1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost
= { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
90 struct processor_costs i386_cost
= { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
127 struct processor_costs i486_cost
= { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
164 struct processor_costs pentium_cost
= {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
201 struct processor_costs pentiumpro_cost
= {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
238 struct processor_costs k6_cost
= {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
275 struct processor_costs athlon_cost
= {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
312 struct processor_costs pentium4_cost
= {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs
*ix86_cost
= &pentium_cost
;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
360 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
361 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
362 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
363 const int x86_double_with_add
= ~m_386
;
364 const int x86_use_bit_test
= m_386
;
365 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
366 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
367 const int x86_3dnow_a
= m_ATHLON
;
368 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
369 const int x86_branch_hints
= m_PENT4
;
370 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
371 const int x86_partial_reg_stall
= m_PPRO
;
372 const int x86_use_loop
= m_K6
;
373 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
374 const int x86_use_mov0
= m_K6
;
375 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
376 const int x86_read_modify_write
= ~m_PENT
;
377 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
378 const int x86_split_long_moves
= m_PPRO
;
379 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
;
380 const int x86_single_stringop
= m_386
| m_PENT4
;
381 const int x86_qimode_math
= ~(0);
382 const int x86_promote_qi_regs
= 0;
383 const int x86_himode_math
= ~(m_PPRO
);
384 const int x86_promote_hi_regs
= m_PPRO
;
385 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
386 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
387 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
388 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
389 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
);
390 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
391 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
392 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
393 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
394 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
395 const int x86_decompose_lea
= m_PENT4
;
396 const int x86_arch_always_fancy_math_387
= m_PENT
|m_PPRO
|m_ATHLON
|m_PENT4
;
398 /* In case the avreage insn count for single function invocation is
399 lower than this constant, emit fast (but longer) prologue and
401 #define FAST_PROLOGUE_INSN_COUNT 30
402 /* Set by prologue expander and used by epilogue expander to determine
404 static int use_fast_prologue_epilogue
;
406 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
408 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
; /* names for 16 bit regs */
409 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
; /* names for 8 bit regs (low) */
410 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
; /* names for 8 bit regs (high) */
412 /* Array of the smallest class containing reg number REGNO, indexed by
413 REGNO. Used by REGNO_REG_CLASS in i386.h. */
415 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
418 AREG
, DREG
, CREG
, BREG
,
420 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
422 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
423 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
426 /* flags, fpsr, dirflag, frame */
427 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
428 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
430 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
432 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
433 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
434 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
438 /* The "default" register map used in 32bit mode. */
440 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
442 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
443 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
444 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
445 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
446 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
447 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
448 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
451 static int const x86_64_int_parameter_registers
[6] = {5 /*RDI*/, 4 /*RSI*/,
452 1 /*RDX*/, 2 /*RCX*/,
453 FIRST_REX_INT_REG
/*R8 */,
454 FIRST_REX_INT_REG
+ 1 /*R9 */};
455 static int const x86_64_int_return_registers
[4] = {0 /*RAX*/, 1 /*RDI*/, 5, 4};
457 /* The "default" register map used in 64bit mode. */
458 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
460 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
461 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
462 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
463 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
464 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
465 8,9,10,11,12,13,14,15, /* extended integer registers */
466 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
469 /* Define the register numbers to be used in Dwarf debugging information.
470 The SVR4 reference port C compiler uses the following register numbers
471 in its Dwarf output code:
472 0 for %eax (gcc regno = 0)
473 1 for %ecx (gcc regno = 2)
474 2 for %edx (gcc regno = 1)
475 3 for %ebx (gcc regno = 3)
476 4 for %esp (gcc regno = 7)
477 5 for %ebp (gcc regno = 6)
478 6 for %esi (gcc regno = 4)
479 7 for %edi (gcc regno = 5)
480 The following three DWARF register numbers are never generated by
481 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
482 believes these numbers have these meanings.
483 8 for %eip (no gcc equivalent)
484 9 for %eflags (gcc regno = 17)
485 10 for %trapno (no gcc equivalent)
486 It is not at all clear how we should number the FP stack registers
487 for the x86 architecture. If the version of SDB on x86/svr4 were
488 a bit less brain dead with respect to floating-point then we would
489 have a precedent to follow with respect to DWARF register numbers
490 for x86 FP registers, but the SDB on x86/svr4 is so completely
491 broken with respect to FP registers that it is hardly worth thinking
492 of it as something to strive for compatibility with.
493 The version of x86/svr4 SDB I have at the moment does (partially)
494 seem to believe that DWARF register number 11 is associated with
495 the x86 register %st(0), but that's about all. Higher DWARF
496 register numbers don't seem to be associated with anything in
497 particular, and even for DWARF regno 11, SDB only seems to under-
498 stand that it should say that a variable lives in %st(0) (when
499 asked via an `=' command) if we said it was in DWARF regno 11,
500 but SDB still prints garbage when asked for the value of the
501 variable in question (via a `/' command).
502 (Also note that the labels SDB prints for various FP stack regs
503 when doing an `x' command are all wrong.)
504 Note that these problems generally don't affect the native SVR4
505 C compiler because it doesn't allow the use of -O with -g and
506 because when it is *not* optimizing, it allocates a memory
507 location for each floating-point variable, and the memory
508 location is what gets described in the DWARF AT_location
509 attribute for the variable in question.
510 Regardless of the severe mental illness of the x86/svr4 SDB, we
511 do something sensible here and we use the following DWARF
512 register numbers. Note that these are all stack-top-relative
514 11 for %st(0) (gcc regno = 8)
515 12 for %st(1) (gcc regno = 9)
516 13 for %st(2) (gcc regno = 10)
517 14 for %st(3) (gcc regno = 11)
518 15 for %st(4) (gcc regno = 12)
519 16 for %st(5) (gcc regno = 13)
520 17 for %st(6) (gcc regno = 14)
521 18 for %st(7) (gcc regno = 15)
523 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
525 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
526 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
527 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
528 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
529 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
530 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
531 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
534 /* Test and compare insns in i386.md store the information needed to
535 generate branch and scc insns here. */
537 rtx ix86_compare_op0
= NULL_RTX
;
538 rtx ix86_compare_op1
= NULL_RTX
;
540 #define MAX_386_STACK_LOCALS 3
541 /* Size of the register save area. */
542 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
544 /* Define the structure for the machine field in struct function. */
545 struct machine_function
547 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
548 int save_varrargs_registers
;
549 int accesses_prev_frame
;
552 #define ix86_stack_locals (cfun->machine->stack_locals)
553 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
555 /* Structure describing stack frame layout.
556 Stack grows downward:
562 saved frame pointer if frame_pointer_needed
563 <- HARD_FRAME_POINTER
569 > to_allocate <- FRAME_POINTER
581 int outgoing_arguments_size
;
584 HOST_WIDE_INT to_allocate
;
585 /* The offsets relative to ARG_POINTER. */
586 HOST_WIDE_INT frame_pointer_offset
;
587 HOST_WIDE_INT hard_frame_pointer_offset
;
588 HOST_WIDE_INT stack_pointer_offset
;
591 /* Used to enable/disable debugging features. */
592 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
593 /* Code model option as passed by user. */
594 const char *ix86_cmodel_string
;
596 enum cmodel ix86_cmodel
;
598 const char *ix86_asm_string
;
599 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
601 /* which cpu are we scheduling for */
602 enum processor_type ix86_cpu
;
604 /* which unit we are generating floating point math for */
605 enum fpmath_unit ix86_fpmath
;
607 /* which instruction set architecture to use. */
610 /* Strings to hold which cpu and instruction set architecture to use. */
611 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
612 const char *ix86_arch_string
; /* for -march=<xxx> */
613 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
615 /* # of registers to use to pass arguments. */
616 const char *ix86_regparm_string
;
618 /* true if sse prefetch instruction is not NOOP. */
619 int x86_prefetch_sse
;
621 /* ix86_regparm_string as a number */
624 /* Alignment to use for loops and jumps: */
626 /* Power of two alignment for loops. */
627 const char *ix86_align_loops_string
;
629 /* Power of two alignment for non-loop jumps. */
630 const char *ix86_align_jumps_string
;
632 /* Power of two alignment for stack boundary in bytes. */
633 const char *ix86_preferred_stack_boundary_string
;
635 /* Preferred alignment for stack boundary in bits. */
636 int ix86_preferred_stack_boundary
;
638 /* Values 1-5: see jump.c */
639 int ix86_branch_cost
;
640 const char *ix86_branch_cost_string
;
642 /* Power of two alignment for functions. */
643 const char *ix86_align_funcs_string
;
645 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
646 static char internal_label_prefix
[16];
647 static int internal_label_prefix_len
;
649 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
650 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
651 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
653 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
654 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
656 static rtx gen_push
PARAMS ((rtx
));
657 static int memory_address_length
PARAMS ((rtx addr
));
658 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
659 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
660 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
661 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
662 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
663 static void ix86_init_machine_status
PARAMS ((struct function
*));
664 static void ix86_mark_machine_status
PARAMS ((struct function
*));
665 static void ix86_free_machine_status
PARAMS ((struct function
*));
666 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
667 static int ix86_nsaved_regs
PARAMS ((void));
668 static void ix86_emit_save_regs
PARAMS ((void));
669 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
670 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
671 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
672 static void ix86_sched_reorder_ppro
PARAMS ((rtx
*, rtx
*));
673 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
674 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
675 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
676 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
677 static int ix86_issue_rate
PARAMS ((void));
678 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
679 static void ix86_sched_init
PARAMS ((FILE *, int, int));
680 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
681 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
682 static int ia32_use_dfa_pipeline_interface
PARAMS ((void));
683 static int ia32_multipass_dfa_lookahead
PARAMS ((void));
684 static void ix86_init_mmx_sse_builtins
PARAMS ((void));
688 rtx base
, index
, disp
;
692 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
694 static void i386_encode_section_info
PARAMS ((tree
, int)) ATTRIBUTE_UNUSED
;
696 struct builtin_description
;
697 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
699 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
701 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
702 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
703 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
704 static rtx ix86_expand_timode_binop_builtin
PARAMS ((enum insn_code
,
706 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
));
707 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
708 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
709 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
713 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
715 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
716 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
717 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
718 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
719 static int ix86_save_reg
PARAMS ((unsigned int, int));
720 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
721 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
722 const struct attribute_spec ix86_attribute_table
[];
723 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
724 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
726 #ifdef DO_GLOBAL_CTORS_BODY
727 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
730 /* Register class used for passing given 64bit part of the argument.
731 These represent classes as documented by the PS ABI, with the exception
732 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
733 use SF or DFmode move instead of DImode to avoid reformating penalties.
735 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
736 whenever possible (upper half does contain padding).
738 enum x86_64_reg_class
741 X86_64_INTEGER_CLASS
,
742 X86_64_INTEGERSI_CLASS
,
751 static const char * const x86_64_reg_class_name
[] =
752 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
754 #define MAX_CLASSES 4
755 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
756 enum x86_64_reg_class
[MAX_CLASSES
],
758 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
760 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
762 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
763 enum x86_64_reg_class
));
765 /* Initialize the GCC target structure. */
766 #undef TARGET_ATTRIBUTE_TABLE
767 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
768 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
769 # undef TARGET_MERGE_DECL_ATTRIBUTES
770 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
773 #undef TARGET_COMP_TYPE_ATTRIBUTES
774 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
776 #undef TARGET_INIT_BUILTINS
777 #define TARGET_INIT_BUILTINS ix86_init_builtins
779 #undef TARGET_EXPAND_BUILTIN
780 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
782 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
783 static void ix86_osf_output_function_prologue
PARAMS ((FILE *,
785 # undef TARGET_ASM_FUNCTION_PROLOGUE
786 # define TARGET_ASM_FUNCTION_PROLOGUE ix86_osf_output_function_prologue
789 #undef TARGET_ASM_OPEN_PAREN
790 #define TARGET_ASM_OPEN_PAREN ""
791 #undef TARGET_ASM_CLOSE_PAREN
792 #define TARGET_ASM_CLOSE_PAREN ""
794 #undef TARGET_ASM_ALIGNED_HI_OP
795 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
796 #undef TARGET_ASM_ALIGNED_SI_OP
797 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
799 #undef TARGET_ASM_ALIGNED_DI_OP
800 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
803 #undef TARGET_ASM_UNALIGNED_HI_OP
804 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
805 #undef TARGET_ASM_UNALIGNED_SI_OP
806 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
807 #undef TARGET_ASM_UNALIGNED_DI_OP
808 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
810 #undef TARGET_SCHED_ADJUST_COST
811 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
812 #undef TARGET_SCHED_ISSUE_RATE
813 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
814 #undef TARGET_SCHED_VARIABLE_ISSUE
815 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
816 #undef TARGET_SCHED_INIT
817 #define TARGET_SCHED_INIT ix86_sched_init
818 #undef TARGET_SCHED_REORDER
819 #define TARGET_SCHED_REORDER ix86_sched_reorder
820 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
821 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
822 ia32_use_dfa_pipeline_interface
823 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
824 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
825 ia32_multipass_dfa_lookahead
827 struct gcc_target targetm
= TARGET_INITIALIZER
;
829 /* Sometimes certain combinations of command options do not make
830 sense on a particular target machine. You can define a macro
831 `OVERRIDE_OPTIONS' to take account of this. This macro, if
832 defined, is executed once just after all the command options have
835 Don't use this macro to turn on various extra optimizations for
836 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
842 /* Comes from final.c -- no real reason to change it. */
843 #define MAX_CODE_ALIGN 16
847 const struct processor_costs
*cost
; /* Processor costs */
848 const int target_enable
; /* Target flags to enable. */
849 const int target_disable
; /* Target flags to disable. */
850 const int align_loop
; /* Default alignments. */
851 const int align_loop_max_skip
;
852 const int align_jump
;
853 const int align_jump_max_skip
;
854 const int align_func
;
855 const int branch_cost
;
857 const processor_target_table
[PROCESSOR_max
] =
859 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4, 1},
860 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16, 1},
861 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16, 1},
862 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16, 1},
863 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32, 1},
864 {&athlon_cost
, 0, 0, 16, 7, 64, 7, 16, 1},
865 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0, 1}
868 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
871 const char *const name
; /* processor name or nickname. */
872 const enum processor_type processor
;
878 PTA_PREFETCH_SSE
= 8,
883 const processor_alias_table
[] =
885 {"i386", PROCESSOR_I386
, 0},
886 {"i486", PROCESSOR_I486
, 0},
887 {"i586", PROCESSOR_PENTIUM
, 0},
888 {"pentium", PROCESSOR_PENTIUM
, 0},
889 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
890 {"i686", PROCESSOR_PENTIUMPRO
, 0},
891 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
892 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
893 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
894 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
895 PTA_MMX
| PTA_PREFETCH_SSE
},
896 {"k6", PROCESSOR_K6
, PTA_MMX
},
897 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
898 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
899 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
901 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
902 | PTA_3DNOW
| PTA_3DNOW_A
},
903 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
904 | PTA_3DNOW_A
| PTA_SSE
},
905 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
906 | PTA_3DNOW_A
| PTA_SSE
},
907 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
908 | PTA_3DNOW_A
| PTA_SSE
},
911 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
913 #ifdef SUBTARGET_OVERRIDE_OPTIONS
914 SUBTARGET_OVERRIDE_OPTIONS
;
917 if (!ix86_cpu_string
&& ix86_arch_string
)
918 ix86_cpu_string
= ix86_arch_string
;
919 if (!ix86_cpu_string
)
920 ix86_cpu_string
= cpu_names
[TARGET_CPU_DEFAULT
];
921 if (!ix86_arch_string
)
922 ix86_arch_string
= TARGET_64BIT
? "athlon-4" : "i386";
924 if (ix86_cmodel_string
!= 0)
926 if (!strcmp (ix86_cmodel_string
, "small"))
927 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
929 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
930 else if (!strcmp (ix86_cmodel_string
, "32"))
932 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
933 ix86_cmodel
= CM_KERNEL
;
934 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
935 ix86_cmodel
= CM_MEDIUM
;
936 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
937 ix86_cmodel
= CM_LARGE
;
939 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
945 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
947 if (ix86_asm_string
!= 0)
949 if (!strcmp (ix86_asm_string
, "intel"))
950 ix86_asm_dialect
= ASM_INTEL
;
951 else if (!strcmp (ix86_asm_string
, "att"))
952 ix86_asm_dialect
= ASM_ATT
;
954 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
956 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
957 error ("code model `%s' not supported in the %s bit mode",
958 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
959 if (ix86_cmodel
== CM_LARGE
)
960 sorry ("code model `large' not supported yet");
961 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
962 sorry ("%i-bit mode not compiled in",
963 (target_flags
& MASK_64BIT
) ? 64 : 32);
965 for (i
= 0; i
< pta_size
; i
++)
966 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
968 ix86_arch
= processor_alias_table
[i
].processor
;
969 /* Default cpu tuning to the architecture. */
970 ix86_cpu
= ix86_arch
;
971 if (processor_alias_table
[i
].flags
& PTA_MMX
972 && !(target_flags
& MASK_MMX_SET
))
973 target_flags
|= MASK_MMX
;
974 if (processor_alias_table
[i
].flags
& PTA_3DNOW
975 && !(target_flags
& MASK_3DNOW_SET
))
976 target_flags
|= MASK_3DNOW
;
977 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
978 && !(target_flags
& MASK_3DNOW_A_SET
))
979 target_flags
|= MASK_3DNOW_A
;
980 if (processor_alias_table
[i
].flags
& PTA_SSE
981 && !(target_flags
& MASK_SSE_SET
))
982 target_flags
|= MASK_SSE
;
983 if (processor_alias_table
[i
].flags
& PTA_SSE2
984 && !(target_flags
& MASK_SSE2_SET
))
985 target_flags
|= MASK_SSE2
;
986 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
987 x86_prefetch_sse
= true;
992 error ("bad value (%s) for -march= switch", ix86_arch_string
);
994 for (i
= 0; i
< pta_size
; i
++)
995 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
997 ix86_cpu
= processor_alias_table
[i
].processor
;
1000 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1001 x86_prefetch_sse
= true;
1003 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
1006 ix86_cost
= &size_cost
;
1008 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
1009 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
1010 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
1012 /* Arrange to set up i386_stack_locals for all functions. */
1013 init_machine_status
= ix86_init_machine_status
;
1014 mark_machine_status
= ix86_mark_machine_status
;
1015 free_machine_status
= ix86_free_machine_status
;
1017 /* Validate -mregparm= value. */
1018 if (ix86_regparm_string
)
1020 i
= atoi (ix86_regparm_string
);
1021 if (i
< 0 || i
> REGPARM_MAX
)
1022 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1028 ix86_regparm
= REGPARM_MAX
;
1030 /* If the user has provided any of the -malign-* options,
1031 warn and use that value only if -falign-* is not set.
1032 Remove this code in GCC 3.2 or later. */
1033 if (ix86_align_loops_string
)
1035 warning ("-malign-loops is obsolete, use -falign-loops");
1036 if (align_loops
== 0)
1038 i
= atoi (ix86_align_loops_string
);
1039 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1040 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1042 align_loops
= 1 << i
;
1046 if (ix86_align_jumps_string
)
1048 warning ("-malign-jumps is obsolete, use -falign-jumps");
1049 if (align_jumps
== 0)
1051 i
= atoi (ix86_align_jumps_string
);
1052 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1053 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1055 align_jumps
= 1 << i
;
1059 if (ix86_align_funcs_string
)
1061 warning ("-malign-functions is obsolete, use -falign-functions");
1062 if (align_functions
== 0)
1064 i
= atoi (ix86_align_funcs_string
);
1065 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1066 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1068 align_functions
= 1 << i
;
1072 /* Default align_* from the processor table. */
1073 if (align_loops
== 0)
1075 align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
1076 align_loops_max_skip
= processor_target_table
[ix86_cpu
].align_loop_max_skip
;
1078 if (align_jumps
== 0)
1080 align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
1081 align_jumps_max_skip
= processor_target_table
[ix86_cpu
].align_jump_max_skip
;
1083 if (align_functions
== 0)
1085 align_functions
= processor_target_table
[ix86_cpu
].align_func
;
1088 /* Validate -mpreferred-stack-boundary= value, or provide default.
1089 The default of 128 bits is for Pentium III's SSE __m128, but we
1090 don't want additional code to keep the stack aligned when
1091 optimizing for code size. */
1092 ix86_preferred_stack_boundary
= (optimize_size
1093 ? TARGET_64BIT
? 64 : 32
1095 if (ix86_preferred_stack_boundary_string
)
1097 i
= atoi (ix86_preferred_stack_boundary_string
);
1098 if (i
< (TARGET_64BIT
? 3 : 2) || i
> 12)
1099 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1100 TARGET_64BIT
? 3 : 2);
1102 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1105 /* Validate -mbranch-cost= value, or provide default. */
1106 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
1107 if (ix86_branch_cost_string
)
1109 i
= atoi (ix86_branch_cost_string
);
1111 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1113 ix86_branch_cost
= i
;
1116 /* Keep nonleaf frame pointers. */
1117 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1118 flag_omit_frame_pointer
= 1;
1120 /* If we're doing fast math, we don't care about comparison order
1121 wrt NaNs. This lets us use a shorter comparison sequence. */
1122 if (flag_unsafe_math_optimizations
)
1123 target_flags
&= ~MASK_IEEE_FP
;
1125 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1126 since the insns won't need emulation. */
1127 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1128 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1132 if (TARGET_ALIGN_DOUBLE
)
1133 error ("-malign-double makes no sense in the 64bit mode");
1135 error ("-mrtd calling convention not supported in the 64bit mode");
1136 /* Enable by default the SSE and MMX builtins. */
1137 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1138 ix86_fpmath
= FPMATH_SSE
;
1141 ix86_fpmath
= FPMATH_387
;
1143 if (ix86_fpmath_string
!= 0)
1145 if (! strcmp (ix86_fpmath_string
, "387"))
1146 ix86_fpmath
= FPMATH_387
;
1147 else if (! strcmp (ix86_fpmath_string
, "sse"))
1151 warning ("SSE instruction set disabled, using 387 arithmetics");
1152 ix86_fpmath
= FPMATH_387
;
1155 ix86_fpmath
= FPMATH_SSE
;
1157 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1158 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1162 warning ("SSE instruction set disabled, using 387 arithmetics");
1163 ix86_fpmath
= FPMATH_387
;
1165 else if (!TARGET_80387
)
1167 warning ("387 instruction set disabled, using SSE arithmetics");
1168 ix86_fpmath
= FPMATH_SSE
;
1171 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1174 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1177 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1181 target_flags
|= MASK_MMX
;
1182 x86_prefetch_sse
= true;
1185 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1188 target_flags
|= MASK_MMX
;
1189 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1190 extensions it adds. */
1191 if (x86_3dnow_a
& (1 << ix86_arch
))
1192 target_flags
|= MASK_3DNOW_A
;
1194 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1195 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS_SET
)
1197 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1199 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1202 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1203 p
= strchr (internal_label_prefix
, 'X');
1204 internal_label_prefix_len
= p
- internal_label_prefix
;
1210 optimization_options (level
, size
)
1212 int size ATTRIBUTE_UNUSED
;
1214 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1215 make the problem with not enough registers even worse. */
1216 #ifdef INSN_SCHEDULING
1218 flag_schedule_insns
= 0;
1220 if (TARGET_64BIT
&& optimize
>= 1)
1221 flag_omit_frame_pointer
= 1;
1224 flag_pcc_struct_return
= 0;
1225 flag_asynchronous_unwind_tables
= 1;
1229 /* Table of valid machine attributes. */
1230 const struct attribute_spec ix86_attribute_table
[] =
1232 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1233 /* Stdcall attribute says callee is responsible for popping arguments
1234 if they are not variable. */
1235 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1236 /* Cdecl attribute says the callee is a normal C declaration */
1237 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1238 /* Regparm attribute specifies how many integer arguments are to be
1239 passed in registers. */
1240 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1241 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1242 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1243 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1244 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1246 { NULL
, 0, 0, false, false, false, NULL
}
1249 /* Handle a "cdecl" or "stdcall" attribute;
1250 arguments as in struct attribute_spec.handler. */
1252 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1255 tree args ATTRIBUTE_UNUSED
;
1256 int flags ATTRIBUTE_UNUSED
;
1259 if (TREE_CODE (*node
) != FUNCTION_TYPE
1260 && TREE_CODE (*node
) != METHOD_TYPE
1261 && TREE_CODE (*node
) != FIELD_DECL
1262 && TREE_CODE (*node
) != TYPE_DECL
)
1264 warning ("`%s' attribute only applies to functions",
1265 IDENTIFIER_POINTER (name
));
1266 *no_add_attrs
= true;
1271 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1272 *no_add_attrs
= true;
1278 /* Handle a "regparm" attribute;
1279 arguments as in struct attribute_spec.handler. */
1281 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1285 int flags ATTRIBUTE_UNUSED
;
1288 if (TREE_CODE (*node
) != FUNCTION_TYPE
1289 && TREE_CODE (*node
) != METHOD_TYPE
1290 && TREE_CODE (*node
) != FIELD_DECL
1291 && TREE_CODE (*node
) != TYPE_DECL
)
1293 warning ("`%s' attribute only applies to functions",
1294 IDENTIFIER_POINTER (name
));
1295 *no_add_attrs
= true;
1301 cst
= TREE_VALUE (args
);
1302 if (TREE_CODE (cst
) != INTEGER_CST
)
1304 warning ("`%s' attribute requires an integer constant argument",
1305 IDENTIFIER_POINTER (name
));
1306 *no_add_attrs
= true;
1308 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1310 warning ("argument to `%s' attribute larger than %d",
1311 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1312 *no_add_attrs
= true;
1319 #if defined (OSF_OS) || defined (TARGET_OSF1ELF)
1321 /* Generate the assembly code for function entry. FILE is a stdio
1322 stream to output the code to. SIZE is an int: how many units of
1323 temporary storage to allocate.
1325 Refer to the array `regs_ever_live' to determine which registers to
1326 save; `regs_ever_live[I]' is nonzero if register number I is ever
1327 used in the function. This function is responsible for knowing
1328 which registers should not be saved even if used.
1330 We override it here to allow for the new profiling code to go before
1331 the prologue and the old mcount code to go after the prologue (and
1332 after %ebx has been set up for ELF shared library support). */
1335 ix86_osf_output_function_prologue (file
, size
)
1339 const char *prefix
= "";
1340 const char *const lprefix
= LPREFIX
;
1341 int labelno
= current_function_profile_label_no
;
1345 if (TARGET_UNDERSCORES
)
1348 if (current_function_profile
&& OSF_PROFILE_BEFORE_PROLOGUE
)
1350 if (!flag_pic
&& !HALF_PIC_P ())
1352 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1353 fprintf (file
, "\tcall *%s_mcount_ptr\n", prefix
);
1356 else if (HALF_PIC_P ())
1360 HALF_PIC_EXTERNAL ("_mcount_ptr");
1361 symref
= HALF_PIC_PTR (gen_rtx_SYMBOL_REF (Pmode
,
1364 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1365 fprintf (file
, "\tmovl %s%s,%%eax\n", prefix
,
1367 fprintf (file
, "\tcall *(%%eax)\n");
1372 static int call_no
= 0;
1374 fprintf (file
, "\tcall %sPc%d\n", lprefix
, call_no
);
1375 fprintf (file
, "%sPc%d:\tpopl %%eax\n", lprefix
, call_no
);
1376 fprintf (file
, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1377 lprefix
, call_no
++);
1378 fprintf (file
, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1380 fprintf (file
, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1382 fprintf (file
, "\tcall *(%%eax)\n");
1388 if (current_function_profile
&& OSF_PROFILE_BEFORE_PROLOGUE
)
1392 fprintf (file
, "\tmovl $%sP%d,%%edx\n", lprefix
, labelno
);
1393 fprintf (file
, "\tcall *%s_mcount_ptr\n", prefix
);
1398 static int call_no
= 0;
1400 fprintf (file
, "\tcall %sPc%d\n", lprefix
, call_no
);
1401 fprintf (file
, "%sPc%d:\tpopl %%eax\n", lprefix
, call_no
);
1402 fprintf (file
, "\taddl $_GLOBAL_OFFSET_TABLE_+[.-%sPc%d],%%eax\n",
1403 lprefix
, call_no
++);
1404 fprintf (file
, "\tleal %sP%d@GOTOFF(%%eax),%%edx\n",
1406 fprintf (file
, "\tmovl %s_mcount_ptr@GOT(%%eax),%%eax\n",
1408 fprintf (file
, "\tcall *(%%eax)\n");
1411 #endif /* !OSF_OS */
1413 function_prologue (file
, size
);
1416 #endif /* OSF_OS || TARGET_OSF1ELF */
1418 /* Return 0 if the attributes for two types are incompatible, 1 if they
1419 are compatible, and 2 if they are nearly compatible (which causes a
1420 warning to be generated). */
1423 ix86_comp_type_attributes (type1
, type2
)
1427 /* Check for mismatch of non-default calling convention. */
1428 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1430 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1433 /* Check for mismatched return types (cdecl vs stdcall). */
1434 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1435 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1440 /* Value is the number of bytes of arguments automatically
1441 popped when returning from a subroutine call.
1442 FUNDECL is the declaration node of the function (as a tree),
1443 FUNTYPE is the data type of the function (as a tree),
1444 or for a library call it is an identifier node for the subroutine name.
1445 SIZE is the number of bytes of arguments passed on the stack.
1447 On the 80386, the RTD insn may be used to pop them if the number
1448 of args is fixed, but if the number is variable then the caller
1449 must pop them all. RTD can't be used for library calls now
1450 because the library is compiled with the Unix compiler.
1451 Use of RTD is a selectable option, since it is incompatible with
1452 standard Unix calling sequences. If the option is not selected,
1453 the caller must always pop the args.
1455 The attribute stdcall is equivalent to RTD on a per module basis. */
1458 ix86_return_pops_args (fundecl
, funtype
, size
)
1463 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1465 /* Cdecl functions override -mrtd, and never pop the stack. */
1466 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1468 /* Stdcall functions will pop the stack if not variable args. */
1469 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
1473 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1474 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1475 == void_type_node
)))
1479 /* Lose any fake structure return argument if it is passed on the stack. */
1480 if (aggregate_value_p (TREE_TYPE (funtype
))
1483 int nregs
= ix86_regparm
;
1487 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype
));
1490 nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1494 return GET_MODE_SIZE (Pmode
);
1500 /* Argument support functions. */
1502 /* Return true when register may be used to pass function parameters. */
1504 ix86_function_arg_regno_p (regno
)
1509 return (regno
< REGPARM_MAX
1510 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1511 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1513 /* RAX is used as hidden argument to va_arg functions. */
1516 for (i
= 0; i
< REGPARM_MAX
; i
++)
1517 if (regno
== x86_64_int_parameter_registers
[i
])
1522 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1523 for a call to a function whose data type is FNTYPE.
1524 For a library call, FNTYPE is 0. */
1527 init_cumulative_args (cum
, fntype
, libname
)
1528 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1529 tree fntype
; /* tree ptr for function decl */
1530 rtx libname
; /* SYMBOL_REF of library name or 0 */
1532 static CUMULATIVE_ARGS zero_cum
;
1533 tree param
, next_param
;
1535 if (TARGET_DEBUG_ARG
)
1537 fprintf (stderr
, "\ninit_cumulative_args (");
1539 fprintf (stderr
, "fntype code = %s, ret code = %s",
1540 tree_code_name
[(int) TREE_CODE (fntype
)],
1541 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1543 fprintf (stderr
, "no fntype");
1546 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1551 /* Set up the number of registers to use for passing arguments. */
1552 cum
->nregs
= ix86_regparm
;
1553 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1554 if (fntype
&& !TARGET_64BIT
)
1556 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1559 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1561 cum
->maybe_vaarg
= false;
1563 /* Determine if this function has variable arguments. This is
1564 indicated by the last argument being 'void_type_mode' if there
1565 are no variable arguments. If there are variable arguments, then
1566 we won't pass anything in registers */
1570 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1571 param
!= 0; param
= next_param
)
1573 next_param
= TREE_CHAIN (param
);
1574 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1578 cum
->maybe_vaarg
= true;
1582 if ((!fntype
&& !libname
)
1583 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1584 cum
->maybe_vaarg
= 1;
1586 if (TARGET_DEBUG_ARG
)
1587 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1592 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1593 of this code is to classify each 8bytes of incoming argument by the register
1594 class and assign registers accordingly. */
1596 /* Return the union class of CLASS1 and CLASS2.
1597 See the x86-64 PS ABI for details. */
1599 static enum x86_64_reg_class
1600 merge_classes (class1
, class2
)
1601 enum x86_64_reg_class class1
, class2
;
1603 /* Rule #1: If both classes are equal, this is the resulting class. */
1604 if (class1
== class2
)
1607 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1609 if (class1
== X86_64_NO_CLASS
)
1611 if (class2
== X86_64_NO_CLASS
)
1614 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1615 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1616 return X86_64_MEMORY_CLASS
;
1618 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1619 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1620 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1621 return X86_64_INTEGERSI_CLASS
;
1622 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1623 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1624 return X86_64_INTEGER_CLASS
;
1626 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1627 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1628 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1629 return X86_64_MEMORY_CLASS
;
1631 /* Rule #6: Otherwise class SSE is used. */
1632 return X86_64_SSE_CLASS
;
1635 /* Classify the argument of type TYPE and mode MODE.
1636 CLASSES will be filled by the register class used to pass each word
1637 of the operand. The number of words is returned. In case the parameter
1638 should be passed in memory, 0 is returned. As a special case for zero
1639 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1641 BIT_OFFSET is used internally for handling records and specifies offset
1642 of the offset in bits modulo 256 to avoid overflow cases.
1644 See the x86-64 PS ABI for details.
1648 classify_argument (mode
, type
, classes
, bit_offset
)
1649 enum machine_mode mode
;
1651 enum x86_64_reg_class classes
[MAX_CLASSES
];
1655 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1656 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1658 if (type
&& AGGREGATE_TYPE_P (type
))
1662 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1664 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1668 for (i
= 0; i
< words
; i
++)
1669 classes
[i
] = X86_64_NO_CLASS
;
1671 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1672 signalize memory class, so handle it as special case. */
1675 classes
[0] = X86_64_NO_CLASS
;
1679 /* Classify each field of record and merge classes. */
1680 if (TREE_CODE (type
) == RECORD_TYPE
)
1682 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1684 if (TREE_CODE (field
) == FIELD_DECL
)
1688 /* Bitfields are always classified as integer. Handle them
1689 early, since later code would consider them to be
1690 misaligned integers. */
1691 if (DECL_BIT_FIELD (field
))
1693 for (i
= int_bit_position (field
) / 8 / 8;
1694 i
< (int_bit_position (field
)
1695 + tree_low_cst (DECL_SIZE (field
), 0)
1698 merge_classes (X86_64_INTEGER_CLASS
,
1703 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1704 TREE_TYPE (field
), subclasses
,
1705 (int_bit_position (field
)
1706 + bit_offset
) % 256);
1709 for (i
= 0; i
< num
; i
++)
1712 (int_bit_position (field
) + bit_offset
) / 8 / 8;
1714 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1720 /* Arrays are handled as small records. */
1721 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1724 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1725 TREE_TYPE (type
), subclasses
, bit_offset
);
1729 /* The partial classes are now full classes. */
1730 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1731 subclasses
[0] = X86_64_SSE_CLASS
;
1732 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1733 subclasses
[0] = X86_64_INTEGER_CLASS
;
1735 for (i
= 0; i
< words
; i
++)
1736 classes
[i
] = subclasses
[i
% num
];
1738 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1739 else if (TREE_CODE (type
) == UNION_TYPE
1740 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
1742 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1744 if (TREE_CODE (field
) == FIELD_DECL
)
1747 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1748 TREE_TYPE (field
), subclasses
,
1752 for (i
= 0; i
< num
; i
++)
1753 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1760 /* Final merger cleanup. */
1761 for (i
= 0; i
< words
; i
++)
1763 /* If one class is MEMORY, everything should be passed in
1765 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1768 /* The X86_64_SSEUP_CLASS should be always preceded by
1769 X86_64_SSE_CLASS. */
1770 if (classes
[i
] == X86_64_SSEUP_CLASS
1771 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1772 classes
[i
] = X86_64_SSE_CLASS
;
1774 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1775 if (classes
[i
] == X86_64_X87UP_CLASS
1776 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1777 classes
[i
] = X86_64_SSE_CLASS
;
1782 /* Compute alignment needed. We align all types to natural boundaries with
1783 exception of XFmode that is aligned to 64bits. */
1784 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1786 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1789 mode_alignment
= 128;
1790 else if (mode
== XCmode
)
1791 mode_alignment
= 256;
1792 /* Misaligned fields are always returned in memory. */
1793 if (bit_offset
% mode_alignment
)
1797 /* Classification of atomic types. */
1807 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1808 classes
[0] = X86_64_INTEGERSI_CLASS
;
1810 classes
[0] = X86_64_INTEGER_CLASS
;
1814 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1817 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1818 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1821 if (!(bit_offset
% 64))
1822 classes
[0] = X86_64_SSESF_CLASS
;
1824 classes
[0] = X86_64_SSE_CLASS
;
1827 classes
[0] = X86_64_SSEDF_CLASS
;
1830 classes
[0] = X86_64_X87_CLASS
;
1831 classes
[1] = X86_64_X87UP_CLASS
;
1834 classes
[0] = X86_64_X87_CLASS
;
1835 classes
[1] = X86_64_X87UP_CLASS
;
1836 classes
[2] = X86_64_X87_CLASS
;
1837 classes
[3] = X86_64_X87UP_CLASS
;
1840 classes
[0] = X86_64_SSEDF_CLASS
;
1841 classes
[1] = X86_64_SSEDF_CLASS
;
1844 classes
[0] = X86_64_SSE_CLASS
;
1848 classes
[0] = X86_64_SSE_CLASS
;
1849 classes
[1] = X86_64_SSEUP_CLASS
;
1855 classes
[0] = X86_64_SSE_CLASS
;
1865 /* Examine the argument and return set number of register required in each
1866 class. Return 0 iff parameter should be passed in memory. */
1868 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
1869 enum machine_mode mode
;
1871 int *int_nregs
, *sse_nregs
;
1874 enum x86_64_reg_class
class[MAX_CLASSES
];
1875 int n
= classify_argument (mode
, type
, class, 0);
1881 for (n
--; n
>= 0; n
--)
1884 case X86_64_INTEGER_CLASS
:
1885 case X86_64_INTEGERSI_CLASS
:
1888 case X86_64_SSE_CLASS
:
1889 case X86_64_SSESF_CLASS
:
1890 case X86_64_SSEDF_CLASS
:
1893 case X86_64_NO_CLASS
:
1894 case X86_64_SSEUP_CLASS
:
1896 case X86_64_X87_CLASS
:
1897 case X86_64_X87UP_CLASS
:
1901 case X86_64_MEMORY_CLASS
:
1906 /* Construct container for the argument used by GCC interface. See
1907 FUNCTION_ARG for the detailed description. */
1909 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
1910 enum machine_mode mode
;
1913 int nintregs
, nsseregs
;
1917 enum machine_mode tmpmode
;
1919 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1920 enum x86_64_reg_class
class[MAX_CLASSES
];
1924 int needed_sseregs
, needed_intregs
;
1925 rtx exp
[MAX_CLASSES
];
1928 n
= classify_argument (mode
, type
, class, 0);
1929 if (TARGET_DEBUG_ARG
)
1932 fprintf (stderr
, "Memory class\n");
1935 fprintf (stderr
, "Classes:");
1936 for (i
= 0; i
< n
; i
++)
1938 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
1940 fprintf (stderr
, "\n");
1945 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
1947 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
1950 /* First construct simple cases. Avoid SCmode, since we want to use
1951 single register to pass this type. */
1952 if (n
== 1 && mode
!= SCmode
)
1955 case X86_64_INTEGER_CLASS
:
1956 case X86_64_INTEGERSI_CLASS
:
1957 return gen_rtx_REG (mode
, intreg
[0]);
1958 case X86_64_SSE_CLASS
:
1959 case X86_64_SSESF_CLASS
:
1960 case X86_64_SSEDF_CLASS
:
1961 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1962 case X86_64_X87_CLASS
:
1963 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
1964 case X86_64_NO_CLASS
:
1965 /* Zero sized array, struct or class. */
1970 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
1971 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1973 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
1974 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
1975 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
1976 && class[1] == X86_64_INTEGER_CLASS
1977 && (mode
== CDImode
|| mode
== TImode
)
1978 && intreg
[0] + 1 == intreg
[1])
1979 return gen_rtx_REG (mode
, intreg
[0]);
1981 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
1982 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
1983 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
1985 /* Otherwise figure out the entries of the PARALLEL. */
1986 for (i
= 0; i
< n
; i
++)
1990 case X86_64_NO_CLASS
:
1992 case X86_64_INTEGER_CLASS
:
1993 case X86_64_INTEGERSI_CLASS
:
1994 /* Merge TImodes on aligned occassions here too. */
1995 if (i
* 8 + 8 > bytes
)
1996 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
1997 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2001 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2002 if (tmpmode
== BLKmode
)
2004 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2005 gen_rtx_REG (tmpmode
, *intreg
),
2009 case X86_64_SSESF_CLASS
:
2010 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2011 gen_rtx_REG (SFmode
,
2012 SSE_REGNO (sse_regno
)),
2016 case X86_64_SSEDF_CLASS
:
2017 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2018 gen_rtx_REG (DFmode
,
2019 SSE_REGNO (sse_regno
)),
2023 case X86_64_SSE_CLASS
:
2024 if (i
< n
&& class[i
+ 1] == X86_64_SSEUP_CLASS
)
2025 tmpmode
= TImode
, i
++;
2028 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2029 gen_rtx_REG (tmpmode
,
2030 SSE_REGNO (sse_regno
)),
2038 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2039 for (i
= 0; i
< nexps
; i
++)
2040 XVECEXP (ret
, 0, i
) = exp
[i
];
2044 /* Update the data in CUM to advance over an argument
2045 of mode MODE and data type TYPE.
2046 (TYPE is null for libcalls where that information may not be available.) */
2049 function_arg_advance (cum
, mode
, type
, named
)
2050 CUMULATIVE_ARGS
*cum
; /* current arg information */
2051 enum machine_mode mode
; /* current arg mode */
2052 tree type
; /* type of the argument or 0 if lib support */
2053 int named
; /* whether or not the argument was named */
2056 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2057 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2059 if (TARGET_DEBUG_ARG
)
2061 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2062 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2065 int int_nregs
, sse_nregs
;
2066 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2067 cum
->words
+= words
;
2068 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2070 cum
->nregs
-= int_nregs
;
2071 cum
->sse_nregs
-= sse_nregs
;
2072 cum
->regno
+= int_nregs
;
2073 cum
->sse_regno
+= sse_nregs
;
2076 cum
->words
+= words
;
2080 if (TARGET_SSE
&& mode
== TImode
)
2082 cum
->sse_words
+= words
;
2083 cum
->sse_nregs
-= 1;
2084 cum
->sse_regno
+= 1;
2085 if (cum
->sse_nregs
<= 0)
2093 cum
->words
+= words
;
2094 cum
->nregs
-= words
;
2095 cum
->regno
+= words
;
2097 if (cum
->nregs
<= 0)
2107 /* Define where to put the arguments to a function.
2108 Value is zero to push the argument on the stack,
2109 or a hard register in which to store the argument.
2111 MODE is the argument's machine mode.
2112 TYPE is the data type of the argument (as a tree).
2113 This is null for libcalls where that information may
2115 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2116 the preceding args and about the function being called.
2117 NAMED is nonzero if this argument is a named parameter
2118 (otherwise it is an extra parameter matching an ellipsis). */
2121 function_arg (cum
, mode
, type
, named
)
2122 CUMULATIVE_ARGS
*cum
; /* current arg information */
2123 enum machine_mode mode
; /* current arg mode */
2124 tree type
; /* type of the argument or 0 if lib support */
2125 int named
; /* != 0 for normal args, == 0 for ... args */
2129 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2130 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2132 /* Handle an hidden AL argument containing number of registers for varargs
2133 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2135 if (mode
== VOIDmode
)
2138 return GEN_INT (cum
->maybe_vaarg
2139 ? (cum
->sse_nregs
< 0
2147 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2148 &x86_64_int_parameter_registers
[cum
->regno
],
2153 /* For now, pass fp/complex values on the stack. */
2162 if (words
<= cum
->nregs
)
2163 ret
= gen_rtx_REG (mode
, cum
->regno
);
2167 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
2171 if (TARGET_DEBUG_ARG
)
2174 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
2175 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2178 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO (ret
) ]);
2180 fprintf (stderr
, ", stack");
2182 fprintf (stderr
, " )\n");
2188 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2192 ix86_function_arg_boundary (mode
, type
)
2193 enum machine_mode mode
;
2198 return PARM_BOUNDARY
;
2200 align
= TYPE_ALIGN (type
);
2202 align
= GET_MODE_ALIGNMENT (mode
);
2203 if (align
< PARM_BOUNDARY
)
2204 align
= PARM_BOUNDARY
;
2210 /* Return true if N is a possible register number of function value. */
2212 ix86_function_value_regno_p (regno
)
2217 return ((regno
) == 0
2218 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2219 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2221 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2222 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2223 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2226 /* Define how to find the value returned by a function.
2227 VALTYPE is the data type of the value (as a tree).
2228 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2229 otherwise, FUNC is 0. */
2231 ix86_function_value (valtype
)
2236 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2237 REGPARM_MAX
, SSE_REGPARM_MAX
,
2238 x86_64_int_return_registers
, 0);
2239 /* For zero sized structures, construct_continer return NULL, but we need
2240 to keep rest of compiler happy by returning meaningfull value. */
2242 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2246 return gen_rtx_REG (TYPE_MODE (valtype
), VALUE_REGNO (TYPE_MODE (valtype
)));
2249 /* Return false iff type is returned in memory. */
2251 ix86_return_in_memory (type
)
2254 int needed_intregs
, needed_sseregs
;
2257 return !examine_argument (TYPE_MODE (type
), type
, 1,
2258 &needed_intregs
, &needed_sseregs
);
2262 if (TYPE_MODE (type
) == BLKmode
2263 || (VECTOR_MODE_P (TYPE_MODE (type
))
2264 && int_size_in_bytes (type
) == 8)
2265 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2266 && TYPE_MODE (type
) != TFmode
2267 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2273 /* Define how to find the value returned by a library function
2274 assuming the value has mode MODE. */
2276 ix86_libcall_value (mode
)
2277 enum machine_mode mode
;
2287 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2290 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2292 return gen_rtx_REG (mode
, 0);
2296 return gen_rtx_REG (mode
, VALUE_REGNO (mode
));
2299 /* Create the va_list data type. */
2302 ix86_build_va_list ()
2304 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2306 /* For i386 we use plain pointer to argument area. */
2308 return build_pointer_type (char_type_node
);
2310 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2311 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2313 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2314 unsigned_type_node
);
2315 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2316 unsigned_type_node
);
2317 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2319 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2322 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2323 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2324 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2325 DECL_FIELD_CONTEXT (f_sav
) = record
;
2327 TREE_CHAIN (record
) = type_decl
;
2328 TYPE_NAME (record
) = type_decl
;
2329 TYPE_FIELDS (record
) = f_gpr
;
2330 TREE_CHAIN (f_gpr
) = f_fpr
;
2331 TREE_CHAIN (f_fpr
) = f_ovf
;
2332 TREE_CHAIN (f_ovf
) = f_sav
;
2334 layout_type (record
);
2336 /* The correct type is an array type of one element. */
2337 return build_array_type (record
, build_index_type (size_zero_node
));
2340 /* Perform any needed actions needed for a function that is receiving a
2341 variable number of arguments.
2345 MODE and TYPE are the mode and type of the current parameter.
2347 PRETEND_SIZE is a variable that should be set to the amount of stack
2348 that must be pushed by the prolog to pretend that our caller pushed
2351 Normally, this macro will push all remaining incoming registers on the
2352 stack and set PRETEND_SIZE to the length of the registers pushed. */
2355 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2356 CUMULATIVE_ARGS
*cum
;
2357 enum machine_mode mode
;
2359 int *pretend_size ATTRIBUTE_UNUSED
;
2363 CUMULATIVE_ARGS next_cum
;
2364 rtx save_area
= NULL_RTX
, mem
;
2377 /* Indicate to allocate space on the stack for varargs save area. */
2378 ix86_save_varrargs_registers
= 1;
2380 fntype
= TREE_TYPE (current_function_decl
);
2381 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2382 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2383 != void_type_node
));
2385 /* For varargs, we do not want to skip the dummy va_dcl argument.
2386 For stdargs, we do want to skip the last named argument. */
2389 function_arg_advance (&next_cum
, mode
, type
, 1);
2392 save_area
= frame_pointer_rtx
;
2394 set
= get_varargs_alias_set ();
2396 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2398 mem
= gen_rtx_MEM (Pmode
,
2399 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2400 set_mem_alias_set (mem
, set
);
2401 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2402 x86_64_int_parameter_registers
[i
]));
2405 if (next_cum
.sse_nregs
)
2407 /* Now emit code to save SSE registers. The AX parameter contains number
2408 of SSE parameter regsiters used to call this function. We use
2409 sse_prologue_save insn template that produces computed jump across
2410 SSE saves. We need some preparation work to get this working. */
2412 label
= gen_label_rtx ();
2413 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2415 /* Compute address to jump to :
2416 label - 5*eax + nnamed_sse_arguments*5 */
2417 tmp_reg
= gen_reg_rtx (Pmode
);
2418 nsse_reg
= gen_reg_rtx (Pmode
);
2419 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2420 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2421 gen_rtx_MULT (Pmode
, nsse_reg
,
2423 if (next_cum
.sse_regno
)
2426 gen_rtx_CONST (DImode
,
2427 gen_rtx_PLUS (DImode
,
2429 GEN_INT (next_cum
.sse_regno
* 4))));
2431 emit_move_insn (nsse_reg
, label_ref
);
2432 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2434 /* Compute address of memory block we save into. We always use pointer
2435 pointing 127 bytes after first byte to store - this is needed to keep
2436 instruction size limited by 4 bytes. */
2437 tmp_reg
= gen_reg_rtx (Pmode
);
2438 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2439 plus_constant (save_area
,
2440 8 * REGPARM_MAX
+ 127)));
2441 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2442 set_mem_alias_set (mem
, set
);
2443 set_mem_align (mem
, BITS_PER_WORD
);
2445 /* And finally do the dirty job! */
2446 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
2447 GEN_INT (next_cum
.sse_regno
), label
));
2452 /* Implement va_start. */
2455 ix86_va_start (stdarg_p
, valist
, nextarg
)
2460 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2461 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2462 tree gpr
, fpr
, ovf
, sav
, t
;
2464 /* Only 64bit target needs something special. */
2467 std_expand_builtin_va_start (stdarg_p
, valist
, nextarg
);
2471 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2472 f_fpr
= TREE_CHAIN (f_gpr
);
2473 f_ovf
= TREE_CHAIN (f_fpr
);
2474 f_sav
= TREE_CHAIN (f_ovf
);
2476 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2477 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2478 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2479 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2480 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2482 /* Count number of gp and fp argument registers used. */
2483 words
= current_function_args_info
.words
;
2484 n_gpr
= current_function_args_info
.regno
;
2485 n_fpr
= current_function_args_info
.sse_regno
;
2487 if (TARGET_DEBUG_ARG
)
2488 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2489 (int) words
, (int) n_gpr
, (int) n_fpr
);
2491 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2492 build_int_2 (n_gpr
* 8, 0));
2493 TREE_SIDE_EFFECTS (t
) = 1;
2494 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2496 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2497 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2498 TREE_SIDE_EFFECTS (t
) = 1;
2499 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2501 /* Find the overflow area. */
2502 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2504 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2505 build_int_2 (words
* UNITS_PER_WORD
, 0));
2506 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2507 TREE_SIDE_EFFECTS (t
) = 1;
2508 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2510 /* Find the register save area.
2511 Prologue of the function save it right above stack frame. */
2512 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2513 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2514 TREE_SIDE_EFFECTS (t
) = 1;
2515 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2518 /* Implement va_arg. */
2520 ix86_va_arg (valist
, type
)
2523 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2524 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2525 tree gpr
, fpr
, ovf
, sav
, t
;
2527 rtx lab_false
, lab_over
= NULL_RTX
;
2531 /* Only 64bit target needs something special. */
2534 return std_expand_builtin_va_arg (valist
, type
);
2537 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2538 f_fpr
= TREE_CHAIN (f_gpr
);
2539 f_ovf
= TREE_CHAIN (f_fpr
);
2540 f_sav
= TREE_CHAIN (f_ovf
);
2542 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2543 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2544 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2545 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2546 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2548 size
= int_size_in_bytes (type
);
2549 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2551 container
= construct_container (TYPE_MODE (type
), type
, 0,
2552 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2554 * Pull the value out of the saved registers ...
2557 addr_rtx
= gen_reg_rtx (Pmode
);
2561 rtx int_addr_rtx
, sse_addr_rtx
;
2562 int needed_intregs
, needed_sseregs
;
2565 lab_over
= gen_label_rtx ();
2566 lab_false
= gen_label_rtx ();
2568 examine_argument (TYPE_MODE (type
), type
, 0,
2569 &needed_intregs
, &needed_sseregs
);
2572 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2573 || TYPE_ALIGN (type
) > 128);
2575 /* In case we are passing structure, verify that it is consetuctive block
2576 on the register save area. If not we need to do moves. */
2577 if (!need_temp
&& !REG_P (container
))
2579 /* Verify that all registers are strictly consetuctive */
2580 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2584 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2586 rtx slot
= XVECEXP (container
, 0, i
);
2587 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
2588 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2596 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2598 rtx slot
= XVECEXP (container
, 0, i
);
2599 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
2600 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2607 int_addr_rtx
= addr_rtx
;
2608 sse_addr_rtx
= addr_rtx
;
2612 int_addr_rtx
= gen_reg_rtx (Pmode
);
2613 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2615 /* First ensure that we fit completely in registers. */
2618 emit_cmp_and_jump_insns (expand_expr
2619 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2620 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2621 1) * 8), GE
, const1_rtx
, SImode
,
2626 emit_cmp_and_jump_insns (expand_expr
2627 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2628 GEN_INT ((SSE_REGPARM_MAX
-
2629 needed_sseregs
+ 1) * 16 +
2630 REGPARM_MAX
* 8), GE
, const1_rtx
,
2631 SImode
, 1, lab_false
);
2634 /* Compute index to start of area used for integer regs. */
2637 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2638 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2639 if (r
!= int_addr_rtx
)
2640 emit_move_insn (int_addr_rtx
, r
);
2644 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2645 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2646 if (r
!= sse_addr_rtx
)
2647 emit_move_insn (sse_addr_rtx
, r
);
2654 /* Never use the memory itself, as it has the alias set. */
2655 addr_rtx
= XEXP (assign_temp (type
, 0, 1, 0), 0);
2656 mem
= gen_rtx_MEM (BLKmode
, addr_rtx
);
2657 set_mem_alias_set (mem
, get_varargs_alias_set ());
2658 set_mem_align (mem
, BITS_PER_UNIT
);
2660 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2662 rtx slot
= XVECEXP (container
, 0, i
);
2663 rtx reg
= XEXP (slot
, 0);
2664 enum machine_mode mode
= GET_MODE (reg
);
2670 if (SSE_REGNO_P (REGNO (reg
)))
2672 src_addr
= sse_addr_rtx
;
2673 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
2677 src_addr
= int_addr_rtx
;
2678 src_offset
= REGNO (reg
) * 8;
2680 src_mem
= gen_rtx_MEM (mode
, src_addr
);
2681 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
2682 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
2683 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
2684 emit_move_insn (dest_mem
, src_mem
);
2691 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
2692 build_int_2 (needed_intregs
* 8, 0));
2693 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
2694 TREE_SIDE_EFFECTS (t
) = 1;
2695 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2700 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
2701 build_int_2 (needed_sseregs
* 16, 0));
2702 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
2703 TREE_SIDE_EFFECTS (t
) = 1;
2704 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2707 emit_jump_insn (gen_jump (lab_over
));
2709 emit_label (lab_false
);
2712 /* ... otherwise out of the overflow area. */
2714 /* Care for on-stack alignment if needed. */
2715 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
2719 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
2720 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
2721 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
2725 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
2727 emit_move_insn (addr_rtx
, r
);
2730 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
2731 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
2732 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2733 TREE_SIDE_EFFECTS (t
) = 1;
2734 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2737 emit_label (lab_over
);
2742 /* Return nonzero if OP is general operand representable on x86_64. */
2745 x86_64_general_operand (op
, mode
)
2747 enum machine_mode mode
;
2750 return general_operand (op
, mode
);
2751 if (nonimmediate_operand (op
, mode
))
2753 return x86_64_sign_extended_value (op
);
2756 /* Return nonzero if OP is general operand representable on x86_64
2757 as either sign extended or zero extended constant. */
2760 x86_64_szext_general_operand (op
, mode
)
2762 enum machine_mode mode
;
2765 return general_operand (op
, mode
);
2766 if (nonimmediate_operand (op
, mode
))
2768 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2771 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2774 x86_64_nonmemory_operand (op
, mode
)
2776 enum machine_mode mode
;
2779 return nonmemory_operand (op
, mode
);
2780 if (register_operand (op
, mode
))
2782 return x86_64_sign_extended_value (op
);
2785 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2788 x86_64_movabs_operand (op
, mode
)
2790 enum machine_mode mode
;
2792 if (!TARGET_64BIT
|| !flag_pic
)
2793 return nonmemory_operand (op
, mode
);
2794 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
2796 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
2801 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2804 x86_64_szext_nonmemory_operand (op
, mode
)
2806 enum machine_mode mode
;
2809 return nonmemory_operand (op
, mode
);
2810 if (register_operand (op
, mode
))
2812 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2815 /* Return nonzero if OP is immediate operand representable on x86_64. */
2818 x86_64_immediate_operand (op
, mode
)
2820 enum machine_mode mode
;
2823 return immediate_operand (op
, mode
);
2824 return x86_64_sign_extended_value (op
);
2827 /* Return nonzero if OP is immediate operand representable on x86_64. */
2830 x86_64_zext_immediate_operand (op
, mode
)
2832 enum machine_mode mode ATTRIBUTE_UNUSED
;
2834 return x86_64_zero_extended_value (op
);
2837 /* Return nonzero if OP is (const_int 1), else return zero. */
2840 const_int_1_operand (op
, mode
)
2842 enum machine_mode mode ATTRIBUTE_UNUSED
;
2844 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
2847 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2848 reference and a constant. */
2851 symbolic_operand (op
, mode
)
2853 enum machine_mode mode ATTRIBUTE_UNUSED
;
2855 switch (GET_CODE (op
))
2863 if (GET_CODE (op
) == SYMBOL_REF
2864 || GET_CODE (op
) == LABEL_REF
2865 || (GET_CODE (op
) == UNSPEC
2866 && (XINT (op
, 1) == UNSPEC_GOT
2867 || XINT (op
, 1) == UNSPEC_GOTOFF
2868 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
2870 if (GET_CODE (op
) != PLUS
2871 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2875 if (GET_CODE (op
) == SYMBOL_REF
2876 || GET_CODE (op
) == LABEL_REF
)
2878 /* Only @GOTOFF gets offsets. */
2879 if (GET_CODE (op
) != UNSPEC
2880 || XINT (op
, 1) != UNSPEC_GOTOFF
)
2883 op
= XVECEXP (op
, 0, 0);
2884 if (GET_CODE (op
) == SYMBOL_REF
2885 || GET_CODE (op
) == LABEL_REF
)
2894 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2897 pic_symbolic_operand (op
, mode
)
2899 enum machine_mode mode ATTRIBUTE_UNUSED
;
2901 if (GET_CODE (op
) != CONST
)
2906 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
2911 if (GET_CODE (op
) == UNSPEC
)
2913 if (GET_CODE (op
) != PLUS
2914 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2917 if (GET_CODE (op
) == UNSPEC
)
2923 /* Return true if OP is a symbolic operand that resolves locally. */
2926 local_symbolic_operand (op
, mode
)
2928 enum machine_mode mode ATTRIBUTE_UNUSED
;
2930 if (GET_CODE (op
) == LABEL_REF
)
2933 if (GET_CODE (op
) == CONST
2934 && GET_CODE (XEXP (op
, 0)) == PLUS
2935 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
2936 op
= XEXP (XEXP (op
, 0), 0);
2938 if (GET_CODE (op
) != SYMBOL_REF
)
2941 /* These we've been told are local by varasm and encode_section_info
2943 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
2946 /* There is, however, a not insubstantial body of code in the rest of
2947 the compiler that assumes it can just stick the results of
2948 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2949 /* ??? This is a hack. Should update the body of the compiler to
2950 always create a DECL an invoke targetm.encode_section_info. */
2951 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
2952 internal_label_prefix_len
) == 0)
2958 /* Test for a valid operand for a call instruction. Don't allow the
2959 arg pointer register or virtual regs since they may decay into
2960 reg + const, which the patterns can't handle. */
2963 call_insn_operand (op
, mode
)
2965 enum machine_mode mode ATTRIBUTE_UNUSED
;
2967 /* Disallow indirect through a virtual register. This leads to
2968 compiler aborts when trying to eliminate them. */
2969 if (GET_CODE (op
) == REG
2970 && (op
== arg_pointer_rtx
2971 || op
== frame_pointer_rtx
2972 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
2973 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
2976 /* Disallow `call 1234'. Due to varying assembler lameness this
2977 gets either rejected or translated to `call .+1234'. */
2978 if (GET_CODE (op
) == CONST_INT
)
2981 /* Explicitly allow SYMBOL_REF even if pic. */
2982 if (GET_CODE (op
) == SYMBOL_REF
)
2985 /* Half-pic doesn't allow anything but registers and constants.
2986 We've just taken care of the later. */
2988 return register_operand (op
, Pmode
);
2990 /* Otherwise we can allow any general_operand in the address. */
2991 return general_operand (op
, Pmode
);
2995 constant_call_address_operand (op
, mode
)
2997 enum machine_mode mode ATTRIBUTE_UNUSED
;
2999 if (GET_CODE (op
) == CONST
3000 && GET_CODE (XEXP (op
, 0)) == PLUS
3001 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3002 op
= XEXP (XEXP (op
, 0), 0);
3003 return GET_CODE (op
) == SYMBOL_REF
;
3006 /* Match exactly zero and one. */
3009 const0_operand (op
, mode
)
3011 enum machine_mode mode
;
3013 return op
== CONST0_RTX (mode
);
3017 const1_operand (op
, mode
)
3019 enum machine_mode mode ATTRIBUTE_UNUSED
;
3021 return op
== const1_rtx
;
3024 /* Match 2, 4, or 8. Used for leal multiplicands. */
3027 const248_operand (op
, mode
)
3029 enum machine_mode mode ATTRIBUTE_UNUSED
;
3031 return (GET_CODE (op
) == CONST_INT
3032 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3035 /* True if this is a constant appropriate for an increment or decremenmt. */
3038 incdec_operand (op
, mode
)
3040 enum machine_mode mode ATTRIBUTE_UNUSED
;
3042 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3043 registers, since carry flag is not set. */
3044 if (TARGET_PENTIUM4
&& !optimize_size
)
3046 return op
== const1_rtx
|| op
== constm1_rtx
;
3049 /* Return nonzero if OP is acceptable as operand of DImode shift
3053 shiftdi_operand (op
, mode
)
3055 enum machine_mode mode ATTRIBUTE_UNUSED
;
3058 return nonimmediate_operand (op
, mode
);
3060 return register_operand (op
, mode
);
3063 /* Return false if this is the stack pointer, or any other fake
3064 register eliminable to the stack pointer. Otherwise, this is
3067 This is used to prevent esp from being used as an index reg.
3068 Which would only happen in pathological cases. */
3071 reg_no_sp_operand (op
, mode
)
3073 enum machine_mode mode
;
3076 if (GET_CODE (t
) == SUBREG
)
3078 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3081 return register_operand (op
, mode
);
3085 mmx_reg_operand (op
, mode
)
3087 enum machine_mode mode ATTRIBUTE_UNUSED
;
3089 return MMX_REG_P (op
);
3092 /* Return false if this is any eliminable register. Otherwise
3096 general_no_elim_operand (op
, mode
)
3098 enum machine_mode mode
;
3101 if (GET_CODE (t
) == SUBREG
)
3103 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3104 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3105 || t
== virtual_stack_dynamic_rtx
)
3108 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3109 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3112 return general_operand (op
, mode
);
3115 /* Return false if this is any eliminable register. Otherwise
3116 register_operand or const_int. */
3119 nonmemory_no_elim_operand (op
, mode
)
3121 enum machine_mode mode
;
3124 if (GET_CODE (t
) == SUBREG
)
3126 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3127 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3128 || t
== virtual_stack_dynamic_rtx
)
3131 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3134 /* Return true if op is a Q_REGS class register. */
3137 q_regs_operand (op
, mode
)
3139 enum machine_mode mode
;
3141 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3143 if (GET_CODE (op
) == SUBREG
)
3144 op
= SUBREG_REG (op
);
3145 return ANY_QI_REG_P (op
);
3148 /* Return true if op is a NON_Q_REGS class register. */
3151 non_q_regs_operand (op
, mode
)
3153 enum machine_mode mode
;
3155 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3157 if (GET_CODE (op
) == SUBREG
)
3158 op
= SUBREG_REG (op
);
3159 return NON_QI_REG_P (op
);
3162 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3165 sse_comparison_operator (op
, mode
)
3167 enum machine_mode mode ATTRIBUTE_UNUSED
;
3169 enum rtx_code code
= GET_CODE (op
);
3172 /* Operations supported directly. */
3182 /* These are equivalent to ones above in non-IEEE comparisons. */
3189 return !TARGET_IEEE_FP
;
3194 /* Return 1 if OP is a valid comparison operator in valid mode. */
3196 ix86_comparison_operator (op
, mode
)
3198 enum machine_mode mode
;
3200 enum machine_mode inmode
;
3201 enum rtx_code code
= GET_CODE (op
);
3202 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3204 if (GET_RTX_CLASS (code
) != '<')
3206 inmode
= GET_MODE (XEXP (op
, 0));
3208 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3210 enum rtx_code second_code
, bypass_code
;
3211 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3212 return (bypass_code
== NIL
&& second_code
== NIL
);
3219 if (inmode
== CCmode
|| inmode
== CCGCmode
3220 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3223 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3224 if (inmode
== CCmode
)
3228 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3236 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3239 fcmov_comparison_operator (op
, mode
)
3241 enum machine_mode mode
;
3243 enum machine_mode inmode
;
3244 enum rtx_code code
= GET_CODE (op
);
3245 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3247 if (GET_RTX_CLASS (code
) != '<')
3249 inmode
= GET_MODE (XEXP (op
, 0));
3250 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3252 enum rtx_code second_code
, bypass_code
;
3253 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3254 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3256 code
= ix86_fp_compare_code_to_integer (code
);
3258 /* i387 supports just limited amount of conditional codes. */
3261 case LTU
: case GTU
: case LEU
: case GEU
:
3262 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3265 case ORDERED
: case UNORDERED
:
3273 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3276 promotable_binary_operator (op
, mode
)
3278 enum machine_mode mode ATTRIBUTE_UNUSED
;
3280 switch (GET_CODE (op
))
3283 /* Modern CPUs have same latency for HImode and SImode multiply,
3284 but 386 and 486 do HImode multiply faster. */
3285 return ix86_cpu
> PROCESSOR_I486
;
3297 /* Nearly general operand, but accept any const_double, since we wish
3298 to be able to drop them into memory rather than have them get pulled
3302 cmp_fp_expander_operand (op
, mode
)
3304 enum machine_mode mode
;
3306 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3308 if (GET_CODE (op
) == CONST_DOUBLE
)
3310 return general_operand (op
, mode
);
3313 /* Match an SI or HImode register for a zero_extract. */
3316 ext_register_operand (op
, mode
)
3318 enum machine_mode mode ATTRIBUTE_UNUSED
;
3321 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3322 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3325 if (!register_operand (op
, VOIDmode
))
3328 /* Be curefull to accept only registers having upper parts. */
3329 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3330 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3333 /* Return 1 if this is a valid binary floating-point operation.
3334 OP is the expression matched, and MODE is its mode. */
3337 binary_fp_operator (op
, mode
)
3339 enum machine_mode mode
;
3341 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3344 switch (GET_CODE (op
))
3350 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3358 mult_operator (op
, mode
)
3360 enum machine_mode mode ATTRIBUTE_UNUSED
;
3362 return GET_CODE (op
) == MULT
;
3366 div_operator (op
, mode
)
3368 enum machine_mode mode ATTRIBUTE_UNUSED
;
3370 return GET_CODE (op
) == DIV
;
3374 arith_or_logical_operator (op
, mode
)
3376 enum machine_mode mode
;
3378 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3379 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3380 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3383 /* Returns 1 if OP is memory operand with a displacement. */
3386 memory_displacement_operand (op
, mode
)
3388 enum machine_mode mode
;
3390 struct ix86_address parts
;
3392 if (! memory_operand (op
, mode
))
3395 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3398 return parts
.disp
!= NULL_RTX
;
3401 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3402 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3404 ??? It seems likely that this will only work because cmpsi is an
3405 expander, and no actual insns use this. */
3408 cmpsi_operand (op
, mode
)
3410 enum machine_mode mode
;
3412 if (nonimmediate_operand (op
, mode
))
3415 if (GET_CODE (op
) == AND
3416 && GET_MODE (op
) == SImode
3417 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3418 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3419 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3420 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3421 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3422 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3428 /* Returns 1 if OP is memory operand that can not be represented by the
3432 long_memory_operand (op
, mode
)
3434 enum machine_mode mode
;
3436 if (! memory_operand (op
, mode
))
3439 return memory_address_length (op
) != 0;
3442 /* Return nonzero if the rtx is known aligned. */
3445 aligned_operand (op
, mode
)
3447 enum machine_mode mode
;
3449 struct ix86_address parts
;
3451 if (!general_operand (op
, mode
))
3454 /* Registers and immediate operands are always "aligned". */
3455 if (GET_CODE (op
) != MEM
)
3458 /* Don't even try to do any aligned optimizations with volatiles. */
3459 if (MEM_VOLATILE_P (op
))
3464 /* Pushes and pops are only valid on the stack pointer. */
3465 if (GET_CODE (op
) == PRE_DEC
3466 || GET_CODE (op
) == POST_INC
)
3469 /* Decode the address. */
3470 if (! ix86_decompose_address (op
, &parts
))
3473 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
3474 parts
.base
= SUBREG_REG (parts
.base
);
3475 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
3476 parts
.index
= SUBREG_REG (parts
.index
);
3478 /* Look for some component that isn't known to be aligned. */
3482 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3487 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
3492 if (GET_CODE (parts
.disp
) != CONST_INT
3493 || (INTVAL (parts
.disp
) & 3) != 0)
3497 /* Didn't find one -- this must be an aligned address. */
3501 /* Return true if the constant is something that can be loaded with
3502 a special instruction. Only handle 0.0 and 1.0; others are less
3506 standard_80387_constant_p (x
)
3509 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3511 /* Note that on the 80387, other constants, such as pi, that we should support
3512 too. On some machines, these are much slower to load as standard constant,
3513 than to load from doubles in memory. */
3514 if (x
== CONST0_RTX (GET_MODE (x
)))
3516 if (x
== CONST1_RTX (GET_MODE (x
)))
3521 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3524 standard_sse_constant_p (x
)
3527 if (GET_CODE (x
) != CONST_DOUBLE
)
3529 return (x
== CONST0_RTX (GET_MODE (x
)));
3532 /* Returns 1 if OP contains a symbol reference */
3535 symbolic_reference_mentioned_p (op
)
3538 register const char *fmt
;
3541 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3544 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3545 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3551 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3552 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3556 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3563 /* Return 1 if it is appropriate to emit `ret' instructions in the
3564 body of a function. Do this only if the epilogue is simple, needing a
3565 couple of insns. Prior to reloading, we can't tell how many registers
3566 must be saved, so return 0 then. Return 0 if there is no frame
3567 marker to de-allocate.
3569 If NON_SAVING_SETJMP is defined and true, then it is not possible
3570 for the epilogue to be simple, so return 0. This is a special case
3571 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3572 until final, but jump_optimize may need to know sooner if a
3576 ix86_can_use_return_insn_p ()
3578 struct ix86_frame frame
;
3580 #ifdef NON_SAVING_SETJMP
3581 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3585 if (! reload_completed
|| frame_pointer_needed
)
3588 /* Don't allow more than 32 pop, since that's all we can do
3589 with one instruction. */
3590 if (current_function_pops_args
3591 && current_function_args_size
>= 32768)
3594 ix86_compute_frame_layout (&frame
);
3595 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3598 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3600 x86_64_sign_extended_value (value
)
3603 switch (GET_CODE (value
))
3605 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3606 to be at least 32 and this all acceptable constants are
3607 represented as CONST_INT. */
3609 if (HOST_BITS_PER_WIDE_INT
== 32)
3613 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
3614 return trunc_int_for_mode (val
, SImode
) == val
;
3618 /* For certain code models, the symbolic references are known to fit. */
3620 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
;
3622 /* For certain code models, the code is near as well. */
3624 return ix86_cmodel
!= CM_LARGE
&& ix86_cmodel
!= CM_SMALL_PIC
;
3626 /* We also may accept the offsetted memory references in certain special
3629 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
3630 && XINT (XEXP (value
, 0), 1) == UNSPEC_GOTPCREL
)
3632 else if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3634 rtx op1
= XEXP (XEXP (value
, 0), 0);
3635 rtx op2
= XEXP (XEXP (value
, 0), 1);
3636 HOST_WIDE_INT offset
;
3638 if (ix86_cmodel
== CM_LARGE
)
3640 if (GET_CODE (op2
) != CONST_INT
)
3642 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
3643 switch (GET_CODE (op1
))
3646 /* For CM_SMALL assume that latest object is 1MB before
3647 end of 31bits boundary. We may also accept pretty
3648 large negative constants knowing that all objects are
3649 in the positive half of address space. */
3650 if (ix86_cmodel
== CM_SMALL
3651 && offset
< 1024*1024*1024
3652 && trunc_int_for_mode (offset
, SImode
) == offset
)
3654 /* For CM_KERNEL we know that all object resist in the
3655 negative half of 32bits address space. We may not
3656 accept negative offsets, since they may be just off
3657 and we may accept pretty large positive ones. */
3658 if (ix86_cmodel
== CM_KERNEL
3660 && trunc_int_for_mode (offset
, SImode
) == offset
)
3664 /* These conditions are similar to SYMBOL_REF ones, just the
3665 constraints for code models differ. */
3666 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3667 && offset
< 1024*1024*1024
3668 && trunc_int_for_mode (offset
, SImode
) == offset
)
3670 if (ix86_cmodel
== CM_KERNEL
3672 && trunc_int_for_mode (offset
, SImode
) == offset
)
3685 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3687 x86_64_zero_extended_value (value
)
3690 switch (GET_CODE (value
))
3693 if (HOST_BITS_PER_WIDE_INT
== 32)
3694 return (GET_MODE (value
) == VOIDmode
3695 && !CONST_DOUBLE_HIGH (value
));
3699 if (HOST_BITS_PER_WIDE_INT
== 32)
3700 return INTVAL (value
) >= 0;
3702 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
3705 /* For certain code models, the symbolic references are known to fit. */
3707 return ix86_cmodel
== CM_SMALL
;
3709 /* For certain code models, the code is near as well. */
3711 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
3713 /* We also may accept the offsetted memory references in certain special
3716 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3718 rtx op1
= XEXP (XEXP (value
, 0), 0);
3719 rtx op2
= XEXP (XEXP (value
, 0), 1);
3721 if (ix86_cmodel
== CM_LARGE
)
3723 switch (GET_CODE (op1
))
3727 /* For small code model we may accept pretty large positive
3728 offsets, since one bit is available for free. Negative
3729 offsets are limited by the size of NULL pointer area
3730 specified by the ABI. */
3731 if (ix86_cmodel
== CM_SMALL
3732 && GET_CODE (op2
) == CONST_INT
3733 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3734 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3737 /* ??? For the kernel, we may accept adjustment of
3738 -0x10000000, since we know that it will just convert
3739 negative address space to positive, but perhaps this
3740 is not worthwhile. */
3743 /* These conditions are similar to SYMBOL_REF ones, just the
3744 constraints for code models differ. */
3745 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3746 && GET_CODE (op2
) == CONST_INT
3747 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3748 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3762 /* Value should be nonzero if functions must have frame pointers.
3763 Zero means the frame pointer need not be set up (and parms may
3764 be accessed via the stack pointer) in functions that seem suitable. */
3767 ix86_frame_pointer_required ()
3769 /* If we accessed previous frames, then the generated code expects
3770 to be able to access the saved ebp value in our frame. */
3771 if (cfun
->machine
->accesses_prev_frame
)
3774 /* Several x86 os'es need a frame pointer for other reasons,
3775 usually pertaining to setjmp. */
3776 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3779 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3780 the frame pointer by default. Turn it back on now if we've not
3781 got a leaf function. */
3782 if (TARGET_OMIT_LEAF_FRAME_POINTER
&& ! leaf_function_p ())
3788 /* Record that the current function accesses previous call frames. */
3791 ix86_setup_frame_addresses ()
3793 cfun
->machine
->accesses_prev_frame
= 1;
3796 static char pic_label_name
[32];
3798 /* This function generates code for -fpic that loads %ebx with
3799 the return address of the caller and then returns. */
3802 ix86_asm_file_end (file
)
3807 if (! TARGET_DEEP_BRANCH_PREDICTION
|| pic_label_name
[0] == 0)
3810 /* ??? Binutils 2.10 and earlier has a linkonce elimination bug related
3811 to updating relocations to a section being discarded such that this
3812 doesn't work. Ought to detect this at configure time. */
3814 /* The trick here is to create a linkonce section containing the
3815 pic label thunk, but to refer to it with an internal label.
3816 Because the label is internal, we don't have inter-dso name
3817 binding issues on hosts that don't support ".hidden".
3819 In order to use these macros, however, we must create a fake
3821 if (targetm
.have_named_sections
)
3823 tree decl
= build_decl (FUNCTION_DECL
,
3824 get_identifier ("i686.get_pc_thunk"),
3826 DECL_ONE_ONLY (decl
) = 1;
3827 (*targetm
.asm_out
.unique_section
) (decl
, 0);
3828 named_section (decl
, NULL
);
3835 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
3836 internal (non-global) label that's being emitted, it didn't make
3837 sense to have .type information for local labels. This caused
3838 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
3839 me debug info for a label that you're declaring non-global?) this
3840 was changed to call ASM_OUTPUT_LABEL() instead. */
3842 ASM_OUTPUT_LABEL (file
, pic_label_name
);
3844 xops
[0] = pic_offset_table_rtx
;
3845 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
3846 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
3847 output_asm_insn ("ret", xops
);
3851 load_pic_register ()
3858 gotsym
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
3860 if (TARGET_DEEP_BRANCH_PREDICTION
)
3862 if (! pic_label_name
[0])
3863 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
3864 pclab
= gen_rtx_MEM (QImode
, gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
));
3868 pclab
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
3871 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx
, pclab
));
3873 if (! TARGET_DEEP_BRANCH_PREDICTION
)
3874 emit_insn (gen_popsi1 (pic_offset_table_rtx
));
3876 emit_insn (gen_prologue_set_got (pic_offset_table_rtx
, gotsym
, pclab
));
3879 /* Generate an "push" pattern for input ARG. */
3885 return gen_rtx_SET (VOIDmode
,
3887 gen_rtx_PRE_DEC (Pmode
,
3888 stack_pointer_rtx
)),
3892 /* Return 1 if we need to save REGNO. */
3894 ix86_save_reg (regno
, maybe_eh_return
)
3896 int maybe_eh_return
;
3898 if (regno
== PIC_OFFSET_TABLE_REGNUM
3899 && (current_function_uses_pic_offset_table
3900 || current_function_uses_const_pool
3901 || current_function_calls_eh_return
))
3904 if (current_function_calls_eh_return
&& maybe_eh_return
)
3909 unsigned test
= EH_RETURN_DATA_REGNO (i
);
3910 if (test
== INVALID_REGNUM
)
3917 return (regs_ever_live
[regno
]
3918 && !call_used_regs
[regno
]
3919 && !fixed_regs
[regno
]
3920 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
3923 /* Return number of registers to be saved on the stack. */
3931 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
3932 if (ix86_save_reg (regno
, true))
3937 /* Return the offset between two registers, one to be eliminated, and the other
3938 its replacement, at the start of a routine. */
3941 ix86_initial_elimination_offset (from
, to
)
3945 struct ix86_frame frame
;
3946 ix86_compute_frame_layout (&frame
);
3948 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3949 return frame
.hard_frame_pointer_offset
;
3950 else if (from
== FRAME_POINTER_REGNUM
3951 && to
== HARD_FRAME_POINTER_REGNUM
)
3952 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
3955 if (to
!= STACK_POINTER_REGNUM
)
3957 else if (from
== ARG_POINTER_REGNUM
)
3958 return frame
.stack_pointer_offset
;
3959 else if (from
!= FRAME_POINTER_REGNUM
)
3962 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
3966 /* Fill structure ix86_frame about frame of currently computed function. */
3969 ix86_compute_frame_layout (frame
)
3970 struct ix86_frame
*frame
;
3972 HOST_WIDE_INT total_size
;
3973 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
3975 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
3976 HOST_WIDE_INT size
= get_frame_size ();
3978 frame
->nregs
= ix86_nsaved_regs ();
3981 /* Skip return address and saved base pointer. */
3982 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
3984 frame
->hard_frame_pointer_offset
= offset
;
3986 /* Do some sanity checking of stack_alignment_needed and
3987 preferred_alignment, since i386 port is the only using those features
3988 that may break easily. */
3990 if (size
&& !stack_alignment_needed
)
3992 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
3994 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
3996 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
3999 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4000 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4002 /* Register save area */
4003 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4006 if (ix86_save_varrargs_registers
)
4008 offset
+= X86_64_VARARGS_SIZE
;
4009 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4012 frame
->va_arg_size
= 0;
4014 /* Align start of frame for local function. */
4015 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4016 & -stack_alignment_needed
) - offset
;
4018 offset
+= frame
->padding1
;
4020 /* Frame pointer points here. */
4021 frame
->frame_pointer_offset
= offset
;
4025 /* Add outgoing arguments area. */
4026 if (ACCUMULATE_OUTGOING_ARGS
)
4028 offset
+= current_function_outgoing_args_size
;
4029 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4032 frame
->outgoing_arguments_size
= 0;
4034 /* Align stack boundary. */
4035 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4036 & -preferred_alignment
) - offset
;
4038 offset
+= frame
->padding2
;
4040 /* We've reached end of stack frame. */
4041 frame
->stack_pointer_offset
= offset
;
4043 /* Size prologue needs to allocate. */
4044 frame
->to_allocate
=
4045 (size
+ frame
->padding1
+ frame
->padding2
4046 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4048 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4049 && current_function_is_leaf
)
4051 frame
->red_zone_size
= frame
->to_allocate
;
4052 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4053 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4056 frame
->red_zone_size
= 0;
4057 frame
->to_allocate
-= frame
->red_zone_size
;
4058 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4060 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4061 fprintf (stderr
, "size: %i\n", size
);
4062 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4063 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4064 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4065 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4066 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4067 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4068 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4069 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4070 frame
->hard_frame_pointer_offset
);
4071 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4075 /* Emit code to save registers in the prologue. */
4078 ix86_emit_save_regs ()
4083 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4084 if (ix86_save_reg (regno
, true))
4086 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4087 RTX_FRAME_RELATED_P (insn
) = 1;
4091 /* Emit code to save registers using MOV insns. First register
4092 is restored from POINTER + OFFSET. */
4094 ix86_emit_save_regs_using_mov (pointer
, offset
)
4096 HOST_WIDE_INT offset
;
4101 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4102 if (ix86_save_reg (regno
, true))
4104 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4106 gen_rtx_REG (Pmode
, regno
));
4107 RTX_FRAME_RELATED_P (insn
) = 1;
4108 offset
+= UNITS_PER_WORD
;
4112 /* Expand the prologue into a bunch of separate insns. */
4115 ix86_expand_prologue ()
4118 int pic_reg_used
= (flag_pic
&& (current_function_uses_pic_offset_table
4119 || current_function_uses_const_pool
)
4121 struct ix86_frame frame
;
4123 HOST_WIDE_INT allocate
;
4127 use_fast_prologue_epilogue
4128 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
4129 if (TARGET_PROLOGUE_USING_MOVE
)
4130 use_mov
= use_fast_prologue_epilogue
;
4132 ix86_compute_frame_layout (&frame
);
4134 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4135 slower on all targets. Also sdb doesn't like it. */
4137 if (frame_pointer_needed
)
4139 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4140 RTX_FRAME_RELATED_P (insn
) = 1;
4142 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4143 RTX_FRAME_RELATED_P (insn
) = 1;
4146 allocate
= frame
.to_allocate
;
4147 /* In case we are dealing only with single register and empty frame,
4148 push is equivalent of the mov+add sequence. */
4149 if (allocate
== 0 && frame
.nregs
<= 1)
4153 ix86_emit_save_regs ();
4155 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4159 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4161 insn
= emit_insn (gen_pro_epilogue_adjust_stack
4162 (stack_pointer_rtx
, stack_pointer_rtx
,
4163 GEN_INT (-allocate
)));
4164 RTX_FRAME_RELATED_P (insn
) = 1;
4168 /* ??? Is this only valid for Win32? */
4175 arg0
= gen_rtx_REG (SImode
, 0);
4176 emit_move_insn (arg0
, GEN_INT (allocate
));
4178 sym
= gen_rtx_MEM (FUNCTION_MODE
,
4179 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
4180 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
4182 CALL_INSN_FUNCTION_USAGE (insn
)
4183 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
4184 CALL_INSN_FUNCTION_USAGE (insn
));
4188 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4189 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4191 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4192 -frame
.nregs
* UNITS_PER_WORD
);
4195 #ifdef SUBTARGET_PROLOGUE
4200 load_pic_register ();
4202 /* If we are profiling, make sure no instructions are scheduled before
4203 the call to mcount. However, if -fpic, the above call will have
4205 if (current_function_profile
&& ! pic_reg_used
)
4206 emit_insn (gen_blockage ());
4209 /* Emit code to restore saved registers using MOV insns. First register
4210 is restored from POINTER + OFFSET. */
4212 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4215 int maybe_eh_return
;
4219 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4220 if (ix86_save_reg (regno
, maybe_eh_return
))
4222 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4223 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4225 offset
+= UNITS_PER_WORD
;
4229 /* Restore function stack, frame, and registers. */
4232 ix86_expand_epilogue (style
)
4236 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4237 struct ix86_frame frame
;
4238 HOST_WIDE_INT offset
;
4240 ix86_compute_frame_layout (&frame
);
4242 /* Calculate start of saved registers relative to ebp. Special care
4243 must be taken for the normal return case of a function using
4244 eh_return: the eax and edx registers are marked as saved, but not
4245 restored along this path. */
4246 offset
= frame
.nregs
;
4247 if (current_function_calls_eh_return
&& style
!= 2)
4249 offset
*= -UNITS_PER_WORD
;
4251 /* If we're only restoring one register and sp is not valid then
4252 using a move instruction to restore the register since it's
4253 less work than reloading sp and popping the register.
4255 The default code result in stack adjustment using add/lea instruction,
4256 while this code results in LEAVE instruction (or discrete equivalent),
4257 so it is profitable in some other cases as well. Especially when there
4258 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4259 and there is exactly one register to pop. This heruistic may need some
4260 tuning in future. */
4261 if ((!sp_valid
&& frame
.nregs
<= 1)
4262 || (TARGET_EPILOGUE_USING_MOVE
4263 && use_fast_prologue_epilogue
4264 && (frame
.nregs
> 1 || frame
.to_allocate
))
4265 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4266 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4267 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4268 || current_function_calls_eh_return
)
4270 /* Restore registers. We can use ebp or esp to address the memory
4271 locations. If both are available, default to ebp, since offsets
4272 are known to be small. Only exception is esp pointing directly to the
4273 end of block of saved registers, where we may simplify addressing
4276 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4277 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4278 frame
.to_allocate
, style
== 2);
4280 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4281 offset
, style
== 2);
4283 /* eh_return epilogues need %ecx added to the stack pointer. */
4286 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4288 if (frame_pointer_needed
)
4290 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4291 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4292 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4294 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4295 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4297 emit_insn (gen_pro_epilogue_adjust_stack
4298 (stack_pointer_rtx
, sa
, const0_rtx
));
4302 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4303 tmp
= plus_constant (tmp
, (frame
.to_allocate
4304 + frame
.nregs
* UNITS_PER_WORD
));
4305 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4308 else if (!frame_pointer_needed
)
4309 emit_insn (gen_pro_epilogue_adjust_stack
4310 (stack_pointer_rtx
, stack_pointer_rtx
,
4311 GEN_INT (frame
.to_allocate
4312 + frame
.nregs
* UNITS_PER_WORD
)));
4313 /* If not an i386, mov & pop is faster than "leave". */
4314 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4315 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4318 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4319 hard_frame_pointer_rtx
,
4322 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4324 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4329 /* First step is to deallocate the stack frame so that we can
4330 pop the registers. */
4333 if (!frame_pointer_needed
)
4335 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4336 hard_frame_pointer_rtx
,
4339 else if (frame
.to_allocate
)
4340 emit_insn (gen_pro_epilogue_adjust_stack
4341 (stack_pointer_rtx
, stack_pointer_rtx
,
4342 GEN_INT (frame
.to_allocate
)));
4344 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4345 if (ix86_save_reg (regno
, false))
4348 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4350 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4352 if (frame_pointer_needed
)
4354 /* Leave results in shorter dependency chains on CPUs that are
4355 able to grok it fast. */
4356 if (TARGET_USE_LEAVE
)
4357 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4358 else if (TARGET_64BIT
)
4359 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4361 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4365 /* Sibcall epilogues don't want a return instruction. */
4369 if (current_function_pops_args
&& current_function_args_size
)
4371 rtx popc
= GEN_INT (current_function_pops_args
);
4373 /* i386 can only pop 64K bytes. If asked to pop more, pop
4374 return address, do explicit add, and jump indirectly to the
4377 if (current_function_pops_args
>= 65536)
4379 rtx ecx
= gen_rtx_REG (SImode
, 2);
4381 /* There are is no "pascal" calling convention in 64bit ABI. */
4385 emit_insn (gen_popsi1 (ecx
));
4386 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4387 emit_jump_insn (gen_return_indirect_internal (ecx
));
4390 emit_jump_insn (gen_return_pop_internal (popc
));
4393 emit_jump_insn (gen_return_internal ());
4396 /* Extract the parts of an RTL expression that is a valid memory address
4397 for an instruction. Return 0 if the structure of the address is
4398 grossly off. Return -1 if the address contains ASHIFT, so it is not
4399 strictly valid, but still used for computing length of lea instruction.
4403 ix86_decompose_address (addr
, out
)
4405 struct ix86_address
*out
;
4407 rtx base
= NULL_RTX
;
4408 rtx index
= NULL_RTX
;
4409 rtx disp
= NULL_RTX
;
4410 HOST_WIDE_INT scale
= 1;
4411 rtx scale_rtx
= NULL_RTX
;
4414 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
4416 else if (GET_CODE (addr
) == PLUS
)
4418 rtx op0
= XEXP (addr
, 0);
4419 rtx op1
= XEXP (addr
, 1);
4420 enum rtx_code code0
= GET_CODE (op0
);
4421 enum rtx_code code1
= GET_CODE (op1
);
4423 if (code0
== REG
|| code0
== SUBREG
)
4425 if (code1
== REG
|| code1
== SUBREG
)
4426 index
= op0
, base
= op1
; /* index + base */
4428 base
= op0
, disp
= op1
; /* base + displacement */
4430 else if (code0
== MULT
)
4432 index
= XEXP (op0
, 0);
4433 scale_rtx
= XEXP (op0
, 1);
4434 if (code1
== REG
|| code1
== SUBREG
)
4435 base
= op1
; /* index*scale + base */
4437 disp
= op1
; /* index*scale + disp */
4439 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
4441 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
4442 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
4443 base
= XEXP (op0
, 1);
4446 else if (code0
== PLUS
)
4448 index
= XEXP (op0
, 0); /* index + base + disp */
4449 base
= XEXP (op0
, 1);
4455 else if (GET_CODE (addr
) == MULT
)
4457 index
= XEXP (addr
, 0); /* index*scale */
4458 scale_rtx
= XEXP (addr
, 1);
4460 else if (GET_CODE (addr
) == ASHIFT
)
4464 /* We're called for lea too, which implements ashift on occasion. */
4465 index
= XEXP (addr
, 0);
4466 tmp
= XEXP (addr
, 1);
4467 if (GET_CODE (tmp
) != CONST_INT
)
4469 scale
= INTVAL (tmp
);
4470 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4476 disp
= addr
; /* displacement */
4478 /* Extract the integral value of scale. */
4481 if (GET_CODE (scale_rtx
) != CONST_INT
)
4483 scale
= INTVAL (scale_rtx
);
4486 /* Allow arg pointer and stack pointer as index if there is not scaling */
4487 if (base
&& index
&& scale
== 1
4488 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
4489 || index
== stack_pointer_rtx
))
4496 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4497 if ((base
== hard_frame_pointer_rtx
4498 || base
== frame_pointer_rtx
4499 || base
== arg_pointer_rtx
) && !disp
)
4502 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4503 Avoid this by transforming to [%esi+0]. */
4504 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
4505 && base
&& !index
&& !disp
4507 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4510 /* Special case: encode reg+reg instead of reg*2. */
4511 if (!base
&& index
&& scale
&& scale
== 2)
4512 base
= index
, scale
= 1;
4514 /* Special case: scaling cannot be encoded without base or displacement. */
4515 if (!base
&& !disp
&& index
&& scale
!= 1)
4526 /* Return cost of the memory address x.
4527 For i386, it is better to use a complex address than let gcc copy
4528 the address into a reg and make a new pseudo. But not if the address
4529 requires to two regs - that would mean more pseudos with longer
4532 ix86_address_cost (x
)
4535 struct ix86_address parts
;
4538 if (!ix86_decompose_address (x
, &parts
))
4541 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
4542 parts
.base
= SUBREG_REG (parts
.base
);
4543 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
4544 parts
.index
= SUBREG_REG (parts
.index
);
4546 /* More complex memory references are better. */
4547 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4550 /* Attempt to minimize number of registers in the address. */
4552 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4554 && (!REG_P (parts
.index
)
4555 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4559 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4561 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4562 && parts
.base
!= parts
.index
)
4565 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4566 since it's predecode logic can't detect the length of instructions
4567 and it degenerates to vector decoded. Increase cost of such
4568 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4569 to split such addresses or even refuse such addresses at all.
4571 Following addressing modes are affected:
4576 The first and last case may be avoidable by explicitly coding the zero in
4577 memory address, but I don't have AMD-K6 machine handy to check this
4581 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4582 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4583 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4589 /* If X is a machine specific address (i.e. a symbol or label being
4590 referenced as a displacement from the GOT implemented using an
4591 UNSPEC), then return the base term. Otherwise return X. */
4594 ix86_find_base_term (x
)
4601 if (GET_CODE (x
) != CONST
)
4604 if (GET_CODE (term
) == PLUS
4605 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4606 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4607 term
= XEXP (term
, 0);
4608 if (GET_CODE (term
) != UNSPEC
4609 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4612 term
= XVECEXP (term
, 0, 0);
4614 if (GET_CODE (term
) != SYMBOL_REF
4615 && GET_CODE (term
) != LABEL_REF
)
4621 if (GET_CODE (x
) != PLUS
4622 || XEXP (x
, 0) != pic_offset_table_rtx
4623 || GET_CODE (XEXP (x
, 1)) != CONST
)
4626 term
= XEXP (XEXP (x
, 1), 0);
4628 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
4629 term
= XEXP (term
, 0);
4631 if (GET_CODE (term
) != UNSPEC
4632 || XINT (term
, 1) != UNSPEC_GOTOFF
)
4635 term
= XVECEXP (term
, 0, 0);
4637 if (GET_CODE (term
) != SYMBOL_REF
4638 && GET_CODE (term
) != LABEL_REF
)
4644 /* Determine if a given CONST RTX is a valid memory displacement
4648 legitimate_pic_address_disp_p (disp
)
4651 /* In 64bit mode we can allow direct addresses of symbols and labels
4652 when they are not dynamic symbols. */
4656 if (GET_CODE (disp
) == CONST
)
4658 /* ??? Handle PIC code models */
4659 if (GET_CODE (x
) == PLUS
4660 && (GET_CODE (XEXP (x
, 1)) == CONST_INT
4661 && ix86_cmodel
== CM_SMALL_PIC
4662 && INTVAL (XEXP (x
, 1)) < 1024*1024*1024
4663 && INTVAL (XEXP (x
, 1)) > -1024*1024*1024))
4665 if (local_symbolic_operand (x
, Pmode
))
4668 if (GET_CODE (disp
) != CONST
)
4670 disp
= XEXP (disp
, 0);
4674 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4675 of GOT tables. We should not need these anyway. */
4676 if (GET_CODE (disp
) != UNSPEC
4677 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
4680 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
4681 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
4686 if (GET_CODE (disp
) == PLUS
)
4688 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
4690 disp
= XEXP (disp
, 0);
4693 if (GET_CODE (disp
) != UNSPEC
)
4696 /* Must be @GOT or @GOTOFF. */
4697 switch (XINT (disp
, 1))
4700 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
4702 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4708 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4709 memory address for an instruction. The MODE argument is the machine mode
4710 for the MEM expression that wants to use this address.
4712 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4713 convert common non-canonical forms to canonical form so that they will
4717 legitimate_address_p (mode
, addr
, strict
)
4718 enum machine_mode mode
;
4722 struct ix86_address parts
;
4723 rtx base
, index
, disp
;
4724 HOST_WIDE_INT scale
;
4725 const char *reason
= NULL
;
4726 rtx reason_rtx
= NULL_RTX
;
4728 if (TARGET_DEBUG_ADDR
)
4731 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
4732 GET_MODE_NAME (mode
), strict
);
4736 if (ix86_decompose_address (addr
, &parts
) <= 0)
4738 reason
= "decomposition failed";
4743 index
= parts
.index
;
4745 scale
= parts
.scale
;
4747 /* Validate base register.
4749 Don't allow SUBREG's here, it can lead to spill failures when the base
4750 is one word out of a two word structure, which is represented internally
4758 if (GET_CODE (base
) == SUBREG
)
4759 reg
= SUBREG_REG (base
);
4763 if (GET_CODE (reg
) != REG
)
4765 reason
= "base is not a register";
4769 if (GET_MODE (base
) != Pmode
)
4771 reason
= "base is not in Pmode";
4775 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
4776 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
4778 reason
= "base is not valid";
4783 /* Validate index register.
4785 Don't allow SUBREG's here, it can lead to spill failures when the index
4786 is one word out of a two word structure, which is represented internally
4794 if (GET_CODE (index
) == SUBREG
)
4795 reg
= SUBREG_REG (index
);
4799 if (GET_CODE (reg
) != REG
)
4801 reason
= "index is not a register";
4805 if (GET_MODE (index
) != Pmode
)
4807 reason
= "index is not in Pmode";
4811 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
4812 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
4814 reason
= "index is not valid";
4819 /* Validate scale factor. */
4822 reason_rtx
= GEN_INT (scale
);
4825 reason
= "scale without index";
4829 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
4831 reason
= "scale is not a valid multiplier";
4836 /* Validate displacement. */
4841 if (!CONSTANT_ADDRESS_P (disp
))
4843 reason
= "displacement is not constant";
4849 if (!x86_64_sign_extended_value (disp
))
4851 reason
= "displacement is out of range";
4857 if (GET_CODE (disp
) == CONST_DOUBLE
)
4859 reason
= "displacement is a const_double";
4864 if (flag_pic
&& SYMBOLIC_CONST (disp
))
4866 if (TARGET_64BIT
&& (index
|| base
))
4868 reason
= "non-constant pic memory reference";
4871 if (! legitimate_pic_address_disp_p (disp
))
4873 reason
= "displacement is an invalid pic construct";
4877 /* This code used to verify that a symbolic pic displacement
4878 includes the pic_offset_table_rtx register.
4880 While this is good idea, unfortunately these constructs may
4881 be created by "adds using lea" optimization for incorrect
4890 This code is nonsensical, but results in addressing
4891 GOT table with pic_offset_table_rtx base. We can't
4892 just refuse it easily, since it gets matched by
4893 "addsi3" pattern, that later gets split to lea in the
4894 case output register differs from input. While this
4895 can be handled by separate addsi pattern for this case
4896 that never results in lea, this seems to be easier and
4897 correct fix for crash to disable this test. */
4899 else if (HALF_PIC_P ())
4901 if (! HALF_PIC_ADDRESS_P (disp
)
4902 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
4904 reason
= "displacement is an invalid half-pic reference";
4910 /* Everything looks valid. */
4911 if (TARGET_DEBUG_ADDR
)
4912 fprintf (stderr
, "Success.\n");
4916 if (TARGET_DEBUG_ADDR
)
4918 fprintf (stderr
, "Error: %s\n", reason
);
4919 debug_rtx (reason_rtx
);
4924 /* Return an unique alias set for the GOT. */
4926 static HOST_WIDE_INT
4927 ix86_GOT_alias_set ()
4929 static HOST_WIDE_INT set
= -1;
4931 set
= new_alias_set ();
4935 /* Return a legitimate reference for ORIG (an address) using the
4936 register REG. If REG is 0, a new pseudo is generated.
4938 There are two types of references that must be handled:
4940 1. Global data references must load the address from the GOT, via
4941 the PIC reg. An insn is emitted to do this load, and the reg is
4944 2. Static data references, constant pool addresses, and code labels
4945 compute the address as an offset from the GOT, whose base is in
4946 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
4947 differentiate them from global data objects. The returned
4948 address is the PIC reg + an unspec constant.
4950 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
4951 reg also appears in the address. */
4954 legitimize_pic_address (orig
, reg
)
4962 if (local_symbolic_operand (addr
, Pmode
))
4964 /* In 64bit mode we can address such objects directly. */
4969 /* This symbol may be referenced via a displacement from the PIC
4970 base address (@GOTOFF). */
4972 current_function_uses_pic_offset_table
= 1;
4973 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
4974 new = gen_rtx_CONST (Pmode
, new);
4975 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
4979 emit_move_insn (reg
, new);
4984 else if (GET_CODE (addr
) == SYMBOL_REF
)
4988 current_function_uses_pic_offset_table
= 1;
4989 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
4990 new = gen_rtx_CONST (Pmode
, new);
4991 new = gen_rtx_MEM (Pmode
, new);
4992 RTX_UNCHANGING_P (new) = 1;
4993 set_mem_alias_set (new, ix86_GOT_alias_set ());
4996 reg
= gen_reg_rtx (Pmode
);
4997 /* Use directly gen_movsi, otherwise the address is loaded
4998 into register for CSE. We don't want to CSE this addresses,
4999 instead we CSE addresses from the GOT table, so skip this. */
5000 emit_insn (gen_movsi (reg
, new));
5005 /* This symbol must be referenced via a load from the
5006 Global Offset Table (@GOT). */
5008 current_function_uses_pic_offset_table
= 1;
5009 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5010 new = gen_rtx_CONST (Pmode
, new);
5011 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5012 new = gen_rtx_MEM (Pmode
, new);
5013 RTX_UNCHANGING_P (new) = 1;
5014 set_mem_alias_set (new, ix86_GOT_alias_set ());
5017 reg
= gen_reg_rtx (Pmode
);
5018 emit_move_insn (reg
, new);
5024 if (GET_CODE (addr
) == CONST
)
5026 addr
= XEXP (addr
, 0);
5028 /* We must match stuff we generate before. Assume the only
5029 unspecs that can get here are ours. Not that we could do
5030 anything with them anyway... */
5031 if (GET_CODE (addr
) == UNSPEC
5032 || (GET_CODE (addr
) == PLUS
5033 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5035 if (GET_CODE (addr
) != PLUS
)
5038 if (GET_CODE (addr
) == PLUS
)
5040 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5042 /* Check first to see if this is a constant offset from a @GOTOFF
5043 symbol reference. */
5044 if (local_symbolic_operand (op0
, Pmode
)
5045 && GET_CODE (op1
) == CONST_INT
)
5049 current_function_uses_pic_offset_table
= 1;
5050 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5052 new = gen_rtx_PLUS (Pmode
, new, op1
);
5053 new = gen_rtx_CONST (Pmode
, new);
5054 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5058 emit_move_insn (reg
, new);
5064 /* ??? We need to limit offsets here. */
5069 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5070 new = legitimize_pic_address (XEXP (addr
, 1),
5071 base
== reg
? NULL_RTX
: reg
);
5073 if (GET_CODE (new) == CONST_INT
)
5074 new = plus_constant (base
, INTVAL (new));
5077 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5079 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5080 new = XEXP (new, 1);
5082 new = gen_rtx_PLUS (Pmode
, base
, new);
5090 /* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
5091 may access it directly in the GOT. */
5094 i386_encode_section_info (decl
, first
)
5096 int first ATTRIBUTE_UNUSED
;
5100 rtx rtl
= (TREE_CODE_CLASS (TREE_CODE (decl
)) != 'd'
5101 ? TREE_CST_RTL (decl
) : DECL_RTL (decl
));
5103 if (GET_CODE (rtl
) == MEM
)
5105 if (TARGET_DEBUG_ADDR
5106 && TREE_CODE_CLASS (TREE_CODE (decl
)) == 'd')
5108 fprintf (stderr
, "Encode %s, public = %d\n",
5109 IDENTIFIER_POINTER (DECL_NAME (decl
)),
5110 TREE_PUBLIC (decl
));
5113 SYMBOL_REF_FLAG (XEXP (rtl
, 0))
5114 = (TREE_CODE_CLASS (TREE_CODE (decl
)) != 'd'
5115 || ! TREE_PUBLIC (decl
)
5116 || MODULE_LOCAL_P (decl
));
5121 /* Try machine-dependent ways of modifying an illegitimate address
5122 to be legitimate. If we find one, return the new, valid address.
5123 This macro is used in only one place: `memory_address' in explow.c.
5125 OLDX is the address as it was before break_out_memory_refs was called.
5126 In some cases it is useful to look at this to decide what needs to be done.
5128 MODE and WIN are passed so that this macro can use
5129 GO_IF_LEGITIMATE_ADDRESS.
5131 It is always safe for this macro to do nothing. It exists to recognize
5132 opportunities to optimize the output.
5134 For the 80386, we handle X+REG by loading X into a register R and
5135 using R+REG. R will go in a general reg and indexing will be used.
5136 However, if REG is a broken-out memory address or multiplication,
5137 nothing needs to be done because REG can certainly go in a general reg.
5139 When -fpic is used, special handling is needed for symbolic references.
5140 See comments by legitimize_pic_address in i386.c for details. */
5143 legitimize_address (x
, oldx
, mode
)
5145 register rtx oldx ATTRIBUTE_UNUSED
;
5146 enum machine_mode mode
;
5151 if (TARGET_DEBUG_ADDR
)
5153 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5154 GET_MODE_NAME (mode
));
5158 if (flag_pic
&& SYMBOLIC_CONST (x
))
5159 return legitimize_pic_address (x
, 0);
5161 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5162 if (GET_CODE (x
) == ASHIFT
5163 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5164 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5167 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5168 GEN_INT (1 << log
));
5171 if (GET_CODE (x
) == PLUS
)
5173 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5175 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5176 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5177 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5180 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5181 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5182 GEN_INT (1 << log
));
5185 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5186 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5187 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5190 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5191 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5192 GEN_INT (1 << log
));
5195 /* Put multiply first if it isn't already. */
5196 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5198 rtx tmp
= XEXP (x
, 0);
5199 XEXP (x
, 0) = XEXP (x
, 1);
5204 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5205 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5206 created by virtual register instantiation, register elimination, and
5207 similar optimizations. */
5208 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5211 x
= gen_rtx_PLUS (Pmode
,
5212 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5213 XEXP (XEXP (x
, 1), 0)),
5214 XEXP (XEXP (x
, 1), 1));
5218 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5219 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5220 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5221 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5222 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5223 && CONSTANT_P (XEXP (x
, 1)))
5226 rtx other
= NULL_RTX
;
5228 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5230 constant
= XEXP (x
, 1);
5231 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5233 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5235 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5236 other
= XEXP (x
, 1);
5244 x
= gen_rtx_PLUS (Pmode
,
5245 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5246 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5247 plus_constant (other
, INTVAL (constant
)));
5251 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5254 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5257 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5260 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5263 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5267 && GET_CODE (XEXP (x
, 1)) == REG
5268 && GET_CODE (XEXP (x
, 0)) == REG
)
5271 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5274 x
= legitimize_pic_address (x
, 0);
5277 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5280 if (GET_CODE (XEXP (x
, 0)) == REG
)
5282 register rtx temp
= gen_reg_rtx (Pmode
);
5283 register rtx val
= force_operand (XEXP (x
, 1), temp
);
5285 emit_move_insn (temp
, val
);
5291 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5293 register rtx temp
= gen_reg_rtx (Pmode
);
5294 register rtx val
= force_operand (XEXP (x
, 0), temp
);
5296 emit_move_insn (temp
, val
);
5306 /* Print an integer constant expression in assembler syntax. Addition
5307 and subtraction are the only arithmetic that may appear in these
5308 expressions. FILE is the stdio stream to write to, X is the rtx, and
5309 CODE is the operand print code from the output string. */
5312 output_pic_addr_const (file
, x
, code
)
5319 switch (GET_CODE (x
))
5329 assemble_name (file
, XSTR (x
, 0));
5330 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
5331 fputs ("@PLT", file
);
5338 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5339 assemble_name (asm_out_file
, buf
);
5343 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5347 /* This used to output parentheses around the expression,
5348 but that does not work on the 386 (either ATT or BSD assembler). */
5349 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5353 if (GET_MODE (x
) == VOIDmode
)
5355 /* We can use %d if the number is <32 bits and positive. */
5356 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5357 fprintf (file
, "0x%lx%08lx",
5358 (unsigned long) CONST_DOUBLE_HIGH (x
),
5359 (unsigned long) CONST_DOUBLE_LOW (x
));
5361 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5364 /* We can't handle floating point constants;
5365 PRINT_OPERAND must handle them. */
5366 output_operand_lossage ("floating constant misused");
5370 /* Some assemblers need integer constants to appear first. */
5371 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5373 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5375 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5377 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5379 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5381 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5388 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
5389 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5391 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5392 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
5396 if (XVECLEN (x
, 0) != 1)
5398 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5399 switch (XINT (x
, 1))
5402 fputs ("@GOT", file
);
5405 fputs ("@GOTOFF", file
);
5408 fputs ("@PLT", file
);
5410 case UNSPEC_GOTPCREL
:
5411 fputs ("@GOTPCREL(%RIP)", file
);
5414 output_operand_lossage ("invalid UNSPEC as operand");
5420 output_operand_lossage ("invalid expression as operand");
5424 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5425 We need to handle our special PIC relocations. */
5428 i386_dwarf_output_addr_const (file
, x
)
5433 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
5437 fprintf (file
, "%s", ASM_LONG
);
5440 output_pic_addr_const (file
, x
, '\0');
5442 output_addr_const (file
, x
);
5446 /* In the name of slightly smaller debug output, and to cater to
5447 general assembler losage, recognize PIC+GOTOFF and turn it back
5448 into a direct symbol reference. */
5451 i386_simplify_dwarf_addr (orig_x
)
5456 if (GET_CODE (x
) == MEM
)
5461 if (GET_CODE (x
) != CONST
5462 || GET_CODE (XEXP (x
, 0)) != UNSPEC
5463 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
5464 || GET_CODE (orig_x
) != MEM
)
5466 return XVECEXP (XEXP (x
, 0), 0, 0);
5469 if (GET_CODE (x
) != PLUS
5470 || GET_CODE (XEXP (x
, 1)) != CONST
)
5473 if (GET_CODE (XEXP (x
, 0)) == REG
5474 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5475 /* %ebx + GOT/GOTOFF */
5477 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
5479 /* %ebx + %reg * scale + GOT/GOTOFF */
5481 if (GET_CODE (XEXP (y
, 0)) == REG
5482 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5484 else if (GET_CODE (XEXP (y
, 1)) == REG
5485 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
5489 if (GET_CODE (y
) != REG
5490 && GET_CODE (y
) != MULT
5491 && GET_CODE (y
) != ASHIFT
)
5497 x
= XEXP (XEXP (x
, 1), 0);
5498 if (GET_CODE (x
) == UNSPEC
5499 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
5500 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
5503 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
5504 return XVECEXP (x
, 0, 0);
5507 if (GET_CODE (x
) == PLUS
5508 && GET_CODE (XEXP (x
, 0)) == UNSPEC
5509 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5510 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
5511 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
5512 && GET_CODE (orig_x
) != MEM
)))
5514 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
5516 return gen_rtx_PLUS (Pmode
, y
, x
);
5524 put_condition_code (code
, mode
, reverse
, fp
, file
)
5526 enum machine_mode mode
;
5532 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
5534 enum rtx_code second_code
, bypass_code
;
5535 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
5536 if (bypass_code
!= NIL
|| second_code
!= NIL
)
5538 code
= ix86_fp_compare_code_to_integer (code
);
5542 code
= reverse_condition (code
);
5553 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
5558 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5559 Those same assemblers have the same but opposite losage on cmov. */
5562 suffix
= fp
? "nbe" : "a";
5565 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
5567 else if (mode
== CCmode
|| mode
== CCGCmode
)
5578 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
5580 else if (mode
== CCmode
|| mode
== CCGCmode
)
5589 suffix
= fp
? "nb" : "ae";
5592 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
5602 suffix
= fp
? "u" : "p";
5605 suffix
= fp
? "nu" : "np";
5610 fputs (suffix
, file
);
5614 print_reg (x
, code
, file
)
5619 if (REGNO (x
) == ARG_POINTER_REGNUM
5620 || REGNO (x
) == FRAME_POINTER_REGNUM
5621 || REGNO (x
) == FLAGS_REG
5622 || REGNO (x
) == FPSR_REG
)
5625 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
5628 if (code
== 'w' || MMX_REG_P (x
))
5630 else if (code
== 'b')
5632 else if (code
== 'k')
5634 else if (code
== 'q')
5636 else if (code
== 'y')
5638 else if (code
== 'h')
5641 code
= GET_MODE_SIZE (GET_MODE (x
));
5643 /* Irritatingly, AMD extended registers use different naming convention
5644 from the normal registers. */
5645 if (REX_INT_REG_P (x
))
5652 error ("extended registers have no high halves");
5655 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5658 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5661 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5664 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
5667 error ("unsupported operand size for extended register");
5675 if (STACK_TOP_P (x
))
5677 fputs ("st(0)", file
);
5684 if (! ANY_FP_REG_P (x
))
5685 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
5689 fputs (hi_reg_name
[REGNO (x
)], file
);
5692 fputs (qi_reg_name
[REGNO (x
)], file
);
5695 fputs (qi_high_reg_name
[REGNO (x
)], file
);
5703 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
5704 C -- print opcode suffix for set/cmov insn.
5705 c -- like C, but print reversed condition
5706 F,f -- likewise, but for floating-point.
5707 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
5709 R -- print the prefix for register names.
5710 z -- print the opcode suffix for the size of the current operand.
5711 * -- print a star (in certain assembler syntax)
5712 A -- print an absolute memory reference.
5713 w -- print the operand as if it's a "word" (HImode) even if it isn't.
5714 s -- print a shift double count, followed by the assemblers argument
5716 b -- print the QImode name of the register for the indicated operand.
5717 %b0 would print %al if operands[0] is reg 0.
5718 w -- likewise, print the HImode name of the register.
5719 k -- likewise, print the SImode name of the register.
5720 q -- likewise, print the DImode name of the register.
5721 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
5722 y -- print "st(0)" instead of "st" as a register.
5723 D -- print condition for SSE cmp instruction.
5724 P -- if PIC, print an @PLT suffix.
5725 X -- don't print any sort of PIC '@' suffix for a symbol.
5729 print_operand (file
, x
, code
)
5739 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5744 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5746 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
5748 /* Intel syntax. For absolute addresses, registers should not
5749 be surrounded by braces. */
5750 if (GET_CODE (x
) != REG
)
5753 PRINT_OPERAND (file
, x
, 0);
5761 PRINT_OPERAND (file
, x
, 0);
5766 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5771 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5776 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5781 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5786 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5791 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5796 /* 387 opcodes don't get size suffixes if the operands are
5798 if (STACK_REG_P (x
))
5801 /* Likewise if using Intel opcodes. */
5802 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
5805 /* This is the size of op from size of operand. */
5806 switch (GET_MODE_SIZE (GET_MODE (x
)))
5809 #ifdef HAVE_GAS_FILDS_FISTS
5815 if (GET_MODE (x
) == SFmode
)
5830 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5832 #ifdef GAS_MNEMONICS
5858 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
5860 PRINT_OPERAND (file
, x
, 0);
5866 /* Little bit of braindamage here. The SSE compare instructions
5867 does use completely different names for the comparisons that the
5868 fp conditional moves. */
5869 switch (GET_CODE (x
))
5884 fputs ("unord", file
);
5888 fputs ("neq", file
);
5892 fputs ("nlt", file
);
5896 fputs ("nle", file
);
5899 fputs ("ord", file
);
5907 #ifdef CMOV_SUN_AS_SYNTAX
5908 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5910 switch (GET_MODE (x
))
5912 case HImode
: putc ('w', file
); break;
5914 case SFmode
: putc ('l', file
); break;
5916 case DFmode
: putc ('q', file
); break;
5924 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
5927 #ifdef CMOV_SUN_AS_SYNTAX
5928 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5931 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
5934 /* Like above, but reverse condition */
5936 /* Check to see if argument to %c is really a constant
5937 and not a condition code which needs to be reversed. */
5938 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
5940 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
5943 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
5946 #ifdef CMOV_SUN_AS_SYNTAX
5947 if (ASSEMBLER_DIALECT
== ASM_ATT
)
5950 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
5956 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
5959 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
5962 int pred_val
= INTVAL (XEXP (x
, 0));
5964 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
5965 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
5967 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
5968 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
5970 /* Emit hints only in the case default branch prediction
5971 heruistics would fail. */
5972 if (taken
!= cputaken
)
5974 /* We use 3e (DS) prefix for taken branches and
5975 2e (CS) prefix for not taken branches. */
5977 fputs ("ds ; ", file
);
5979 fputs ("cs ; ", file
);
5986 output_operand_lossage ("invalid operand code `%c'", code
);
5990 if (GET_CODE (x
) == REG
)
5992 PRINT_REG (x
, code
, file
);
5995 else if (GET_CODE (x
) == MEM
)
5997 /* No `byte ptr' prefix for call instructions. */
5998 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6001 switch (GET_MODE_SIZE (GET_MODE (x
)))
6003 case 1: size
= "BYTE"; break;
6004 case 2: size
= "WORD"; break;
6005 case 4: size
= "DWORD"; break;
6006 case 8: size
= "QWORD"; break;
6007 case 12: size
= "XWORD"; break;
6008 case 16: size
= "XMMWORD"; break;
6013 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6016 else if (code
== 'w')
6018 else if (code
== 'k')
6022 fputs (" PTR ", file
);
6026 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
6027 output_pic_addr_const (file
, x
, code
);
6028 /* Avoid (%rip) for call operands. */
6029 else if (CONSTANT_ADDRESS_P (x
) && code
=='P'
6030 && GET_CODE (x
) != CONST_INT
)
6031 output_addr_const (file
, x
);
6032 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6033 output_operand_lossage ("invalid constraints for operand");
6038 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6043 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6044 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6046 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6048 fprintf (file
, "0x%lx", l
);
6051 /* These float cases don't actually occur as immediate operands. */
6052 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6057 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6058 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
6059 fprintf (file
, "%s", dstr
);
6062 else if (GET_CODE (x
) == CONST_DOUBLE
6063 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
6068 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6069 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
6070 fprintf (file
, "%s", dstr
);
6076 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6078 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6081 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6082 || GET_CODE (x
) == LABEL_REF
)
6084 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6087 fputs ("OFFSET FLAT:", file
);
6090 if (GET_CODE (x
) == CONST_INT
)
6091 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6093 output_pic_addr_const (file
, x
, code
);
6095 output_addr_const (file
, x
);
6099 /* Print a memory operand whose address is ADDR. */
6102 print_operand_address (file
, addr
)
6106 struct ix86_address parts
;
6107 rtx base
, index
, disp
;
6110 if (! ix86_decompose_address (addr
, &parts
))
6114 index
= parts
.index
;
6116 scale
= parts
.scale
;
6118 if (!base
&& !index
)
6120 /* Displacement only requires special attention. */
6122 if (GET_CODE (disp
) == CONST_INT
)
6124 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6126 if (USER_LABEL_PREFIX
[0] == 0)
6128 fputs ("ds:", file
);
6130 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
6133 output_pic_addr_const (file
, addr
, 0);
6135 output_addr_const (file
, addr
);
6137 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6138 if (GET_CODE (disp
) != CONST_INT
&& TARGET_64BIT
)
6139 fputs ("(%rip)", file
);
6143 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6148 output_pic_addr_const (file
, disp
, 0);
6149 else if (GET_CODE (disp
) == LABEL_REF
)
6150 output_asm_label (disp
);
6152 output_addr_const (file
, disp
);
6157 PRINT_REG (base
, 0, file
);
6161 PRINT_REG (index
, 0, file
);
6163 fprintf (file
, ",%d", scale
);
6169 rtx offset
= NULL_RTX
;
6173 /* Pull out the offset of a symbol; print any symbol itself. */
6174 if (GET_CODE (disp
) == CONST
6175 && GET_CODE (XEXP (disp
, 0)) == PLUS
6176 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
6178 offset
= XEXP (XEXP (disp
, 0), 1);
6179 disp
= gen_rtx_CONST (VOIDmode
,
6180 XEXP (XEXP (disp
, 0), 0));
6184 output_pic_addr_const (file
, disp
, 0);
6185 else if (GET_CODE (disp
) == LABEL_REF
)
6186 output_asm_label (disp
);
6187 else if (GET_CODE (disp
) == CONST_INT
)
6190 output_addr_const (file
, disp
);
6196 PRINT_REG (base
, 0, file
);
6199 if (INTVAL (offset
) >= 0)
6201 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6205 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6212 PRINT_REG (index
, 0, file
);
6214 fprintf (file
, "*%d", scale
);
6221 /* Split one or more DImode RTL references into pairs of SImode
6222 references. The RTL can be REG, offsettable MEM, integer constant, or
6223 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6224 split and "num" is its length. lo_half and hi_half are output arrays
6225 that parallel "operands". */
6228 split_di (operands
, num
, lo_half
, hi_half
)
6231 rtx lo_half
[], hi_half
[];
6235 rtx op
= operands
[num
];
6237 /* simplify_subreg refuse to split volatile memory addresses,
6238 but we still have to handle it. */
6239 if (GET_CODE (op
) == MEM
)
6241 lo_half
[num
] = adjust_address (op
, SImode
, 0);
6242 hi_half
[num
] = adjust_address (op
, SImode
, 4);
6246 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
6247 GET_MODE (op
) == VOIDmode
6248 ? DImode
: GET_MODE (op
), 0);
6249 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
6250 GET_MODE (op
) == VOIDmode
6251 ? DImode
: GET_MODE (op
), 4);
6255 /* Split one or more TImode RTL references into pairs of SImode
6256 references. The RTL can be REG, offsettable MEM, integer constant, or
6257 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6258 split and "num" is its length. lo_half and hi_half are output arrays
6259 that parallel "operands". */
6262 split_ti (operands
, num
, lo_half
, hi_half
)
6265 rtx lo_half
[], hi_half
[];
6269 rtx op
= operands
[num
];
6271 /* simplify_subreg refuse to split volatile memory addresses, but we
6272 still have to handle it. */
6273 if (GET_CODE (op
) == MEM
)
6275 lo_half
[num
] = adjust_address (op
, DImode
, 0);
6276 hi_half
[num
] = adjust_address (op
, DImode
, 8);
6280 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
6281 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
6286 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6287 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6288 is the expression of the binary operation. The output may either be
6289 emitted here, or returned to the caller, like all output_* functions.
6291 There is no guarantee that the operands are the same mode, as they
6292 might be within FLOAT or FLOAT_EXTEND expressions. */
6294 #ifndef SYSV386_COMPAT
6295 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6296 wants to fix the assemblers because that causes incompatibility
6297 with gcc. No-one wants to fix gcc because that causes
6298 incompatibility with assemblers... You can use the option of
6299 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6300 #define SYSV386_COMPAT 1
6304 output_387_binary_op (insn
, operands
)
6308 static char buf
[30];
6311 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
6313 #ifdef ENABLE_CHECKING
6314 /* Even if we do not want to check the inputs, this documents input
6315 constraints. Which helps in understanding the following code. */
6316 if (STACK_REG_P (operands
[0])
6317 && ((REG_P (operands
[1])
6318 && REGNO (operands
[0]) == REGNO (operands
[1])
6319 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
6320 || (REG_P (operands
[2])
6321 && REGNO (operands
[0]) == REGNO (operands
[2])
6322 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
6323 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
6329 switch (GET_CODE (operands
[3]))
6332 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6333 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6341 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6342 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6350 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6351 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6359 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6360 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6374 if (GET_MODE (operands
[0]) == SFmode
)
6375 strcat (buf
, "ss\t{%2, %0|%0, %2}");
6377 strcat (buf
, "sd\t{%2, %0|%0, %2}");
6382 switch (GET_CODE (operands
[3]))
6386 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
6388 rtx temp
= operands
[2];
6389 operands
[2] = operands
[1];
6393 /* know operands[0] == operands[1]. */
6395 if (GET_CODE (operands
[2]) == MEM
)
6401 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6403 if (STACK_TOP_P (operands
[0]))
6404 /* How is it that we are storing to a dead operand[2]?
6405 Well, presumably operands[1] is dead too. We can't
6406 store the result to st(0) as st(0) gets popped on this
6407 instruction. Instead store to operands[2] (which I
6408 think has to be st(1)). st(1) will be popped later.
6409 gcc <= 2.8.1 didn't have this check and generated
6410 assembly code that the Unixware assembler rejected. */
6411 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6413 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6417 if (STACK_TOP_P (operands
[0]))
6418 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6420 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6425 if (GET_CODE (operands
[1]) == MEM
)
6431 if (GET_CODE (operands
[2]) == MEM
)
6437 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6440 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6441 derived assemblers, confusingly reverse the direction of
6442 the operation for fsub{r} and fdiv{r} when the
6443 destination register is not st(0). The Intel assembler
6444 doesn't have this brain damage. Read !SYSV386_COMPAT to
6445 figure out what the hardware really does. */
6446 if (STACK_TOP_P (operands
[0]))
6447 p
= "{p\t%0, %2|rp\t%2, %0}";
6449 p
= "{rp\t%2, %0|p\t%0, %2}";
6451 if (STACK_TOP_P (operands
[0]))
6452 /* As above for fmul/fadd, we can't store to st(0). */
6453 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6455 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6460 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
6463 if (STACK_TOP_P (operands
[0]))
6464 p
= "{rp\t%0, %1|p\t%1, %0}";
6466 p
= "{p\t%1, %0|rp\t%0, %1}";
6468 if (STACK_TOP_P (operands
[0]))
6469 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
6471 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
6476 if (STACK_TOP_P (operands
[0]))
6478 if (STACK_TOP_P (operands
[1]))
6479 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6481 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
6484 else if (STACK_TOP_P (operands
[1]))
6487 p
= "{\t%1, %0|r\t%0, %1}";
6489 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
6495 p
= "{r\t%2, %0|\t%0, %2}";
6497 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6510 /* Output code to initialize control word copies used by
6511 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
6512 is set to control word rounding downwards. */
6514 emit_i387_cw_initialization (normal
, round_down
)
6515 rtx normal
, round_down
;
6517 rtx reg
= gen_reg_rtx (HImode
);
6519 emit_insn (gen_x86_fnstcw_1 (normal
));
6520 emit_move_insn (reg
, normal
);
6521 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
6523 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
6525 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
6526 emit_move_insn (round_down
, reg
);
6529 /* Output code for INSN to convert a float to a signed int. OPERANDS
6530 are the insn operands. The output may be [HSD]Imode and the input
6531 operand may be [SDX]Fmode. */
6534 output_fix_trunc (insn
, operands
)
6538 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
6539 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
6541 /* Jump through a hoop or two for DImode, since the hardware has no
6542 non-popping instruction. We used to do this a different way, but
6543 that was somewhat fragile and broke with post-reload splitters. */
6544 if (dimode_p
&& !stack_top_dies
)
6545 output_asm_insn ("fld\t%y1", operands
);
6547 if (!STACK_TOP_P (operands
[1]))
6550 if (GET_CODE (operands
[0]) != MEM
)
6553 output_asm_insn ("fldcw\t%3", operands
);
6554 if (stack_top_dies
|| dimode_p
)
6555 output_asm_insn ("fistp%z0\t%0", operands
);
6557 output_asm_insn ("fist%z0\t%0", operands
);
6558 output_asm_insn ("fldcw\t%2", operands
);
6563 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
6564 should be used and 2 when fnstsw should be used. UNORDERED_P is true
6565 when fucom should be used. */
6568 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
6571 int eflags_p
, unordered_p
;
6574 rtx cmp_op0
= operands
[0];
6575 rtx cmp_op1
= operands
[1];
6576 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
6581 cmp_op1
= operands
[2];
6585 if (GET_MODE (operands
[0]) == SFmode
)
6587 return "ucomiss\t{%1, %0|%0, %1}";
6589 return "comiss\t{%1, %0|%0, %y}";
6592 return "ucomisd\t{%1, %0|%0, %1}";
6594 return "comisd\t{%1, %0|%0, %y}";
6597 if (! STACK_TOP_P (cmp_op0
))
6600 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
6602 if (STACK_REG_P (cmp_op1
)
6604 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
6605 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
6607 /* If both the top of the 387 stack dies, and the other operand
6608 is also a stack register that dies, then this must be a
6609 `fcompp' float compare */
6613 /* There is no double popping fcomi variant. Fortunately,
6614 eflags is immune from the fstp's cc clobbering. */
6616 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
6618 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
6626 return "fucompp\n\tfnstsw\t%0";
6628 return "fcompp\n\tfnstsw\t%0";
6641 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
6643 static const char * const alt
[24] =
6655 "fcomi\t{%y1, %0|%0, %y1}",
6656 "fcomip\t{%y1, %0|%0, %y1}",
6657 "fucomi\t{%y1, %0|%0, %y1}",
6658 "fucomip\t{%y1, %0|%0, %y1}",
6665 "fcom%z2\t%y2\n\tfnstsw\t%0",
6666 "fcomp%z2\t%y2\n\tfnstsw\t%0",
6667 "fucom%z2\t%y2\n\tfnstsw\t%0",
6668 "fucomp%z2\t%y2\n\tfnstsw\t%0",
6670 "ficom%z2\t%y2\n\tfnstsw\t%0",
6671 "ficomp%z2\t%y2\n\tfnstsw\t%0",
6679 mask
= eflags_p
<< 3;
6680 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
6681 mask
|= unordered_p
<< 1;
6682 mask
|= stack_top_dies
;
6695 ix86_output_addr_vec_elt (file
, value
)
6699 const char *directive
= ASM_LONG
;
6704 directive
= ASM_QUAD
;
6710 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
6714 ix86_output_addr_diff_elt (file
, value
, rel
)
6719 fprintf (file
, "%s%s%d-%s%d\n",
6720 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
6721 else if (HAVE_AS_GOTOFF_IN_DATA
)
6722 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
6724 asm_fprintf (file
, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
6725 ASM_LONG
, LPREFIX
, value
);
6728 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
6732 ix86_expand_clear (dest
)
6737 /* We play register width games, which are only valid after reload. */
6738 if (!reload_completed
)
6741 /* Avoid HImode and its attendant prefix byte. */
6742 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
6743 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
6745 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
6747 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
6748 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
6750 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
6751 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
6758 ix86_expand_move (mode
, operands
)
6759 enum machine_mode mode
;
6762 int strict
= (reload_in_progress
|| reload_completed
);
6765 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (operands
[1], Pmode
))
6767 /* Emit insns to move operands[1] into operands[0]. */
6769 if (GET_CODE (operands
[0]) == MEM
)
6770 operands
[1] = force_reg (Pmode
, operands
[1]);
6773 rtx temp
= operands
[0];
6774 if (GET_CODE (temp
) != REG
)
6775 temp
= gen_reg_rtx (Pmode
);
6776 temp
= legitimize_pic_address (operands
[1], temp
);
6777 if (temp
== operands
[0])
6784 if (GET_CODE (operands
[0]) == MEM
6785 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
6786 || !push_operand (operands
[0], mode
))
6787 && GET_CODE (operands
[1]) == MEM
)
6788 operands
[1] = force_reg (mode
, operands
[1]);
6790 if (push_operand (operands
[0], mode
)
6791 && ! general_no_elim_operand (operands
[1], mode
))
6792 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
6794 /* Force large constants in 64bit compilation into register
6795 to get them CSEed. */
6796 if (TARGET_64BIT
&& mode
== DImode
6797 && immediate_operand (operands
[1], mode
)
6798 && !x86_64_zero_extended_value (operands
[1])
6799 && !register_operand (operands
[0], mode
)
6800 && optimize
&& !reload_completed
&& !reload_in_progress
)
6801 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
6803 if (FLOAT_MODE_P (mode
))
6805 /* If we are loading a floating point constant to a register,
6806 force the value to memory now, since we'll get better code
6807 out the back end. */
6811 else if (GET_CODE (operands
[1]) == CONST_DOUBLE
6812 && register_operand (operands
[0], mode
))
6813 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
6817 insn
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
6823 ix86_expand_vector_move (mode
, operands
)
6824 enum machine_mode mode
;
6827 /* Force constants other than zero into memory. We do not know how
6828 the instructions used to build constants modify the upper 64 bits
6829 of the register, once we have that information we may be able
6830 to handle some of them more efficiently. */
6831 if ((reload_in_progress
| reload_completed
) == 0
6832 && register_operand (operands
[0], mode
)
6833 && CONSTANT_P (operands
[1]))
6835 rtx addr
= gen_reg_rtx (Pmode
);
6836 emit_move_insn (addr
, XEXP (force_const_mem (mode
, operands
[1]), 0));
6837 operands
[1] = gen_rtx_MEM (mode
, addr
);
6840 /* Make operand1 a register if it isn't already. */
6841 if ((reload_in_progress
| reload_completed
) == 0
6842 && !register_operand (operands
[0], mode
)
6843 && !register_operand (operands
[1], mode
)
6844 && operands
[1] != CONST0_RTX (mode
))
6846 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
6847 emit_move_insn (operands
[0], temp
);
6851 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
6854 /* Attempt to expand a binary operator. Make the expansion closer to the
6855 actual machine, then just general_operand, which will allow 3 separate
6856 memory references (one output, two input) in a single insn. */
6859 ix86_expand_binary_operator (code
, mode
, operands
)
6861 enum machine_mode mode
;
6864 int matching_memory
;
6865 rtx src1
, src2
, dst
, op
, clob
;
6871 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
6872 if (GET_RTX_CLASS (code
) == 'c'
6873 && (rtx_equal_p (dst
, src2
)
6874 || immediate_operand (src1
, mode
)))
6881 /* If the destination is memory, and we do not have matching source
6882 operands, do things in registers. */
6883 matching_memory
= 0;
6884 if (GET_CODE (dst
) == MEM
)
6886 if (rtx_equal_p (dst
, src1
))
6887 matching_memory
= 1;
6888 else if (GET_RTX_CLASS (code
) == 'c'
6889 && rtx_equal_p (dst
, src2
))
6890 matching_memory
= 2;
6892 dst
= gen_reg_rtx (mode
);
6895 /* Both source operands cannot be in memory. */
6896 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
6898 if (matching_memory
!= 2)
6899 src2
= force_reg (mode
, src2
);
6901 src1
= force_reg (mode
, src1
);
6904 /* If the operation is not commutable, source 1 cannot be a constant
6905 or non-matching memory. */
6906 if ((CONSTANT_P (src1
)
6907 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
6908 && GET_RTX_CLASS (code
) != 'c')
6909 src1
= force_reg (mode
, src1
);
6911 /* If optimizing, copy to regs to improve CSE */
6912 if (optimize
&& ! no_new_pseudos
)
6914 if (GET_CODE (dst
) == MEM
)
6915 dst
= gen_reg_rtx (mode
);
6916 if (GET_CODE (src1
) == MEM
)
6917 src1
= force_reg (mode
, src1
);
6918 if (GET_CODE (src2
) == MEM
)
6919 src2
= force_reg (mode
, src2
);
6922 /* Emit the instruction. */
6924 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
6925 if (reload_in_progress
)
6927 /* Reload doesn't know about the flags register, and doesn't know that
6928 it doesn't want to clobber it. We can only do this with PLUS. */
6935 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
6936 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
6939 /* Fix up the destination if needed. */
6940 if (dst
!= operands
[0])
6941 emit_move_insn (operands
[0], dst
);
6944 /* Return TRUE or FALSE depending on whether the binary operator meets the
6945 appropriate constraints. */
6948 ix86_binary_operator_ok (code
, mode
, operands
)
6950 enum machine_mode mode ATTRIBUTE_UNUSED
;
6953 /* Both source operands cannot be in memory. */
6954 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
6956 /* If the operation is not commutable, source 1 cannot be a constant. */
6957 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
6959 /* If the destination is memory, we must have a matching source operand. */
6960 if (GET_CODE (operands
[0]) == MEM
6961 && ! (rtx_equal_p (operands
[0], operands
[1])
6962 || (GET_RTX_CLASS (code
) == 'c'
6963 && rtx_equal_p (operands
[0], operands
[2]))))
6965 /* If the operation is not commutable and the source 1 is memory, we must
6966 have a matching destination. */
6967 if (GET_CODE (operands
[1]) == MEM
6968 && GET_RTX_CLASS (code
) != 'c'
6969 && ! rtx_equal_p (operands
[0], operands
[1]))
6974 /* Attempt to expand a unary operator. Make the expansion closer to the
6975 actual machine, then just general_operand, which will allow 2 separate
6976 memory references (one output, one input) in a single insn. */
6979 ix86_expand_unary_operator (code
, mode
, operands
)
6981 enum machine_mode mode
;
6984 int matching_memory
;
6985 rtx src
, dst
, op
, clob
;
6990 /* If the destination is memory, and we do not have matching source
6991 operands, do things in registers. */
6992 matching_memory
= 0;
6993 if (GET_CODE (dst
) == MEM
)
6995 if (rtx_equal_p (dst
, src
))
6996 matching_memory
= 1;
6998 dst
= gen_reg_rtx (mode
);
7001 /* When source operand is memory, destination must match. */
7002 if (!matching_memory
&& GET_CODE (src
) == MEM
)
7003 src
= force_reg (mode
, src
);
7005 /* If optimizing, copy to regs to improve CSE */
7006 if (optimize
&& ! no_new_pseudos
)
7008 if (GET_CODE (dst
) == MEM
)
7009 dst
= gen_reg_rtx (mode
);
7010 if (GET_CODE (src
) == MEM
)
7011 src
= force_reg (mode
, src
);
7014 /* Emit the instruction. */
7016 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
7017 if (reload_in_progress
|| code
== NOT
)
7019 /* Reload doesn't know about the flags register, and doesn't know that
7020 it doesn't want to clobber it. */
7027 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7028 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7031 /* Fix up the destination if needed. */
7032 if (dst
!= operands
[0])
7033 emit_move_insn (operands
[0], dst
);
7036 /* Return TRUE or FALSE depending on whether the unary operator meets the
7037 appropriate constraints. */
7040 ix86_unary_operator_ok (code
, mode
, operands
)
7041 enum rtx_code code ATTRIBUTE_UNUSED
;
7042 enum machine_mode mode ATTRIBUTE_UNUSED
;
7043 rtx operands
[2] ATTRIBUTE_UNUSED
;
7045 /* If one of operands is memory, source and destination must match. */
7046 if ((GET_CODE (operands
[0]) == MEM
7047 || GET_CODE (operands
[1]) == MEM
)
7048 && ! rtx_equal_p (operands
[0], operands
[1]))
7053 /* Return TRUE or FALSE depending on whether the first SET in INSN
7054 has source and destination with matching CC modes, and that the
7055 CC mode is at least as constrained as REQ_MODE. */
7058 ix86_match_ccmode (insn
, req_mode
)
7060 enum machine_mode req_mode
;
7063 enum machine_mode set_mode
;
7065 set
= PATTERN (insn
);
7066 if (GET_CODE (set
) == PARALLEL
)
7067 set
= XVECEXP (set
, 0, 0);
7068 if (GET_CODE (set
) != SET
)
7070 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
7073 set_mode
= GET_MODE (SET_DEST (set
));
7077 if (req_mode
!= CCNOmode
7078 && (req_mode
!= CCmode
7079 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
7083 if (req_mode
== CCGCmode
)
7087 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
7091 if (req_mode
== CCZmode
)
7101 return (GET_MODE (SET_SRC (set
)) == set_mode
);
7104 /* Generate insn patterns to do an integer compare of OPERANDS. */
7107 ix86_expand_int_compare (code
, op0
, op1
)
7111 enum machine_mode cmpmode
;
7114 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
7115 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
7117 /* This is very simple, but making the interface the same as in the
7118 FP case makes the rest of the code easier. */
7119 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
7120 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
7122 /* Return the test that should be put into the flags user, i.e.
7123 the bcc, scc, or cmov instruction. */
7124 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
7127 /* Figure out whether to use ordered or unordered fp comparisons.
7128 Return the appropriate mode to use. */
7131 ix86_fp_compare_mode (code
)
7132 enum rtx_code code ATTRIBUTE_UNUSED
;
7134 /* ??? In order to make all comparisons reversible, we do all comparisons
7135 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7136 all forms trapping and nontrapping comparisons, we can make inequality
7137 comparisons trapping again, since it results in better code when using
7138 FCOM based compares. */
7139 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
7143 ix86_cc_mode (code
, op0
, op1
)
7147 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7148 return ix86_fp_compare_mode (code
);
7151 /* Only zero flag is needed. */
7153 case NE
: /* ZF!=0 */
7155 /* Codes needing carry flag. */
7156 case GEU
: /* CF=0 */
7157 case GTU
: /* CF=0 & ZF=0 */
7158 case LTU
: /* CF=1 */
7159 case LEU
: /* CF=1 | ZF=1 */
7161 /* Codes possibly doable only with sign flag when
7162 comparing against zero. */
7163 case GE
: /* SF=OF or SF=0 */
7164 case LT
: /* SF<>OF or SF=1 */
7165 if (op1
== const0_rtx
)
7168 /* For other cases Carry flag is not required. */
7170 /* Codes doable only with sign flag when comparing
7171 against zero, but we miss jump instruction for it
7172 so we need to use relational tests agains overflow
7173 that thus needs to be zero. */
7174 case GT
: /* ZF=0 & SF=OF */
7175 case LE
: /* ZF=1 | SF<>OF */
7176 if (op1
== const0_rtx
)
7180 /* strcmp pattern do (use flags) and combine may ask us for proper
7189 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7192 ix86_use_fcomi_compare (code
)
7193 enum rtx_code code ATTRIBUTE_UNUSED
;
7195 enum rtx_code swapped_code
= swap_condition (code
);
7196 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
7197 || (ix86_fp_comparison_cost (swapped_code
)
7198 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
7201 /* Swap, force into registers, or otherwise massage the two operands
7202 to a fp comparison. The operands are updated in place; the new
7203 comparsion code is returned. */
7205 static enum rtx_code
7206 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
7210 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
7211 rtx op0
= *pop0
, op1
= *pop1
;
7212 enum machine_mode op_mode
= GET_MODE (op0
);
7213 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
7215 /* All of the unordered compare instructions only work on registers.
7216 The same is true of the XFmode compare instructions. The same is
7217 true of the fcomi compare instructions. */
7220 && (fpcmp_mode
== CCFPUmode
7221 || op_mode
== XFmode
7222 || op_mode
== TFmode
7223 || ix86_use_fcomi_compare (code
)))
7225 op0
= force_reg (op_mode
, op0
);
7226 op1
= force_reg (op_mode
, op1
);
7230 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7231 things around if they appear profitable, otherwise force op0
7234 if (standard_80387_constant_p (op0
) == 0
7235 || (GET_CODE (op0
) == MEM
7236 && ! (standard_80387_constant_p (op1
) == 0
7237 || GET_CODE (op1
) == MEM
)))
7240 tmp
= op0
, op0
= op1
, op1
= tmp
;
7241 code
= swap_condition (code
);
7244 if (GET_CODE (op0
) != REG
)
7245 op0
= force_reg (op_mode
, op0
);
7247 if (CONSTANT_P (op1
))
7249 if (standard_80387_constant_p (op1
))
7250 op1
= force_reg (op_mode
, op1
);
7252 op1
= validize_mem (force_const_mem (op_mode
, op1
));
7256 /* Try to rearrange the comparison to make it cheaper. */
7257 if (ix86_fp_comparison_cost (code
)
7258 > ix86_fp_comparison_cost (swap_condition (code
))
7259 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
7262 tmp
= op0
, op0
= op1
, op1
= tmp
;
7263 code
= swap_condition (code
);
7264 if (GET_CODE (op0
) != REG
)
7265 op0
= force_reg (op_mode
, op0
);
7273 /* Convert comparison codes we use to represent FP comparison to integer
7274 code that will result in proper branch. Return UNKNOWN if no such code
7276 static enum rtx_code
7277 ix86_fp_compare_code_to_integer (code
)
7307 /* Split comparison code CODE into comparisons we can do using branch
7308 instructions. BYPASS_CODE is comparison code for branch that will
7309 branch around FIRST_CODE and SECOND_CODE. If some of branches
7310 is not required, set value to NIL.
7311 We never require more than two branches. */
7313 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
7314 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
7320 /* The fcomi comparison sets flags as follows:
7330 case GT
: /* GTU - CF=0 & ZF=0 */
7331 case GE
: /* GEU - CF=0 */
7332 case ORDERED
: /* PF=0 */
7333 case UNORDERED
: /* PF=1 */
7334 case UNEQ
: /* EQ - ZF=1 */
7335 case UNLT
: /* LTU - CF=1 */
7336 case UNLE
: /* LEU - CF=1 | ZF=1 */
7337 case LTGT
: /* EQ - ZF=0 */
7339 case LT
: /* LTU - CF=1 - fails on unordered */
7341 *bypass_code
= UNORDERED
;
7343 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
7345 *bypass_code
= UNORDERED
;
7347 case EQ
: /* EQ - ZF=1 - fails on unordered */
7349 *bypass_code
= UNORDERED
;
7351 case NE
: /* NE - ZF=0 - fails on unordered */
7353 *second_code
= UNORDERED
;
7355 case UNGE
: /* GEU - CF=0 - fails on unordered */
7357 *second_code
= UNORDERED
;
7359 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
7361 *second_code
= UNORDERED
;
7366 if (!TARGET_IEEE_FP
)
7373 /* Return cost of comparison done fcom + arithmetics operations on AX.
7374 All following functions do use number of instructions as an cost metrics.
7375 In future this should be tweaked to compute bytes for optimize_size and
7376 take into account performance of various instructions on various CPUs. */
7378 ix86_fp_comparison_arithmetics_cost (code
)
7381 if (!TARGET_IEEE_FP
)
7383 /* The cost of code output by ix86_expand_fp_compare. */
7411 /* Return cost of comparison done using fcomi operation.
7412 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7414 ix86_fp_comparison_fcomi_cost (code
)
7417 enum rtx_code bypass_code
, first_code
, second_code
;
7418 /* Return arbitarily high cost when instruction is not supported - this
7419 prevents gcc from using it. */
7422 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7423 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
7426 /* Return cost of comparison done using sahf operation.
7427 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7429 ix86_fp_comparison_sahf_cost (code
)
7432 enum rtx_code bypass_code
, first_code
, second_code
;
7433 /* Return arbitarily high cost when instruction is not preferred - this
7434 avoids gcc from using it. */
7435 if (!TARGET_USE_SAHF
&& !optimize_size
)
7437 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7438 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
7441 /* Compute cost of the comparison done using any method.
7442 See ix86_fp_comparison_arithmetics_cost for the metrics. */
7444 ix86_fp_comparison_cost (code
)
7447 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
7450 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
7451 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
7453 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
7454 if (min
> sahf_cost
)
7456 if (min
> fcomi_cost
)
7461 /* Generate insn patterns to do a floating point compare of OPERANDS. */
7464 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
7466 rtx op0
, op1
, scratch
;
7470 enum machine_mode fpcmp_mode
, intcmp_mode
;
7472 int cost
= ix86_fp_comparison_cost (code
);
7473 enum rtx_code bypass_code
, first_code
, second_code
;
7475 fpcmp_mode
= ix86_fp_compare_mode (code
);
7476 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
7479 *second_test
= NULL_RTX
;
7481 *bypass_test
= NULL_RTX
;
7483 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7485 /* Do fcomi/sahf based test when profitable. */
7486 if ((bypass_code
== NIL
|| bypass_test
)
7487 && (second_code
== NIL
|| second_test
)
7488 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
7492 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7493 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
7499 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7500 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
7502 scratch
= gen_reg_rtx (HImode
);
7503 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
7504 emit_insn (gen_x86_sahf_1 (scratch
));
7507 /* The FP codes work out to act like unsigned. */
7508 intcmp_mode
= fpcmp_mode
;
7510 if (bypass_code
!= NIL
)
7511 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
7512 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7514 if (second_code
!= NIL
)
7515 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
7516 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7521 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
7522 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
7523 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
7525 scratch
= gen_reg_rtx (HImode
);
7526 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
7528 /* In the unordered case, we have to check C2 for NaN's, which
7529 doesn't happen to work out to anything nice combination-wise.
7530 So do some bit twiddling on the value we've got in AH to come
7531 up with an appropriate set of condition codes. */
7533 intcmp_mode
= CCNOmode
;
7538 if (code
== GT
|| !TARGET_IEEE_FP
)
7540 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
7545 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7546 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
7547 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
7548 intcmp_mode
= CCmode
;
7554 if (code
== LT
&& TARGET_IEEE_FP
)
7556 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7557 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
7558 intcmp_mode
= CCmode
;
7563 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
7569 if (code
== GE
|| !TARGET_IEEE_FP
)
7571 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
7576 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7577 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
7584 if (code
== LE
&& TARGET_IEEE_FP
)
7586 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7587 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
7588 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
7589 intcmp_mode
= CCmode
;
7594 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
7600 if (code
== EQ
&& TARGET_IEEE_FP
)
7602 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7603 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
7604 intcmp_mode
= CCmode
;
7609 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
7616 if (code
== NE
&& TARGET_IEEE_FP
)
7618 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
7619 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
7625 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
7631 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
7635 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
7644 /* Return the test that should be put into the flags user, i.e.
7645 the bcc, scc, or cmov instruction. */
7646 return gen_rtx_fmt_ee (code
, VOIDmode
,
7647 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
7652 ix86_expand_compare (code
, second_test
, bypass_test
)
7654 rtx
*second_test
, *bypass_test
;
7657 op0
= ix86_compare_op0
;
7658 op1
= ix86_compare_op1
;
7661 *second_test
= NULL_RTX
;
7663 *bypass_test
= NULL_RTX
;
7665 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7666 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
7667 second_test
, bypass_test
);
7669 ret
= ix86_expand_int_compare (code
, op0
, op1
);
7674 /* Return true if the CODE will result in nontrivial jump sequence. */
7676 ix86_fp_jump_nontrivial_p (code
)
7679 enum rtx_code bypass_code
, first_code
, second_code
;
7682 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7683 return bypass_code
!= NIL
|| second_code
!= NIL
;
7687 ix86_expand_branch (code
, label
)
7693 switch (GET_MODE (ix86_compare_op0
))
7699 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
7700 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
7701 gen_rtx_LABEL_REF (VOIDmode
, label
),
7703 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
7713 enum rtx_code bypass_code
, first_code
, second_code
;
7715 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
7718 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
7720 /* Check whether we will use the natural sequence with one jump. If
7721 so, we can expand jump early. Otherwise delay expansion by
7722 creating compound insn to not confuse optimizers. */
7723 if (bypass_code
== NIL
&& second_code
== NIL
7726 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
7727 gen_rtx_LABEL_REF (VOIDmode
, label
),
7732 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
7733 ix86_compare_op0
, ix86_compare_op1
);
7734 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
7735 gen_rtx_LABEL_REF (VOIDmode
, label
),
7737 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
7739 use_fcomi
= ix86_use_fcomi_compare (code
);
7740 vec
= rtvec_alloc (3 + !use_fcomi
);
7741 RTVEC_ELT (vec
, 0) = tmp
;
7743 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
7745 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
7748 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
7750 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
7758 /* Expand DImode branch into multiple compare+branch. */
7760 rtx lo
[2], hi
[2], label2
;
7761 enum rtx_code code1
, code2
, code3
;
7763 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
7765 tmp
= ix86_compare_op0
;
7766 ix86_compare_op0
= ix86_compare_op1
;
7767 ix86_compare_op1
= tmp
;
7768 code
= swap_condition (code
);
7770 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
7771 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
7773 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
7774 avoid two branches. This costs one extra insn, so disable when
7775 optimizing for size. */
7777 if ((code
== EQ
|| code
== NE
)
7779 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
7784 if (hi
[1] != const0_rtx
)
7785 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
7786 NULL_RTX
, 0, OPTAB_WIDEN
);
7789 if (lo
[1] != const0_rtx
)
7790 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
7791 NULL_RTX
, 0, OPTAB_WIDEN
);
7793 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
7794 NULL_RTX
, 0, OPTAB_WIDEN
);
7796 ix86_compare_op0
= tmp
;
7797 ix86_compare_op1
= const0_rtx
;
7798 ix86_expand_branch (code
, label
);
7802 /* Otherwise, if we are doing less-than or greater-or-equal-than,
7803 op1 is a constant and the low word is zero, then we can just
7804 examine the high word. */
7806 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
7809 case LT
: case LTU
: case GE
: case GEU
:
7810 ix86_compare_op0
= hi
[0];
7811 ix86_compare_op1
= hi
[1];
7812 ix86_expand_branch (code
, label
);
7818 /* Otherwise, we need two or three jumps. */
7820 label2
= gen_label_rtx ();
7823 code2
= swap_condition (code
);
7824 code3
= unsigned_condition (code
);
7828 case LT
: case GT
: case LTU
: case GTU
:
7831 case LE
: code1
= LT
; code2
= GT
; break;
7832 case GE
: code1
= GT
; code2
= LT
; break;
7833 case LEU
: code1
= LTU
; code2
= GTU
; break;
7834 case GEU
: code1
= GTU
; code2
= LTU
; break;
7836 case EQ
: code1
= NIL
; code2
= NE
; break;
7837 case NE
: code2
= NIL
; break;
7845 * if (hi(a) < hi(b)) goto true;
7846 * if (hi(a) > hi(b)) goto false;
7847 * if (lo(a) < lo(b)) goto true;
7851 ix86_compare_op0
= hi
[0];
7852 ix86_compare_op1
= hi
[1];
7855 ix86_expand_branch (code1
, label
);
7857 ix86_expand_branch (code2
, label2
);
7859 ix86_compare_op0
= lo
[0];
7860 ix86_compare_op1
= lo
[1];
7861 ix86_expand_branch (code3
, label
);
7864 emit_label (label2
);
7873 /* Split branch based on floating point condition. */
7875 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
7877 rtx op1
, op2
, target1
, target2
, tmp
;
7880 rtx label
= NULL_RTX
;
7882 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
7885 if (target2
!= pc_rtx
)
7888 code
= reverse_condition_maybe_unordered (code
);
7893 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
7894 tmp
, &second
, &bypass
);
7896 if (split_branch_probability
>= 0)
7898 /* Distribute the probabilities across the jumps.
7899 Assume the BYPASS and SECOND to be always test
7901 probability
= split_branch_probability
;
7903 /* Value of 1 is low enough to make no need for probability
7904 to be updated. Later we may run some experiments and see
7905 if unordered values are more frequent in practice. */
7907 bypass_probability
= 1;
7909 second_probability
= 1;
7911 if (bypass
!= NULL_RTX
)
7913 label
= gen_label_rtx ();
7914 i
= emit_jump_insn (gen_rtx_SET
7916 gen_rtx_IF_THEN_ELSE (VOIDmode
,
7918 gen_rtx_LABEL_REF (VOIDmode
,
7921 if (bypass_probability
>= 0)
7923 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7924 GEN_INT (bypass_probability
),
7927 i
= emit_jump_insn (gen_rtx_SET
7929 gen_rtx_IF_THEN_ELSE (VOIDmode
,
7930 condition
, target1
, target2
)));
7931 if (probability
>= 0)
7933 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7934 GEN_INT (probability
),
7936 if (second
!= NULL_RTX
)
7938 i
= emit_jump_insn (gen_rtx_SET
7940 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
7942 if (second_probability
>= 0)
7944 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
7945 GEN_INT (second_probability
),
7948 if (label
!= NULL_RTX
)
7953 ix86_expand_setcc (code
, dest
)
7957 rtx ret
, tmp
, tmpreg
;
7958 rtx second_test
, bypass_test
;
7960 if (GET_MODE (ix86_compare_op0
) == DImode
7962 return 0; /* FAIL */
7964 if (GET_MODE (dest
) != QImode
)
7967 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
7968 PUT_MODE (ret
, QImode
);
7973 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
7974 if (bypass_test
|| second_test
)
7976 rtx test
= second_test
;
7978 rtx tmp2
= gen_reg_rtx (QImode
);
7985 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
7987 PUT_MODE (test
, QImode
);
7988 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
7991 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
7993 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
7996 return 1; /* DONE */
8000 ix86_expand_int_movcc (operands
)
8003 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
8004 rtx compare_seq
, compare_op
;
8005 rtx second_test
, bypass_test
;
8006 enum machine_mode mode
= GET_MODE (operands
[0]);
8008 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8009 In case comparsion is done with immediate, we can convert it to LTU or
8010 GEU by altering the integer. */
8012 if ((code
== LEU
|| code
== GTU
)
8013 && GET_CODE (ix86_compare_op1
) == CONST_INT
8015 && (unsigned int) INTVAL (ix86_compare_op1
) != 0xffffffff
8016 /* The operand still must be representable as sign extended value. */
8018 || GET_MODE (ix86_compare_op0
) != DImode
8019 || (unsigned int) INTVAL (ix86_compare_op1
) != 0x7fffffff)
8020 && GET_CODE (operands
[2]) == CONST_INT
8021 && GET_CODE (operands
[3]) == CONST_INT
)
8028 = gen_int_mode (INTVAL (ix86_compare_op1
) + 1,
8029 GET_MODE (ix86_compare_op0
));
8033 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8034 compare_seq
= gen_sequence ();
8037 compare_code
= GET_CODE (compare_op
);
8039 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8040 HImode insns, we'd be swallowed in word prefix ops. */
8043 && (mode
!= DImode
|| TARGET_64BIT
)
8044 && GET_CODE (operands
[2]) == CONST_INT
8045 && GET_CODE (operands
[3]) == CONST_INT
)
8047 rtx out
= operands
[0];
8048 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
8049 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
8052 if ((compare_code
== LTU
|| compare_code
== GEU
)
8053 && !second_test
&& !bypass_test
)
8056 /* Detect overlap between destination and compare sources. */
8059 /* To simplify rest of code, restrict to the GEU case. */
8060 if (compare_code
== LTU
)
8065 compare_code
= reverse_condition (compare_code
);
8066 code
= reverse_condition (code
);
8070 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
8071 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
8072 tmp
= gen_reg_rtx (mode
);
8074 emit_insn (compare_seq
);
8076 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
8078 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
8090 tmp
= expand_simple_binop (mode
, PLUS
,
8092 tmp
, 1, OPTAB_DIRECT
);
8103 tmp
= expand_simple_binop (mode
, IOR
,
8105 tmp
, 1, OPTAB_DIRECT
);
8107 else if (diff
== -1 && ct
)
8117 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
8119 tmp
= expand_simple_binop (mode
, PLUS
,
8121 tmp
, 1, OPTAB_DIRECT
);
8128 * andl cf - ct, dest
8133 tmp
= expand_simple_binop (mode
, AND
,
8135 gen_int_mode (cf
- ct
, mode
),
8136 tmp
, 1, OPTAB_DIRECT
);
8138 tmp
= expand_simple_binop (mode
, PLUS
,
8140 tmp
, 1, OPTAB_DIRECT
);
8144 emit_move_insn (out
, tmp
);
8146 return 1; /* DONE */
8153 tmp
= ct
, ct
= cf
, cf
= tmp
;
8155 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8157 /* We may be reversing unordered compare to normal compare, that
8158 is not valid in general (we may convert non-trapping condition
8159 to trapping one), however on i386 we currently emit all
8160 comparisons unordered. */
8161 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8162 code
= reverse_condition_maybe_unordered (code
);
8166 compare_code
= reverse_condition (compare_code
);
8167 code
= reverse_condition (code
);
8172 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
8173 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
8175 if (ix86_compare_op1
== const0_rtx
8176 && (code
== LT
|| code
== GE
))
8177 compare_code
= code
;
8178 else if (ix86_compare_op1
== constm1_rtx
)
8182 else if (code
== GT
)
8187 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8188 if (compare_code
!= NIL
8189 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
8190 && (cf
== -1 || ct
== -1))
8192 /* If lea code below could be used, only optimize
8193 if it results in a 2 insn sequence. */
8195 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8196 || diff
== 3 || diff
== 5 || diff
== 9)
8197 || (compare_code
== LT
&& ct
== -1)
8198 || (compare_code
== GE
&& cf
== -1))
8201 * notl op1 (if necessary)
8209 code
= reverse_condition (code
);
8212 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8213 ix86_compare_op1
, VOIDmode
, 0, -1);
8215 out
= expand_simple_binop (mode
, IOR
,
8217 out
, 1, OPTAB_DIRECT
);
8218 if (out
!= operands
[0])
8219 emit_move_insn (operands
[0], out
);
8221 return 1; /* DONE */
8225 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8226 || diff
== 3 || diff
== 5 || diff
== 9)
8227 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
8233 * lea cf(dest*(ct-cf)),dest
8237 * This also catches the degenerate setcc-only case.
8243 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8244 ix86_compare_op1
, VOIDmode
, 0, 1);
8247 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8248 done in proper mode to match. */
8255 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
8259 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
8265 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
8269 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
8275 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
8276 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
8278 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
8279 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8283 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
8285 if (out
!= operands
[0])
8286 emit_move_insn (operands
[0], out
);
8288 return 1; /* DONE */
8292 * General case: Jumpful:
8293 * xorl dest,dest cmpl op1, op2
8294 * cmpl op1, op2 movl ct, dest
8296 * decl dest movl cf, dest
8297 * andl (cf-ct),dest 1:
8302 * This is reasonably steep, but branch mispredict costs are
8303 * high on modern cpus, so consider failing only if optimizing
8306 * %%% Parameterize branch_cost on the tuning architecture, then
8307 * use that. The 80386 couldn't care less about mispredicts.
8310 if (!optimize_size
&& !TARGET_CMOVE
)
8316 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8317 /* We may be reversing unordered compare to normal compare,
8318 that is not valid in general (we may convert non-trapping
8319 condition to trapping one), however on i386 we currently
8320 emit all comparisons unordered. */
8321 code
= reverse_condition_maybe_unordered (code
);
8324 code
= reverse_condition (code
);
8325 if (compare_code
!= NIL
)
8326 compare_code
= reverse_condition (compare_code
);
8330 if (compare_code
!= NIL
)
8332 /* notl op1 (if needed)
8337 For x < 0 (resp. x <= -1) there will be no notl,
8338 so if possible swap the constants to get rid of the
8340 True/false will be -1/0 while code below (store flag
8341 followed by decrement) is 0/-1, so the constants need
8342 to be exchanged once more. */
8344 if (compare_code
== GE
|| !cf
)
8346 code
= reverse_condition (code
);
8351 HOST_WIDE_INT tmp
= cf
;
8356 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8357 ix86_compare_op1
, VOIDmode
, 0, -1);
8361 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8362 ix86_compare_op1
, VOIDmode
, 0, 1);
8364 out
= expand_simple_binop (mode
, PLUS
,
8366 out
, 1, OPTAB_DIRECT
);
8369 out
= expand_simple_binop (mode
, AND
,
8371 gen_int_mode (cf
- ct
, mode
),
8372 out
, 1, OPTAB_DIRECT
);
8373 out
= expand_simple_binop (mode
, PLUS
,
8375 out
, 1, OPTAB_DIRECT
);
8376 if (out
!= operands
[0])
8377 emit_move_insn (operands
[0], out
);
8379 return 1; /* DONE */
8385 /* Try a few things more with specific constants and a variable. */
8388 rtx var
, orig_out
, out
, tmp
;
8391 return 0; /* FAIL */
8393 /* If one of the two operands is an interesting constant, load a
8394 constant with the above and mask it in with a logical operation. */
8396 if (GET_CODE (operands
[2]) == CONST_INT
)
8399 if (INTVAL (operands
[2]) == 0)
8400 operands
[3] = constm1_rtx
, op
= and_optab
;
8401 else if (INTVAL (operands
[2]) == -1)
8402 operands
[3] = const0_rtx
, op
= ior_optab
;
8404 return 0; /* FAIL */
8406 else if (GET_CODE (operands
[3]) == CONST_INT
)
8409 if (INTVAL (operands
[3]) == 0)
8410 operands
[2] = constm1_rtx
, op
= and_optab
;
8411 else if (INTVAL (operands
[3]) == -1)
8412 operands
[2] = const0_rtx
, op
= ior_optab
;
8414 return 0; /* FAIL */
8417 return 0; /* FAIL */
8419 orig_out
= operands
[0];
8420 tmp
= gen_reg_rtx (mode
);
8423 /* Recurse to get the constant loaded. */
8424 if (ix86_expand_int_movcc (operands
) == 0)
8425 return 0; /* FAIL */
8427 /* Mask in the interesting variable. */
8428 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
8430 if (out
!= orig_out
)
8431 emit_move_insn (orig_out
, out
);
8433 return 1; /* DONE */
8437 * For comparison with above,
8447 if (! nonimmediate_operand (operands
[2], mode
))
8448 operands
[2] = force_reg (mode
, operands
[2]);
8449 if (! nonimmediate_operand (operands
[3], mode
))
8450 operands
[3] = force_reg (mode
, operands
[3]);
8452 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
8454 rtx tmp
= gen_reg_rtx (mode
);
8455 emit_move_insn (tmp
, operands
[3]);
8458 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
8460 rtx tmp
= gen_reg_rtx (mode
);
8461 emit_move_insn (tmp
, operands
[2]);
8464 if (! register_operand (operands
[2], VOIDmode
)
8465 && ! register_operand (operands
[3], VOIDmode
))
8466 operands
[2] = force_reg (mode
, operands
[2]);
8468 emit_insn (compare_seq
);
8469 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8470 gen_rtx_IF_THEN_ELSE (mode
,
8471 compare_op
, operands
[2],
8474 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8475 gen_rtx_IF_THEN_ELSE (mode
,
8480 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8481 gen_rtx_IF_THEN_ELSE (mode
,
8486 return 1; /* DONE */
8490 ix86_expand_fp_movcc (operands
)
8495 rtx compare_op
, second_test
, bypass_test
;
8497 /* For SF/DFmode conditional moves based on comparisons
8498 in same mode, we may want to use SSE min/max instructions. */
8499 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
8500 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
8501 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
8502 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
8504 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
8505 /* We may be called from the post-reload splitter. */
8506 && (!REG_P (operands
[0])
8507 || SSE_REG_P (operands
[0])
8508 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
8510 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
8511 code
= GET_CODE (operands
[1]);
8513 /* See if we have (cross) match between comparison operands and
8514 conditional move operands. */
8515 if (rtx_equal_p (operands
[2], op1
))
8520 code
= reverse_condition_maybe_unordered (code
);
8522 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
8524 /* Check for min operation. */
8527 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
8528 if (memory_operand (op0
, VOIDmode
))
8529 op0
= force_reg (GET_MODE (operands
[0]), op0
);
8530 if (GET_MODE (operands
[0]) == SFmode
)
8531 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
8533 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
8536 /* Check for max operation. */
8539 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
8540 if (memory_operand (op0
, VOIDmode
))
8541 op0
= force_reg (GET_MODE (operands
[0]), op0
);
8542 if (GET_MODE (operands
[0]) == SFmode
)
8543 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
8545 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
8549 /* Manage condition to be sse_comparison_operator. In case we are
8550 in non-ieee mode, try to canonicalize the destination operand
8551 to be first in the comparison - this helps reload to avoid extra
8553 if (!sse_comparison_operator (operands
[1], VOIDmode
)
8554 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
8556 rtx tmp
= ix86_compare_op0
;
8557 ix86_compare_op0
= ix86_compare_op1
;
8558 ix86_compare_op1
= tmp
;
8559 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
8560 VOIDmode
, ix86_compare_op0
,
8563 /* Similary try to manage result to be first operand of conditional
8564 move. We also don't support the NE comparison on SSE, so try to
8566 if ((rtx_equal_p (operands
[0], operands
[3])
8567 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
8568 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
8570 rtx tmp
= operands
[2];
8571 operands
[2] = operands
[3];
8573 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
8574 (GET_CODE (operands
[1])),
8575 VOIDmode
, ix86_compare_op0
,
8578 if (GET_MODE (operands
[0]) == SFmode
)
8579 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
8580 operands
[2], operands
[3],
8581 ix86_compare_op0
, ix86_compare_op1
));
8583 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
8584 operands
[2], operands
[3],
8585 ix86_compare_op0
, ix86_compare_op1
));
8589 /* The floating point conditional move instructions don't directly
8590 support conditions resulting from a signed integer comparison. */
8592 code
= GET_CODE (operands
[1]);
8593 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8595 /* The floating point conditional move instructions don't directly
8596 support signed integer comparisons. */
8598 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
8600 if (second_test
!= NULL
|| bypass_test
!= NULL
)
8602 tmp
= gen_reg_rtx (QImode
);
8603 ix86_expand_setcc (code
, tmp
);
8605 ix86_compare_op0
= tmp
;
8606 ix86_compare_op1
= const0_rtx
;
8607 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8609 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
8611 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8612 emit_move_insn (tmp
, operands
[3]);
8615 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
8617 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
8618 emit_move_insn (tmp
, operands
[2]);
8622 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8623 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8628 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8629 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8634 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
8635 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
8643 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
8644 works for floating pointer parameters and nonoffsetable memories.
8645 For pushes, it returns just stack offsets; the values will be saved
8646 in the right order. Maximally three parts are generated. */
8649 ix86_split_to_parts (operand
, parts
, mode
)
8652 enum machine_mode mode
;
8657 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
8659 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
8661 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
8663 if (size
< 2 || size
> 3)
8666 /* Optimize constant pool reference to immediates. This is used by fp moves,
8667 that force all constants to memory to allow combining. */
8669 if (GET_CODE (operand
) == MEM
8670 && GET_CODE (XEXP (operand
, 0)) == SYMBOL_REF
8671 && CONSTANT_POOL_ADDRESS_P (XEXP (operand
, 0)))
8672 operand
= get_pool_constant (XEXP (operand
, 0));
8674 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
8676 /* The only non-offsetable memories we handle are pushes. */
8677 if (! push_operand (operand
, VOIDmode
))
8680 operand
= copy_rtx (operand
);
8681 PUT_MODE (operand
, Pmode
);
8682 parts
[0] = parts
[1] = parts
[2] = operand
;
8684 else if (!TARGET_64BIT
)
8687 split_di (&operand
, 1, &parts
[0], &parts
[1]);
8690 if (REG_P (operand
))
8692 if (!reload_completed
)
8694 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
8695 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
8697 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
8699 else if (offsettable_memref_p (operand
))
8701 operand
= adjust_address (operand
, SImode
, 0);
8703 parts
[1] = adjust_address (operand
, SImode
, 4);
8705 parts
[2] = adjust_address (operand
, SImode
, 8);
8707 else if (GET_CODE (operand
) == CONST_DOUBLE
)
8712 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
8717 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
8718 parts
[2] = gen_int_mode (l
[2], SImode
);
8721 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
8726 parts
[1] = gen_int_mode (l
[1], SImode
);
8727 parts
[0] = gen_int_mode (l
[0], SImode
);
8736 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
8737 if (mode
== XFmode
|| mode
== TFmode
)
8739 if (REG_P (operand
))
8741 if (!reload_completed
)
8743 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
8744 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
8746 else if (offsettable_memref_p (operand
))
8748 operand
= adjust_address (operand
, DImode
, 0);
8750 parts
[1] = adjust_address (operand
, SImode
, 8);
8752 else if (GET_CODE (operand
) == CONST_DOUBLE
)
8757 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
8758 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
8759 /* Do not use shift by 32 to avoid warning on 32bit systems. */
8760 if (HOST_BITS_PER_WIDE_INT
>= 64)
8763 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
8764 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
8767 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
8768 parts
[1] = gen_int_mode (l
[2], SImode
);
8778 /* Emit insns to perform a move or push of DI, DF, and XF values.
8779 Return false when normal moves are needed; true when all required
8780 insns have been emitted. Operands 2-4 contain the input values
8781 int the correct order; operands 5-7 contain the output values. */
8784 ix86_split_long_move (operands
)
8791 enum machine_mode mode
= GET_MODE (operands
[0]);
8793 /* The DFmode expanders may ask us to move double.
8794 For 64bit target this is single move. By hiding the fact
8795 here we simplify i386.md splitters. */
8796 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
8798 /* Optimize constant pool reference to immediates. This is used by
8799 fp moves, that force all constants to memory to allow combining. */
8801 if (GET_CODE (operands
[1]) == MEM
8802 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
8803 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
8804 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
8805 if (push_operand (operands
[0], VOIDmode
))
8807 operands
[0] = copy_rtx (operands
[0]);
8808 PUT_MODE (operands
[0], Pmode
);
8811 operands
[0] = gen_lowpart (DImode
, operands
[0]);
8812 operands
[1] = gen_lowpart (DImode
, operands
[1]);
8813 emit_move_insn (operands
[0], operands
[1]);
8817 /* The only non-offsettable memory we handle is push. */
8818 if (push_operand (operands
[0], VOIDmode
))
8820 else if (GET_CODE (operands
[0]) == MEM
8821 && ! offsettable_memref_p (operands
[0]))
8824 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
8825 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
8827 /* When emitting push, take care for source operands on the stack. */
8828 if (push
&& GET_CODE (operands
[1]) == MEM
8829 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
8832 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
8833 XEXP (part
[1][2], 0));
8834 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
8835 XEXP (part
[1][1], 0));
8838 /* We need to do copy in the right order in case an address register
8839 of the source overlaps the destination. */
8840 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
8842 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
8844 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
8847 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
8850 /* Collision in the middle part can be handled by reordering. */
8851 if (collisions
== 1 && nparts
== 3
8852 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
8855 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
8856 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
8859 /* If there are more collisions, we can't handle it by reordering.
8860 Do an lea to the last part and use only one colliding move. */
8861 else if (collisions
> 1)
8864 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
8865 XEXP (part
[1][0], 0)));
8866 part
[1][0] = change_address (part
[1][0],
8867 TARGET_64BIT
? DImode
: SImode
,
8868 part
[0][nparts
- 1]);
8869 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
8871 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
8881 /* We use only first 12 bytes of TFmode value, but for pushing we
8882 are required to adjust stack as if we were pushing real 16byte
8884 if (mode
== TFmode
&& !TARGET_64BIT
)
8885 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
8887 emit_move_insn (part
[0][2], part
[1][2]);
8892 /* In 64bit mode we don't have 32bit push available. In case this is
8893 register, it is OK - we will just use larger counterpart. We also
8894 retype memory - these comes from attempt to avoid REX prefix on
8895 moving of second half of TFmode value. */
8896 if (GET_MODE (part
[1][1]) == SImode
)
8898 if (GET_CODE (part
[1][1]) == MEM
)
8899 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
8900 else if (REG_P (part
[1][1]))
8901 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
8904 if (GET_MODE (part
[1][0]) == SImode
)
8905 part
[1][0] = part
[1][1];
8908 emit_move_insn (part
[0][1], part
[1][1]);
8909 emit_move_insn (part
[0][0], part
[1][0]);
8913 /* Choose correct order to not overwrite the source before it is copied. */
8914 if ((REG_P (part
[0][0])
8915 && REG_P (part
[1][1])
8916 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
8918 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
8920 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
8924 operands
[2] = part
[0][2];
8925 operands
[3] = part
[0][1];
8926 operands
[4] = part
[0][0];
8927 operands
[5] = part
[1][2];
8928 operands
[6] = part
[1][1];
8929 operands
[7] = part
[1][0];
8933 operands
[2] = part
[0][1];
8934 operands
[3] = part
[0][0];
8935 operands
[5] = part
[1][1];
8936 operands
[6] = part
[1][0];
8943 operands
[2] = part
[0][0];
8944 operands
[3] = part
[0][1];
8945 operands
[4] = part
[0][2];
8946 operands
[5] = part
[1][0];
8947 operands
[6] = part
[1][1];
8948 operands
[7] = part
[1][2];
8952 operands
[2] = part
[0][0];
8953 operands
[3] = part
[0][1];
8954 operands
[5] = part
[1][0];
8955 operands
[6] = part
[1][1];
8958 emit_move_insn (operands
[2], operands
[5]);
8959 emit_move_insn (operands
[3], operands
[6]);
8961 emit_move_insn (operands
[4], operands
[7]);
8967 ix86_split_ashldi (operands
, scratch
)
8968 rtx
*operands
, scratch
;
8970 rtx low
[2], high
[2];
8973 if (GET_CODE (operands
[2]) == CONST_INT
)
8975 split_di (operands
, 2, low
, high
);
8976 count
= INTVAL (operands
[2]) & 63;
8980 emit_move_insn (high
[0], low
[1]);
8981 emit_move_insn (low
[0], const0_rtx
);
8984 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
8988 if (!rtx_equal_p (operands
[0], operands
[1]))
8989 emit_move_insn (operands
[0], operands
[1]);
8990 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
8991 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
8996 if (!rtx_equal_p (operands
[0], operands
[1]))
8997 emit_move_insn (operands
[0], operands
[1]);
8999 split_di (operands
, 1, low
, high
);
9001 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
9002 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
9004 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9006 if (! no_new_pseudos
)
9007 scratch
= force_reg (SImode
, const0_rtx
);
9009 emit_move_insn (scratch
, const0_rtx
);
9011 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
9015 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
9020 ix86_split_ashrdi (operands
, scratch
)
9021 rtx
*operands
, scratch
;
9023 rtx low
[2], high
[2];
9026 if (GET_CODE (operands
[2]) == CONST_INT
)
9028 split_di (operands
, 2, low
, high
);
9029 count
= INTVAL (operands
[2]) & 63;
9033 emit_move_insn (low
[0], high
[1]);
9035 if (! reload_completed
)
9036 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
9039 emit_move_insn (high
[0], low
[0]);
9040 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
9044 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9048 if (!rtx_equal_p (operands
[0], operands
[1]))
9049 emit_move_insn (operands
[0], operands
[1]);
9050 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9051 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
9056 if (!rtx_equal_p (operands
[0], operands
[1]))
9057 emit_move_insn (operands
[0], operands
[1]);
9059 split_di (operands
, 1, low
, high
);
9061 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9062 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
9064 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9066 if (! no_new_pseudos
)
9067 scratch
= gen_reg_rtx (SImode
);
9068 emit_move_insn (scratch
, high
[0]);
9069 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
9070 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
9074 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
9079 ix86_split_lshrdi (operands
, scratch
)
9080 rtx
*operands
, scratch
;
9082 rtx low
[2], high
[2];
9085 if (GET_CODE (operands
[2]) == CONST_INT
)
9087 split_di (operands
, 2, low
, high
);
9088 count
= INTVAL (operands
[2]) & 63;
9092 emit_move_insn (low
[0], high
[1]);
9093 emit_move_insn (high
[0], const0_rtx
);
9096 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9100 if (!rtx_equal_p (operands
[0], operands
[1]))
9101 emit_move_insn (operands
[0], operands
[1]);
9102 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9103 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
9108 if (!rtx_equal_p (operands
[0], operands
[1]))
9109 emit_move_insn (operands
[0], operands
[1]);
9111 split_di (operands
, 1, low
, high
);
9113 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9114 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
9116 /* Heh. By reversing the arguments, we can reuse this pattern. */
9117 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9119 if (! no_new_pseudos
)
9120 scratch
= force_reg (SImode
, const0_rtx
);
9122 emit_move_insn (scratch
, const0_rtx
);
9124 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
9128 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
9132 /* Helper function for the string operations below. Dest VARIABLE whether
9133 it is aligned to VALUE bytes. If true, jump to the label. */
9135 ix86_expand_aligntest (variable
, value
)
9139 rtx label
= gen_label_rtx ();
9140 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
9141 if (GET_MODE (variable
) == DImode
)
9142 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
9144 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
9145 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
9150 /* Adjust COUNTER by the VALUE. */
9152 ix86_adjust_counter (countreg
, value
)
9154 HOST_WIDE_INT value
;
9156 if (GET_MODE (countreg
) == DImode
)
9157 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
9159 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
9162 /* Zero extend possibly SImode EXP to Pmode register. */
9164 ix86_zero_extend_to_Pmode (exp
)
9168 if (GET_MODE (exp
) == VOIDmode
)
9169 return force_reg (Pmode
, exp
);
9170 if (GET_MODE (exp
) == Pmode
)
9171 return copy_to_mode_reg (Pmode
, exp
);
9172 r
= gen_reg_rtx (Pmode
);
9173 emit_insn (gen_zero_extendsidi2 (r
, exp
));
9177 /* Expand string move (memcpy) operation. Use i386 string operations when
9178 profitable. expand_clrstr contains similar code. */
9180 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
9181 rtx dst
, src
, count_exp
, align_exp
;
9183 rtx srcreg
, destreg
, countreg
;
9184 enum machine_mode counter_mode
;
9185 HOST_WIDE_INT align
= 0;
9186 unsigned HOST_WIDE_INT count
= 0;
9191 if (GET_CODE (align_exp
) == CONST_INT
)
9192 align
= INTVAL (align_exp
);
9194 /* This simple hack avoids all inlining code and simplifies code below. */
9195 if (!TARGET_ALIGN_STRINGOPS
)
9198 if (GET_CODE (count_exp
) == CONST_INT
)
9199 count
= INTVAL (count_exp
);
9201 /* Figure out proper mode for counter. For 32bits it is always SImode,
9202 for 64bits use SImode when possible, otherwise DImode.
9203 Set count to number of bytes copied when known at compile time. */
9204 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
9205 || x86_64_zero_extended_value (count_exp
))
9206 counter_mode
= SImode
;
9208 counter_mode
= DImode
;
9210 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
9213 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
9214 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
9216 emit_insn (gen_cld ());
9218 /* When optimizing for size emit simple rep ; movsb instruction for
9219 counts not divisible by 4. */
9221 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
9223 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
9225 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
9226 destreg
, srcreg
, countreg
));
9228 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
9229 destreg
, srcreg
, countreg
));
9232 /* For constant aligned (or small unaligned) copies use rep movsl
9233 followed by code copying the rest. For PentiumPro ensure 8 byte
9234 alignment to allow rep movsl acceleration. */
9238 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
9239 || optimize_size
|| count
< (unsigned int) 64))
9241 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
9242 if (count
& ~(size
- 1))
9244 countreg
= copy_to_mode_reg (counter_mode
,
9245 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
9246 & (TARGET_64BIT
? -1 : 0x3fffffff)));
9247 countreg
= ix86_zero_extend_to_Pmode (countreg
);
9251 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
9252 destreg
, srcreg
, countreg
));
9254 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
9255 destreg
, srcreg
, countreg
));
9258 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
9259 destreg
, srcreg
, countreg
));
9261 if (size
== 8 && (count
& 0x04))
9262 emit_insn (gen_strmovsi (destreg
, srcreg
));
9264 emit_insn (gen_strmovhi (destreg
, srcreg
));
9266 emit_insn (gen_strmovqi (destreg
, srcreg
));
9268 /* The generic code based on the glibc implementation:
9269 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9270 allowing accelerated copying there)
9271 - copy the data using rep movsl
9277 int desired_alignment
= (TARGET_PENTIUMPRO
9278 && (count
== 0 || count
>= (unsigned int) 260)
9279 ? 8 : UNITS_PER_WORD
);
9281 /* In case we don't know anything about the alignment, default to
9282 library version, since it is usually equally fast and result in
9284 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
9290 if (TARGET_SINGLE_STRINGOP
)
9291 emit_insn (gen_cld ());
9293 countreg2
= gen_reg_rtx (Pmode
);
9294 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
9296 /* We don't use loops to align destination and to copy parts smaller
9297 than 4 bytes, because gcc is able to optimize such code better (in
9298 the case the destination or the count really is aligned, gcc is often
9299 able to predict the branches) and also it is friendlier to the
9300 hardware branch prediction.
9302 Using loops is benefical for generic case, because we can
9303 handle small counts using the loops. Many CPUs (such as Athlon)
9304 have large REP prefix setup costs.
9306 This is quite costy. Maybe we can revisit this decision later or
9307 add some customizability to this code. */
9309 if (count
== 0 && align
< desired_alignment
)
9311 label
= gen_label_rtx ();
9312 emit_cmp_and_jump_insns (countreg
, GEN_INT (UNITS_PER_WORD
- 1),
9313 LEU
, 0, counter_mode
, 1, label
);
9317 rtx label
= ix86_expand_aligntest (destreg
, 1);
9318 emit_insn (gen_strmovqi (destreg
, srcreg
));
9319 ix86_adjust_counter (countreg
, 1);
9321 LABEL_NUSES (label
) = 1;
9325 rtx label
= ix86_expand_aligntest (destreg
, 2);
9326 emit_insn (gen_strmovhi (destreg
, srcreg
));
9327 ix86_adjust_counter (countreg
, 2);
9329 LABEL_NUSES (label
) = 1;
9331 if (align
<= 4 && desired_alignment
> 4)
9333 rtx label
= ix86_expand_aligntest (destreg
, 4);
9334 emit_insn (gen_strmovsi (destreg
, srcreg
));
9335 ix86_adjust_counter (countreg
, 4);
9337 LABEL_NUSES (label
) = 1;
9340 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
9343 LABEL_NUSES (label
) = 1;
9346 if (!TARGET_SINGLE_STRINGOP
)
9347 emit_insn (gen_cld ());
9350 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
9352 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
9353 destreg
, srcreg
, countreg2
));
9357 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
9358 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
9359 destreg
, srcreg
, countreg2
));
9365 LABEL_NUSES (label
) = 1;
9367 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
9368 emit_insn (gen_strmovsi (destreg
, srcreg
));
9369 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
9371 rtx label
= ix86_expand_aligntest (countreg
, 4);
9372 emit_insn (gen_strmovsi (destreg
, srcreg
));
9374 LABEL_NUSES (label
) = 1;
9376 if (align
> 2 && count
!= 0 && (count
& 2))
9377 emit_insn (gen_strmovhi (destreg
, srcreg
));
9378 if (align
<= 2 || count
== 0)
9380 rtx label
= ix86_expand_aligntest (countreg
, 2);
9381 emit_insn (gen_strmovhi (destreg
, srcreg
));
9383 LABEL_NUSES (label
) = 1;
9385 if (align
> 1 && count
!= 0 && (count
& 1))
9386 emit_insn (gen_strmovqi (destreg
, srcreg
));
9387 if (align
<= 1 || count
== 0)
9389 rtx label
= ix86_expand_aligntest (countreg
, 1);
9390 emit_insn (gen_strmovqi (destreg
, srcreg
));
9392 LABEL_NUSES (label
) = 1;
9396 insns
= get_insns ();
9399 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
9404 /* Expand string clear operation (bzero). Use i386 string operations when
9405 profitable. expand_movstr contains similar code. */
9407 ix86_expand_clrstr (src
, count_exp
, align_exp
)
9408 rtx src
, count_exp
, align_exp
;
9410 rtx destreg
, zeroreg
, countreg
;
9411 enum machine_mode counter_mode
;
9412 HOST_WIDE_INT align
= 0;
9413 unsigned HOST_WIDE_INT count
= 0;
9415 if (GET_CODE (align_exp
) == CONST_INT
)
9416 align
= INTVAL (align_exp
);
9418 /* This simple hack avoids all inlining code and simplifies code below. */
9419 if (!TARGET_ALIGN_STRINGOPS
)
9422 if (GET_CODE (count_exp
) == CONST_INT
)
9423 count
= INTVAL (count_exp
);
9424 /* Figure out proper mode for counter. For 32bits it is always SImode,
9425 for 64bits use SImode when possible, otherwise DImode.
9426 Set count to number of bytes copied when known at compile time. */
9427 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
9428 || x86_64_zero_extended_value (count_exp
))
9429 counter_mode
= SImode
;
9431 counter_mode
= DImode
;
9433 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
9435 emit_insn (gen_cld ());
9437 /* When optimizing for size emit simple rep ; movsb instruction for
9438 counts not divisible by 4. */
9440 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
9442 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
9443 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
9445 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
9446 destreg
, countreg
));
9448 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
9449 destreg
, countreg
));
9453 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
9454 || optimize_size
|| count
< (unsigned int) 64))
9456 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
9457 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
9458 if (count
& ~(size
- 1))
9460 countreg
= copy_to_mode_reg (counter_mode
,
9461 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
9462 & (TARGET_64BIT
? -1 : 0x3fffffff)));
9463 countreg
= ix86_zero_extend_to_Pmode (countreg
);
9467 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
9468 destreg
, countreg
));
9470 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
9471 destreg
, countreg
));
9474 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
9475 destreg
, countreg
));
9477 if (size
== 8 && (count
& 0x04))
9478 emit_insn (gen_strsetsi (destreg
,
9479 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9481 emit_insn (gen_strsethi (destreg
,
9482 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9484 emit_insn (gen_strsetqi (destreg
,
9485 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9491 /* Compute desired alignment of the string operation. */
9492 int desired_alignment
= (TARGET_PENTIUMPRO
9493 && (count
== 0 || count
>= (unsigned int) 260)
9494 ? 8 : UNITS_PER_WORD
);
9496 /* In case we don't know anything about the alignment, default to
9497 library version, since it is usually equally fast and result in
9499 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
9502 if (TARGET_SINGLE_STRINGOP
)
9503 emit_insn (gen_cld ());
9505 countreg2
= gen_reg_rtx (Pmode
);
9506 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
9507 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
9509 if (count
== 0 && align
< desired_alignment
)
9511 label
= gen_label_rtx ();
9512 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
9513 LEU
, 0, counter_mode
, 1, label
);
9517 rtx label
= ix86_expand_aligntest (destreg
, 1);
9518 emit_insn (gen_strsetqi (destreg
,
9519 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9520 ix86_adjust_counter (countreg
, 1);
9522 LABEL_NUSES (label
) = 1;
9526 rtx label
= ix86_expand_aligntest (destreg
, 2);
9527 emit_insn (gen_strsethi (destreg
,
9528 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9529 ix86_adjust_counter (countreg
, 2);
9531 LABEL_NUSES (label
) = 1;
9533 if (align
<= 4 && desired_alignment
> 4)
9535 rtx label
= ix86_expand_aligntest (destreg
, 4);
9536 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
9537 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
9539 ix86_adjust_counter (countreg
, 4);
9541 LABEL_NUSES (label
) = 1;
9544 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
9547 LABEL_NUSES (label
) = 1;
9551 if (!TARGET_SINGLE_STRINGOP
)
9552 emit_insn (gen_cld ());
9555 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
9557 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
9558 destreg
, countreg2
));
9562 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
9563 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
9564 destreg
, countreg2
));
9569 LABEL_NUSES (label
) = 1;
9572 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
9573 emit_insn (gen_strsetsi (destreg
,
9574 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9575 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
9577 rtx label
= ix86_expand_aligntest (countreg
, 2);
9578 emit_insn (gen_strsetsi (destreg
,
9579 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
9581 LABEL_NUSES (label
) = 1;
9583 if (align
> 2 && count
!= 0 && (count
& 2))
9584 emit_insn (gen_strsethi (destreg
,
9585 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9586 if (align
<= 2 || count
== 0)
9588 rtx label
= ix86_expand_aligntest (countreg
, 2);
9589 emit_insn (gen_strsethi (destreg
,
9590 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
9592 LABEL_NUSES (label
) = 1;
9594 if (align
> 1 && count
!= 0 && (count
& 1))
9595 emit_insn (gen_strsetqi (destreg
,
9596 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9597 if (align
<= 1 || count
== 0)
9599 rtx label
= ix86_expand_aligntest (countreg
, 1);
9600 emit_insn (gen_strsetqi (destreg
,
9601 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
9603 LABEL_NUSES (label
) = 1;
9608 /* Expand strlen. */
9610 ix86_expand_strlen (out
, src
, eoschar
, align
)
9611 rtx out
, src
, eoschar
, align
;
9613 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
9615 /* The generic case of strlen expander is long. Avoid it's
9616 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
9618 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
9619 && !TARGET_INLINE_ALL_STRINGOPS
9621 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
9624 addr
= force_reg (Pmode
, XEXP (src
, 0));
9625 scratch1
= gen_reg_rtx (Pmode
);
9627 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
9630 /* Well it seems that some optimizer does not combine a call like
9631 foo(strlen(bar), strlen(bar));
9632 when the move and the subtraction is done here. It does calculate
9633 the length just once when these instructions are done inside of
9634 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
9635 often used and I use one fewer register for the lifetime of
9636 output_strlen_unroll() this is better. */
9638 emit_move_insn (out
, addr
);
9640 ix86_expand_strlensi_unroll_1 (out
, align
);
9642 /* strlensi_unroll_1 returns the address of the zero at the end of
9643 the string, like memchr(), so compute the length by subtracting
9644 the start address. */
9646 emit_insn (gen_subdi3 (out
, out
, addr
));
9648 emit_insn (gen_subsi3 (out
, out
, addr
));
9652 scratch2
= gen_reg_rtx (Pmode
);
9653 scratch3
= gen_reg_rtx (Pmode
);
9654 scratch4
= force_reg (Pmode
, constm1_rtx
);
9656 emit_move_insn (scratch3
, addr
);
9657 eoschar
= force_reg (QImode
, eoschar
);
9659 emit_insn (gen_cld ());
9662 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
9663 align
, scratch4
, scratch3
));
9664 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
9665 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
9669 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
9670 align
, scratch4
, scratch3
));
9671 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
9672 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
9678 /* Expand the appropriate insns for doing strlen if not just doing
9681 out = result, initialized with the start address
9682 align_rtx = alignment of the address.
9683 scratch = scratch register, initialized with the startaddress when
9684 not aligned, otherwise undefined
9686 This is just the body. It needs the initialisations mentioned above and
9687 some address computing at the end. These things are done in i386.md. */
9690 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
9695 rtx align_2_label
= NULL_RTX
;
9696 rtx align_3_label
= NULL_RTX
;
9697 rtx align_4_label
= gen_label_rtx ();
9698 rtx end_0_label
= gen_label_rtx ();
9700 rtx tmpreg
= gen_reg_rtx (SImode
);
9701 rtx scratch
= gen_reg_rtx (SImode
);
9704 if (GET_CODE (align_rtx
) == CONST_INT
)
9705 align
= INTVAL (align_rtx
);
9707 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
9709 /* Is there a known alignment and is it less than 4? */
9712 rtx scratch1
= gen_reg_rtx (Pmode
);
9713 emit_move_insn (scratch1
, out
);
9714 /* Is there a known alignment and is it not 2? */
9717 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
9718 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
9720 /* Leave just the 3 lower bits. */
9721 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
9722 NULL_RTX
, 0, OPTAB_WIDEN
);
9724 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
9725 Pmode
, 1, align_4_label
);
9726 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
9727 Pmode
, 1, align_2_label
);
9728 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
9729 Pmode
, 1, align_3_label
);
9733 /* Since the alignment is 2, we have to check 2 or 0 bytes;
9734 check if is aligned to 4 - byte. */
9736 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
9737 NULL_RTX
, 0, OPTAB_WIDEN
);
9739 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
9740 Pmode
, 1, align_4_label
);
9743 mem
= gen_rtx_MEM (QImode
, out
);
9745 /* Now compare the bytes. */
9747 /* Compare the first n unaligned byte on a byte per byte basis. */
9748 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
9749 QImode
, 1, end_0_label
);
9751 /* Increment the address. */
9753 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9755 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9757 /* Not needed with an alignment of 2 */
9760 emit_label (align_2_label
);
9762 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
9766 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9768 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9770 emit_label (align_3_label
);
9773 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
9777 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
9779 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
9782 /* Generate loop to check 4 bytes at a time. It is not a good idea to
9783 align this loop. It gives only huge programs, but does not help to
9785 emit_label (align_4_label
);
9787 mem
= gen_rtx_MEM (SImode
, out
);
9788 emit_move_insn (scratch
, mem
);
9790 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
9792 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
9794 /* This formula yields a nonzero result iff one of the bytes is zero.
9795 This saves three branches inside loop and many cycles. */
9797 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
9798 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
9799 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
9800 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
9801 gen_int_mode (0x80808080, SImode
)));
9802 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
9807 rtx reg
= gen_reg_rtx (SImode
);
9808 rtx reg2
= gen_reg_rtx (Pmode
);
9809 emit_move_insn (reg
, tmpreg
);
9810 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
9812 /* If zero is not in the first two bytes, move two bytes forward. */
9813 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
9814 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9815 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
9816 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
9817 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
9820 /* Emit lea manually to avoid clobbering of flags. */
9821 emit_insn (gen_rtx_SET (SImode
, reg2
,
9822 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
9824 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9825 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
9826 emit_insn (gen_rtx_SET (VOIDmode
, out
,
9827 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
9834 rtx end_2_label
= gen_label_rtx ();
9835 /* Is zero in the first two bytes? */
9837 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
9838 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
9839 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
9840 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9841 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
9843 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9844 JUMP_LABEL (tmp
) = end_2_label
;
9846 /* Not in the first two. Move two bytes forward. */
9847 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
9849 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
9851 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
9853 emit_label (end_2_label
);
9857 /* Avoid branch in fixing the byte. */
9858 tmpreg
= gen_lowpart (QImode
, tmpreg
);
9859 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
9861 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
9863 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
9865 emit_label (end_0_label
);
9868 /* Clear stack slot assignments remembered from previous functions.
9869 This is called from INIT_EXPANDERS once before RTL is emitted for each
9873 ix86_init_machine_status (p
)
9876 p
->machine
= (struct machine_function
*)
9877 xcalloc (1, sizeof (struct machine_function
));
9880 /* Mark machine specific bits of P for GC. */
9882 ix86_mark_machine_status (p
)
9885 struct machine_function
*machine
= p
->machine
;
9886 enum machine_mode mode
;
9892 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
9893 mode
= (enum machine_mode
) ((int) mode
+ 1))
9894 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
9895 ggc_mark_rtx (machine
->stack_locals
[(int) mode
][n
]);
9899 ix86_free_machine_status (p
)
9906 /* Return a MEM corresponding to a stack slot with mode MODE.
9907 Allocate a new slot if necessary.
9909 The RTL for a function can have several slots available: N is
9910 which slot to use. */
9913 assign_386_stack_local (mode
, n
)
9914 enum machine_mode mode
;
9917 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
9920 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
9921 ix86_stack_locals
[(int) mode
][n
]
9922 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
9924 return ix86_stack_locals
[(int) mode
][n
];
9927 /* Calculate the length of the memory address in the instruction
9928 encoding. Does not include the one-byte modrm, opcode, or prefix. */
9931 memory_address_length (addr
)
9934 struct ix86_address parts
;
9935 rtx base
, index
, disp
;
9938 if (GET_CODE (addr
) == PRE_DEC
9939 || GET_CODE (addr
) == POST_INC
9940 || GET_CODE (addr
) == PRE_MODIFY
9941 || GET_CODE (addr
) == POST_MODIFY
)
9944 if (! ix86_decompose_address (addr
, &parts
))
9948 index
= parts
.index
;
9952 /* Register Indirect. */
9953 if (base
&& !index
&& !disp
)
9955 /* Special cases: ebp and esp need the two-byte modrm form. */
9956 if (addr
== stack_pointer_rtx
9957 || addr
== arg_pointer_rtx
9958 || addr
== frame_pointer_rtx
9959 || addr
== hard_frame_pointer_rtx
)
9963 /* Direct Addressing. */
9964 else if (disp
&& !base
&& !index
)
9969 /* Find the length of the displacement constant. */
9972 if (GET_CODE (disp
) == CONST_INT
9973 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
9979 /* An index requires the two-byte modrm form. */
9987 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
9988 expect that insn have 8bit immediate alternative. */
9990 ix86_attr_length_immediate_default (insn
, shortform
)
9996 extract_insn_cached (insn
);
9997 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
9998 if (CONSTANT_P (recog_data
.operand
[i
]))
10003 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
10004 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
10008 switch (get_attr_mode (insn
))
10019 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10024 fatal_insn ("unknown insn mode", insn
);
10030 /* Compute default value for "length_address" attribute. */
10032 ix86_attr_length_address_default (insn
)
10036 extract_insn_cached (insn
);
10037 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10038 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
10040 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
10046 /* Return the maximum number of instructions a cpu can issue. */
10053 case PROCESSOR_PENTIUM
:
10057 case PROCESSOR_PENTIUMPRO
:
10058 case PROCESSOR_PENTIUM4
:
10059 case PROCESSOR_ATHLON
:
10067 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10068 by DEP_INSN and nothing set by DEP_INSN. */
10071 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
10072 rtx insn
, dep_insn
;
10073 enum attr_type insn_type
;
10077 /* Simplify the test for uninteresting insns. */
10078 if (insn_type
!= TYPE_SETCC
10079 && insn_type
!= TYPE_ICMOV
10080 && insn_type
!= TYPE_FCMOV
10081 && insn_type
!= TYPE_IBR
)
10084 if ((set
= single_set (dep_insn
)) != 0)
10086 set
= SET_DEST (set
);
10089 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
10090 && XVECLEN (PATTERN (dep_insn
), 0) == 2
10091 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
10092 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
10094 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
10095 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
10100 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
10103 /* This test is true if the dependent insn reads the flags but
10104 not any other potentially set register. */
10105 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
10108 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
10114 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10115 address with operands set by DEP_INSN. */
10118 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
10119 rtx insn
, dep_insn
;
10120 enum attr_type insn_type
;
10124 if (insn_type
== TYPE_LEA
10127 addr
= PATTERN (insn
);
10128 if (GET_CODE (addr
) == SET
)
10130 else if (GET_CODE (addr
) == PARALLEL
10131 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
10132 addr
= XVECEXP (addr
, 0, 0);
10135 addr
= SET_SRC (addr
);
10140 extract_insn_cached (insn
);
10141 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10142 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
10144 addr
= XEXP (recog_data
.operand
[i
], 0);
10151 return modified_in_p (addr
, dep_insn
);
10155 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
10156 rtx insn
, link
, dep_insn
;
10159 enum attr_type insn_type
, dep_insn_type
;
10160 enum attr_memory memory
, dep_memory
;
10162 int dep_insn_code_number
;
10164 /* Anti and output depenancies have zero cost on all CPUs. */
10165 if (REG_NOTE_KIND (link
) != 0)
10168 dep_insn_code_number
= recog_memoized (dep_insn
);
10170 /* If we can't recognize the insns, we can't really do anything. */
10171 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
10174 insn_type
= get_attr_type (insn
);
10175 dep_insn_type
= get_attr_type (dep_insn
);
10179 case PROCESSOR_PENTIUM
:
10180 /* Address Generation Interlock adds a cycle of latency. */
10181 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10184 /* ??? Compares pair with jump/setcc. */
10185 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
10188 /* Floating point stores require value to be ready one cycle ealier. */
10189 if (insn_type
== TYPE_FMOV
10190 && get_attr_memory (insn
) == MEMORY_STORE
10191 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10195 case PROCESSOR_PENTIUMPRO
:
10196 memory
= get_attr_memory (insn
);
10197 dep_memory
= get_attr_memory (dep_insn
);
10199 /* Since we can't represent delayed latencies of load+operation,
10200 increase the cost here for non-imov insns. */
10201 if (dep_insn_type
!= TYPE_IMOV
10202 && dep_insn_type
!= TYPE_FMOV
10203 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
10206 /* INT->FP conversion is expensive. */
10207 if (get_attr_fp_int_src (dep_insn
))
10210 /* There is one cycle extra latency between an FP op and a store. */
10211 if (insn_type
== TYPE_FMOV
10212 && (set
= single_set (dep_insn
)) != NULL_RTX
10213 && (set2
= single_set (insn
)) != NULL_RTX
10214 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
10215 && GET_CODE (SET_DEST (set2
)) == MEM
)
10218 /* Show ability of reorder buffer to hide latency of load by executing
10219 in parallel with previous instruction in case
10220 previous instruction is not needed to compute the address. */
10221 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10222 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10224 /* Claim moves to take one cycle, as core can issue one load
10225 at time and the next load can start cycle later. */
10226 if (dep_insn_type
== TYPE_IMOV
10227 || dep_insn_type
== TYPE_FMOV
)
10235 memory
= get_attr_memory (insn
);
10236 dep_memory
= get_attr_memory (dep_insn
);
10237 /* The esp dependency is resolved before the instruction is really
10239 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
10240 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
10243 /* Since we can't represent delayed latencies of load+operation,
10244 increase the cost here for non-imov insns. */
10245 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10246 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
10248 /* INT->FP conversion is expensive. */
10249 if (get_attr_fp_int_src (dep_insn
))
10252 /* Show ability of reorder buffer to hide latency of load by executing
10253 in parallel with previous instruction in case
10254 previous instruction is not needed to compute the address. */
10255 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10256 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10258 /* Claim moves to take one cycle, as core can issue one load
10259 at time and the next load can start cycle later. */
10260 if (dep_insn_type
== TYPE_IMOV
10261 || dep_insn_type
== TYPE_FMOV
)
10270 case PROCESSOR_ATHLON
:
10271 memory
= get_attr_memory (insn
);
10272 dep_memory
= get_attr_memory (dep_insn
);
10274 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10276 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
10281 /* Show ability of reorder buffer to hide latency of load by executing
10282 in parallel with previous instruction in case
10283 previous instruction is not needed to compute the address. */
10284 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10285 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10287 /* Claim moves to take one cycle, as core can issue one load
10288 at time and the next load can start cycle later. */
10289 if (dep_insn_type
== TYPE_IMOV
10290 || dep_insn_type
== TYPE_FMOV
)
10292 else if (cost
>= 3)
10307 struct ppro_sched_data
10310 int issued_this_cycle
;
10314 static enum attr_ppro_uops
10315 ix86_safe_ppro_uops (insn
)
10318 if (recog_memoized (insn
) >= 0)
10319 return get_attr_ppro_uops (insn
);
10321 return PPRO_UOPS_MANY
;
10325 ix86_dump_ppro_packet (dump
)
10328 if (ix86_sched_data
.ppro
.decode
[0])
10330 fprintf (dump
, "PPRO packet: %d",
10331 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
10332 if (ix86_sched_data
.ppro
.decode
[1])
10333 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
10334 if (ix86_sched_data
.ppro
.decode
[2])
10335 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
10336 fputc ('\n', dump
);
10340 /* We're beginning a new block. Initialize data structures as necessary. */
10343 ix86_sched_init (dump
, sched_verbose
, veclen
)
10344 FILE *dump ATTRIBUTE_UNUSED
;
10345 int sched_verbose ATTRIBUTE_UNUSED
;
10346 int veclen ATTRIBUTE_UNUSED
;
10348 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
10351 /* Shift INSN to SLOT, and shift everything else down. */
10354 ix86_reorder_insn (insnp
, slot
)
10361 insnp
[0] = insnp
[1];
10362 while (++insnp
!= slot
);
10368 ix86_sched_reorder_ppro (ready
, e_ready
)
10373 enum attr_ppro_uops cur_uops
;
10374 int issued_this_cycle
;
10378 /* At this point .ppro.decode contains the state of the three
10379 decoders from last "cycle". That is, those insns that were
10380 actually independent. But here we're scheduling for the
10381 decoder, and we may find things that are decodable in the
10384 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
10385 issued_this_cycle
= 0;
10388 cur_uops
= ix86_safe_ppro_uops (*insnp
);
10390 /* If the decoders are empty, and we've a complex insn at the
10391 head of the priority queue, let it issue without complaint. */
10392 if (decode
[0] == NULL
)
10394 if (cur_uops
== PPRO_UOPS_MANY
)
10396 decode
[0] = *insnp
;
10400 /* Otherwise, search for a 2-4 uop unsn to issue. */
10401 while (cur_uops
!= PPRO_UOPS_FEW
)
10403 if (insnp
== ready
)
10405 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
10408 /* If so, move it to the head of the line. */
10409 if (cur_uops
== PPRO_UOPS_FEW
)
10410 ix86_reorder_insn (insnp
, e_ready
);
10412 /* Issue the head of the queue. */
10413 issued_this_cycle
= 1;
10414 decode
[0] = *e_ready
--;
10417 /* Look for simple insns to fill in the other two slots. */
10418 for (i
= 1; i
< 3; ++i
)
10419 if (decode
[i
] == NULL
)
10421 if (ready
>= e_ready
)
10425 cur_uops
= ix86_safe_ppro_uops (*insnp
);
10426 while (cur_uops
!= PPRO_UOPS_ONE
)
10428 if (insnp
== ready
)
10430 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
10433 /* Found one. Move it to the head of the queue and issue it. */
10434 if (cur_uops
== PPRO_UOPS_ONE
)
10436 ix86_reorder_insn (insnp
, e_ready
);
10437 decode
[i
] = *e_ready
--;
10438 issued_this_cycle
++;
10442 /* ??? Didn't find one. Ideally, here we would do a lazy split
10443 of 2-uop insns, issue one and queue the other. */
10447 if (issued_this_cycle
== 0)
10448 issued_this_cycle
= 1;
10449 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
10452 /* We are about to being issuing insns for this clock cycle.
10453 Override the default sort algorithm to better slot instructions. */
10455 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
10456 FILE *dump ATTRIBUTE_UNUSED
;
10457 int sched_verbose ATTRIBUTE_UNUSED
;
10460 int clock_var ATTRIBUTE_UNUSED
;
10462 int n_ready
= *n_readyp
;
10463 rtx
*e_ready
= ready
+ n_ready
- 1;
10473 case PROCESSOR_PENTIUMPRO
:
10474 ix86_sched_reorder_ppro (ready
, e_ready
);
10479 return ix86_issue_rate ();
10482 /* We are about to issue INSN. Return the number of insns left on the
10483 ready queue that can be issued this cycle. */
10486 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
10490 int can_issue_more
;
10496 return can_issue_more
- 1;
10498 case PROCESSOR_PENTIUMPRO
:
10500 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
10502 if (uops
== PPRO_UOPS_MANY
)
10505 ix86_dump_ppro_packet (dump
);
10506 ix86_sched_data
.ppro
.decode
[0] = insn
;
10507 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10508 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10510 ix86_dump_ppro_packet (dump
);
10511 ix86_sched_data
.ppro
.decode
[0] = NULL
;
10513 else if (uops
== PPRO_UOPS_FEW
)
10516 ix86_dump_ppro_packet (dump
);
10517 ix86_sched_data
.ppro
.decode
[0] = insn
;
10518 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10519 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10523 for (i
= 0; i
< 3; ++i
)
10524 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
10526 ix86_sched_data
.ppro
.decode
[i
] = insn
;
10534 ix86_dump_ppro_packet (dump
);
10535 ix86_sched_data
.ppro
.decode
[0] = NULL
;
10536 ix86_sched_data
.ppro
.decode
[1] = NULL
;
10537 ix86_sched_data
.ppro
.decode
[2] = NULL
;
10541 return --ix86_sched_data
.ppro
.issued_this_cycle
;
10546 ia32_use_dfa_pipeline_interface ()
10548 if (ix86_cpu
== PROCESSOR_PENTIUM
)
10553 /* How many alternative schedules to try. This should be as wide as the
10554 scheduling freedom in the DFA, but no wider. Making this value too
10555 large results extra work for the scheduler. */
10558 ia32_multipass_dfa_lookahead ()
10560 if (ix86_cpu
== PROCESSOR_PENTIUM
)
10567 /* Walk through INSNS and look for MEM references whose address is DSTREG or
10568 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
10572 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
10574 rtx dstref
, srcref
, dstreg
, srcreg
;
10578 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
10580 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
10584 /* Subroutine of above to actually do the updating by recursively walking
10588 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
10590 rtx dstref
, srcref
, dstreg
, srcreg
;
10592 enum rtx_code code
= GET_CODE (x
);
10593 const char *format_ptr
= GET_RTX_FORMAT (code
);
10596 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
10597 MEM_COPY_ATTRIBUTES (x
, dstref
);
10598 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
10599 MEM_COPY_ATTRIBUTES (x
, srcref
);
10601 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
10603 if (*format_ptr
== 'e')
10604 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
10606 else if (*format_ptr
== 'E')
10607 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
10608 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
10613 /* Compute the alignment given to a constant that is being placed in memory.
10614 EXP is the constant and ALIGN is the alignment that the object would
10616 The value of this function is used instead of that alignment to align
10620 ix86_constant_alignment (exp
, align
)
10624 if (TREE_CODE (exp
) == REAL_CST
)
10626 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
10628 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
10631 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
10638 /* Compute the alignment for a static variable.
10639 TYPE is the data type, and ALIGN is the alignment that
10640 the object would ordinarily have. The value of this function is used
10641 instead of that alignment to align the object. */
10644 ix86_data_alignment (type
, align
)
10648 if (AGGREGATE_TYPE_P (type
)
10649 && TYPE_SIZE (type
)
10650 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10651 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
10652 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
10655 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10656 to 16byte boundary. */
10659 if (AGGREGATE_TYPE_P (type
)
10660 && TYPE_SIZE (type
)
10661 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10662 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
10663 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
10667 if (TREE_CODE (type
) == ARRAY_TYPE
)
10669 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
10671 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
10674 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
10677 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
10679 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
10682 else if ((TREE_CODE (type
) == RECORD_TYPE
10683 || TREE_CODE (type
) == UNION_TYPE
10684 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
10685 && TYPE_FIELDS (type
))
10687 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
10689 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
10692 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
10693 || TREE_CODE (type
) == INTEGER_TYPE
)
10695 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
10697 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
10704 /* Compute the alignment for a local variable.
10705 TYPE is the data type, and ALIGN is the alignment that
10706 the object would ordinarily have. The value of this macro is used
10707 instead of that alignment to align the object. */
10710 ix86_local_alignment (type
, align
)
10714 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
10715 to 16byte boundary. */
10718 if (AGGREGATE_TYPE_P (type
)
10719 && TYPE_SIZE (type
)
10720 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
10721 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
10722 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
10725 if (TREE_CODE (type
) == ARRAY_TYPE
)
10727 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
10729 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
10732 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
10734 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
10736 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
10739 else if ((TREE_CODE (type
) == RECORD_TYPE
10740 || TREE_CODE (type
) == UNION_TYPE
10741 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
10742 && TYPE_FIELDS (type
))
10744 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
10746 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
10749 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
10750 || TREE_CODE (type
) == INTEGER_TYPE
)
10753 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
10755 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
10761 /* Emit RTL insns to initialize the variable parts of a trampoline.
10762 FNADDR is an RTX for the address of the function's pure code.
10763 CXT is an RTX for the static chain value for the function. */
10765 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
10766 rtx tramp
, fnaddr
, cxt
;
10770 /* Compute offset from the end of the jmp to the target function. */
10771 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
10772 plus_constant (tramp
, 10),
10773 NULL_RTX
, 1, OPTAB_DIRECT
);
10774 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
10775 gen_int_mode (0xb9, QImode
));
10776 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
10777 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
10778 gen_int_mode (0xe9, QImode
));
10779 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
10784 /* Try to load address using shorter movl instead of movabs.
10785 We may want to support movq for kernel mode, but kernel does not use
10786 trampolines at the moment. */
10787 if (x86_64_zero_extended_value (fnaddr
))
10789 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
10790 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10791 gen_int_mode (0xbb41, HImode
));
10792 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
10793 gen_lowpart (SImode
, fnaddr
));
10798 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10799 gen_int_mode (0xbb49, HImode
));
10800 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
10804 /* Load static chain using movabs to r10. */
10805 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10806 gen_int_mode (0xba49, HImode
));
10807 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
10810 /* Jump to the r11 */
10811 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
10812 gen_int_mode (0xff49, HImode
));
10813 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
10814 gen_int_mode (0xe3, QImode
));
10816 if (offset
> TRAMPOLINE_SIZE
)
10821 #define def_builtin(MASK, NAME, TYPE, CODE) \
10823 if ((MASK) & target_flags) \
10824 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
10827 struct builtin_description
10829 const unsigned int mask
;
10830 const enum insn_code icode
;
10831 const char *const name
;
10832 const enum ix86_builtins code
;
10833 const enum rtx_code comparison
;
10834 const unsigned int flag
;
10837 /* Used for builtins that are enabled both by -msse and -msse2. */
10838 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
10840 static const struct builtin_description bdesc_comi
[] =
10842 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, EQ
, 0 },
10843 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, LT
, 0 },
10844 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, LE
, 0 },
10845 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, LT
, 1 },
10846 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, LE
, 1 },
10847 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, NE
, 0 },
10848 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, EQ
, 0 },
10849 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, LT
, 0 },
10850 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, LE
, 0 },
10851 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, LT
, 1 },
10852 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, LE
, 1 },
10853 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, NE
, 0 },
10854 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, EQ
, 0 },
10855 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, LT
, 0 },
10856 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, LE
, 0 },
10857 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, LT
, 1 },
10858 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, LE
, 1 },
10859 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, NE
, 0 },
10860 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, EQ
, 0 },
10861 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, LT
, 0 },
10862 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, LE
, 0 },
10863 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, LT
, 1 },
10864 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, LE
, 1 },
10865 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, NE
, 0 },
10868 static const struct builtin_description bdesc_2arg
[] =
10871 { MASK_SSE1
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
10872 { MASK_SSE1
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
10873 { MASK_SSE1
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
10874 { MASK_SSE1
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
10875 { MASK_SSE1
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
10876 { MASK_SSE1
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
10877 { MASK_SSE1
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
10878 { MASK_SSE1
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
10880 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
10881 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
10882 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
10883 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
10884 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
10885 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
10886 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
10887 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
10888 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
10889 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
10890 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
10891 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
10892 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
10893 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
10894 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
10895 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS
, LT
, 1 },
10896 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS
, LE
, 1 },
10897 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
10898 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
10899 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
10900 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
10901 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, LT
, 1 },
10902 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, LE
, 1 },
10903 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
10905 { MASK_SSE1
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
10906 { MASK_SSE1
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
10907 { MASK_SSE1
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
10908 { MASK_SSE1
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
10910 { MASK_SSE1
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
10911 { MASK_SSE1
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
10912 { MASK_SSE1
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
10913 { MASK_SSE1
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
10914 { MASK_SSE1
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
10917 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
10918 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
10919 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
10920 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
10921 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
10922 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
10924 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
10925 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
10926 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
10927 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
10928 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
10929 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
10930 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
10931 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
10933 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
10934 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
10935 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
10937 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
10938 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
10939 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
10940 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
10942 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
10943 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
10945 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
10946 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
10947 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
10948 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
10949 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
10950 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
10952 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
10953 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
10954 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
10955 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
10957 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
10958 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
10959 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
10960 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
10961 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
10962 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
10965 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
10966 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
10967 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
10969 { MASK_SSE1
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
10970 { MASK_SSE1
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
10972 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
10973 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
10974 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
10975 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
10976 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
10977 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
10979 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
10980 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
10981 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
10982 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
10983 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
10984 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
10986 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
10987 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
10988 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
10989 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
10991 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
10992 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
10995 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
10996 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
10997 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
10998 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
10999 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
11000 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
11001 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
11002 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
11004 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
11005 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
11006 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
11007 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
11008 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
11009 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
11010 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
11011 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
11012 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
11013 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
11014 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
11015 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
11016 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
11017 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
11018 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
11019 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD
, LT
, 1 },
11020 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD
, LE
, 1 },
11021 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
11022 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
11023 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
11024 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
11025 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD
, LT
, 1 },
11026 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD
, LE
, 1 },
11027 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
11029 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
11030 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
11031 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
11032 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
11034 { MASK_SSE2
, CODE_FOR_sse2_anddf3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
11035 { MASK_SSE2
, CODE_FOR_sse2_nanddf3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
11036 { MASK_SSE2
, CODE_FOR_sse2_iordf3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
11037 { MASK_SSE2
, CODE_FOR_sse2_xordf3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
11039 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
11040 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
11041 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
11044 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
11045 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
11046 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
11047 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
11048 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
11049 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
11050 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
11051 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
11053 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
11054 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
11055 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
11056 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
11057 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
11058 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
11059 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
11060 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
11062 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
11063 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
11064 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
11065 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
11067 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
11068 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
11069 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
11070 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
11072 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
11073 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
11075 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
11076 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
11077 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
11078 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
11079 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
11080 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
11082 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
11083 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
11084 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
11085 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
11087 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
11088 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
11089 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
11090 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
11091 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
11092 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
11094 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
11095 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
11096 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
11098 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
11099 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
11101 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
11102 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
11103 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
11104 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
11105 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
11106 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
11108 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
11109 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
11110 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
11111 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
11112 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
11113 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
11115 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
11116 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
11117 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
11118 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
11120 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
11122 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
11123 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
11124 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 }
11127 static const struct builtin_description bdesc_1arg
[] =
11129 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
11130 { MASK_SSE1
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
11132 { MASK_SSE1
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
11133 { MASK_SSE1
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
11134 { MASK_SSE1
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
11136 { MASK_SSE1
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
11137 { MASK_SSE1
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
11138 { MASK_SSE1
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
11139 { MASK_SSE1
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
11141 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
11142 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
11143 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
11145 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
11147 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
11148 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
11150 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
11151 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
11152 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
11153 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
11154 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
11156 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
11158 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
11159 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
11161 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
11162 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
11163 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 }
11167 ix86_init_builtins ()
11170 ix86_init_mmx_sse_builtins ();
11173 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11174 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11177 ix86_init_mmx_sse_builtins ()
11179 const struct builtin_description
* d
;
11181 tree endlink
= void_list_node
;
11183 tree pchar_type_node
= build_pointer_type (char_type_node
);
11184 tree pfloat_type_node
= build_pointer_type (float_type_node
);
11185 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
11186 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
11187 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
11190 tree int_ftype_v4sf_v4sf
11191 = build_function_type (integer_type_node
,
11192 tree_cons (NULL_TREE
, V4SF_type_node
,
11193 tree_cons (NULL_TREE
,
11196 tree v4si_ftype_v4sf_v4sf
11197 = build_function_type (V4SI_type_node
,
11198 tree_cons (NULL_TREE
, V4SF_type_node
,
11199 tree_cons (NULL_TREE
,
11202 /* MMX/SSE/integer conversions. */
11203 tree int_ftype_v4sf
11204 = build_function_type (integer_type_node
,
11205 tree_cons (NULL_TREE
, V4SF_type_node
,
11207 tree int_ftype_v8qi
11208 = build_function_type (integer_type_node
,
11209 tree_cons (NULL_TREE
, V8QI_type_node
,
11211 tree v4sf_ftype_v4sf_int
11212 = build_function_type (V4SF_type_node
,
11213 tree_cons (NULL_TREE
, V4SF_type_node
,
11214 tree_cons (NULL_TREE
, integer_type_node
,
11216 tree v4sf_ftype_v4sf_v2si
11217 = build_function_type (V4SF_type_node
,
11218 tree_cons (NULL_TREE
, V4SF_type_node
,
11219 tree_cons (NULL_TREE
, V2SI_type_node
,
11221 tree int_ftype_v4hi_int
11222 = build_function_type (integer_type_node
,
11223 tree_cons (NULL_TREE
, V4HI_type_node
,
11224 tree_cons (NULL_TREE
, integer_type_node
,
11226 tree v4hi_ftype_v4hi_int_int
11227 = build_function_type (V4HI_type_node
,
11228 tree_cons (NULL_TREE
, V4HI_type_node
,
11229 tree_cons (NULL_TREE
, integer_type_node
,
11230 tree_cons (NULL_TREE
,
11233 /* Miscellaneous. */
11234 tree v8qi_ftype_v4hi_v4hi
11235 = build_function_type (V8QI_type_node
,
11236 tree_cons (NULL_TREE
, V4HI_type_node
,
11237 tree_cons (NULL_TREE
, V4HI_type_node
,
11239 tree v4hi_ftype_v2si_v2si
11240 = build_function_type (V4HI_type_node
,
11241 tree_cons (NULL_TREE
, V2SI_type_node
,
11242 tree_cons (NULL_TREE
, V2SI_type_node
,
11244 tree v4sf_ftype_v4sf_v4sf_int
11245 = build_function_type (V4SF_type_node
,
11246 tree_cons (NULL_TREE
, V4SF_type_node
,
11247 tree_cons (NULL_TREE
, V4SF_type_node
,
11248 tree_cons (NULL_TREE
,
11251 tree v2si_ftype_v4hi_v4hi
11252 = build_function_type (V2SI_type_node
,
11253 tree_cons (NULL_TREE
, V4HI_type_node
,
11254 tree_cons (NULL_TREE
, V4HI_type_node
,
11256 tree v4hi_ftype_v4hi_int
11257 = build_function_type (V4HI_type_node
,
11258 tree_cons (NULL_TREE
, V4HI_type_node
,
11259 tree_cons (NULL_TREE
, integer_type_node
,
11261 tree v4hi_ftype_v4hi_di
11262 = build_function_type (V4HI_type_node
,
11263 tree_cons (NULL_TREE
, V4HI_type_node
,
11264 tree_cons (NULL_TREE
,
11265 long_long_integer_type_node
,
11267 tree v2si_ftype_v2si_di
11268 = build_function_type (V2SI_type_node
,
11269 tree_cons (NULL_TREE
, V2SI_type_node
,
11270 tree_cons (NULL_TREE
,
11271 long_long_integer_type_node
,
11273 tree void_ftype_void
11274 = build_function_type (void_type_node
, endlink
);
11275 tree void_ftype_unsigned
11276 = build_function_type (void_type_node
,
11277 tree_cons (NULL_TREE
, unsigned_type_node
,
11279 tree unsigned_ftype_void
11280 = build_function_type (unsigned_type_node
, endlink
);
11282 = build_function_type (long_long_unsigned_type_node
, endlink
);
11283 tree v4sf_ftype_void
11284 = build_function_type (V4SF_type_node
, endlink
);
11285 tree v2si_ftype_v4sf
11286 = build_function_type (V2SI_type_node
,
11287 tree_cons (NULL_TREE
, V4SF_type_node
,
11289 /* Loads/stores. */
11290 tree maskmovq_args
= tree_cons (NULL_TREE
, V8QI_type_node
,
11291 tree_cons (NULL_TREE
, V8QI_type_node
,
11292 tree_cons (NULL_TREE
,
11295 tree void_ftype_v8qi_v8qi_pchar
11296 = build_function_type (void_type_node
, maskmovq_args
);
11297 tree v4sf_ftype_pfloat
11298 = build_function_type (V4SF_type_node
,
11299 tree_cons (NULL_TREE
, pfloat_type_node
,
11301 /* @@@ the type is bogus */
11302 tree v4sf_ftype_v4sf_pv2si
11303 = build_function_type (V4SF_type_node
,
11304 tree_cons (NULL_TREE
, V4SF_type_node
,
11305 tree_cons (NULL_TREE
, pv2si_type_node
,
11307 tree void_ftype_pv2si_v4sf
11308 = build_function_type (void_type_node
,
11309 tree_cons (NULL_TREE
, pv2si_type_node
,
11310 tree_cons (NULL_TREE
, V4SF_type_node
,
11312 tree void_ftype_pfloat_v4sf
11313 = build_function_type (void_type_node
,
11314 tree_cons (NULL_TREE
, pfloat_type_node
,
11315 tree_cons (NULL_TREE
, V4SF_type_node
,
11317 tree void_ftype_pdi_di
11318 = build_function_type (void_type_node
,
11319 tree_cons (NULL_TREE
, pdi_type_node
,
11320 tree_cons (NULL_TREE
,
11321 long_long_unsigned_type_node
,
11323 tree void_ftype_pv2di_v2di
11324 = build_function_type (void_type_node
,
11325 tree_cons (NULL_TREE
, pv2di_type_node
,
11326 tree_cons (NULL_TREE
,
11329 /* Normal vector unops. */
11330 tree v4sf_ftype_v4sf
11331 = build_function_type (V4SF_type_node
,
11332 tree_cons (NULL_TREE
, V4SF_type_node
,
11335 /* Normal vector binops. */
11336 tree v4sf_ftype_v4sf_v4sf
11337 = build_function_type (V4SF_type_node
,
11338 tree_cons (NULL_TREE
, V4SF_type_node
,
11339 tree_cons (NULL_TREE
, V4SF_type_node
,
11341 tree v8qi_ftype_v8qi_v8qi
11342 = build_function_type (V8QI_type_node
,
11343 tree_cons (NULL_TREE
, V8QI_type_node
,
11344 tree_cons (NULL_TREE
, V8QI_type_node
,
11346 tree v4hi_ftype_v4hi_v4hi
11347 = build_function_type (V4HI_type_node
,
11348 tree_cons (NULL_TREE
, V4HI_type_node
,
11349 tree_cons (NULL_TREE
, V4HI_type_node
,
11351 tree v2si_ftype_v2si_v2si
11352 = build_function_type (V2SI_type_node
,
11353 tree_cons (NULL_TREE
, V2SI_type_node
,
11354 tree_cons (NULL_TREE
, V2SI_type_node
,
11356 tree di_ftype_di_di
11357 = build_function_type (long_long_unsigned_type_node
,
11358 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
11359 tree_cons (NULL_TREE
,
11360 long_long_unsigned_type_node
,
11363 tree v2si_ftype_v2sf
11364 = build_function_type (V2SI_type_node
,
11365 tree_cons (NULL_TREE
, V2SF_type_node
,
11367 tree v2sf_ftype_v2si
11368 = build_function_type (V2SF_type_node
,
11369 tree_cons (NULL_TREE
, V2SI_type_node
,
11371 tree v2si_ftype_v2si
11372 = build_function_type (V2SI_type_node
,
11373 tree_cons (NULL_TREE
, V2SI_type_node
,
11375 tree v2sf_ftype_v2sf
11376 = build_function_type (V2SF_type_node
,
11377 tree_cons (NULL_TREE
, V2SF_type_node
,
11379 tree v2sf_ftype_v2sf_v2sf
11380 = build_function_type (V2SF_type_node
,
11381 tree_cons (NULL_TREE
, V2SF_type_node
,
11382 tree_cons (NULL_TREE
,
11385 tree v2si_ftype_v2sf_v2sf
11386 = build_function_type (V2SI_type_node
,
11387 tree_cons (NULL_TREE
, V2SF_type_node
,
11388 tree_cons (NULL_TREE
,
11391 tree pint_type_node
= build_pointer_type (integer_type_node
);
11392 tree pdouble_type_node
= build_pointer_type (double_type_node
);
11393 tree int_ftype_v2df_v2df
11394 = build_function_type (integer_type_node
,
11395 tree_cons (NULL_TREE
, V2DF_type_node
,
11396 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
)));
11399 = build_function_type (intTI_type_node
, endlink
);
11400 tree ti_ftype_ti_ti
11401 = build_function_type (intTI_type_node
,
11402 tree_cons (NULL_TREE
, intTI_type_node
,
11403 tree_cons (NULL_TREE
, intTI_type_node
,
11405 tree void_ftype_pvoid
11406 = build_function_type (void_type_node
,
11407 tree_cons (NULL_TREE
, ptr_type_node
, endlink
));
11409 = build_function_type (V2DI_type_node
,
11410 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
11412 tree v4sf_ftype_v4si
11413 = build_function_type (V4SF_type_node
,
11414 tree_cons (NULL_TREE
, V4SI_type_node
, endlink
));
11415 tree v4si_ftype_v4sf
11416 = build_function_type (V4SI_type_node
,
11417 tree_cons (NULL_TREE
, V4SF_type_node
, endlink
));
11418 tree v2df_ftype_v4si
11419 = build_function_type (V2DF_type_node
,
11420 tree_cons (NULL_TREE
, V4SI_type_node
, endlink
));
11421 tree v4si_ftype_v2df
11422 = build_function_type (V4SI_type_node
,
11423 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
11424 tree v2si_ftype_v2df
11425 = build_function_type (V2SI_type_node
,
11426 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
11427 tree v4sf_ftype_v2df
11428 = build_function_type (V4SF_type_node
,
11429 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
11430 tree v2df_ftype_v2si
11431 = build_function_type (V2DF_type_node
,
11432 tree_cons (NULL_TREE
, V2SI_type_node
, endlink
));
11433 tree v2df_ftype_v4sf
11434 = build_function_type (V2DF_type_node
,
11435 tree_cons (NULL_TREE
, V4SF_type_node
, endlink
));
11436 tree int_ftype_v2df
11437 = build_function_type (integer_type_node
,
11438 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
11439 tree v2df_ftype_v2df_int
11440 = build_function_type (V2DF_type_node
,
11441 tree_cons (NULL_TREE
, V2DF_type_node
,
11442 tree_cons (NULL_TREE
, integer_type_node
,
11444 tree v4sf_ftype_v4sf_v2df
11445 = build_function_type (V4SF_type_node
,
11446 tree_cons (NULL_TREE
, V4SF_type_node
,
11447 tree_cons (NULL_TREE
, V2DF_type_node
,
11449 tree v2df_ftype_v2df_v4sf
11450 = build_function_type (V2DF_type_node
,
11451 tree_cons (NULL_TREE
, V2DF_type_node
,
11452 tree_cons (NULL_TREE
, V4SF_type_node
,
11454 tree v2df_ftype_v2df_v2df_int
11455 = build_function_type (V2DF_type_node
,
11456 tree_cons (NULL_TREE
, V2DF_type_node
,
11457 tree_cons (NULL_TREE
, V2DF_type_node
,
11458 tree_cons (NULL_TREE
,
11461 tree v2df_ftype_v2df_pv2si
11462 = build_function_type (V2DF_type_node
,
11463 tree_cons (NULL_TREE
, V2DF_type_node
,
11464 tree_cons (NULL_TREE
, pv2si_type_node
,
11466 tree void_ftype_pv2si_v2df
11467 = build_function_type (void_type_node
,
11468 tree_cons (NULL_TREE
, pv2si_type_node
,
11469 tree_cons (NULL_TREE
, V2DF_type_node
,
11471 tree void_ftype_pdouble_v2df
11472 = build_function_type (void_type_node
,
11473 tree_cons (NULL_TREE
, pdouble_type_node
,
11474 tree_cons (NULL_TREE
, V2DF_type_node
,
11476 tree void_ftype_pint_int
11477 = build_function_type (void_type_node
,
11478 tree_cons (NULL_TREE
, pint_type_node
,
11479 tree_cons (NULL_TREE
, integer_type_node
,
11481 tree maskmovdqu_args
= tree_cons (NULL_TREE
, V16QI_type_node
,
11482 tree_cons (NULL_TREE
, V16QI_type_node
,
11483 tree_cons (NULL_TREE
,
11486 tree void_ftype_v16qi_v16qi_pchar
11487 = build_function_type (void_type_node
, maskmovdqu_args
);
11488 tree v2df_ftype_pdouble
11489 = build_function_type (V2DF_type_node
,
11490 tree_cons (NULL_TREE
, pdouble_type_node
,
11492 tree v2df_ftype_v2df_v2df
11493 = build_function_type (V2DF_type_node
,
11494 tree_cons (NULL_TREE
, V2DF_type_node
,
11495 tree_cons (NULL_TREE
, V2DF_type_node
,
11497 tree v16qi_ftype_v16qi_v16qi
11498 = build_function_type (V16QI_type_node
,
11499 tree_cons (NULL_TREE
, V16QI_type_node
,
11500 tree_cons (NULL_TREE
, V16QI_type_node
,
11502 tree v8hi_ftype_v8hi_v8hi
11503 = build_function_type (V8HI_type_node
,
11504 tree_cons (NULL_TREE
, V8HI_type_node
,
11505 tree_cons (NULL_TREE
, V8HI_type_node
,
11507 tree v4si_ftype_v4si_v4si
11508 = build_function_type (V4SI_type_node
,
11509 tree_cons (NULL_TREE
, V4SI_type_node
,
11510 tree_cons (NULL_TREE
, V4SI_type_node
,
11512 tree v2di_ftype_v2di_v2di
11513 = build_function_type (V2DI_type_node
,
11514 tree_cons (NULL_TREE
, V2DI_type_node
,
11515 tree_cons (NULL_TREE
, V2DI_type_node
,
11517 tree v2di_ftype_v2df_v2df
11518 = build_function_type (V2DI_type_node
,
11519 tree_cons (NULL_TREE
, V2DF_type_node
,
11520 tree_cons (NULL_TREE
, V2DF_type_node
,
11522 tree v2df_ftype_v2df
11523 = build_function_type (V2DF_type_node
,
11524 tree_cons (NULL_TREE
, V2DF_type_node
,
11526 tree v2df_ftype_double
11527 = build_function_type (V2DF_type_node
,
11528 tree_cons (NULL_TREE
, double_type_node
,
11530 tree v2df_ftype_double_double
11531 = build_function_type (V2DF_type_node
,
11532 tree_cons (NULL_TREE
, double_type_node
,
11533 tree_cons (NULL_TREE
, double_type_node
,
11535 tree int_ftype_v8hi_int
11536 = build_function_type (integer_type_node
,
11537 tree_cons (NULL_TREE
, V8HI_type_node
,
11538 tree_cons (NULL_TREE
, integer_type_node
,
11540 tree v8hi_ftype_v8hi_int_int
11541 = build_function_type (V8HI_type_node
,
11542 tree_cons (NULL_TREE
, V8HI_type_node
,
11543 tree_cons (NULL_TREE
, integer_type_node
,
11544 tree_cons (NULL_TREE
,
11547 tree v2di_ftype_v2di_int
11548 = build_function_type (V2DI_type_node
,
11549 tree_cons (NULL_TREE
, V2DI_type_node
,
11550 tree_cons (NULL_TREE
, integer_type_node
,
11552 tree v4si_ftype_v4si_int
11553 = build_function_type (V4SI_type_node
,
11554 tree_cons (NULL_TREE
, V4SI_type_node
,
11555 tree_cons (NULL_TREE
, integer_type_node
,
11557 tree v8hi_ftype_v8hi_int
11558 = build_function_type (V8HI_type_node
,
11559 tree_cons (NULL_TREE
, V8HI_type_node
,
11560 tree_cons (NULL_TREE
, integer_type_node
,
11562 tree v8hi_ftype_v8hi_v2di
11563 = build_function_type (V8HI_type_node
,
11564 tree_cons (NULL_TREE
, V8HI_type_node
,
11565 tree_cons (NULL_TREE
, V2DI_type_node
,
11567 tree v4si_ftype_v4si_v2di
11568 = build_function_type (V4SI_type_node
,
11569 tree_cons (NULL_TREE
, V4SI_type_node
,
11570 tree_cons (NULL_TREE
, V2DI_type_node
,
11572 tree v4si_ftype_v8hi_v8hi
11573 = build_function_type (V4SI_type_node
,
11574 tree_cons (NULL_TREE
, V8HI_type_node
,
11575 tree_cons (NULL_TREE
, V8HI_type_node
,
11577 tree di_ftype_v8qi_v8qi
11578 = build_function_type (long_long_unsigned_type_node
,
11579 tree_cons (NULL_TREE
, V8QI_type_node
,
11580 tree_cons (NULL_TREE
, V8QI_type_node
,
11582 tree v2di_ftype_v16qi_v16qi
11583 = build_function_type (V2DI_type_node
,
11584 tree_cons (NULL_TREE
, V16QI_type_node
,
11585 tree_cons (NULL_TREE
, V16QI_type_node
,
11587 tree int_ftype_v16qi
11588 = build_function_type (integer_type_node
,
11589 tree_cons (NULL_TREE
, V16QI_type_node
, endlink
));
11591 /* Add all builtins that are more or less simple operations on two
11593 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
11595 /* Use one of the operands; the target can have a different mode for
11596 mask-generating compares. */
11597 enum machine_mode mode
;
11602 mode
= insn_data
[d
->icode
].operand
[1].mode
;
11607 type
= v16qi_ftype_v16qi_v16qi
;
11610 type
= v8hi_ftype_v8hi_v8hi
;
11613 type
= v4si_ftype_v4si_v4si
;
11616 type
= v2di_ftype_v2di_v2di
;
11619 type
= v2df_ftype_v2df_v2df
;
11622 type
= ti_ftype_ti_ti
;
11625 type
= v4sf_ftype_v4sf_v4sf
;
11628 type
= v8qi_ftype_v8qi_v8qi
;
11631 type
= v4hi_ftype_v4hi_v4hi
;
11634 type
= v2si_ftype_v2si_v2si
;
11637 type
= di_ftype_di_di
;
11644 /* Override for comparisons. */
11645 if (d
->icode
== CODE_FOR_maskcmpv4sf3
11646 || d
->icode
== CODE_FOR_maskncmpv4sf3
11647 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
11648 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
11649 type
= v4si_ftype_v4sf_v4sf
;
11651 if (d
->icode
== CODE_FOR_maskcmpv2df3
11652 || d
->icode
== CODE_FOR_maskncmpv2df3
11653 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
11654 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
11655 type
= v2di_ftype_v2df_v2df
;
11657 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
11660 /* Add the remaining MMX insns with somewhat more complicated types. */
11661 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
11662 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
11663 def_builtin (MASK_MMX
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
11664 def_builtin (MASK_MMX
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
11665 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
11666 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
11667 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
11669 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
11670 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
11671 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
11673 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
11674 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
11676 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
11677 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
11679 /* comi/ucomi insns. */
11680 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
11681 if (d
->mask
== MASK_SSE2
)
11682 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
11684 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
11686 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
11687 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
11688 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
11690 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
11691 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
11692 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
11693 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
11694 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
11695 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
11697 def_builtin (MASK_SSE1
, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDPS
);
11698 def_builtin (MASK_SSE1
, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDNPS
);
11699 def_builtin (MASK_SSE1
, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ORPS
);
11700 def_builtin (MASK_SSE1
, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_XORPS
);
11702 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
11703 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
11705 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
11707 def_builtin (MASK_SSE1
, "__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
11708 def_builtin (MASK_SSE1
, "__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
11709 def_builtin (MASK_SSE1
, "__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
11710 def_builtin (MASK_SSE1
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
11711 def_builtin (MASK_SSE1
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
11712 def_builtin (MASK_SSE1
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
11714 def_builtin (MASK_SSE1
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
11715 def_builtin (MASK_SSE1
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
11716 def_builtin (MASK_SSE1
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
11717 def_builtin (MASK_SSE1
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
11719 def_builtin (MASK_SSE1
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
11720 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
11721 def_builtin (MASK_SSE1
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
11722 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
11724 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
11726 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
11728 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
11729 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
11730 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
11731 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
11732 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
11733 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
11735 def_builtin (MASK_SSE1
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
11737 /* Original 3DNow! */
11738 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
11739 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
11740 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
11741 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
11742 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
11743 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
11744 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
11745 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
11746 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
11747 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
11748 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
11749 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
11750 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
11751 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
11752 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
11753 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
11754 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
11755 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
11756 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
11757 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
11759 /* 3DNow! extension as used in the Athlon CPU. */
11760 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
11761 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
11762 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
11763 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
11764 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
11765 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
11767 def_builtin (MASK_SSE1
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
11770 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
11771 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
11773 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
11774 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
11776 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADAPD
);
11777 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADUPD
);
11778 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADSD
);
11779 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
11780 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
11781 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
11783 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
11784 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
11785 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
11786 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
11788 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
11789 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
11790 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
11791 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
11792 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
11794 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
11795 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
11796 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
11797 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
11799 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
11800 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
11802 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
11804 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
11805 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
11807 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
11808 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
11809 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
11810 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
11811 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
11813 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
11815 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
11816 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
11818 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
11819 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
11820 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
11822 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
11823 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
11824 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
11826 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
11827 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
11828 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
11829 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pdouble
, IX86_BUILTIN_LOADPD1
);
11830 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADRPD
);
11831 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
11832 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
11834 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pvoid
, IX86_BUILTIN_CLFLUSH
);
11835 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
11836 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
11838 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
11839 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
11840 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
11842 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
11843 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
11844 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
11846 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
11847 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
11849 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
11850 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
11851 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
11853 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
11854 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
11855 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
11857 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
11858 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
11860 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
11863 /* Errors in the source file can cause expand_expr to return const0_rtx
11864 where we expect a vector. To avoid crashing, use one of the vector
11865 clear instructions. */
11867 safe_vector_operand (x
, mode
)
11869 enum machine_mode mode
;
11871 if (x
!= const0_rtx
)
11873 x
= gen_reg_rtx (mode
);
11875 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
11876 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
11877 : gen_rtx_SUBREG (DImode
, x
, 0)));
11879 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
11880 : gen_rtx_SUBREG (V4SFmode
, x
, 0)));
11884 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
11887 ix86_expand_binop_builtin (icode
, arglist
, target
)
11888 enum insn_code icode
;
11893 tree arg0
= TREE_VALUE (arglist
);
11894 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11895 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11896 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11897 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
11898 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
11899 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
11901 if (VECTOR_MODE_P (mode0
))
11902 op0
= safe_vector_operand (op0
, mode0
);
11903 if (VECTOR_MODE_P (mode1
))
11904 op1
= safe_vector_operand (op1
, mode1
);
11907 || GET_MODE (target
) != tmode
11908 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
11909 target
= gen_reg_rtx (tmode
);
11911 /* In case the insn wants input operands in modes different from
11912 the result, abort. */
11913 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
11916 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
11917 op0
= copy_to_mode_reg (mode0
, op0
);
11918 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
11919 op1
= copy_to_mode_reg (mode1
, op1
);
11921 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11922 yet one of the two must not be a memory. This is normally enforced
11923 by expanders, but we didn't bother to create one here. */
11924 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
11925 op0
= copy_to_mode_reg (mode0
, op0
);
11927 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11934 /* In type_for_mode we restrict the ability to create TImode types
11935 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
11936 to have a V4SFmode signature. Convert them in-place to TImode. */
11939 ix86_expand_timode_binop_builtin (icode
, arglist
, target
)
11940 enum insn_code icode
;
11945 tree arg0
= TREE_VALUE (arglist
);
11946 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11947 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11948 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11950 op0
= gen_lowpart (TImode
, op0
);
11951 op1
= gen_lowpart (TImode
, op1
);
11952 target
= gen_reg_rtx (TImode
);
11954 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, TImode
))
11955 op0
= copy_to_mode_reg (TImode
, op0
);
11956 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, TImode
))
11957 op1
= copy_to_mode_reg (TImode
, op1
);
11959 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
11960 yet one of the two must not be a memory. This is normally enforced
11961 by expanders, but we didn't bother to create one here. */
11962 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
11963 op0
= copy_to_mode_reg (TImode
, op0
);
11965 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
11970 return gen_lowpart (V4SFmode
, target
);
11973 /* Subroutine of ix86_expand_builtin to take care of stores. */
11976 ix86_expand_store_builtin (icode
, arglist
)
11977 enum insn_code icode
;
11981 tree arg0
= TREE_VALUE (arglist
);
11982 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
11983 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
11984 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
11985 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
11986 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
11988 if (VECTOR_MODE_P (mode1
))
11989 op1
= safe_vector_operand (op1
, mode1
);
11991 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
11993 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
11994 op1
= copy_to_mode_reg (mode1
, op1
);
11996 pat
= GEN_FCN (icode
) (op0
, op1
);
12002 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12005 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
12006 enum insn_code icode
;
12012 tree arg0
= TREE_VALUE (arglist
);
12013 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12014 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12015 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12018 || GET_MODE (target
) != tmode
12019 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12020 target
= gen_reg_rtx (tmode
);
12022 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12025 if (VECTOR_MODE_P (mode0
))
12026 op0
= safe_vector_operand (op0
, mode0
);
12028 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12029 op0
= copy_to_mode_reg (mode0
, op0
);
12032 pat
= GEN_FCN (icode
) (target
, op0
);
12039 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12040 sqrtss, rsqrtss, rcpss. */
12043 ix86_expand_unop1_builtin (icode
, arglist
, target
)
12044 enum insn_code icode
;
12049 tree arg0
= TREE_VALUE (arglist
);
12050 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12051 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12052 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12055 || GET_MODE (target
) != tmode
12056 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12057 target
= gen_reg_rtx (tmode
);
12059 if (VECTOR_MODE_P (mode0
))
12060 op0
= safe_vector_operand (op0
, mode0
);
12062 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12063 op0
= copy_to_mode_reg (mode0
, op0
);
12066 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
12067 op1
= copy_to_mode_reg (mode0
, op1
);
12069 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12076 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12079 ix86_expand_sse_compare (d
, arglist
, target
)
12080 const struct builtin_description
*d
;
12085 tree arg0
= TREE_VALUE (arglist
);
12086 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12087 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12088 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12090 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
12091 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
12092 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
12093 enum rtx_code comparison
= d
->comparison
;
12095 if (VECTOR_MODE_P (mode0
))
12096 op0
= safe_vector_operand (op0
, mode0
);
12097 if (VECTOR_MODE_P (mode1
))
12098 op1
= safe_vector_operand (op1
, mode1
);
12100 /* Swap operands if we have a comparison that isn't available in
12104 rtx tmp
= gen_reg_rtx (mode1
);
12105 emit_move_insn (tmp
, op1
);
12111 || GET_MODE (target
) != tmode
12112 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
12113 target
= gen_reg_rtx (tmode
);
12115 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
12116 op0
= copy_to_mode_reg (mode0
, op0
);
12117 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
12118 op1
= copy_to_mode_reg (mode1
, op1
);
12120 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12121 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
12128 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12131 ix86_expand_sse_comi (d
, arglist
, target
)
12132 const struct builtin_description
*d
;
12137 tree arg0
= TREE_VALUE (arglist
);
12138 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12139 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12140 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12142 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
12143 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
12144 enum rtx_code comparison
= d
->comparison
;
12146 if (VECTOR_MODE_P (mode0
))
12147 op0
= safe_vector_operand (op0
, mode0
);
12148 if (VECTOR_MODE_P (mode1
))
12149 op1
= safe_vector_operand (op1
, mode1
);
12151 /* Swap operands if we have a comparison that isn't available in
12160 target
= gen_reg_rtx (SImode
);
12161 emit_move_insn (target
, const0_rtx
);
12162 target
= gen_rtx_SUBREG (QImode
, target
, 0);
12164 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
12165 op0
= copy_to_mode_reg (mode0
, op0
);
12166 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
12167 op1
= copy_to_mode_reg (mode1
, op1
);
12169 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12170 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
12174 emit_insn (gen_rtx_SET (VOIDmode
,
12175 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
12176 gen_rtx_fmt_ee (comparison
, QImode
,
12177 gen_rtx_REG (CCmode
, FLAGS_REG
),
12180 return SUBREG_REG (target
);
12183 /* Expand an expression EXP that calls a built-in function,
12184 with result going to TARGET if that's convenient
12185 (and in mode MODE if that's convenient).
12186 SUBTARGET may be used as the target for computing one of EXP's operands.
12187 IGNORE is nonzero if the value is to be ignored. */
12190 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
12193 rtx subtarget ATTRIBUTE_UNUSED
;
12194 enum machine_mode mode ATTRIBUTE_UNUSED
;
12195 int ignore ATTRIBUTE_UNUSED
;
12197 const struct builtin_description
*d
;
12199 enum insn_code icode
;
12200 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
12201 tree arglist
= TREE_OPERAND (exp
, 1);
12202 tree arg0
, arg1
, arg2
;
12203 rtx op0
, op1
, op2
, pat
;
12204 enum machine_mode tmode
, mode0
, mode1
, mode2
;
12205 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
12209 case IX86_BUILTIN_EMMS
:
12210 emit_insn (gen_emms ());
12213 case IX86_BUILTIN_SFENCE
:
12214 emit_insn (gen_sfence ());
12217 case IX86_BUILTIN_PEXTRW
:
12218 case IX86_BUILTIN_PEXTRW128
:
12219 icode
= (fcode
== IX86_BUILTIN_PEXTRW
12220 ? CODE_FOR_mmx_pextrw
12221 : CODE_FOR_sse2_pextrw
);
12222 arg0
= TREE_VALUE (arglist
);
12223 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12224 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12225 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12226 tmode
= insn_data
[icode
].operand
[0].mode
;
12227 mode0
= insn_data
[icode
].operand
[1].mode
;
12228 mode1
= insn_data
[icode
].operand
[2].mode
;
12230 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12231 op0
= copy_to_mode_reg (mode0
, op0
);
12232 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12234 /* @@@ better error message */
12235 error ("selector must be an immediate");
12236 return gen_reg_rtx (tmode
);
12239 || GET_MODE (target
) != tmode
12240 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12241 target
= gen_reg_rtx (tmode
);
12242 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12248 case IX86_BUILTIN_PINSRW
:
12249 case IX86_BUILTIN_PINSRW128
:
12250 icode
= (fcode
== IX86_BUILTIN_PINSRW
12251 ? CODE_FOR_mmx_pinsrw
12252 : CODE_FOR_sse2_pinsrw
);
12253 arg0
= TREE_VALUE (arglist
);
12254 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12255 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12256 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12257 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12258 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
12259 tmode
= insn_data
[icode
].operand
[0].mode
;
12260 mode0
= insn_data
[icode
].operand
[1].mode
;
12261 mode1
= insn_data
[icode
].operand
[2].mode
;
12262 mode2
= insn_data
[icode
].operand
[3].mode
;
12264 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12265 op0
= copy_to_mode_reg (mode0
, op0
);
12266 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12267 op1
= copy_to_mode_reg (mode1
, op1
);
12268 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
12270 /* @@@ better error message */
12271 error ("selector must be an immediate");
12275 || GET_MODE (target
) != tmode
12276 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12277 target
= gen_reg_rtx (tmode
);
12278 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
12284 case IX86_BUILTIN_MASKMOVQ
:
12285 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
12286 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
12287 : CODE_FOR_sse2_maskmovdqu
);
12288 /* Note the arg order is different from the operand order. */
12289 arg1
= TREE_VALUE (arglist
);
12290 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
12291 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12292 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12293 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12294 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
12295 mode0
= insn_data
[icode
].operand
[0].mode
;
12296 mode1
= insn_data
[icode
].operand
[1].mode
;
12297 mode2
= insn_data
[icode
].operand
[2].mode
;
12299 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
12300 op0
= copy_to_mode_reg (mode0
, op0
);
12301 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
12302 op1
= copy_to_mode_reg (mode1
, op1
);
12303 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
12304 op2
= copy_to_mode_reg (mode2
, op2
);
12305 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
12311 case IX86_BUILTIN_SQRTSS
:
12312 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
12313 case IX86_BUILTIN_RSQRTSS
:
12314 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
12315 case IX86_BUILTIN_RCPSS
:
12316 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
12318 case IX86_BUILTIN_ANDPS
:
12319 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3
,
12321 case IX86_BUILTIN_ANDNPS
:
12322 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3
,
12324 case IX86_BUILTIN_ORPS
:
12325 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3
,
12327 case IX86_BUILTIN_XORPS
:
12328 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3
,
12331 case IX86_BUILTIN_LOADAPS
:
12332 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
12334 case IX86_BUILTIN_LOADUPS
:
12335 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
12337 case IX86_BUILTIN_STOREAPS
:
12338 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
12339 case IX86_BUILTIN_STOREUPS
:
12340 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
12342 case IX86_BUILTIN_LOADSS
:
12343 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
12345 case IX86_BUILTIN_STORESS
:
12346 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
12348 case IX86_BUILTIN_LOADHPS
:
12349 case IX86_BUILTIN_LOADLPS
:
12350 case IX86_BUILTIN_LOADHPD
:
12351 case IX86_BUILTIN_LOADLPD
:
12352 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
12353 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
12354 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
12355 : CODE_FOR_sse2_movlpd
);
12356 arg0
= TREE_VALUE (arglist
);
12357 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12358 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12359 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12360 tmode
= insn_data
[icode
].operand
[0].mode
;
12361 mode0
= insn_data
[icode
].operand
[1].mode
;
12362 mode1
= insn_data
[icode
].operand
[2].mode
;
12364 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12365 op0
= copy_to_mode_reg (mode0
, op0
);
12366 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
12368 || GET_MODE (target
) != tmode
12369 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12370 target
= gen_reg_rtx (tmode
);
12371 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12377 case IX86_BUILTIN_STOREHPS
:
12378 case IX86_BUILTIN_STORELPS
:
12379 case IX86_BUILTIN_STOREHPD
:
12380 case IX86_BUILTIN_STORELPD
:
12381 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
12382 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
12383 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
12384 : CODE_FOR_sse2_movlpd
);
12385 arg0
= TREE_VALUE (arglist
);
12386 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12387 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12388 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12389 mode0
= insn_data
[icode
].operand
[1].mode
;
12390 mode1
= insn_data
[icode
].operand
[2].mode
;
12392 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12393 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12394 op1
= copy_to_mode_reg (mode1
, op1
);
12396 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
12402 case IX86_BUILTIN_MOVNTPS
:
12403 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
12404 case IX86_BUILTIN_MOVNTQ
:
12405 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
12407 case IX86_BUILTIN_LDMXCSR
:
12408 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
12409 target
= assign_386_stack_local (SImode
, 0);
12410 emit_move_insn (target
, op0
);
12411 emit_insn (gen_ldmxcsr (target
));
12414 case IX86_BUILTIN_STMXCSR
:
12415 target
= assign_386_stack_local (SImode
, 0);
12416 emit_insn (gen_stmxcsr (target
));
12417 return copy_to_mode_reg (SImode
, target
);
12419 case IX86_BUILTIN_SHUFPS
:
12420 case IX86_BUILTIN_SHUFPD
:
12421 icode
= (fcode
== IX86_BUILTIN_SHUFPS
12422 ? CODE_FOR_sse_shufps
12423 : CODE_FOR_sse2_shufpd
);
12424 arg0
= TREE_VALUE (arglist
);
12425 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12426 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12427 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12428 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12429 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
12430 tmode
= insn_data
[icode
].operand
[0].mode
;
12431 mode0
= insn_data
[icode
].operand
[1].mode
;
12432 mode1
= insn_data
[icode
].operand
[2].mode
;
12433 mode2
= insn_data
[icode
].operand
[3].mode
;
12435 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12436 op0
= copy_to_mode_reg (mode0
, op0
);
12437 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12438 op1
= copy_to_mode_reg (mode1
, op1
);
12439 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
12441 /* @@@ better error message */
12442 error ("mask must be an immediate");
12443 return gen_reg_rtx (tmode
);
12446 || GET_MODE (target
) != tmode
12447 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12448 target
= gen_reg_rtx (tmode
);
12449 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
12455 case IX86_BUILTIN_PSHUFW
:
12456 case IX86_BUILTIN_PSHUFD
:
12457 case IX86_BUILTIN_PSHUFHW
:
12458 case IX86_BUILTIN_PSHUFLW
:
12459 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
12460 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
12461 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
12462 : CODE_FOR_mmx_pshufw
);
12463 arg0
= TREE_VALUE (arglist
);
12464 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12465 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12466 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12467 tmode
= insn_data
[icode
].operand
[0].mode
;
12468 mode1
= insn_data
[icode
].operand
[1].mode
;
12469 mode2
= insn_data
[icode
].operand
[2].mode
;
12471 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
12472 op0
= copy_to_mode_reg (mode1
, op0
);
12473 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
12475 /* @@@ better error message */
12476 error ("mask must be an immediate");
12480 || GET_MODE (target
) != tmode
12481 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12482 target
= gen_reg_rtx (tmode
);
12483 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12489 case IX86_BUILTIN_FEMMS
:
12490 emit_insn (gen_femms ());
12493 case IX86_BUILTIN_PAVGUSB
:
12494 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
12496 case IX86_BUILTIN_PF2ID
:
12497 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
12499 case IX86_BUILTIN_PFACC
:
12500 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
12502 case IX86_BUILTIN_PFADD
:
12503 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
12505 case IX86_BUILTIN_PFCMPEQ
:
12506 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
12508 case IX86_BUILTIN_PFCMPGE
:
12509 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
12511 case IX86_BUILTIN_PFCMPGT
:
12512 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
12514 case IX86_BUILTIN_PFMAX
:
12515 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
12517 case IX86_BUILTIN_PFMIN
:
12518 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
12520 case IX86_BUILTIN_PFMUL
:
12521 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
12523 case IX86_BUILTIN_PFRCP
:
12524 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
12526 case IX86_BUILTIN_PFRCPIT1
:
12527 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
12529 case IX86_BUILTIN_PFRCPIT2
:
12530 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
12532 case IX86_BUILTIN_PFRSQIT1
:
12533 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
12535 case IX86_BUILTIN_PFRSQRT
:
12536 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
12538 case IX86_BUILTIN_PFSUB
:
12539 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
12541 case IX86_BUILTIN_PFSUBR
:
12542 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
12544 case IX86_BUILTIN_PI2FD
:
12545 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
12547 case IX86_BUILTIN_PMULHRW
:
12548 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
12550 case IX86_BUILTIN_PF2IW
:
12551 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
12553 case IX86_BUILTIN_PFNACC
:
12554 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
12556 case IX86_BUILTIN_PFPNACC
:
12557 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
12559 case IX86_BUILTIN_PI2FW
:
12560 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
12562 case IX86_BUILTIN_PSWAPDSI
:
12563 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
12565 case IX86_BUILTIN_PSWAPDSF
:
12566 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
12568 case IX86_BUILTIN_SSE_ZERO
:
12569 target
= gen_reg_rtx (V4SFmode
);
12570 emit_insn (gen_sse_clrv4sf (target
));
12573 case IX86_BUILTIN_MMX_ZERO
:
12574 target
= gen_reg_rtx (DImode
);
12575 emit_insn (gen_mmx_clrdi (target
));
12578 case IX86_BUILTIN_SQRTSD
:
12579 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
12580 case IX86_BUILTIN_LOADAPD
:
12581 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
12582 case IX86_BUILTIN_LOADUPD
:
12583 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
12585 case IX86_BUILTIN_STOREAPD
:
12586 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
12587 case IX86_BUILTIN_STOREUPD
:
12588 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
12590 case IX86_BUILTIN_LOADSD
:
12591 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
12593 case IX86_BUILTIN_STORESD
:
12594 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
12596 case IX86_BUILTIN_SETPD1
:
12597 target
= assign_386_stack_local (DFmode
, 0);
12598 arg0
= TREE_VALUE (arglist
);
12599 emit_move_insn (adjust_address (target
, DFmode
, 0),
12600 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
12601 op0
= gen_reg_rtx (V2DFmode
);
12602 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
12603 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, GEN_INT (0)));
12606 case IX86_BUILTIN_SETPD
:
12607 target
= assign_386_stack_local (V2DFmode
, 0);
12608 arg0
= TREE_VALUE (arglist
);
12609 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12610 emit_move_insn (adjust_address (target
, DFmode
, 0),
12611 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
12612 emit_move_insn (adjust_address (target
, DFmode
, 8),
12613 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
12614 op0
= gen_reg_rtx (V2DFmode
);
12615 emit_insn (gen_sse2_movapd (op0
, target
));
12618 case IX86_BUILTIN_LOADRPD
:
12619 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
12620 gen_reg_rtx (V2DFmode
), 1);
12621 emit_insn (gen_sse2_shufpd (target
, target
, target
, GEN_INT (1)));
12624 case IX86_BUILTIN_LOADPD1
:
12625 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
12626 gen_reg_rtx (V2DFmode
), 1);
12627 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
12630 case IX86_BUILTIN_STOREPD1
:
12631 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
12632 case IX86_BUILTIN_STORERPD
:
12633 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
12635 case IX86_BUILTIN_MFENCE
:
12636 emit_insn (gen_sse2_mfence ());
12638 case IX86_BUILTIN_LFENCE
:
12639 emit_insn (gen_sse2_lfence ());
12642 case IX86_BUILTIN_CLFLUSH
:
12643 arg0
= TREE_VALUE (arglist
);
12644 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12645 icode
= CODE_FOR_sse2_clflush
;
12646 mode0
= insn_data
[icode
].operand
[0].mode
;
12647 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
12648 op0
= copy_to_mode_reg (mode0
, op0
);
12650 emit_insn (gen_sse2_clflush (op0
));
12653 case IX86_BUILTIN_MOVNTPD
:
12654 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
12655 case IX86_BUILTIN_MOVNTDQ
:
12656 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
12657 case IX86_BUILTIN_MOVNTI
:
12658 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
12664 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12665 if (d
->code
== fcode
)
12667 /* Compares are treated specially. */
12668 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12669 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12670 || d
->icode
== CODE_FOR_maskncmpv4sf3
12671 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
12672 || d
->icode
== CODE_FOR_maskcmpv2df3
12673 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12674 || d
->icode
== CODE_FOR_maskncmpv2df3
12675 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12676 return ix86_expand_sse_compare (d
, arglist
, target
);
12678 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
12681 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
12682 if (d
->code
== fcode
)
12683 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
12685 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12686 if (d
->code
== fcode
)
12687 return ix86_expand_sse_comi (d
, arglist
, target
);
12689 /* @@@ Should really do something sensible here. */
12693 /* Store OPERAND to the memory after reload is completed. This means
12694 that we can't easily use assign_stack_local. */
12696 ix86_force_to_memory (mode
, operand
)
12697 enum machine_mode mode
;
12701 if (!reload_completed
)
12703 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
12705 result
= gen_rtx_MEM (mode
,
12706 gen_rtx_PLUS (Pmode
,
12708 GEN_INT (-RED_ZONE_SIZE
)));
12709 emit_move_insn (result
, operand
);
12711 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
12717 operand
= gen_lowpart (DImode
, operand
);
12721 gen_rtx_SET (VOIDmode
,
12722 gen_rtx_MEM (DImode
,
12723 gen_rtx_PRE_DEC (DImode
,
12724 stack_pointer_rtx
)),
12730 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
12739 split_di (&operand
, 1, operands
, operands
+ 1);
12741 gen_rtx_SET (VOIDmode
,
12742 gen_rtx_MEM (SImode
,
12743 gen_rtx_PRE_DEC (Pmode
,
12744 stack_pointer_rtx
)),
12747 gen_rtx_SET (VOIDmode
,
12748 gen_rtx_MEM (SImode
,
12749 gen_rtx_PRE_DEC (Pmode
,
12750 stack_pointer_rtx
)),
12755 /* It is better to store HImodes as SImodes. */
12756 if (!TARGET_PARTIAL_REG_STALL
)
12757 operand
= gen_lowpart (SImode
, operand
);
12761 gen_rtx_SET (VOIDmode
,
12762 gen_rtx_MEM (GET_MODE (operand
),
12763 gen_rtx_PRE_DEC (SImode
,
12764 stack_pointer_rtx
)),
12770 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
12775 /* Free operand from the memory. */
12777 ix86_free_from_memory (mode
)
12778 enum machine_mode mode
;
12780 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
12784 if (mode
== DImode
|| TARGET_64BIT
)
12786 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
12790 /* Use LEA to deallocate stack space. In peephole2 it will be converted
12791 to pop or add instruction if registers are available. */
12792 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
12793 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
12798 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
12799 QImode must go into class Q_REGS.
12800 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
12801 movdf to do mem-to-mem moves through integer regs. */
12803 ix86_preferred_reload_class (x
, class)
12805 enum reg_class
class;
12807 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
12809 /* SSE can't load any constant directly yet. */
12810 if (SSE_CLASS_P (class))
12812 /* Floats can load 0 and 1. */
12813 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
12815 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
12816 if (MAYBE_SSE_CLASS_P (class))
12817 return (reg_class_subset_p (class, GENERAL_REGS
)
12818 ? GENERAL_REGS
: FLOAT_REGS
);
12822 /* General regs can load everything. */
12823 if (reg_class_subset_p (class, GENERAL_REGS
))
12824 return GENERAL_REGS
;
12825 /* In case we haven't resolved FLOAT or SSE yet, give up. */
12826 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
12829 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
12831 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
12836 /* If we are copying between general and FP registers, we need a memory
12837 location. The same is true for SSE and MMX registers.
12839 The macro can't work reliably when one of the CLASSES is class containing
12840 registers from multiple units (SSE, MMX, integer). We avoid this by never
12841 combining those units in single alternative in the machine description.
12842 Ensure that this constraint holds to avoid unexpected surprises.
12844 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
12845 enforce these sanity checks. */
12847 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
12848 enum reg_class class1
, class2
;
12849 enum machine_mode mode
;
12852 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
12853 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
12854 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
12855 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
12856 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
12857 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
12864 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
12865 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
12866 && (mode
) != SImode
)
12867 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
12868 && (mode
) != SImode
));
12870 /* Return the cost of moving data from a register in class CLASS1 to
12871 one in class CLASS2.
12873 It is not required that the cost always equal 2 when FROM is the same as TO;
12874 on some machines it is expensive to move between registers if they are not
12875 general registers. */
12877 ix86_register_move_cost (mode
, class1
, class2
)
12878 enum machine_mode mode
;
12879 enum reg_class class1
, class2
;
12881 /* In case we require secondary memory, compute cost of the store followed
12882 by load. In case of copying from general_purpose_register we may emit
12883 multiple stores followed by single load causing memory size mismatch
12884 stall. Count this as arbitarily high cost of 20. */
12885 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
12888 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
12890 return (MEMORY_MOVE_COST (mode
, class1
, 0)
12891 + MEMORY_MOVE_COST (mode
, class2
, 1) + add_cost
);
12893 /* Moves between SSE/MMX and integer unit are expensive. */
12894 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
12895 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
12896 return ix86_cost
->mmxsse_to_integer
;
12897 if (MAYBE_FLOAT_CLASS_P (class1
))
12898 return ix86_cost
->fp_move
;
12899 if (MAYBE_SSE_CLASS_P (class1
))
12900 return ix86_cost
->sse_move
;
12901 if (MAYBE_MMX_CLASS_P (class1
))
12902 return ix86_cost
->mmx_move
;
12906 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
12908 ix86_hard_regno_mode_ok (regno
, mode
)
12910 enum machine_mode mode
;
12912 /* Flags and only flags can only hold CCmode values. */
12913 if (CC_REGNO_P (regno
))
12914 return GET_MODE_CLASS (mode
) == MODE_CC
;
12915 if (GET_MODE_CLASS (mode
) == MODE_CC
12916 || GET_MODE_CLASS (mode
) == MODE_RANDOM
12917 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
12919 if (FP_REGNO_P (regno
))
12920 return VALID_FP_MODE_P (mode
);
12921 if (SSE_REGNO_P (regno
))
12922 return VALID_SSE_REG_MODE (mode
);
12923 if (MMX_REGNO_P (regno
))
12924 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
12925 /* We handle both integer and floats in the general purpose registers.
12926 In future we should be able to handle vector modes as well. */
12927 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
12929 /* Take care for QImode values - they can be in non-QI regs, but then
12930 they do cause partial register stalls. */
12931 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
12933 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
12936 /* Return the cost of moving data of mode M between a
12937 register and memory. A value of 2 is the default; this cost is
12938 relative to those in `REGISTER_MOVE_COST'.
12940 If moving between registers and memory is more expensive than
12941 between two registers, you should define this macro to express the
12944 Model also increased moving costs of QImode registers in non
12948 ix86_memory_move_cost (mode
, class, in
)
12949 enum machine_mode mode
;
12950 enum reg_class
class;
12953 if (FLOAT_CLASS_P (class))
12971 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
12973 if (SSE_CLASS_P (class))
12976 switch (GET_MODE_SIZE (mode
))
12990 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
12992 if (MMX_CLASS_P (class))
12995 switch (GET_MODE_SIZE (mode
))
13006 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
13008 switch (GET_MODE_SIZE (mode
))
13012 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
13013 : ix86_cost
->movzbl_load
);
13015 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
13016 : ix86_cost
->int_store
[0] + 4);
13019 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
13021 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13022 if (mode
== TFmode
)
13024 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
13025 * (int) GET_MODE_SIZE (mode
) / 4);
13029 #ifdef DO_GLOBAL_CTORS_BODY
13031 ix86_svr3_asm_out_constructor (symbol
, priority
)
13033 int priority ATTRIBUTE_UNUSED
;
13036 fputs ("\tpushl $", asm_out_file
);
13037 assemble_name (asm_out_file
, XSTR (symbol
, 0));
13038 fputc ('\n', asm_out_file
);
13042 /* Order the registers for register allocator. */
13045 x86_order_regs_for_local_alloc ()
13050 /* First allocate the local general purpose registers. */
13051 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
13052 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
13053 reg_alloc_order
[pos
++] = i
;
13055 /* Global general purpose registers. */
13056 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
13057 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
13058 reg_alloc_order
[pos
++] = i
;
13060 /* x87 registers come first in case we are doing FP math
13062 if (!TARGET_SSE_MATH
)
13063 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
13064 reg_alloc_order
[pos
++] = i
;
13066 /* SSE registers. */
13067 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
13068 reg_alloc_order
[pos
++] = i
;
13069 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
13070 reg_alloc_order
[pos
++] = i
;
13072 /* x87 registerts. */
13073 if (TARGET_SSE_MATH
)
13074 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
13075 reg_alloc_order
[pos
++] = i
;
13077 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
13078 reg_alloc_order
[pos
++] = i
;
13080 /* Initialize the rest of array as we do not allocate some registers
13082 while (pos
< FIRST_PSEUDO_REGISTER
)
13083 reg_alloc_order
[pos
++] = 0;
13087 x86_output_mi_thunk (file
, delta
, function
)
13095 if (ix86_regparm
> 0)
13096 parm
= TYPE_ARG_TYPES (TREE_TYPE (function
));
13099 for (; parm
; parm
= TREE_CHAIN (parm
))
13100 if (TREE_VALUE (parm
) == void_type_node
)
13103 xops
[0] = GEN_INT (delta
);
13106 int n
= aggregate_value_p (TREE_TYPE (TREE_TYPE (function
))) != 0;
13107 xops
[1] = gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
13108 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops
);
13111 fprintf (file
, "\tjmp *");
13112 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13113 fprintf (file
, "@GOTPCREL(%%rip)\n");
13117 fprintf (file
, "\tjmp ");
13118 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13119 fprintf (file
, "\n");
13125 xops
[1] = gen_rtx_REG (SImode
, 0);
13126 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
))))
13127 xops
[1] = gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
13129 xops
[1] = gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
13130 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops
);
13134 xops
[0] = pic_offset_table_rtx
;
13135 xops
[1] = gen_label_rtx ();
13136 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
13138 if (ix86_regparm
> 2)
13140 output_asm_insn ("push{l}\t%0", xops
);
13141 output_asm_insn ("call\t%P1", xops
);
13142 ASM_OUTPUT_INTERNAL_LABEL (file
, "L", CODE_LABEL_NUMBER (xops
[1]));
13143 output_asm_insn ("pop{l}\t%0", xops
);
13145 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops
);
13146 xops
[0] = gen_rtx_MEM (SImode
, XEXP (DECL_RTL (function
), 0));
13148 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops
);
13149 asm_fprintf (file
, "\tpop{l\t%%ebx|\t%%ebx}\n");
13150 asm_fprintf (file
, "\tjmp\t{*%%ecx|%%ecx}\n");
13154 fprintf (file
, "\tjmp ");
13155 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13156 fprintf (file
, "\n");