1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost
= { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
90 struct processor_costs i386_cost
= { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
127 struct processor_costs i486_cost
= { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
164 struct processor_costs pentium_cost
= {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
201 struct processor_costs pentiumpro_cost
= {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
238 struct processor_costs k6_cost
= {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
275 struct processor_costs athlon_cost
= {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
312 struct processor_costs pentium4_cost
= {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs
*ix86_cost
= &pentium_cost
;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
360 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
361 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
362 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
363 const int x86_double_with_add
= ~m_386
;
364 const int x86_use_bit_test
= m_386
;
365 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
366 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
367 const int x86_3dnow_a
= m_ATHLON
;
368 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
369 const int x86_branch_hints
= m_PENT4
;
370 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
371 const int x86_partial_reg_stall
= m_PPRO
;
372 const int x86_use_loop
= m_K6
;
373 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
374 const int x86_use_mov0
= m_K6
;
375 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
376 const int x86_read_modify_write
= ~m_PENT
;
377 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
378 const int x86_split_long_moves
= m_PPRO
;
379 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON
;
380 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
381 const int x86_single_stringop
= m_386
| m_PENT4
;
382 const int x86_qimode_math
= ~(0);
383 const int x86_promote_qi_regs
= 0;
384 const int x86_himode_math
= ~(m_PPRO
);
385 const int x86_promote_hi_regs
= m_PPRO
;
386 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
387 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
388 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
389 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
390 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
);
391 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
392 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
393 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
394 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
395 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
396 const int x86_decompose_lea
= m_PENT4
;
397 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON
| m_PENT4
;
399 /* In case the avreage insn count for single function invocation is
400 lower than this constant, emit fast (but longer) prologue and
402 #define FAST_PROLOGUE_INSN_COUNT 30
404 /* Set by prologue expander and used by epilogue expander to determine
406 static int use_fast_prologue_epilogue
;
408 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
410 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
411 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
412 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
413 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
415 /* Array of the smallest class containing reg number REGNO, indexed by
416 REGNO. Used by REGNO_REG_CLASS in i386.h. */
418 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
421 AREG
, DREG
, CREG
, BREG
,
423 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
425 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
426 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
429 /* flags, fpsr, dirflag, frame */
430 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
431 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
433 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
435 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
436 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
437 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
441 /* The "default" register map used in 32bit mode. */
443 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
445 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
446 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
447 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
448 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
449 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
451 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
454 static int const x86_64_int_parameter_registers
[6] =
456 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
457 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
460 static int const x86_64_int_return_registers
[4] =
462 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
465 /* The "default" register map used in 64bit mode. */
466 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
468 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
469 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
470 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
471 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
472 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
473 8,9,10,11,12,13,14,15, /* extended integer registers */
474 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
477 /* Define the register numbers to be used in Dwarf debugging information.
478 The SVR4 reference port C compiler uses the following register numbers
479 in its Dwarf output code:
480 0 for %eax (gcc regno = 0)
481 1 for %ecx (gcc regno = 2)
482 2 for %edx (gcc regno = 1)
483 3 for %ebx (gcc regno = 3)
484 4 for %esp (gcc regno = 7)
485 5 for %ebp (gcc regno = 6)
486 6 for %esi (gcc regno = 4)
487 7 for %edi (gcc regno = 5)
488 The following three DWARF register numbers are never generated by
489 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
490 believes these numbers have these meanings.
491 8 for %eip (no gcc equivalent)
492 9 for %eflags (gcc regno = 17)
493 10 for %trapno (no gcc equivalent)
494 It is not at all clear how we should number the FP stack registers
495 for the x86 architecture. If the version of SDB on x86/svr4 were
496 a bit less brain dead with respect to floating-point then we would
497 have a precedent to follow with respect to DWARF register numbers
498 for x86 FP registers, but the SDB on x86/svr4 is so completely
499 broken with respect to FP registers that it is hardly worth thinking
500 of it as something to strive for compatibility with.
501 The version of x86/svr4 SDB I have at the moment does (partially)
502 seem to believe that DWARF register number 11 is associated with
503 the x86 register %st(0), but that's about all. Higher DWARF
504 register numbers don't seem to be associated with anything in
505 particular, and even for DWARF regno 11, SDB only seems to under-
506 stand that it should say that a variable lives in %st(0) (when
507 asked via an `=' command) if we said it was in DWARF regno 11,
508 but SDB still prints garbage when asked for the value of the
509 variable in question (via a `/' command).
510 (Also note that the labels SDB prints for various FP stack regs
511 when doing an `x' command are all wrong.)
512 Note that these problems generally don't affect the native SVR4
513 C compiler because it doesn't allow the use of -O with -g and
514 because when it is *not* optimizing, it allocates a memory
515 location for each floating-point variable, and the memory
516 location is what gets described in the DWARF AT_location
517 attribute for the variable in question.
518 Regardless of the severe mental illness of the x86/svr4 SDB, we
519 do something sensible here and we use the following DWARF
520 register numbers. Note that these are all stack-top-relative
522 11 for %st(0) (gcc regno = 8)
523 12 for %st(1) (gcc regno = 9)
524 13 for %st(2) (gcc regno = 10)
525 14 for %st(3) (gcc regno = 11)
526 15 for %st(4) (gcc regno = 12)
527 16 for %st(5) (gcc regno = 13)
528 17 for %st(6) (gcc regno = 14)
529 18 for %st(7) (gcc regno = 15)
531 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
533 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
534 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
535 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
536 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
537 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
539 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
542 /* Test and compare insns in i386.md store the information needed to
543 generate branch and scc insns here. */
545 rtx ix86_compare_op0
= NULL_RTX
;
546 rtx ix86_compare_op1
= NULL_RTX
;
548 /* The encoding characters for the four TLS models present in ELF. */
550 static char const tls_model_chars
[] = " GLil";
552 #define MAX_386_STACK_LOCALS 3
553 /* Size of the register save area. */
554 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
556 /* Define the structure for the machine field in struct function. */
557 struct machine_function
GTY(())
559 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
560 const char *some_ld_name
;
561 int save_varrargs_registers
;
562 int accesses_prev_frame
;
565 #define ix86_stack_locals (cfun->machine->stack_locals)
566 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
568 /* Structure describing stack frame layout.
569 Stack grows downward:
575 saved frame pointer if frame_pointer_needed
576 <- HARD_FRAME_POINTER
582 > to_allocate <- FRAME_POINTER
594 int outgoing_arguments_size
;
597 HOST_WIDE_INT to_allocate
;
598 /* The offsets relative to ARG_POINTER. */
599 HOST_WIDE_INT frame_pointer_offset
;
600 HOST_WIDE_INT hard_frame_pointer_offset
;
601 HOST_WIDE_INT stack_pointer_offset
;
604 /* Used to enable/disable debugging features. */
605 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
606 /* Code model option as passed by user. */
607 const char *ix86_cmodel_string
;
609 enum cmodel ix86_cmodel
;
611 const char *ix86_asm_string
;
612 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
614 const char *ix86_tls_dialect_string
;
615 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
617 /* Which unit we are generating floating point math for. */
618 enum fpmath_unit ix86_fpmath
;
620 /* Which cpu are we scheduling for. */
621 enum processor_type ix86_cpu
;
622 /* Which instruction set architecture to use. */
623 enum processor_type ix86_arch
;
625 /* Strings to hold which cpu and instruction set architecture to use. */
626 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
627 const char *ix86_arch_string
; /* for -march=<xxx> */
628 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
630 /* # of registers to use to pass arguments. */
631 const char *ix86_regparm_string
;
633 /* true if sse prefetch instruction is not NOOP. */
634 int x86_prefetch_sse
;
636 /* ix86_regparm_string as a number */
639 /* Alignment to use for loops and jumps: */
641 /* Power of two alignment for loops. */
642 const char *ix86_align_loops_string
;
644 /* Power of two alignment for non-loop jumps. */
645 const char *ix86_align_jumps_string
;
647 /* Power of two alignment for stack boundary in bytes. */
648 const char *ix86_preferred_stack_boundary_string
;
650 /* Preferred alignment for stack boundary in bits. */
651 int ix86_preferred_stack_boundary
;
653 /* Values 1-5: see jump.c */
654 int ix86_branch_cost
;
655 const char *ix86_branch_cost_string
;
657 /* Power of two alignment for functions. */
658 const char *ix86_align_funcs_string
;
660 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
661 static char internal_label_prefix
[16];
662 static int internal_label_prefix_len
;
664 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
665 static int tls_symbolic_operand_1
PARAMS ((rtx
, enum tls_model
));
666 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
667 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
669 static const char *get_some_local_dynamic_name
PARAMS ((void));
670 static int get_some_local_dynamic_name_1
PARAMS ((rtx
*, void *));
671 static rtx maybe_get_pool_constant
PARAMS ((rtx
));
672 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
673 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
675 static rtx get_thread_pointer
PARAMS ((void));
676 static void get_pc_thunk_name
PARAMS ((char [32], unsigned int));
677 static rtx gen_push
PARAMS ((rtx
));
678 static int memory_address_length
PARAMS ((rtx addr
));
679 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
680 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
681 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
682 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
683 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
684 static struct machine_function
* ix86_init_machine_status
PARAMS ((void));
685 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
686 static int ix86_nsaved_regs
PARAMS ((void));
687 static void ix86_emit_save_regs
PARAMS ((void));
688 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
689 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
690 static void ix86_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
691 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
692 static void ix86_sched_reorder_ppro
PARAMS ((rtx
*, rtx
*));
693 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
694 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
695 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
696 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
697 static int ix86_issue_rate
PARAMS ((void));
698 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
699 static void ix86_sched_init
PARAMS ((FILE *, int, int));
700 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
701 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
702 static int ia32_use_dfa_pipeline_interface
PARAMS ((void));
703 static int ia32_multipass_dfa_lookahead
PARAMS ((void));
704 static void ix86_init_mmx_sse_builtins
PARAMS ((void));
708 rtx base
, index
, disp
;
712 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
714 static void ix86_encode_section_info
PARAMS ((tree
, int)) ATTRIBUTE_UNUSED
;
715 static const char *ix86_strip_name_encoding
PARAMS ((const char *))
718 struct builtin_description
;
719 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
721 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
723 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
724 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
725 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
726 static rtx ix86_expand_timode_binop_builtin
PARAMS ((enum insn_code
,
728 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
));
729 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
730 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
731 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
735 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
737 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
738 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
739 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
740 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
741 static unsigned int ix86_select_alt_pic_regnum
PARAMS ((void));
742 static int ix86_save_reg
PARAMS ((unsigned int, int));
743 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
744 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
745 const struct attribute_spec ix86_attribute_table
[];
746 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
747 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
749 #ifdef DO_GLOBAL_CTORS_BODY
750 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
761 enum x86_64_reg_class
764 X86_64_INTEGER_CLASS
,
765 X86_64_INTEGERSI_CLASS
,
774 static const char * const x86_64_reg_class_name
[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
777 #define MAX_CLASSES 4
778 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
779 enum x86_64_reg_class
[MAX_CLASSES
],
781 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
783 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
785 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
786 enum x86_64_reg_class
));
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
851 struct gcc_target targetm
= TARGET_INITIALIZER
;
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
871 const struct processor_costs
*cost
; /* Processor costs */
872 const int target_enable
; /* Target flags to enable. */
873 const int target_disable
; /* Target flags to disable. */
874 const int align_loop
; /* Default alignments. */
875 const int align_loop_max_skip
;
876 const int align_jump
;
877 const int align_jump_max_skip
;
878 const int align_func
;
879 const int branch_cost
;
881 const processor_target_table
[PROCESSOR_max
] =
883 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost
, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0, 1}
892 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
895 const char *const name
; /* processor name or nickname. */
896 const enum processor_type processor
;
902 PTA_PREFETCH_SSE
= 8,
907 const processor_alias_table
[] =
909 {"i386", PROCESSOR_I386
, 0},
910 {"i486", PROCESSOR_I486
, 0},
911 {"i586", PROCESSOR_PENTIUM
, 0},
912 {"pentium", PROCESSOR_PENTIUM
, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
914 {"i686", PROCESSOR_PENTIUMPRO
, 0},
915 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
916 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
917 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
918 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
919 PTA_MMX
| PTA_PREFETCH_SSE
},
920 {"k6", PROCESSOR_K6
, PTA_MMX
},
921 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
922 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
923 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
925 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
926 | PTA_3DNOW
| PTA_3DNOW_A
},
927 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
928 | PTA_3DNOW_A
| PTA_SSE
},
929 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
930 | PTA_3DNOW_A
| PTA_SSE
},
931 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
932 | PTA_3DNOW_A
| PTA_SSE
},
935 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
937 #ifdef SUBTARGET_OVERRIDE_OPTIONS
938 SUBTARGET_OVERRIDE_OPTIONS
;
941 if (!ix86_cpu_string
&& ix86_arch_string
)
942 ix86_cpu_string
= ix86_arch_string
;
943 if (!ix86_cpu_string
)
944 ix86_cpu_string
= cpu_names
[TARGET_CPU_DEFAULT
];
945 if (!ix86_arch_string
)
946 ix86_arch_string
= TARGET_64BIT
? "athlon-4" : "i386";
948 if (ix86_cmodel_string
!= 0)
950 if (!strcmp (ix86_cmodel_string
, "small"))
951 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
953 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
954 else if (!strcmp (ix86_cmodel_string
, "32"))
956 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
957 ix86_cmodel
= CM_KERNEL
;
958 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
959 ix86_cmodel
= CM_MEDIUM
;
960 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
961 ix86_cmodel
= CM_LARGE
;
963 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
969 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
971 if (ix86_asm_string
!= 0)
973 if (!strcmp (ix86_asm_string
, "intel"))
974 ix86_asm_dialect
= ASM_INTEL
;
975 else if (!strcmp (ix86_asm_string
, "att"))
976 ix86_asm_dialect
= ASM_ATT
;
978 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
980 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
981 error ("code model `%s' not supported in the %s bit mode",
982 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
983 if (ix86_cmodel
== CM_LARGE
)
984 sorry ("code model `large' not supported yet");
985 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
986 sorry ("%i-bit mode not compiled in",
987 (target_flags
& MASK_64BIT
) ? 64 : 32);
989 for (i
= 0; i
< pta_size
; i
++)
990 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
992 ix86_arch
= processor_alias_table
[i
].processor
;
993 /* Default cpu tuning to the architecture. */
994 ix86_cpu
= ix86_arch
;
995 if (processor_alias_table
[i
].flags
& PTA_MMX
996 && !(target_flags
& MASK_MMX_SET
))
997 target_flags
|= MASK_MMX
;
998 if (processor_alias_table
[i
].flags
& PTA_3DNOW
999 && !(target_flags
& MASK_3DNOW_SET
))
1000 target_flags
|= MASK_3DNOW
;
1001 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1002 && !(target_flags
& MASK_3DNOW_A_SET
))
1003 target_flags
|= MASK_3DNOW_A
;
1004 if (processor_alias_table
[i
].flags
& PTA_SSE
1005 && !(target_flags
& MASK_SSE_SET
))
1006 target_flags
|= MASK_SSE
;
1007 if (processor_alias_table
[i
].flags
& PTA_SSE2
1008 && !(target_flags
& MASK_SSE2_SET
))
1009 target_flags
|= MASK_SSE2
;
1010 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1011 x86_prefetch_sse
= true;
1016 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1018 for (i
= 0; i
< pta_size
; i
++)
1019 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
1021 ix86_cpu
= processor_alias_table
[i
].processor
;
1024 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1025 x86_prefetch_sse
= true;
1027 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
1030 ix86_cost
= &size_cost
;
1032 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
1033 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
1034 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
1036 /* Arrange to set up i386_stack_locals for all functions. */
1037 init_machine_status
= ix86_init_machine_status
;
1039 /* Validate -mregparm= value. */
1040 if (ix86_regparm_string
)
1042 i
= atoi (ix86_regparm_string
);
1043 if (i
< 0 || i
> REGPARM_MAX
)
1044 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1050 ix86_regparm
= REGPARM_MAX
;
1052 /* If the user has provided any of the -malign-* options,
1053 warn and use that value only if -falign-* is not set.
1054 Remove this code in GCC 3.2 or later. */
1055 if (ix86_align_loops_string
)
1057 warning ("-malign-loops is obsolete, use -falign-loops");
1058 if (align_loops
== 0)
1060 i
= atoi (ix86_align_loops_string
);
1061 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1062 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1064 align_loops
= 1 << i
;
1068 if (ix86_align_jumps_string
)
1070 warning ("-malign-jumps is obsolete, use -falign-jumps");
1071 if (align_jumps
== 0)
1073 i
= atoi (ix86_align_jumps_string
);
1074 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1075 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1077 align_jumps
= 1 << i
;
1081 if (ix86_align_funcs_string
)
1083 warning ("-malign-functions is obsolete, use -falign-functions");
1084 if (align_functions
== 0)
1086 i
= atoi (ix86_align_funcs_string
);
1087 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1088 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1090 align_functions
= 1 << i
;
1094 /* Default align_* from the processor table. */
1095 if (align_loops
== 0)
1097 align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
1098 align_loops_max_skip
= processor_target_table
[ix86_cpu
].align_loop_max_skip
;
1100 if (align_jumps
== 0)
1102 align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
1103 align_jumps_max_skip
= processor_target_table
[ix86_cpu
].align_jump_max_skip
;
1105 if (align_functions
== 0)
1107 align_functions
= processor_target_table
[ix86_cpu
].align_func
;
1110 /* Validate -mpreferred-stack-boundary= value, or provide default.
1111 The default of 128 bits is for Pentium III's SSE __m128, but we
1112 don't want additional code to keep the stack aligned when
1113 optimizing for code size. */
1114 ix86_preferred_stack_boundary
= (optimize_size
1115 ? TARGET_64BIT
? 64 : 32
1117 if (ix86_preferred_stack_boundary_string
)
1119 i
= atoi (ix86_preferred_stack_boundary_string
);
1120 if (i
< (TARGET_64BIT
? 3 : 2) || i
> 12)
1121 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1122 TARGET_64BIT
? 3 : 2);
1124 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1127 /* Validate -mbranch-cost= value, or provide default. */
1128 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
1129 if (ix86_branch_cost_string
)
1131 i
= atoi (ix86_branch_cost_string
);
1133 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1135 ix86_branch_cost
= i
;
1138 if (ix86_tls_dialect_string
)
1140 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1141 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1142 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1143 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1145 error ("bad value (%s) for -mtls-dialect= switch",
1146 ix86_tls_dialect_string
);
1149 /* Keep nonleaf frame pointers. */
1150 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1151 flag_omit_frame_pointer
= 1;
1153 /* If we're doing fast math, we don't care about comparison order
1154 wrt NaNs. This lets us use a shorter comparison sequence. */
1155 if (flag_unsafe_math_optimizations
)
1156 target_flags
&= ~MASK_IEEE_FP
;
1158 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1159 since the insns won't need emulation. */
1160 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1161 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1165 if (TARGET_ALIGN_DOUBLE
)
1166 error ("-malign-double makes no sense in the 64bit mode");
1168 error ("-mrtd calling convention not supported in the 64bit mode");
1169 /* Enable by default the SSE and MMX builtins. */
1170 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1171 ix86_fpmath
= FPMATH_SSE
;
1174 ix86_fpmath
= FPMATH_387
;
1176 if (ix86_fpmath_string
!= 0)
1178 if (! strcmp (ix86_fpmath_string
, "387"))
1179 ix86_fpmath
= FPMATH_387
;
1180 else if (! strcmp (ix86_fpmath_string
, "sse"))
1184 warning ("SSE instruction set disabled, using 387 arithmetics");
1185 ix86_fpmath
= FPMATH_387
;
1188 ix86_fpmath
= FPMATH_SSE
;
1190 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1191 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1195 warning ("SSE instruction set disabled, using 387 arithmetics");
1196 ix86_fpmath
= FPMATH_387
;
1198 else if (!TARGET_80387
)
1200 warning ("387 instruction set disabled, using SSE arithmetics");
1201 ix86_fpmath
= FPMATH_SSE
;
1204 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1207 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1210 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1214 target_flags
|= MASK_MMX
;
1215 x86_prefetch_sse
= true;
1218 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1221 target_flags
|= MASK_MMX
;
1222 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1223 extensions it adds. */
1224 if (x86_3dnow_a
& (1 << ix86_arch
))
1225 target_flags
|= MASK_3DNOW_A
;
1227 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1228 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS_SET
)
1230 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1232 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1235 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1236 p
= strchr (internal_label_prefix
, 'X');
1237 internal_label_prefix_len
= p
- internal_label_prefix
;
1243 optimization_options (level
, size
)
1245 int size ATTRIBUTE_UNUSED
;
1247 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1248 make the problem with not enough registers even worse. */
1249 #ifdef INSN_SCHEDULING
1251 flag_schedule_insns
= 0;
1253 if (TARGET_64BIT
&& optimize
>= 1)
1254 flag_omit_frame_pointer
= 1;
1257 flag_pcc_struct_return
= 0;
1258 flag_asynchronous_unwind_tables
= 1;
1262 /* Table of valid machine attributes. */
1263 const struct attribute_spec ix86_attribute_table
[] =
1265 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1266 /* Stdcall attribute says callee is responsible for popping arguments
1267 if they are not variable. */
1268 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1269 /* Cdecl attribute says the callee is a normal C declaration */
1270 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1271 /* Regparm attribute specifies how many integer arguments are to be
1272 passed in registers. */
1273 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1274 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1275 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1276 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1277 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1279 { NULL
, 0, 0, false, false, false, NULL
}
1282 /* Handle a "cdecl" or "stdcall" attribute;
1283 arguments as in struct attribute_spec.handler. */
1285 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1288 tree args ATTRIBUTE_UNUSED
;
1289 int flags ATTRIBUTE_UNUSED
;
1292 if (TREE_CODE (*node
) != FUNCTION_TYPE
1293 && TREE_CODE (*node
) != METHOD_TYPE
1294 && TREE_CODE (*node
) != FIELD_DECL
1295 && TREE_CODE (*node
) != TYPE_DECL
)
1297 warning ("`%s' attribute only applies to functions",
1298 IDENTIFIER_POINTER (name
));
1299 *no_add_attrs
= true;
1304 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1305 *no_add_attrs
= true;
1311 /* Handle a "regparm" attribute;
1312 arguments as in struct attribute_spec.handler. */
1314 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1318 int flags ATTRIBUTE_UNUSED
;
1321 if (TREE_CODE (*node
) != FUNCTION_TYPE
1322 && TREE_CODE (*node
) != METHOD_TYPE
1323 && TREE_CODE (*node
) != FIELD_DECL
1324 && TREE_CODE (*node
) != TYPE_DECL
)
1326 warning ("`%s' attribute only applies to functions",
1327 IDENTIFIER_POINTER (name
));
1328 *no_add_attrs
= true;
1334 cst
= TREE_VALUE (args
);
1335 if (TREE_CODE (cst
) != INTEGER_CST
)
1337 warning ("`%s' attribute requires an integer constant argument",
1338 IDENTIFIER_POINTER (name
));
1339 *no_add_attrs
= true;
1341 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1343 warning ("argument to `%s' attribute larger than %d",
1344 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1345 *no_add_attrs
= true;
1352 /* Return 0 if the attributes for two types are incompatible, 1 if they
1353 are compatible, and 2 if they are nearly compatible (which causes a
1354 warning to be generated). */
1357 ix86_comp_type_attributes (type1
, type2
)
1361 /* Check for mismatch of non-default calling convention. */
1362 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1364 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1367 /* Check for mismatched return types (cdecl vs stdcall). */
1368 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1369 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1374 /* Value is the number of bytes of arguments automatically
1375 popped when returning from a subroutine call.
1376 FUNDECL is the declaration node of the function (as a tree),
1377 FUNTYPE is the data type of the function (as a tree),
1378 or for a library call it is an identifier node for the subroutine name.
1379 SIZE is the number of bytes of arguments passed on the stack.
1381 On the 80386, the RTD insn may be used to pop them if the number
1382 of args is fixed, but if the number is variable then the caller
1383 must pop them all. RTD can't be used for library calls now
1384 because the library is compiled with the Unix compiler.
1385 Use of RTD is a selectable option, since it is incompatible with
1386 standard Unix calling sequences. If the option is not selected,
1387 the caller must always pop the args.
1389 The attribute stdcall is equivalent to RTD on a per module basis. */
1392 ix86_return_pops_args (fundecl
, funtype
, size
)
1397 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1399 /* Cdecl functions override -mrtd, and never pop the stack. */
1400 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1402 /* Stdcall functions will pop the stack if not variable args. */
1403 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
1407 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1408 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1409 == void_type_node
)))
1413 /* Lose any fake structure return argument if it is passed on the stack. */
1414 if (aggregate_value_p (TREE_TYPE (funtype
))
1417 int nregs
= ix86_regparm
;
1421 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype
));
1424 nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1428 return GET_MODE_SIZE (Pmode
);
1434 /* Argument support functions. */
1436 /* Return true when register may be used to pass function parameters. */
1438 ix86_function_arg_regno_p (regno
)
1443 return (regno
< REGPARM_MAX
1444 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1445 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1447 /* RAX is used as hidden argument to va_arg functions. */
1450 for (i
= 0; i
< REGPARM_MAX
; i
++)
1451 if (regno
== x86_64_int_parameter_registers
[i
])
1456 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1457 for a call to a function whose data type is FNTYPE.
1458 For a library call, FNTYPE is 0. */
1461 init_cumulative_args (cum
, fntype
, libname
)
1462 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1463 tree fntype
; /* tree ptr for function decl */
1464 rtx libname
; /* SYMBOL_REF of library name or 0 */
1466 static CUMULATIVE_ARGS zero_cum
;
1467 tree param
, next_param
;
1469 if (TARGET_DEBUG_ARG
)
1471 fprintf (stderr
, "\ninit_cumulative_args (");
1473 fprintf (stderr
, "fntype code = %s, ret code = %s",
1474 tree_code_name
[(int) TREE_CODE (fntype
)],
1475 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1477 fprintf (stderr
, "no fntype");
1480 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1485 /* Set up the number of registers to use for passing arguments. */
1486 cum
->nregs
= ix86_regparm
;
1487 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1488 if (fntype
&& !TARGET_64BIT
)
1490 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1493 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1495 cum
->maybe_vaarg
= false;
1497 /* Determine if this function has variable arguments. This is
1498 indicated by the last argument being 'void_type_mode' if there
1499 are no variable arguments. If there are variable arguments, then
1500 we won't pass anything in registers */
1504 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1505 param
!= 0; param
= next_param
)
1507 next_param
= TREE_CHAIN (param
);
1508 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1512 cum
->maybe_vaarg
= true;
1516 if ((!fntype
&& !libname
)
1517 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1518 cum
->maybe_vaarg
= 1;
1520 if (TARGET_DEBUG_ARG
)
1521 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1526 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1527 of this code is to classify each 8bytes of incoming argument by the register
1528 class and assign registers accordingly. */
1530 /* Return the union class of CLASS1 and CLASS2.
1531 See the x86-64 PS ABI for details. */
1533 static enum x86_64_reg_class
1534 merge_classes (class1
, class2
)
1535 enum x86_64_reg_class class1
, class2
;
1537 /* Rule #1: If both classes are equal, this is the resulting class. */
1538 if (class1
== class2
)
1541 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1543 if (class1
== X86_64_NO_CLASS
)
1545 if (class2
== X86_64_NO_CLASS
)
1548 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1549 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1550 return X86_64_MEMORY_CLASS
;
1552 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1553 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1554 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1555 return X86_64_INTEGERSI_CLASS
;
1556 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1557 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1558 return X86_64_INTEGER_CLASS
;
1560 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1561 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1562 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1563 return X86_64_MEMORY_CLASS
;
1565 /* Rule #6: Otherwise class SSE is used. */
1566 return X86_64_SSE_CLASS
;
1569 /* Classify the argument of type TYPE and mode MODE.
1570 CLASSES will be filled by the register class used to pass each word
1571 of the operand. The number of words is returned. In case the parameter
1572 should be passed in memory, 0 is returned. As a special case for zero
1573 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1575 BIT_OFFSET is used internally for handling records and specifies offset
1576 of the offset in bits modulo 256 to avoid overflow cases.
1578 See the x86-64 PS ABI for details.
1582 classify_argument (mode
, type
, classes
, bit_offset
)
1583 enum machine_mode mode
;
1585 enum x86_64_reg_class classes
[MAX_CLASSES
];
1589 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1590 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1592 if (type
&& AGGREGATE_TYPE_P (type
))
1596 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1598 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1602 for (i
= 0; i
< words
; i
++)
1603 classes
[i
] = X86_64_NO_CLASS
;
1605 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1606 signalize memory class, so handle it as special case. */
1609 classes
[0] = X86_64_NO_CLASS
;
1613 /* Classify each field of record and merge classes. */
1614 if (TREE_CODE (type
) == RECORD_TYPE
)
1616 /* For classes first merge in the field of the subclasses. */
1617 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1619 tree bases
= TYPE_BINFO_BASETYPES (type
);
1620 int n_bases
= TREE_VEC_LENGTH (bases
);
1623 for (i
= 0; i
< n_bases
; ++i
)
1625 tree binfo
= TREE_VEC_ELT (bases
, i
);
1627 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1628 tree type
= BINFO_TYPE (binfo
);
1630 num
= classify_argument (TYPE_MODE (type
),
1632 (offset
+ bit_offset
) % 256);
1635 for (i
= 0; i
< num
; i
++)
1637 int pos
= (offset
+ bit_offset
) / 8 / 8;
1639 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1643 /* And now merge the fields of structure. */
1644 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1646 if (TREE_CODE (field
) == FIELD_DECL
)
1650 /* Bitfields are always classified as integer. Handle them
1651 early, since later code would consider them to be
1652 misaligned integers. */
1653 if (DECL_BIT_FIELD (field
))
1655 for (i
= int_bit_position (field
) / 8 / 8;
1656 i
< (int_bit_position (field
)
1657 + tree_low_cst (DECL_SIZE (field
), 0)
1660 merge_classes (X86_64_INTEGER_CLASS
,
1665 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1666 TREE_TYPE (field
), subclasses
,
1667 (int_bit_position (field
)
1668 + bit_offset
) % 256);
1671 for (i
= 0; i
< num
; i
++)
1674 (int_bit_position (field
) + bit_offset
) / 8 / 8;
1676 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1682 /* Arrays are handled as small records. */
1683 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1686 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1687 TREE_TYPE (type
), subclasses
, bit_offset
);
1691 /* The partial classes are now full classes. */
1692 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1693 subclasses
[0] = X86_64_SSE_CLASS
;
1694 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1695 subclasses
[0] = X86_64_INTEGER_CLASS
;
1697 for (i
= 0; i
< words
; i
++)
1698 classes
[i
] = subclasses
[i
% num
];
1700 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1701 else if (TREE_CODE (type
) == UNION_TYPE
1702 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
1704 /* For classes first merge in the field of the subclasses. */
1705 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1707 tree bases
= TYPE_BINFO_BASETYPES (type
);
1708 int n_bases
= TREE_VEC_LENGTH (bases
);
1711 for (i
= 0; i
< n_bases
; ++i
)
1713 tree binfo
= TREE_VEC_ELT (bases
, i
);
1715 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1716 tree type
= BINFO_TYPE (binfo
);
1718 num
= classify_argument (TYPE_MODE (type
),
1720 (offset
+ bit_offset
) % 256);
1723 for (i
= 0; i
< num
; i
++)
1725 int pos
= (offset
+ bit_offset
) / 8 / 8;
1727 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1731 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1733 if (TREE_CODE (field
) == FIELD_DECL
)
1736 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1737 TREE_TYPE (field
), subclasses
,
1741 for (i
= 0; i
< num
; i
++)
1742 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1749 /* Final merger cleanup. */
1750 for (i
= 0; i
< words
; i
++)
1752 /* If one class is MEMORY, everything should be passed in
1754 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1757 /* The X86_64_SSEUP_CLASS should be always preceded by
1758 X86_64_SSE_CLASS. */
1759 if (classes
[i
] == X86_64_SSEUP_CLASS
1760 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1761 classes
[i
] = X86_64_SSE_CLASS
;
1763 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1764 if (classes
[i
] == X86_64_X87UP_CLASS
1765 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1766 classes
[i
] = X86_64_SSE_CLASS
;
1771 /* Compute alignment needed. We align all types to natural boundaries with
1772 exception of XFmode that is aligned to 64bits. */
1773 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1775 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1778 mode_alignment
= 128;
1779 else if (mode
== XCmode
)
1780 mode_alignment
= 256;
1781 /* Misaligned fields are always returned in memory. */
1782 if (bit_offset
% mode_alignment
)
1786 /* Classification of atomic types. */
1796 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1797 classes
[0] = X86_64_INTEGERSI_CLASS
;
1799 classes
[0] = X86_64_INTEGER_CLASS
;
1803 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1806 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1807 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1810 if (!(bit_offset
% 64))
1811 classes
[0] = X86_64_SSESF_CLASS
;
1813 classes
[0] = X86_64_SSE_CLASS
;
1816 classes
[0] = X86_64_SSEDF_CLASS
;
1819 classes
[0] = X86_64_X87_CLASS
;
1820 classes
[1] = X86_64_X87UP_CLASS
;
1823 classes
[0] = X86_64_X87_CLASS
;
1824 classes
[1] = X86_64_X87UP_CLASS
;
1825 classes
[2] = X86_64_X87_CLASS
;
1826 classes
[3] = X86_64_X87UP_CLASS
;
1829 classes
[0] = X86_64_SSEDF_CLASS
;
1830 classes
[1] = X86_64_SSEDF_CLASS
;
1833 classes
[0] = X86_64_SSE_CLASS
;
1837 classes
[0] = X86_64_SSE_CLASS
;
1838 classes
[1] = X86_64_SSEUP_CLASS
;
1844 classes
[0] = X86_64_SSE_CLASS
;
1854 /* Examine the argument and return set number of register required in each
1855 class. Return 0 iff parameter should be passed in memory. */
1857 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
1858 enum machine_mode mode
;
1860 int *int_nregs
, *sse_nregs
;
1863 enum x86_64_reg_class
class[MAX_CLASSES
];
1864 int n
= classify_argument (mode
, type
, class, 0);
1870 for (n
--; n
>= 0; n
--)
1873 case X86_64_INTEGER_CLASS
:
1874 case X86_64_INTEGERSI_CLASS
:
1877 case X86_64_SSE_CLASS
:
1878 case X86_64_SSESF_CLASS
:
1879 case X86_64_SSEDF_CLASS
:
1882 case X86_64_NO_CLASS
:
1883 case X86_64_SSEUP_CLASS
:
1885 case X86_64_X87_CLASS
:
1886 case X86_64_X87UP_CLASS
:
1890 case X86_64_MEMORY_CLASS
:
1895 /* Construct container for the argument used by GCC interface. See
1896 FUNCTION_ARG for the detailed description. */
1898 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
1899 enum machine_mode mode
;
1902 int nintregs
, nsseregs
;
1906 enum machine_mode tmpmode
;
1908 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1909 enum x86_64_reg_class
class[MAX_CLASSES
];
1913 int needed_sseregs
, needed_intregs
;
1914 rtx exp
[MAX_CLASSES
];
1917 n
= classify_argument (mode
, type
, class, 0);
1918 if (TARGET_DEBUG_ARG
)
1921 fprintf (stderr
, "Memory class\n");
1924 fprintf (stderr
, "Classes:");
1925 for (i
= 0; i
< n
; i
++)
1927 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
1929 fprintf (stderr
, "\n");
1934 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
1936 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
1939 /* First construct simple cases. Avoid SCmode, since we want to use
1940 single register to pass this type. */
1941 if (n
== 1 && mode
!= SCmode
)
1944 case X86_64_INTEGER_CLASS
:
1945 case X86_64_INTEGERSI_CLASS
:
1946 return gen_rtx_REG (mode
, intreg
[0]);
1947 case X86_64_SSE_CLASS
:
1948 case X86_64_SSESF_CLASS
:
1949 case X86_64_SSEDF_CLASS
:
1950 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1951 case X86_64_X87_CLASS
:
1952 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
1953 case X86_64_NO_CLASS
:
1954 /* Zero sized array, struct or class. */
1959 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
1960 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
1962 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
1963 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
1964 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
1965 && class[1] == X86_64_INTEGER_CLASS
1966 && (mode
== CDImode
|| mode
== TImode
)
1967 && intreg
[0] + 1 == intreg
[1])
1968 return gen_rtx_REG (mode
, intreg
[0]);
1970 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
1971 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
1972 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
1974 /* Otherwise figure out the entries of the PARALLEL. */
1975 for (i
= 0; i
< n
; i
++)
1979 case X86_64_NO_CLASS
:
1981 case X86_64_INTEGER_CLASS
:
1982 case X86_64_INTEGERSI_CLASS
:
1983 /* Merge TImodes on aligned occassions here too. */
1984 if (i
* 8 + 8 > bytes
)
1985 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
1986 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
1990 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1991 if (tmpmode
== BLKmode
)
1993 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
1994 gen_rtx_REG (tmpmode
, *intreg
),
1998 case X86_64_SSESF_CLASS
:
1999 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2000 gen_rtx_REG (SFmode
,
2001 SSE_REGNO (sse_regno
)),
2005 case X86_64_SSEDF_CLASS
:
2006 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2007 gen_rtx_REG (DFmode
,
2008 SSE_REGNO (sse_regno
)),
2012 case X86_64_SSE_CLASS
:
2013 if (i
< n
&& class[i
+ 1] == X86_64_SSEUP_CLASS
)
2014 tmpmode
= TImode
, i
++;
2017 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2018 gen_rtx_REG (tmpmode
,
2019 SSE_REGNO (sse_regno
)),
2027 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2028 for (i
= 0; i
< nexps
; i
++)
2029 XVECEXP (ret
, 0, i
) = exp
[i
];
2033 /* Update the data in CUM to advance over an argument
2034 of mode MODE and data type TYPE.
2035 (TYPE is null for libcalls where that information may not be available.) */
2038 function_arg_advance (cum
, mode
, type
, named
)
2039 CUMULATIVE_ARGS
*cum
; /* current arg information */
2040 enum machine_mode mode
; /* current arg mode */
2041 tree type
; /* type of the argument or 0 if lib support */
2042 int named
; /* whether or not the argument was named */
2045 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2046 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2048 if (TARGET_DEBUG_ARG
)
2050 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2051 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2054 int int_nregs
, sse_nregs
;
2055 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2056 cum
->words
+= words
;
2057 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2059 cum
->nregs
-= int_nregs
;
2060 cum
->sse_nregs
-= sse_nregs
;
2061 cum
->regno
+= int_nregs
;
2062 cum
->sse_regno
+= sse_nregs
;
2065 cum
->words
+= words
;
2069 if (TARGET_SSE
&& mode
== TImode
)
2071 cum
->sse_words
+= words
;
2072 cum
->sse_nregs
-= 1;
2073 cum
->sse_regno
+= 1;
2074 if (cum
->sse_nregs
<= 0)
2082 cum
->words
+= words
;
2083 cum
->nregs
-= words
;
2084 cum
->regno
+= words
;
2086 if (cum
->nregs
<= 0)
2096 /* Define where to put the arguments to a function.
2097 Value is zero to push the argument on the stack,
2098 or a hard register in which to store the argument.
2100 MODE is the argument's machine mode.
2101 TYPE is the data type of the argument (as a tree).
2102 This is null for libcalls where that information may
2104 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2105 the preceding args and about the function being called.
2106 NAMED is nonzero if this argument is a named parameter
2107 (otherwise it is an extra parameter matching an ellipsis). */
2110 function_arg (cum
, mode
, type
, named
)
2111 CUMULATIVE_ARGS
*cum
; /* current arg information */
2112 enum machine_mode mode
; /* current arg mode */
2113 tree type
; /* type of the argument or 0 if lib support */
2114 int named
; /* != 0 for normal args, == 0 for ... args */
2118 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2119 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2121 /* Handle an hidden AL argument containing number of registers for varargs
2122 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2124 if (mode
== VOIDmode
)
2127 return GEN_INT (cum
->maybe_vaarg
2128 ? (cum
->sse_nregs
< 0
2136 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2137 &x86_64_int_parameter_registers
[cum
->regno
],
2142 /* For now, pass fp/complex values on the stack. */
2151 if (words
<= cum
->nregs
)
2152 ret
= gen_rtx_REG (mode
, cum
->regno
);
2156 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
2160 if (TARGET_DEBUG_ARG
)
2163 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2164 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2167 print_simple_rtl (stderr
, ret
);
2169 fprintf (stderr
, ", stack");
2171 fprintf (stderr
, " )\n");
2177 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2181 ix86_function_arg_boundary (mode
, type
)
2182 enum machine_mode mode
;
2187 return PARM_BOUNDARY
;
2189 align
= TYPE_ALIGN (type
);
2191 align
= GET_MODE_ALIGNMENT (mode
);
2192 if (align
< PARM_BOUNDARY
)
2193 align
= PARM_BOUNDARY
;
2199 /* Return true if N is a possible register number of function value. */
2201 ix86_function_value_regno_p (regno
)
2206 return ((regno
) == 0
2207 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2208 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2210 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2211 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2212 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2215 /* Define how to find the value returned by a function.
2216 VALTYPE is the data type of the value (as a tree).
2217 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2218 otherwise, FUNC is 0. */
2220 ix86_function_value (valtype
)
2225 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2226 REGPARM_MAX
, SSE_REGPARM_MAX
,
2227 x86_64_int_return_registers
, 0);
2228 /* For zero sized structures, construct_continer return NULL, but we need
2229 to keep rest of compiler happy by returning meaningfull value. */
2231 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2235 return gen_rtx_REG (TYPE_MODE (valtype
), VALUE_REGNO (TYPE_MODE (valtype
)));
2238 /* Return false iff type is returned in memory. */
2240 ix86_return_in_memory (type
)
2243 int needed_intregs
, needed_sseregs
;
2246 return !examine_argument (TYPE_MODE (type
), type
, 1,
2247 &needed_intregs
, &needed_sseregs
);
2251 if (TYPE_MODE (type
) == BLKmode
2252 || (VECTOR_MODE_P (TYPE_MODE (type
))
2253 && int_size_in_bytes (type
) == 8)
2254 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2255 && TYPE_MODE (type
) != TFmode
2256 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2262 /* Define how to find the value returned by a library function
2263 assuming the value has mode MODE. */
2265 ix86_libcall_value (mode
)
2266 enum machine_mode mode
;
2276 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2279 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2281 return gen_rtx_REG (mode
, 0);
2285 return gen_rtx_REG (mode
, VALUE_REGNO (mode
));
2288 /* Create the va_list data type. */
2291 ix86_build_va_list ()
2293 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2295 /* For i386 we use plain pointer to argument area. */
2297 return build_pointer_type (char_type_node
);
2299 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2300 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2302 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2303 unsigned_type_node
);
2304 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2305 unsigned_type_node
);
2306 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2308 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2311 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2312 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2313 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2314 DECL_FIELD_CONTEXT (f_sav
) = record
;
2316 TREE_CHAIN (record
) = type_decl
;
2317 TYPE_NAME (record
) = type_decl
;
2318 TYPE_FIELDS (record
) = f_gpr
;
2319 TREE_CHAIN (f_gpr
) = f_fpr
;
2320 TREE_CHAIN (f_fpr
) = f_ovf
;
2321 TREE_CHAIN (f_ovf
) = f_sav
;
2323 layout_type (record
);
2325 /* The correct type is an array type of one element. */
2326 return build_array_type (record
, build_index_type (size_zero_node
));
2329 /* Perform any needed actions needed for a function that is receiving a
2330 variable number of arguments.
2334 MODE and TYPE are the mode and type of the current parameter.
2336 PRETEND_SIZE is a variable that should be set to the amount of stack
2337 that must be pushed by the prolog to pretend that our caller pushed
2340 Normally, this macro will push all remaining incoming registers on the
2341 stack and set PRETEND_SIZE to the length of the registers pushed. */
2344 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2345 CUMULATIVE_ARGS
*cum
;
2346 enum machine_mode mode
;
2348 int *pretend_size ATTRIBUTE_UNUSED
;
2352 CUMULATIVE_ARGS next_cum
;
2353 rtx save_area
= NULL_RTX
, mem
;
2366 /* Indicate to allocate space on the stack for varargs save area. */
2367 ix86_save_varrargs_registers
= 1;
2369 fntype
= TREE_TYPE (current_function_decl
);
2370 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2371 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2372 != void_type_node
));
2374 /* For varargs, we do not want to skip the dummy va_dcl argument.
2375 For stdargs, we do want to skip the last named argument. */
2378 function_arg_advance (&next_cum
, mode
, type
, 1);
2381 save_area
= frame_pointer_rtx
;
2383 set
= get_varargs_alias_set ();
2385 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2387 mem
= gen_rtx_MEM (Pmode
,
2388 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2389 set_mem_alias_set (mem
, set
);
2390 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2391 x86_64_int_parameter_registers
[i
]));
2394 if (next_cum
.sse_nregs
)
2396 /* Now emit code to save SSE registers. The AX parameter contains number
2397 of SSE parameter regsiters used to call this function. We use
2398 sse_prologue_save insn template that produces computed jump across
2399 SSE saves. We need some preparation work to get this working. */
2401 label
= gen_label_rtx ();
2402 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2404 /* Compute address to jump to :
2405 label - 5*eax + nnamed_sse_arguments*5 */
2406 tmp_reg
= gen_reg_rtx (Pmode
);
2407 nsse_reg
= gen_reg_rtx (Pmode
);
2408 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2409 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2410 gen_rtx_MULT (Pmode
, nsse_reg
,
2412 if (next_cum
.sse_regno
)
2415 gen_rtx_CONST (DImode
,
2416 gen_rtx_PLUS (DImode
,
2418 GEN_INT (next_cum
.sse_regno
* 4))));
2420 emit_move_insn (nsse_reg
, label_ref
);
2421 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2423 /* Compute address of memory block we save into. We always use pointer
2424 pointing 127 bytes after first byte to store - this is needed to keep
2425 instruction size limited by 4 bytes. */
2426 tmp_reg
= gen_reg_rtx (Pmode
);
2427 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2428 plus_constant (save_area
,
2429 8 * REGPARM_MAX
+ 127)));
2430 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2431 set_mem_alias_set (mem
, set
);
2432 set_mem_align (mem
, BITS_PER_WORD
);
2434 /* And finally do the dirty job! */
2435 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
2436 GEN_INT (next_cum
.sse_regno
), label
));
2441 /* Implement va_start. */
2444 ix86_va_start (stdarg_p
, valist
, nextarg
)
2449 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2450 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2451 tree gpr
, fpr
, ovf
, sav
, t
;
2453 /* Only 64bit target needs something special. */
2456 std_expand_builtin_va_start (stdarg_p
, valist
, nextarg
);
2460 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2461 f_fpr
= TREE_CHAIN (f_gpr
);
2462 f_ovf
= TREE_CHAIN (f_fpr
);
2463 f_sav
= TREE_CHAIN (f_ovf
);
2465 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2466 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2467 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2468 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2469 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2471 /* Count number of gp and fp argument registers used. */
2472 words
= current_function_args_info
.words
;
2473 n_gpr
= current_function_args_info
.regno
;
2474 n_fpr
= current_function_args_info
.sse_regno
;
2476 if (TARGET_DEBUG_ARG
)
2477 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2478 (int) words
, (int) n_gpr
, (int) n_fpr
);
2480 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2481 build_int_2 (n_gpr
* 8, 0));
2482 TREE_SIDE_EFFECTS (t
) = 1;
2483 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2485 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2486 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2487 TREE_SIDE_EFFECTS (t
) = 1;
2488 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2490 /* Find the overflow area. */
2491 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2493 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2494 build_int_2 (words
* UNITS_PER_WORD
, 0));
2495 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2496 TREE_SIDE_EFFECTS (t
) = 1;
2497 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2499 /* Find the register save area.
2500 Prologue of the function save it right above stack frame. */
2501 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2502 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2503 TREE_SIDE_EFFECTS (t
) = 1;
2504 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2507 /* Implement va_arg. */
2509 ix86_va_arg (valist
, type
)
2512 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2513 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2514 tree gpr
, fpr
, ovf
, sav
, t
;
2516 rtx lab_false
, lab_over
= NULL_RTX
;
2520 /* Only 64bit target needs something special. */
2523 return std_expand_builtin_va_arg (valist
, type
);
2526 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2527 f_fpr
= TREE_CHAIN (f_gpr
);
2528 f_ovf
= TREE_CHAIN (f_fpr
);
2529 f_sav
= TREE_CHAIN (f_ovf
);
2531 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2532 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2533 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2534 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2535 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2537 size
= int_size_in_bytes (type
);
2538 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2540 container
= construct_container (TYPE_MODE (type
), type
, 0,
2541 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2543 * Pull the value out of the saved registers ...
2546 addr_rtx
= gen_reg_rtx (Pmode
);
2550 rtx int_addr_rtx
, sse_addr_rtx
;
2551 int needed_intregs
, needed_sseregs
;
2554 lab_over
= gen_label_rtx ();
2555 lab_false
= gen_label_rtx ();
2557 examine_argument (TYPE_MODE (type
), type
, 0,
2558 &needed_intregs
, &needed_sseregs
);
2561 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2562 || TYPE_ALIGN (type
) > 128);
2564 /* In case we are passing structure, verify that it is consetuctive block
2565 on the register save area. If not we need to do moves. */
2566 if (!need_temp
&& !REG_P (container
))
2568 /* Verify that all registers are strictly consetuctive */
2569 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2573 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2575 rtx slot
= XVECEXP (container
, 0, i
);
2576 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
2577 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2585 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2587 rtx slot
= XVECEXP (container
, 0, i
);
2588 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
2589 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2596 int_addr_rtx
= addr_rtx
;
2597 sse_addr_rtx
= addr_rtx
;
2601 int_addr_rtx
= gen_reg_rtx (Pmode
);
2602 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2604 /* First ensure that we fit completely in registers. */
2607 emit_cmp_and_jump_insns (expand_expr
2608 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2609 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2610 1) * 8), GE
, const1_rtx
, SImode
,
2615 emit_cmp_and_jump_insns (expand_expr
2616 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2617 GEN_INT ((SSE_REGPARM_MAX
-
2618 needed_sseregs
+ 1) * 16 +
2619 REGPARM_MAX
* 8), GE
, const1_rtx
,
2620 SImode
, 1, lab_false
);
2623 /* Compute index to start of area used for integer regs. */
2626 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2627 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2628 if (r
!= int_addr_rtx
)
2629 emit_move_insn (int_addr_rtx
, r
);
2633 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2634 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2635 if (r
!= sse_addr_rtx
)
2636 emit_move_insn (sse_addr_rtx
, r
);
2643 /* Never use the memory itself, as it has the alias set. */
2644 addr_rtx
= XEXP (assign_temp (type
, 0, 1, 0), 0);
2645 mem
= gen_rtx_MEM (BLKmode
, addr_rtx
);
2646 set_mem_alias_set (mem
, get_varargs_alias_set ());
2647 set_mem_align (mem
, BITS_PER_UNIT
);
2649 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2651 rtx slot
= XVECEXP (container
, 0, i
);
2652 rtx reg
= XEXP (slot
, 0);
2653 enum machine_mode mode
= GET_MODE (reg
);
2659 if (SSE_REGNO_P (REGNO (reg
)))
2661 src_addr
= sse_addr_rtx
;
2662 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
2666 src_addr
= int_addr_rtx
;
2667 src_offset
= REGNO (reg
) * 8;
2669 src_mem
= gen_rtx_MEM (mode
, src_addr
);
2670 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
2671 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
2672 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
2673 emit_move_insn (dest_mem
, src_mem
);
2680 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
2681 build_int_2 (needed_intregs
* 8, 0));
2682 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
2683 TREE_SIDE_EFFECTS (t
) = 1;
2684 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2689 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
2690 build_int_2 (needed_sseregs
* 16, 0));
2691 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
2692 TREE_SIDE_EFFECTS (t
) = 1;
2693 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2696 emit_jump_insn (gen_jump (lab_over
));
2698 emit_label (lab_false
);
2701 /* ... otherwise out of the overflow area. */
2703 /* Care for on-stack alignment if needed. */
2704 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
2708 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
2709 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
2710 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
2714 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
2716 emit_move_insn (addr_rtx
, r
);
2719 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
2720 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
2721 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2722 TREE_SIDE_EFFECTS (t
) = 1;
2723 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2726 emit_label (lab_over
);
2731 /* Return nonzero if OP is general operand representable on x86_64. */
2734 x86_64_general_operand (op
, mode
)
2736 enum machine_mode mode
;
2739 return general_operand (op
, mode
);
2740 if (nonimmediate_operand (op
, mode
))
2742 return x86_64_sign_extended_value (op
);
2745 /* Return nonzero if OP is general operand representable on x86_64
2746 as either sign extended or zero extended constant. */
2749 x86_64_szext_general_operand (op
, mode
)
2751 enum machine_mode mode
;
2754 return general_operand (op
, mode
);
2755 if (nonimmediate_operand (op
, mode
))
2757 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2760 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2763 x86_64_nonmemory_operand (op
, mode
)
2765 enum machine_mode mode
;
2768 return nonmemory_operand (op
, mode
);
2769 if (register_operand (op
, mode
))
2771 return x86_64_sign_extended_value (op
);
2774 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2777 x86_64_movabs_operand (op
, mode
)
2779 enum machine_mode mode
;
2781 if (!TARGET_64BIT
|| !flag_pic
)
2782 return nonmemory_operand (op
, mode
);
2783 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
2785 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
2790 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2793 x86_64_szext_nonmemory_operand (op
, mode
)
2795 enum machine_mode mode
;
2798 return nonmemory_operand (op
, mode
);
2799 if (register_operand (op
, mode
))
2801 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2804 /* Return nonzero if OP is immediate operand representable on x86_64. */
2807 x86_64_immediate_operand (op
, mode
)
2809 enum machine_mode mode
;
2812 return immediate_operand (op
, mode
);
2813 return x86_64_sign_extended_value (op
);
2816 /* Return nonzero if OP is immediate operand representable on x86_64. */
2819 x86_64_zext_immediate_operand (op
, mode
)
2821 enum machine_mode mode ATTRIBUTE_UNUSED
;
2823 return x86_64_zero_extended_value (op
);
2826 /* Return nonzero if OP is (const_int 1), else return zero. */
2829 const_int_1_operand (op
, mode
)
2831 enum machine_mode mode ATTRIBUTE_UNUSED
;
2833 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
2836 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2837 reference and a constant. */
2840 symbolic_operand (op
, mode
)
2842 enum machine_mode mode ATTRIBUTE_UNUSED
;
2844 switch (GET_CODE (op
))
2852 if (GET_CODE (op
) == SYMBOL_REF
2853 || GET_CODE (op
) == LABEL_REF
2854 || (GET_CODE (op
) == UNSPEC
2855 && (XINT (op
, 1) == UNSPEC_GOT
2856 || XINT (op
, 1) == UNSPEC_GOTOFF
2857 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
2859 if (GET_CODE (op
) != PLUS
2860 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2864 if (GET_CODE (op
) == SYMBOL_REF
2865 || GET_CODE (op
) == LABEL_REF
)
2867 /* Only @GOTOFF gets offsets. */
2868 if (GET_CODE (op
) != UNSPEC
2869 || XINT (op
, 1) != UNSPEC_GOTOFF
)
2872 op
= XVECEXP (op
, 0, 0);
2873 if (GET_CODE (op
) == SYMBOL_REF
2874 || GET_CODE (op
) == LABEL_REF
)
2883 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2886 pic_symbolic_operand (op
, mode
)
2888 enum machine_mode mode ATTRIBUTE_UNUSED
;
2890 if (GET_CODE (op
) != CONST
)
2895 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
2900 if (GET_CODE (op
) == UNSPEC
)
2902 if (GET_CODE (op
) != PLUS
2903 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
2906 if (GET_CODE (op
) == UNSPEC
)
2912 /* Return true if OP is a symbolic operand that resolves locally. */
2915 local_symbolic_operand (op
, mode
)
2917 enum machine_mode mode ATTRIBUTE_UNUSED
;
2919 if (GET_CODE (op
) == LABEL_REF
)
2922 if (GET_CODE (op
) == CONST
2923 && GET_CODE (XEXP (op
, 0)) == PLUS
2924 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
2925 op
= XEXP (XEXP (op
, 0), 0);
2927 if (GET_CODE (op
) != SYMBOL_REF
)
2930 /* These we've been told are local by varasm and encode_section_info
2932 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
2935 /* There is, however, a not insubstantial body of code in the rest of
2936 the compiler that assumes it can just stick the results of
2937 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2938 /* ??? This is a hack. Should update the body of the compiler to
2939 always create a DECL an invoke targetm.encode_section_info. */
2940 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
2941 internal_label_prefix_len
) == 0)
2947 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2950 tls_symbolic_operand (op
, mode
)
2952 enum machine_mode mode ATTRIBUTE_UNUSED
;
2954 const char *symbol_str
;
2956 if (GET_CODE (op
) != SYMBOL_REF
)
2958 symbol_str
= XSTR (op
, 0);
2960 if (symbol_str
[0] != '%')
2962 return strchr (tls_model_chars
, symbol_str
[1]) - tls_model_chars
;
2966 tls_symbolic_operand_1 (op
, kind
)
2968 enum tls_model kind
;
2970 const char *symbol_str
;
2972 if (GET_CODE (op
) != SYMBOL_REF
)
2974 symbol_str
= XSTR (op
, 0);
2976 return symbol_str
[0] == '%' && symbol_str
[1] == tls_model_chars
[kind
];
2980 global_dynamic_symbolic_operand (op
, mode
)
2982 enum machine_mode mode ATTRIBUTE_UNUSED
;
2984 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
2988 local_dynamic_symbolic_operand (op
, mode
)
2990 enum machine_mode mode ATTRIBUTE_UNUSED
;
2992 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
2996 initial_exec_symbolic_operand (op
, mode
)
2998 enum machine_mode mode ATTRIBUTE_UNUSED
;
3000 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3004 local_exec_symbolic_operand (op
, mode
)
3006 enum machine_mode mode ATTRIBUTE_UNUSED
;
3008 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3011 /* Test for a valid operand for a call instruction. Don't allow the
3012 arg pointer register or virtual regs since they may decay into
3013 reg + const, which the patterns can't handle. */
3016 call_insn_operand (op
, mode
)
3018 enum machine_mode mode ATTRIBUTE_UNUSED
;
3020 /* Disallow indirect through a virtual register. This leads to
3021 compiler aborts when trying to eliminate them. */
3022 if (GET_CODE (op
) == REG
3023 && (op
== arg_pointer_rtx
3024 || op
== frame_pointer_rtx
3025 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3026 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3029 /* Disallow `call 1234'. Due to varying assembler lameness this
3030 gets either rejected or translated to `call .+1234'. */
3031 if (GET_CODE (op
) == CONST_INT
)
3034 /* Explicitly allow SYMBOL_REF even if pic. */
3035 if (GET_CODE (op
) == SYMBOL_REF
)
3038 /* Otherwise we can allow any general_operand in the address. */
3039 return general_operand (op
, Pmode
);
3043 constant_call_address_operand (op
, mode
)
3045 enum machine_mode mode ATTRIBUTE_UNUSED
;
3047 if (GET_CODE (op
) == CONST
3048 && GET_CODE (XEXP (op
, 0)) == PLUS
3049 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3050 op
= XEXP (XEXP (op
, 0), 0);
3051 return GET_CODE (op
) == SYMBOL_REF
;
3054 /* Match exactly zero and one. */
3057 const0_operand (op
, mode
)
3059 enum machine_mode mode
;
3061 return op
== CONST0_RTX (mode
);
3065 const1_operand (op
, mode
)
3067 enum machine_mode mode ATTRIBUTE_UNUSED
;
3069 return op
== const1_rtx
;
3072 /* Match 2, 4, or 8. Used for leal multiplicands. */
3075 const248_operand (op
, mode
)
3077 enum machine_mode mode ATTRIBUTE_UNUSED
;
3079 return (GET_CODE (op
) == CONST_INT
3080 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3083 /* True if this is a constant appropriate for an increment or decremenmt. */
3086 incdec_operand (op
, mode
)
3088 enum machine_mode mode ATTRIBUTE_UNUSED
;
3090 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3091 registers, since carry flag is not set. */
3092 if (TARGET_PENTIUM4
&& !optimize_size
)
3094 return op
== const1_rtx
|| op
== constm1_rtx
;
3097 /* Return nonzero if OP is acceptable as operand of DImode shift
3101 shiftdi_operand (op
, mode
)
3103 enum machine_mode mode ATTRIBUTE_UNUSED
;
3106 return nonimmediate_operand (op
, mode
);
3108 return register_operand (op
, mode
);
3111 /* Return false if this is the stack pointer, or any other fake
3112 register eliminable to the stack pointer. Otherwise, this is
3115 This is used to prevent esp from being used as an index reg.
3116 Which would only happen in pathological cases. */
3119 reg_no_sp_operand (op
, mode
)
3121 enum machine_mode mode
;
3124 if (GET_CODE (t
) == SUBREG
)
3126 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3129 return register_operand (op
, mode
);
3133 mmx_reg_operand (op
, mode
)
3135 enum machine_mode mode ATTRIBUTE_UNUSED
;
3137 return MMX_REG_P (op
);
3140 /* Return false if this is any eliminable register. Otherwise
3144 general_no_elim_operand (op
, mode
)
3146 enum machine_mode mode
;
3149 if (GET_CODE (t
) == SUBREG
)
3151 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3152 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3153 || t
== virtual_stack_dynamic_rtx
)
3156 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3157 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3160 return general_operand (op
, mode
);
3163 /* Return false if this is any eliminable register. Otherwise
3164 register_operand or const_int. */
3167 nonmemory_no_elim_operand (op
, mode
)
3169 enum machine_mode mode
;
3172 if (GET_CODE (t
) == SUBREG
)
3174 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3175 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3176 || t
== virtual_stack_dynamic_rtx
)
3179 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3182 /* Return true if op is a Q_REGS class register. */
3185 q_regs_operand (op
, mode
)
3187 enum machine_mode mode
;
3189 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3191 if (GET_CODE (op
) == SUBREG
)
3192 op
= SUBREG_REG (op
);
3193 return ANY_QI_REG_P (op
);
3196 /* Return true if op is a NON_Q_REGS class register. */
3199 non_q_regs_operand (op
, mode
)
3201 enum machine_mode mode
;
3203 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3205 if (GET_CODE (op
) == SUBREG
)
3206 op
= SUBREG_REG (op
);
3207 return NON_QI_REG_P (op
);
3210 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3213 sse_comparison_operator (op
, mode
)
3215 enum machine_mode mode ATTRIBUTE_UNUSED
;
3217 enum rtx_code code
= GET_CODE (op
);
3220 /* Operations supported directly. */
3230 /* These are equivalent to ones above in non-IEEE comparisons. */
3237 return !TARGET_IEEE_FP
;
3242 /* Return 1 if OP is a valid comparison operator in valid mode. */
3244 ix86_comparison_operator (op
, mode
)
3246 enum machine_mode mode
;
3248 enum machine_mode inmode
;
3249 enum rtx_code code
= GET_CODE (op
);
3250 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3252 if (GET_RTX_CLASS (code
) != '<')
3254 inmode
= GET_MODE (XEXP (op
, 0));
3256 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3258 enum rtx_code second_code
, bypass_code
;
3259 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3260 return (bypass_code
== NIL
&& second_code
== NIL
);
3267 if (inmode
== CCmode
|| inmode
== CCGCmode
3268 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3271 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3272 if (inmode
== CCmode
)
3276 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3284 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3287 fcmov_comparison_operator (op
, mode
)
3289 enum machine_mode mode
;
3291 enum machine_mode inmode
;
3292 enum rtx_code code
= GET_CODE (op
);
3293 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3295 if (GET_RTX_CLASS (code
) != '<')
3297 inmode
= GET_MODE (XEXP (op
, 0));
3298 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3300 enum rtx_code second_code
, bypass_code
;
3301 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3302 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3304 code
= ix86_fp_compare_code_to_integer (code
);
3306 /* i387 supports just limited amount of conditional codes. */
3309 case LTU
: case GTU
: case LEU
: case GEU
:
3310 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3313 case ORDERED
: case UNORDERED
:
3321 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3324 promotable_binary_operator (op
, mode
)
3326 enum machine_mode mode ATTRIBUTE_UNUSED
;
3328 switch (GET_CODE (op
))
3331 /* Modern CPUs have same latency for HImode and SImode multiply,
3332 but 386 and 486 do HImode multiply faster. */
3333 return ix86_cpu
> PROCESSOR_I486
;
3345 /* Nearly general operand, but accept any const_double, since we wish
3346 to be able to drop them into memory rather than have them get pulled
3350 cmp_fp_expander_operand (op
, mode
)
3352 enum machine_mode mode
;
3354 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3356 if (GET_CODE (op
) == CONST_DOUBLE
)
3358 return general_operand (op
, mode
);
3361 /* Match an SI or HImode register for a zero_extract. */
3364 ext_register_operand (op
, mode
)
3366 enum machine_mode mode ATTRIBUTE_UNUSED
;
3369 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3370 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3373 if (!register_operand (op
, VOIDmode
))
3376 /* Be curefull to accept only registers having upper parts. */
3377 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3378 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3381 /* Return 1 if this is a valid binary floating-point operation.
3382 OP is the expression matched, and MODE is its mode. */
3385 binary_fp_operator (op
, mode
)
3387 enum machine_mode mode
;
3389 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3392 switch (GET_CODE (op
))
3398 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3406 mult_operator (op
, mode
)
3408 enum machine_mode mode ATTRIBUTE_UNUSED
;
3410 return GET_CODE (op
) == MULT
;
3414 div_operator (op
, mode
)
3416 enum machine_mode mode ATTRIBUTE_UNUSED
;
3418 return GET_CODE (op
) == DIV
;
3422 arith_or_logical_operator (op
, mode
)
3424 enum machine_mode mode
;
3426 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3427 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3428 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3431 /* Returns 1 if OP is memory operand with a displacement. */
3434 memory_displacement_operand (op
, mode
)
3436 enum machine_mode mode
;
3438 struct ix86_address parts
;
3440 if (! memory_operand (op
, mode
))
3443 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3446 return parts
.disp
!= NULL_RTX
;
3449 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3450 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3452 ??? It seems likely that this will only work because cmpsi is an
3453 expander, and no actual insns use this. */
3456 cmpsi_operand (op
, mode
)
3458 enum machine_mode mode
;
3460 if (nonimmediate_operand (op
, mode
))
3463 if (GET_CODE (op
) == AND
3464 && GET_MODE (op
) == SImode
3465 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3466 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3467 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3468 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3469 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3470 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3476 /* Returns 1 if OP is memory operand that can not be represented by the
3480 long_memory_operand (op
, mode
)
3482 enum machine_mode mode
;
3484 if (! memory_operand (op
, mode
))
3487 return memory_address_length (op
) != 0;
3490 /* Return nonzero if the rtx is known aligned. */
3493 aligned_operand (op
, mode
)
3495 enum machine_mode mode
;
3497 struct ix86_address parts
;
3499 if (!general_operand (op
, mode
))
3502 /* Registers and immediate operands are always "aligned". */
3503 if (GET_CODE (op
) != MEM
)
3506 /* Don't even try to do any aligned optimizations with volatiles. */
3507 if (MEM_VOLATILE_P (op
))
3512 /* Pushes and pops are only valid on the stack pointer. */
3513 if (GET_CODE (op
) == PRE_DEC
3514 || GET_CODE (op
) == POST_INC
)
3517 /* Decode the address. */
3518 if (! ix86_decompose_address (op
, &parts
))
3521 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
3522 parts
.base
= SUBREG_REG (parts
.base
);
3523 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
3524 parts
.index
= SUBREG_REG (parts
.index
);
3526 /* Look for some component that isn't known to be aligned. */
3530 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3535 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
3540 if (GET_CODE (parts
.disp
) != CONST_INT
3541 || (INTVAL (parts
.disp
) & 3) != 0)
3545 /* Didn't find one -- this must be an aligned address. */
3549 /* Return true if the constant is something that can be loaded with
3550 a special instruction. Only handle 0.0 and 1.0; others are less
3554 standard_80387_constant_p (x
)
3557 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3559 /* Note that on the 80387, other constants, such as pi, that we should support
3560 too. On some machines, these are much slower to load as standard constant,
3561 than to load from doubles in memory. */
3562 if (x
== CONST0_RTX (GET_MODE (x
)))
3564 if (x
== CONST1_RTX (GET_MODE (x
)))
3569 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3572 standard_sse_constant_p (x
)
3575 if (GET_CODE (x
) != CONST_DOUBLE
)
3577 return (x
== CONST0_RTX (GET_MODE (x
)));
3580 /* Returns 1 if OP contains a symbol reference */
3583 symbolic_reference_mentioned_p (op
)
3586 register const char *fmt
;
3589 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3592 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3593 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3599 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3600 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3604 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3611 /* Return 1 if it is appropriate to emit `ret' instructions in the
3612 body of a function. Do this only if the epilogue is simple, needing a
3613 couple of insns. Prior to reloading, we can't tell how many registers
3614 must be saved, so return 0 then. Return 0 if there is no frame
3615 marker to de-allocate.
3617 If NON_SAVING_SETJMP is defined and true, then it is not possible
3618 for the epilogue to be simple, so return 0. This is a special case
3619 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3620 until final, but jump_optimize may need to know sooner if a
3624 ix86_can_use_return_insn_p ()
3626 struct ix86_frame frame
;
3628 #ifdef NON_SAVING_SETJMP
3629 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3633 if (! reload_completed
|| frame_pointer_needed
)
3636 /* Don't allow more than 32 pop, since that's all we can do
3637 with one instruction. */
3638 if (current_function_pops_args
3639 && current_function_args_size
>= 32768)
3642 ix86_compute_frame_layout (&frame
);
3643 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3646 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3648 x86_64_sign_extended_value (value
)
3651 switch (GET_CODE (value
))
3653 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3654 to be at least 32 and this all acceptable constants are
3655 represented as CONST_INT. */
3657 if (HOST_BITS_PER_WIDE_INT
== 32)
3661 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
3662 return trunc_int_for_mode (val
, SImode
) == val
;
3666 /* For certain code models, the symbolic references are known to fit. */
3668 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
;
3670 /* For certain code models, the code is near as well. */
3672 return ix86_cmodel
!= CM_LARGE
&& ix86_cmodel
!= CM_SMALL_PIC
;
3674 /* We also may accept the offsetted memory references in certain special
3677 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
3678 && XINT (XEXP (value
, 0), 1) == UNSPEC_GOTPCREL
)
3680 else if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3682 rtx op1
= XEXP (XEXP (value
, 0), 0);
3683 rtx op2
= XEXP (XEXP (value
, 0), 1);
3684 HOST_WIDE_INT offset
;
3686 if (ix86_cmodel
== CM_LARGE
)
3688 if (GET_CODE (op2
) != CONST_INT
)
3690 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
3691 switch (GET_CODE (op1
))
3694 /* For CM_SMALL assume that latest object is 1MB before
3695 end of 31bits boundary. We may also accept pretty
3696 large negative constants knowing that all objects are
3697 in the positive half of address space. */
3698 if (ix86_cmodel
== CM_SMALL
3699 && offset
< 1024*1024*1024
3700 && trunc_int_for_mode (offset
, SImode
) == offset
)
3702 /* For CM_KERNEL we know that all object resist in the
3703 negative half of 32bits address space. We may not
3704 accept negative offsets, since they may be just off
3705 and we may accept pretty large positive ones. */
3706 if (ix86_cmodel
== CM_KERNEL
3708 && trunc_int_for_mode (offset
, SImode
) == offset
)
3712 /* These conditions are similar to SYMBOL_REF ones, just the
3713 constraints for code models differ. */
3714 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3715 && offset
< 1024*1024*1024
3716 && trunc_int_for_mode (offset
, SImode
) == offset
)
3718 if (ix86_cmodel
== CM_KERNEL
3720 && trunc_int_for_mode (offset
, SImode
) == offset
)
3733 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3735 x86_64_zero_extended_value (value
)
3738 switch (GET_CODE (value
))
3741 if (HOST_BITS_PER_WIDE_INT
== 32)
3742 return (GET_MODE (value
) == VOIDmode
3743 && !CONST_DOUBLE_HIGH (value
));
3747 if (HOST_BITS_PER_WIDE_INT
== 32)
3748 return INTVAL (value
) >= 0;
3750 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
3753 /* For certain code models, the symbolic references are known to fit. */
3755 return ix86_cmodel
== CM_SMALL
;
3757 /* For certain code models, the code is near as well. */
3759 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
3761 /* We also may accept the offsetted memory references in certain special
3764 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3766 rtx op1
= XEXP (XEXP (value
, 0), 0);
3767 rtx op2
= XEXP (XEXP (value
, 0), 1);
3769 if (ix86_cmodel
== CM_LARGE
)
3771 switch (GET_CODE (op1
))
3775 /* For small code model we may accept pretty large positive
3776 offsets, since one bit is available for free. Negative
3777 offsets are limited by the size of NULL pointer area
3778 specified by the ABI. */
3779 if (ix86_cmodel
== CM_SMALL
3780 && GET_CODE (op2
) == CONST_INT
3781 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3782 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3785 /* ??? For the kernel, we may accept adjustment of
3786 -0x10000000, since we know that it will just convert
3787 negative address space to positive, but perhaps this
3788 is not worthwhile. */
3791 /* These conditions are similar to SYMBOL_REF ones, just the
3792 constraints for code models differ. */
3793 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3794 && GET_CODE (op2
) == CONST_INT
3795 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3796 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3810 /* Value should be nonzero if functions must have frame pointers.
3811 Zero means the frame pointer need not be set up (and parms may
3812 be accessed via the stack pointer) in functions that seem suitable. */
3815 ix86_frame_pointer_required ()
3817 /* If we accessed previous frames, then the generated code expects
3818 to be able to access the saved ebp value in our frame. */
3819 if (cfun
->machine
->accesses_prev_frame
)
3822 /* Several x86 os'es need a frame pointer for other reasons,
3823 usually pertaining to setjmp. */
3824 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
3827 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3828 the frame pointer by default. Turn it back on now if we've not
3829 got a leaf function. */
3830 if (TARGET_OMIT_LEAF_FRAME_POINTER
3831 && (!current_function_is_leaf
|| current_function_profile
))
3837 /* Record that the current function accesses previous call frames. */
3840 ix86_setup_frame_addresses ()
3842 cfun
->machine
->accesses_prev_frame
= 1;
3845 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3846 # define USE_HIDDEN_LINKONCE 1
3848 # define USE_HIDDEN_LINKONCE 0
3851 static int pic_labels_used
;
3853 /* Fills in the label name that should be used for a pc thunk for
3854 the given register. */
3857 get_pc_thunk_name (name
, regno
)
3861 if (USE_HIDDEN_LINKONCE
)
3862 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
3864 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
3868 /* This function generates code for -fpic that loads %ebx with
3869 the return address of the caller and then returns. */
3872 ix86_asm_file_end (file
)
3878 for (regno
= 0; regno
< 8; ++regno
)
3882 if (! ((pic_labels_used
>> regno
) & 1))
3885 get_pc_thunk_name (name
, regno
);
3887 if (USE_HIDDEN_LINKONCE
)
3891 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
3893 TREE_PUBLIC (decl
) = 1;
3894 TREE_STATIC (decl
) = 1;
3895 DECL_ONE_ONLY (decl
) = 1;
3897 (*targetm
.asm_out
.unique_section
) (decl
, 0);
3898 named_section (decl
, NULL
, 0);
3900 ASM_GLOBALIZE_LABEL (file
, name
);
3901 fputs ("\t.hidden\t", file
);
3902 assemble_name (file
, name
);
3904 ASM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
3909 ASM_OUTPUT_LABEL (file
, name
);
3912 xops
[0] = gen_rtx_REG (SImode
, regno
);
3913 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
3914 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
3915 output_asm_insn ("ret", xops
);
3919 /* Emit code for the SET_GOT patterns. */
3922 output_set_got (dest
)
3928 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
3930 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
3932 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
3935 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3937 output_asm_insn ("call\t%a2", xops
);
3939 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, "L",
3940 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
3943 output_asm_insn ("pop{l}\t%0", xops
);
3948 get_pc_thunk_name (name
, REGNO (dest
));
3949 pic_labels_used
|= 1 << REGNO (dest
);
3951 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
3952 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
3953 output_asm_insn ("call\t%X2", xops
);
3956 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
3957 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
3959 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
3964 /* Generate an "push" pattern for input ARG. */
3970 return gen_rtx_SET (VOIDmode
,
3972 gen_rtx_PRE_DEC (Pmode
,
3973 stack_pointer_rtx
)),
3977 /* Return >= 0 if there is an unused call-clobbered register available
3978 for the entire function. */
3981 ix86_select_alt_pic_regnum ()
3983 if (current_function_is_leaf
&& !current_function_profile
)
3986 for (i
= 2; i
>= 0; --i
)
3987 if (!regs_ever_live
[i
])
3991 return INVALID_REGNUM
;
3994 /* Return 1 if we need to save REGNO. */
3996 ix86_save_reg (regno
, maybe_eh_return
)
3998 int maybe_eh_return
;
4000 if (pic_offset_table_rtx
4001 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4002 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4003 || current_function_profile
4004 || current_function_calls_eh_return
))
4006 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4011 if (current_function_calls_eh_return
&& maybe_eh_return
)
4016 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4017 if (test
== INVALID_REGNUM
)
4024 return (regs_ever_live
[regno
]
4025 && !call_used_regs
[regno
]
4026 && !fixed_regs
[regno
]
4027 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4030 /* Return number of registers to be saved on the stack. */
4038 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4039 if (ix86_save_reg (regno
, true))
4044 /* Return the offset between two registers, one to be eliminated, and the other
4045 its replacement, at the start of a routine. */
4048 ix86_initial_elimination_offset (from
, to
)
4052 struct ix86_frame frame
;
4053 ix86_compute_frame_layout (&frame
);
4055 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4056 return frame
.hard_frame_pointer_offset
;
4057 else if (from
== FRAME_POINTER_REGNUM
4058 && to
== HARD_FRAME_POINTER_REGNUM
)
4059 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4062 if (to
!= STACK_POINTER_REGNUM
)
4064 else if (from
== ARG_POINTER_REGNUM
)
4065 return frame
.stack_pointer_offset
;
4066 else if (from
!= FRAME_POINTER_REGNUM
)
4069 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4073 /* Fill structure ix86_frame about frame of currently computed function. */
4076 ix86_compute_frame_layout (frame
)
4077 struct ix86_frame
*frame
;
4079 HOST_WIDE_INT total_size
;
4080 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4082 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4083 HOST_WIDE_INT size
= get_frame_size ();
4085 frame
->nregs
= ix86_nsaved_regs ();
4088 /* Skip return address and saved base pointer. */
4089 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4091 frame
->hard_frame_pointer_offset
= offset
;
4093 /* Do some sanity checking of stack_alignment_needed and
4094 preferred_alignment, since i386 port is the only using those features
4095 that may break easily. */
4097 if (size
&& !stack_alignment_needed
)
4099 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4101 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4103 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4106 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4107 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4109 /* Register save area */
4110 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4113 if (ix86_save_varrargs_registers
)
4115 offset
+= X86_64_VARARGS_SIZE
;
4116 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4119 frame
->va_arg_size
= 0;
4121 /* Align start of frame for local function. */
4122 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4123 & -stack_alignment_needed
) - offset
;
4125 offset
+= frame
->padding1
;
4127 /* Frame pointer points here. */
4128 frame
->frame_pointer_offset
= offset
;
4132 /* Add outgoing arguments area. Can be skipped if we eliminated
4133 all the function calls as dead code. */
4134 if (ACCUMULATE_OUTGOING_ARGS
&& !current_function_is_leaf
)
4136 offset
+= current_function_outgoing_args_size
;
4137 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4140 frame
->outgoing_arguments_size
= 0;
4142 /* Align stack boundary. Only needed if we're calling another function
4144 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4145 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4146 & -preferred_alignment
) - offset
;
4148 frame
->padding2
= 0;
4150 offset
+= frame
->padding2
;
4152 /* We've reached end of stack frame. */
4153 frame
->stack_pointer_offset
= offset
;
4155 /* Size prologue needs to allocate. */
4156 frame
->to_allocate
=
4157 (size
+ frame
->padding1
+ frame
->padding2
4158 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4160 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4161 && current_function_is_leaf
)
4163 frame
->red_zone_size
= frame
->to_allocate
;
4164 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4165 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4168 frame
->red_zone_size
= 0;
4169 frame
->to_allocate
-= frame
->red_zone_size
;
4170 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4172 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4173 fprintf (stderr
, "size: %i\n", size
);
4174 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4175 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4176 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4177 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4178 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4179 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4180 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4181 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4182 frame
->hard_frame_pointer_offset
);
4183 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4187 /* Emit code to save registers in the prologue. */
4190 ix86_emit_save_regs ()
4195 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4196 if (ix86_save_reg (regno
, true))
4198 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4199 RTX_FRAME_RELATED_P (insn
) = 1;
4203 /* Emit code to save registers using MOV insns. First register
4204 is restored from POINTER + OFFSET. */
4206 ix86_emit_save_regs_using_mov (pointer
, offset
)
4208 HOST_WIDE_INT offset
;
4213 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4214 if (ix86_save_reg (regno
, true))
4216 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4218 gen_rtx_REG (Pmode
, regno
));
4219 RTX_FRAME_RELATED_P (insn
) = 1;
4220 offset
+= UNITS_PER_WORD
;
4224 /* Expand the prologue into a bunch of separate insns. */
4227 ix86_expand_prologue ()
4231 struct ix86_frame frame
;
4233 HOST_WIDE_INT allocate
;
4237 use_fast_prologue_epilogue
4238 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
4239 if (TARGET_PROLOGUE_USING_MOVE
)
4240 use_mov
= use_fast_prologue_epilogue
;
4242 ix86_compute_frame_layout (&frame
);
4244 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4245 slower on all targets. Also sdb doesn't like it. */
4247 if (frame_pointer_needed
)
4249 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4250 RTX_FRAME_RELATED_P (insn
) = 1;
4252 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4253 RTX_FRAME_RELATED_P (insn
) = 1;
4256 allocate
= frame
.to_allocate
;
4257 /* In case we are dealing only with single register and empty frame,
4258 push is equivalent of the mov+add sequence. */
4259 if (allocate
== 0 && frame
.nregs
<= 1)
4263 ix86_emit_save_regs ();
4265 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4269 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4271 insn
= emit_insn (gen_pro_epilogue_adjust_stack
4272 (stack_pointer_rtx
, stack_pointer_rtx
,
4273 GEN_INT (-allocate
)));
4274 RTX_FRAME_RELATED_P (insn
) = 1;
4278 /* ??? Is this only valid for Win32? */
4285 arg0
= gen_rtx_REG (SImode
, 0);
4286 emit_move_insn (arg0
, GEN_INT (allocate
));
4288 sym
= gen_rtx_MEM (FUNCTION_MODE
,
4289 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
4290 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
4292 CALL_INSN_FUNCTION_USAGE (insn
)
4293 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
4294 CALL_INSN_FUNCTION_USAGE (insn
));
4298 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4299 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4301 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4302 -frame
.nregs
* UNITS_PER_WORD
);
4305 #ifdef SUBTARGET_PROLOGUE
4309 pic_reg_used
= false;
4310 if (pic_offset_table_rtx
4311 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4312 || current_function_profile
))
4314 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4316 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4317 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4319 pic_reg_used
= true;
4324 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4326 /* Even with accurate pre-reload life analysis, we can wind up
4327 deleting all references to the pic register after reload.
4328 Consider if cross-jumping unifies two sides of a branch
4329 controled by a comparison vs the only read from a global.
4330 In which case, allow the set_got to be deleted, though we're
4331 too late to do anything about the ebx save in the prologue. */
4332 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4335 /* Prevent function calls from be scheduled before the call to mcount.
4336 In the pic_reg_used case, make sure that the got load isn't deleted. */
4337 if (current_function_profile
)
4338 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4341 /* Emit code to restore saved registers using MOV insns. First register
4342 is restored from POINTER + OFFSET. */
4344 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4347 int maybe_eh_return
;
4351 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4352 if (ix86_save_reg (regno
, maybe_eh_return
))
4354 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4355 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4357 offset
+= UNITS_PER_WORD
;
4361 /* Restore function stack, frame, and registers. */
4364 ix86_expand_epilogue (style
)
4368 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4369 struct ix86_frame frame
;
4370 HOST_WIDE_INT offset
;
4372 ix86_compute_frame_layout (&frame
);
4374 /* Calculate start of saved registers relative to ebp. Special care
4375 must be taken for the normal return case of a function using
4376 eh_return: the eax and edx registers are marked as saved, but not
4377 restored along this path. */
4378 offset
= frame
.nregs
;
4379 if (current_function_calls_eh_return
&& style
!= 2)
4381 offset
*= -UNITS_PER_WORD
;
4383 /* If we're only restoring one register and sp is not valid then
4384 using a move instruction to restore the register since it's
4385 less work than reloading sp and popping the register.
4387 The default code result in stack adjustment using add/lea instruction,
4388 while this code results in LEAVE instruction (or discrete equivalent),
4389 so it is profitable in some other cases as well. Especially when there
4390 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4391 and there is exactly one register to pop. This heruistic may need some
4392 tuning in future. */
4393 if ((!sp_valid
&& frame
.nregs
<= 1)
4394 || (TARGET_EPILOGUE_USING_MOVE
4395 && use_fast_prologue_epilogue
4396 && (frame
.nregs
> 1 || frame
.to_allocate
))
4397 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4398 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4399 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4400 || current_function_calls_eh_return
)
4402 /* Restore registers. We can use ebp or esp to address the memory
4403 locations. If both are available, default to ebp, since offsets
4404 are known to be small. Only exception is esp pointing directly to the
4405 end of block of saved registers, where we may simplify addressing
4408 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4409 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4410 frame
.to_allocate
, style
== 2);
4412 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4413 offset
, style
== 2);
4415 /* eh_return epilogues need %ecx added to the stack pointer. */
4418 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4420 if (frame_pointer_needed
)
4422 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4423 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4424 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4426 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4427 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4429 emit_insn (gen_pro_epilogue_adjust_stack
4430 (stack_pointer_rtx
, sa
, const0_rtx
));
4434 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4435 tmp
= plus_constant (tmp
, (frame
.to_allocate
4436 + frame
.nregs
* UNITS_PER_WORD
));
4437 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4440 else if (!frame_pointer_needed
)
4441 emit_insn (gen_pro_epilogue_adjust_stack
4442 (stack_pointer_rtx
, stack_pointer_rtx
,
4443 GEN_INT (frame
.to_allocate
4444 + frame
.nregs
* UNITS_PER_WORD
)));
4445 /* If not an i386, mov & pop is faster than "leave". */
4446 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4447 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4450 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4451 hard_frame_pointer_rtx
,
4454 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4456 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4461 /* First step is to deallocate the stack frame so that we can
4462 pop the registers. */
4465 if (!frame_pointer_needed
)
4467 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4468 hard_frame_pointer_rtx
,
4471 else if (frame
.to_allocate
)
4472 emit_insn (gen_pro_epilogue_adjust_stack
4473 (stack_pointer_rtx
, stack_pointer_rtx
,
4474 GEN_INT (frame
.to_allocate
)));
4476 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4477 if (ix86_save_reg (regno
, false))
4480 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4482 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4484 if (frame_pointer_needed
)
4486 /* Leave results in shorter dependency chains on CPUs that are
4487 able to grok it fast. */
4488 if (TARGET_USE_LEAVE
)
4489 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4490 else if (TARGET_64BIT
)
4491 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4493 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4497 /* Sibcall epilogues don't want a return instruction. */
4501 if (current_function_pops_args
&& current_function_args_size
)
4503 rtx popc
= GEN_INT (current_function_pops_args
);
4505 /* i386 can only pop 64K bytes. If asked to pop more, pop
4506 return address, do explicit add, and jump indirectly to the
4509 if (current_function_pops_args
>= 65536)
4511 rtx ecx
= gen_rtx_REG (SImode
, 2);
4513 /* There are is no "pascal" calling convention in 64bit ABI. */
4517 emit_insn (gen_popsi1 (ecx
));
4518 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4519 emit_jump_insn (gen_return_indirect_internal (ecx
));
4522 emit_jump_insn (gen_return_pop_internal (popc
));
4525 emit_jump_insn (gen_return_internal ());
4528 /* Reset from the function's potential modifications. */
4531 ix86_output_function_epilogue (file
, size
)
4532 FILE *file ATTRIBUTE_UNUSED
;
4533 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
4535 if (pic_offset_table_rtx
)
4536 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4539 /* Extract the parts of an RTL expression that is a valid memory address
4540 for an instruction. Return 0 if the structure of the address is
4541 grossly off. Return -1 if the address contains ASHIFT, so it is not
4542 strictly valid, but still used for computing length of lea instruction.
4546 ix86_decompose_address (addr
, out
)
4548 struct ix86_address
*out
;
4550 rtx base
= NULL_RTX
;
4551 rtx index
= NULL_RTX
;
4552 rtx disp
= NULL_RTX
;
4553 HOST_WIDE_INT scale
= 1;
4554 rtx scale_rtx
= NULL_RTX
;
4557 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
4559 else if (GET_CODE (addr
) == PLUS
)
4561 rtx op0
= XEXP (addr
, 0);
4562 rtx op1
= XEXP (addr
, 1);
4563 enum rtx_code code0
= GET_CODE (op0
);
4564 enum rtx_code code1
= GET_CODE (op1
);
4566 if (code0
== REG
|| code0
== SUBREG
)
4568 if (code1
== REG
|| code1
== SUBREG
)
4569 index
= op0
, base
= op1
; /* index + base */
4571 base
= op0
, disp
= op1
; /* base + displacement */
4573 else if (code0
== MULT
)
4575 index
= XEXP (op0
, 0);
4576 scale_rtx
= XEXP (op0
, 1);
4577 if (code1
== REG
|| code1
== SUBREG
)
4578 base
= op1
; /* index*scale + base */
4580 disp
= op1
; /* index*scale + disp */
4582 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
4584 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
4585 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
4586 base
= XEXP (op0
, 1);
4589 else if (code0
== PLUS
)
4591 index
= XEXP (op0
, 0); /* index + base + disp */
4592 base
= XEXP (op0
, 1);
4598 else if (GET_CODE (addr
) == MULT
)
4600 index
= XEXP (addr
, 0); /* index*scale */
4601 scale_rtx
= XEXP (addr
, 1);
4603 else if (GET_CODE (addr
) == ASHIFT
)
4607 /* We're called for lea too, which implements ashift on occasion. */
4608 index
= XEXP (addr
, 0);
4609 tmp
= XEXP (addr
, 1);
4610 if (GET_CODE (tmp
) != CONST_INT
)
4612 scale
= INTVAL (tmp
);
4613 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4619 disp
= addr
; /* displacement */
4621 /* Extract the integral value of scale. */
4624 if (GET_CODE (scale_rtx
) != CONST_INT
)
4626 scale
= INTVAL (scale_rtx
);
4629 /* Allow arg pointer and stack pointer as index if there is not scaling */
4630 if (base
&& index
&& scale
== 1
4631 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
4632 || index
== stack_pointer_rtx
))
4639 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4640 if ((base
== hard_frame_pointer_rtx
4641 || base
== frame_pointer_rtx
4642 || base
== arg_pointer_rtx
) && !disp
)
4645 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4646 Avoid this by transforming to [%esi+0]. */
4647 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
4648 && base
&& !index
&& !disp
4650 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4653 /* Special case: encode reg+reg instead of reg*2. */
4654 if (!base
&& index
&& scale
&& scale
== 2)
4655 base
= index
, scale
= 1;
4657 /* Special case: scaling cannot be encoded without base or displacement. */
4658 if (!base
&& !disp
&& index
&& scale
!= 1)
4669 /* Return cost of the memory address x.
4670 For i386, it is better to use a complex address than let gcc copy
4671 the address into a reg and make a new pseudo. But not if the address
4672 requires to two regs - that would mean more pseudos with longer
4675 ix86_address_cost (x
)
4678 struct ix86_address parts
;
4681 if (!ix86_decompose_address (x
, &parts
))
4684 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
4685 parts
.base
= SUBREG_REG (parts
.base
);
4686 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
4687 parts
.index
= SUBREG_REG (parts
.index
);
4689 /* More complex memory references are better. */
4690 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4693 /* Attempt to minimize number of registers in the address. */
4695 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4697 && (!REG_P (parts
.index
)
4698 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4702 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4704 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4705 && parts
.base
!= parts
.index
)
4708 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4709 since it's predecode logic can't detect the length of instructions
4710 and it degenerates to vector decoded. Increase cost of such
4711 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4712 to split such addresses or even refuse such addresses at all.
4714 Following addressing modes are affected:
4719 The first and last case may be avoidable by explicitly coding the zero in
4720 memory address, but I don't have AMD-K6 machine handy to check this
4724 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4725 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4726 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4732 /* If X is a machine specific address (i.e. a symbol or label being
4733 referenced as a displacement from the GOT implemented using an
4734 UNSPEC), then return the base term. Otherwise return X. */
4737 ix86_find_base_term (x
)
4744 if (GET_CODE (x
) != CONST
)
4747 if (GET_CODE (term
) == PLUS
4748 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4749 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4750 term
= XEXP (term
, 0);
4751 if (GET_CODE (term
) != UNSPEC
4752 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4755 term
= XVECEXP (term
, 0, 0);
4757 if (GET_CODE (term
) != SYMBOL_REF
4758 && GET_CODE (term
) != LABEL_REF
)
4764 if (GET_CODE (x
) != PLUS
4765 || XEXP (x
, 0) != pic_offset_table_rtx
4766 || GET_CODE (XEXP (x
, 1)) != CONST
)
4769 term
= XEXP (XEXP (x
, 1), 0);
4771 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
4772 term
= XEXP (term
, 0);
4774 if (GET_CODE (term
) != UNSPEC
4775 || XINT (term
, 1) != UNSPEC_GOTOFF
)
4778 term
= XVECEXP (term
, 0, 0);
4780 if (GET_CODE (term
) != SYMBOL_REF
4781 && GET_CODE (term
) != LABEL_REF
)
4787 /* Determine if a given RTX is a valid constant. We already know this
4788 satisfies CONSTANT_P. */
4791 legitimate_constant_p (x
)
4796 switch (GET_CODE (x
))
4799 /* TLS symbols are not constant. */
4800 if (tls_symbolic_operand (x
, Pmode
))
4805 inner
= XEXP (x
, 0);
4807 /* Offsets of TLS symbols are never valid.
4808 Discourage CSE from creating them. */
4809 if (GET_CODE (inner
) == PLUS
4810 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
4813 /* Only some unspecs are valid as "constants". */
4814 if (GET_CODE (inner
) == UNSPEC
)
4815 switch (XINT (inner
, 1))
4818 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4830 /* Otherwise we handle everything else in the move patterns. */
4834 /* Determine if a given RTX is a valid constant address. */
4837 constant_address_p (x
)
4840 switch (GET_CODE (x
))
4847 return TARGET_64BIT
;
4851 return !flag_pic
&& legitimate_constant_p (x
);
4858 /* Nonzero if the constant value X is a legitimate general operand
4859 when generating PIC code. It is given that flag_pic is on and
4860 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4863 legitimate_pic_operand_p (x
)
4868 switch (GET_CODE (x
))
4871 inner
= XEXP (x
, 0);
4873 /* Only some unspecs are valid as "constants". */
4874 if (GET_CODE (inner
) == UNSPEC
)
4875 switch (XINT (inner
, 1))
4878 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
4888 return legitimate_pic_address_disp_p (x
);
4895 /* Determine if a given CONST RTX is a valid memory displacement
4899 legitimate_pic_address_disp_p (disp
)
4904 /* In 64bit mode we can allow direct addresses of symbols and labels
4905 when they are not dynamic symbols. */
4909 if (GET_CODE (disp
) == CONST
)
4911 /* ??? Handle PIC code models */
4912 if (GET_CODE (x
) == PLUS
4913 && (GET_CODE (XEXP (x
, 1)) == CONST_INT
4914 && ix86_cmodel
== CM_SMALL_PIC
4915 && INTVAL (XEXP (x
, 1)) < 1024*1024*1024
4916 && INTVAL (XEXP (x
, 1)) > -1024*1024*1024))
4918 if (local_symbolic_operand (x
, Pmode
))
4921 if (GET_CODE (disp
) != CONST
)
4923 disp
= XEXP (disp
, 0);
4927 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4928 of GOT tables. We should not need these anyway. */
4929 if (GET_CODE (disp
) != UNSPEC
4930 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
4933 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
4934 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
4940 if (GET_CODE (disp
) == PLUS
)
4942 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
4944 disp
= XEXP (disp
, 0);
4948 if (GET_CODE (disp
) != UNSPEC
)
4951 switch (XINT (disp
, 1))
4956 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
4958 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4959 case UNSPEC_GOTTPOFF
:
4962 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4964 /* ??? Could support offset here. */
4967 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4969 /* ??? Could support offset here. */
4972 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
4978 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4979 memory address for an instruction. The MODE argument is the machine mode
4980 for the MEM expression that wants to use this address.
4982 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4983 convert common non-canonical forms to canonical form so that they will
4987 legitimate_address_p (mode
, addr
, strict
)
4988 enum machine_mode mode
;
4992 struct ix86_address parts
;
4993 rtx base
, index
, disp
;
4994 HOST_WIDE_INT scale
;
4995 const char *reason
= NULL
;
4996 rtx reason_rtx
= NULL_RTX
;
4998 if (TARGET_DEBUG_ADDR
)
5001 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5002 GET_MODE_NAME (mode
), strict
);
5006 if (ix86_decompose_address (addr
, &parts
) <= 0)
5008 reason
= "decomposition failed";
5013 index
= parts
.index
;
5015 scale
= parts
.scale
;
5017 /* Validate base register.
5019 Don't allow SUBREG's here, it can lead to spill failures when the base
5020 is one word out of a two word structure, which is represented internally
5028 if (GET_CODE (base
) == SUBREG
)
5029 reg
= SUBREG_REG (base
);
5033 if (GET_CODE (reg
) != REG
)
5035 reason
= "base is not a register";
5039 if (GET_MODE (base
) != Pmode
)
5041 reason
= "base is not in Pmode";
5045 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
5046 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
5048 reason
= "base is not valid";
5053 /* Validate index register.
5055 Don't allow SUBREG's here, it can lead to spill failures when the index
5056 is one word out of a two word structure, which is represented internally
5064 if (GET_CODE (index
) == SUBREG
)
5065 reg
= SUBREG_REG (index
);
5069 if (GET_CODE (reg
) != REG
)
5071 reason
= "index is not a register";
5075 if (GET_MODE (index
) != Pmode
)
5077 reason
= "index is not in Pmode";
5081 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
5082 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
5084 reason
= "index is not valid";
5089 /* Validate scale factor. */
5092 reason_rtx
= GEN_INT (scale
);
5095 reason
= "scale without index";
5099 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5101 reason
= "scale is not a valid multiplier";
5106 /* Validate displacement. */
5113 if (!x86_64_sign_extended_value (disp
))
5115 reason
= "displacement is out of range";
5121 if (GET_CODE (disp
) == CONST_DOUBLE
)
5123 reason
= "displacement is a const_double";
5128 if (GET_CODE (disp
) == CONST
5129 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5130 switch (XINT (XEXP (disp
, 0), 1))
5134 case UNSPEC_GOTPCREL
:
5137 goto is_legitimate_pic
;
5139 case UNSPEC_GOTTPOFF
:
5145 reason
= "invalid address unspec";
5149 else if (flag_pic
&& SYMBOLIC_CONST (disp
))
5152 if (TARGET_64BIT
&& (index
|| base
))
5154 reason
= "non-constant pic memory reference";
5157 if (! legitimate_pic_address_disp_p (disp
))
5159 reason
= "displacement is an invalid pic construct";
5163 /* This code used to verify that a symbolic pic displacement
5164 includes the pic_offset_table_rtx register.
5166 While this is good idea, unfortunately these constructs may
5167 be created by "adds using lea" optimization for incorrect
5176 This code is nonsensical, but results in addressing
5177 GOT table with pic_offset_table_rtx base. We can't
5178 just refuse it easily, since it gets matched by
5179 "addsi3" pattern, that later gets split to lea in the
5180 case output register differs from input. While this
5181 can be handled by separate addsi pattern for this case
5182 that never results in lea, this seems to be easier and
5183 correct fix for crash to disable this test. */
5185 else if (!CONSTANT_ADDRESS_P (disp
))
5187 reason
= "displacement is not constant";
5192 /* Everything looks valid. */
5193 if (TARGET_DEBUG_ADDR
)
5194 fprintf (stderr
, "Success.\n");
5198 if (TARGET_DEBUG_ADDR
)
5200 fprintf (stderr
, "Error: %s\n", reason
);
5201 debug_rtx (reason_rtx
);
5206 /* Return an unique alias set for the GOT. */
5208 static HOST_WIDE_INT
5209 ix86_GOT_alias_set ()
5211 static HOST_WIDE_INT set
= -1;
5213 set
= new_alias_set ();
5217 /* Return a legitimate reference for ORIG (an address) using the
5218 register REG. If REG is 0, a new pseudo is generated.
5220 There are two types of references that must be handled:
5222 1. Global data references must load the address from the GOT, via
5223 the PIC reg. An insn is emitted to do this load, and the reg is
5226 2. Static data references, constant pool addresses, and code labels
5227 compute the address as an offset from the GOT, whose base is in
5228 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5229 differentiate them from global data objects. The returned
5230 address is the PIC reg + an unspec constant.
5232 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5233 reg also appears in the address. */
5236 legitimize_pic_address (orig
, reg
)
5244 if (local_symbolic_operand (addr
, Pmode
))
5246 /* In 64bit mode we can address such objects directly. */
5251 /* This symbol may be referenced via a displacement from the PIC
5252 base address (@GOTOFF). */
5254 if (reload_in_progress
)
5255 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5256 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5257 new = gen_rtx_CONST (Pmode
, new);
5258 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5262 emit_move_insn (reg
, new);
5267 else if (GET_CODE (addr
) == SYMBOL_REF
)
5271 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5272 new = gen_rtx_CONST (Pmode
, new);
5273 new = gen_rtx_MEM (Pmode
, new);
5274 RTX_UNCHANGING_P (new) = 1;
5275 set_mem_alias_set (new, ix86_GOT_alias_set ());
5278 reg
= gen_reg_rtx (Pmode
);
5279 /* Use directly gen_movsi, otherwise the address is loaded
5280 into register for CSE. We don't want to CSE this addresses,
5281 instead we CSE addresses from the GOT table, so skip this. */
5282 emit_insn (gen_movsi (reg
, new));
5287 /* This symbol must be referenced via a load from the
5288 Global Offset Table (@GOT). */
5290 if (reload_in_progress
)
5291 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5292 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5293 new = gen_rtx_CONST (Pmode
, new);
5294 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5295 new = gen_rtx_MEM (Pmode
, new);
5296 RTX_UNCHANGING_P (new) = 1;
5297 set_mem_alias_set (new, ix86_GOT_alias_set ());
5300 reg
= gen_reg_rtx (Pmode
);
5301 emit_move_insn (reg
, new);
5307 if (GET_CODE (addr
) == CONST
)
5309 addr
= XEXP (addr
, 0);
5311 /* We must match stuff we generate before. Assume the only
5312 unspecs that can get here are ours. Not that we could do
5313 anything with them anyway... */
5314 if (GET_CODE (addr
) == UNSPEC
5315 || (GET_CODE (addr
) == PLUS
5316 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5318 if (GET_CODE (addr
) != PLUS
)
5321 if (GET_CODE (addr
) == PLUS
)
5323 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5325 /* Check first to see if this is a constant offset from a @GOTOFF
5326 symbol reference. */
5327 if (local_symbolic_operand (op0
, Pmode
)
5328 && GET_CODE (op1
) == CONST_INT
)
5332 if (reload_in_progress
)
5333 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5334 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5336 new = gen_rtx_PLUS (Pmode
, new, op1
);
5337 new = gen_rtx_CONST (Pmode
, new);
5338 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5342 emit_move_insn (reg
, new);
5348 /* ??? We need to limit offsets here. */
5353 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5354 new = legitimize_pic_address (XEXP (addr
, 1),
5355 base
== reg
? NULL_RTX
: reg
);
5357 if (GET_CODE (new) == CONST_INT
)
5358 new = plus_constant (base
, INTVAL (new));
5361 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5363 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5364 new = XEXP (new, 1);
5366 new = gen_rtx_PLUS (Pmode
, base
, new);
5375 ix86_encode_section_info (decl
, first
)
5377 int first ATTRIBUTE_UNUSED
;
5379 bool local_p
= (*targetm
.binds_local_p
) (decl
);
5382 rtl
= DECL_P (decl
) ? DECL_RTL (decl
) : TREE_CST_RTL (decl
);
5383 if (GET_CODE (rtl
) != MEM
)
5385 symbol
= XEXP (rtl
, 0);
5386 if (GET_CODE (symbol
) != SYMBOL_REF
)
5389 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5390 symbol so that we may access it directly in the GOT. */
5393 SYMBOL_REF_FLAG (symbol
) = local_p
;
5395 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5396 "local dynamic", "initial exec" or "local exec" TLS models
5399 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL (decl
))
5401 const char *symbol_str
;
5404 enum tls_model kind
;
5409 kind
= TLS_MODEL_LOCAL_EXEC
;
5411 kind
= TLS_MODEL_INITIAL_EXEC
;
5413 /* Local dynamic is inefficient when we're not combining the
5414 parts of the address. */
5415 else if (optimize
&& local_p
)
5416 kind
= TLS_MODEL_LOCAL_DYNAMIC
;
5418 kind
= TLS_MODEL_GLOBAL_DYNAMIC
;
5419 if (kind
< flag_tls_default
)
5420 kind
= flag_tls_default
;
5422 symbol_str
= XSTR (symbol
, 0);
5424 if (symbol_str
[0] == '%')
5426 if (symbol_str
[1] == tls_model_chars
[kind
])
5430 len
= strlen (symbol_str
) + 1;
5431 newstr
= alloca (len
+ 2);
5434 newstr
[1] = tls_model_chars
[kind
];
5435 memcpy (newstr
+ 2, symbol_str
, len
);
5437 XSTR (symbol
, 0) = ggc_alloc_string (newstr
, len
+ 2 - 1);
5441 /* Undo the above when printing symbol names. */
5444 ix86_strip_name_encoding (str
)
5454 /* Load the thread pointer into a register. */
5457 get_thread_pointer ()
5461 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5462 tp
= gen_rtx_CONST (Pmode
, tp
);
5463 tp
= force_reg (Pmode
, tp
);
5468 /* Try machine-dependent ways of modifying an illegitimate address
5469 to be legitimate. If we find one, return the new, valid address.
5470 This macro is used in only one place: `memory_address' in explow.c.
5472 OLDX is the address as it was before break_out_memory_refs was called.
5473 In some cases it is useful to look at this to decide what needs to be done.
5475 MODE and WIN are passed so that this macro can use
5476 GO_IF_LEGITIMATE_ADDRESS.
5478 It is always safe for this macro to do nothing. It exists to recognize
5479 opportunities to optimize the output.
5481 For the 80386, we handle X+REG by loading X into a register R and
5482 using R+REG. R will go in a general reg and indexing will be used.
5483 However, if REG is a broken-out memory address or multiplication,
5484 nothing needs to be done because REG can certainly go in a general reg.
5486 When -fpic is used, special handling is needed for symbolic references.
5487 See comments by legitimize_pic_address in i386.c for details. */
5490 legitimize_address (x
, oldx
, mode
)
5492 register rtx oldx ATTRIBUTE_UNUSED
;
5493 enum machine_mode mode
;
5498 if (TARGET_DEBUG_ADDR
)
5500 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5501 GET_MODE_NAME (mode
));
5505 log
= tls_symbolic_operand (x
, mode
);
5508 rtx dest
, base
, off
, pic
;
5512 case TLS_MODEL_GLOBAL_DYNAMIC
:
5513 dest
= gen_reg_rtx (Pmode
);
5514 emit_insn (gen_tls_global_dynamic (dest
, x
));
5517 case TLS_MODEL_LOCAL_DYNAMIC
:
5518 base
= gen_reg_rtx (Pmode
);
5519 emit_insn (gen_tls_local_dynamic_base (base
));
5521 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5522 off
= gen_rtx_CONST (Pmode
, off
);
5524 return gen_rtx_PLUS (Pmode
, base
, off
);
5526 case TLS_MODEL_INITIAL_EXEC
:
5529 if (reload_in_progress
)
5530 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5531 pic
= pic_offset_table_rtx
;
5535 pic
= gen_reg_rtx (Pmode
);
5536 emit_insn (gen_set_got (pic
));
5539 base
= get_thread_pointer ();
5541 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_GOTTPOFF
);
5542 off
= gen_rtx_CONST (Pmode
, off
);
5543 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5544 off
= gen_rtx_MEM (Pmode
, off
);
5545 RTX_UNCHANGING_P (off
) = 1;
5546 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5548 /* Damn Sun for specifing a set of dynamic relocations without
5549 considering the two-operand nature of the architecture!
5550 We'd be much better off with a "GOTNTPOFF" relocation that
5551 already contained the negated constant. */
5552 /* ??? Using negl and reg+reg addressing appears to be a lose
5553 size-wise. The negl is two bytes, just like the extra movl
5554 incurred by the two-operand subl, but reg+reg addressing
5555 uses the two-byte modrm form, unlike plain reg. */
5557 dest
= gen_reg_rtx (Pmode
);
5558 emit_insn (gen_subsi3 (dest
, base
, off
));
5561 case TLS_MODEL_LOCAL_EXEC
:
5562 base
= get_thread_pointer ();
5564 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5565 TARGET_GNU_TLS
? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5566 off
= gen_rtx_CONST (Pmode
, off
);
5569 return gen_rtx_PLUS (Pmode
, base
, off
);
5572 dest
= gen_reg_rtx (Pmode
);
5573 emit_insn (gen_subsi3 (dest
, base
, off
));
5584 if (flag_pic
&& SYMBOLIC_CONST (x
))
5585 return legitimize_pic_address (x
, 0);
5587 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5588 if (GET_CODE (x
) == ASHIFT
5589 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5590 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5593 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5594 GEN_INT (1 << log
));
5597 if (GET_CODE (x
) == PLUS
)
5599 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5601 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5602 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5603 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5606 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5607 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5608 GEN_INT (1 << log
));
5611 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5612 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5613 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5616 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5617 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5618 GEN_INT (1 << log
));
5621 /* Put multiply first if it isn't already. */
5622 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5624 rtx tmp
= XEXP (x
, 0);
5625 XEXP (x
, 0) = XEXP (x
, 1);
5630 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5631 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5632 created by virtual register instantiation, register elimination, and
5633 similar optimizations. */
5634 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5637 x
= gen_rtx_PLUS (Pmode
,
5638 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5639 XEXP (XEXP (x
, 1), 0)),
5640 XEXP (XEXP (x
, 1), 1));
5644 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5645 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5646 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5647 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5648 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5649 && CONSTANT_P (XEXP (x
, 1)))
5652 rtx other
= NULL_RTX
;
5654 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5656 constant
= XEXP (x
, 1);
5657 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5659 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5661 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5662 other
= XEXP (x
, 1);
5670 x
= gen_rtx_PLUS (Pmode
,
5671 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5672 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5673 plus_constant (other
, INTVAL (constant
)));
5677 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5680 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5683 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5686 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5689 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5693 && GET_CODE (XEXP (x
, 1)) == REG
5694 && GET_CODE (XEXP (x
, 0)) == REG
)
5697 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5700 x
= legitimize_pic_address (x
, 0);
5703 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5706 if (GET_CODE (XEXP (x
, 0)) == REG
)
5708 register rtx temp
= gen_reg_rtx (Pmode
);
5709 register rtx val
= force_operand (XEXP (x
, 1), temp
);
5711 emit_move_insn (temp
, val
);
5717 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5719 register rtx temp
= gen_reg_rtx (Pmode
);
5720 register rtx val
= force_operand (XEXP (x
, 0), temp
);
5722 emit_move_insn (temp
, val
);
5732 /* Print an integer constant expression in assembler syntax. Addition
5733 and subtraction are the only arithmetic that may appear in these
5734 expressions. FILE is the stdio stream to write to, X is the rtx, and
5735 CODE is the operand print code from the output string. */
5738 output_pic_addr_const (file
, x
, code
)
5745 switch (GET_CODE (x
))
5755 assemble_name (file
, XSTR (x
, 0));
5756 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
5757 fputs ("@PLT", file
);
5764 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
5765 assemble_name (asm_out_file
, buf
);
5769 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5773 /* This used to output parentheses around the expression,
5774 but that does not work on the 386 (either ATT or BSD assembler). */
5775 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5779 if (GET_MODE (x
) == VOIDmode
)
5781 /* We can use %d if the number is <32 bits and positive. */
5782 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
5783 fprintf (file
, "0x%lx%08lx",
5784 (unsigned long) CONST_DOUBLE_HIGH (x
),
5785 (unsigned long) CONST_DOUBLE_LOW (x
));
5787 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
5790 /* We can't handle floating point constants;
5791 PRINT_OPERAND must handle them. */
5792 output_operand_lossage ("floating constant misused");
5796 /* Some assemblers need integer constants to appear first. */
5797 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5799 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5801 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5803 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5805 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5807 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5814 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
5815 output_pic_addr_const (file
, XEXP (x
, 0), code
);
5817 output_pic_addr_const (file
, XEXP (x
, 1), code
);
5818 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
5822 if (XVECLEN (x
, 0) != 1)
5824 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
5825 switch (XINT (x
, 1))
5828 fputs ("@GOT", file
);
5831 fputs ("@GOTOFF", file
);
5833 case UNSPEC_GOTPCREL
:
5834 fputs ("@GOTPCREL(%rip)", file
);
5836 case UNSPEC_GOTTPOFF
:
5837 fputs ("@GOTTPOFF", file
);
5840 fputs ("@TPOFF", file
);
5843 fputs ("@NTPOFF", file
);
5846 fputs ("@DTPOFF", file
);
5849 output_operand_lossage ("invalid UNSPEC as operand");
5855 output_operand_lossage ("invalid expression as operand");
5859 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5860 We need to handle our special PIC relocations. */
5863 i386_dwarf_output_addr_const (file
, x
)
5868 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
5872 fprintf (file
, "%s", ASM_LONG
);
5875 output_pic_addr_const (file
, x
, '\0');
5877 output_addr_const (file
, x
);
5881 /* In the name of slightly smaller debug output, and to cater to
5882 general assembler losage, recognize PIC+GOTOFF and turn it back
5883 into a direct symbol reference. */
5886 i386_simplify_dwarf_addr (orig_x
)
5891 if (GET_CODE (x
) == MEM
)
5896 if (GET_CODE (x
) != CONST
5897 || GET_CODE (XEXP (x
, 0)) != UNSPEC
5898 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
5899 || GET_CODE (orig_x
) != MEM
)
5901 return XVECEXP (XEXP (x
, 0), 0, 0);
5904 if (GET_CODE (x
) != PLUS
5905 || GET_CODE (XEXP (x
, 1)) != CONST
)
5908 if (GET_CODE (XEXP (x
, 0)) == REG
5909 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5910 /* %ebx + GOT/GOTOFF */
5912 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
5914 /* %ebx + %reg * scale + GOT/GOTOFF */
5916 if (GET_CODE (XEXP (y
, 0)) == REG
5917 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
5919 else if (GET_CODE (XEXP (y
, 1)) == REG
5920 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
5924 if (GET_CODE (y
) != REG
5925 && GET_CODE (y
) != MULT
5926 && GET_CODE (y
) != ASHIFT
)
5932 x
= XEXP (XEXP (x
, 1), 0);
5933 if (GET_CODE (x
) == UNSPEC
5934 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
5935 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
5938 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
5939 return XVECEXP (x
, 0, 0);
5942 if (GET_CODE (x
) == PLUS
5943 && GET_CODE (XEXP (x
, 0)) == UNSPEC
5944 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5945 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
5946 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
5947 && GET_CODE (orig_x
) != MEM
)))
5949 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
5951 return gen_rtx_PLUS (Pmode
, y
, x
);
5959 put_condition_code (code
, mode
, reverse
, fp
, file
)
5961 enum machine_mode mode
;
5967 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
5969 enum rtx_code second_code
, bypass_code
;
5970 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
5971 if (bypass_code
!= NIL
|| second_code
!= NIL
)
5973 code
= ix86_fp_compare_code_to_integer (code
);
5977 code
= reverse_condition (code
);
5988 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
5993 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
5994 Those same assemblers have the same but opposite losage on cmov. */
5997 suffix
= fp
? "nbe" : "a";
6000 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6002 else if (mode
== CCmode
|| mode
== CCGCmode
)
6013 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6015 else if (mode
== CCmode
|| mode
== CCGCmode
)
6024 suffix
= fp
? "nb" : "ae";
6027 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6037 suffix
= fp
? "u" : "p";
6040 suffix
= fp
? "nu" : "np";
6045 fputs (suffix
, file
);
6049 print_reg (x
, code
, file
)
6054 if (REGNO (x
) == ARG_POINTER_REGNUM
6055 || REGNO (x
) == FRAME_POINTER_REGNUM
6056 || REGNO (x
) == FLAGS_REG
6057 || REGNO (x
) == FPSR_REG
)
6060 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6063 if (code
== 'w' || MMX_REG_P (x
))
6065 else if (code
== 'b')
6067 else if (code
== 'k')
6069 else if (code
== 'q')
6071 else if (code
== 'y')
6073 else if (code
== 'h')
6076 code
= GET_MODE_SIZE (GET_MODE (x
));
6078 /* Irritatingly, AMD extended registers use different naming convention
6079 from the normal registers. */
6080 if (REX_INT_REG_P (x
))
6087 error ("extended registers have no high halves");
6090 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6093 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6096 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6099 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6102 error ("unsupported operand size for extended register");
6110 if (STACK_TOP_P (x
))
6112 fputs ("st(0)", file
);
6119 if (! ANY_FP_REG_P (x
))
6120 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6124 fputs (hi_reg_name
[REGNO (x
)], file
);
6127 fputs (qi_reg_name
[REGNO (x
)], file
);
6130 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6137 /* Locate some local-dynamic symbol still in use by this function
6138 so that we can print its name in some tls_local_dynamic_base
6142 get_some_local_dynamic_name ()
6146 if (cfun
->machine
->some_ld_name
)
6147 return cfun
->machine
->some_ld_name
;
6149 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6151 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6152 return cfun
->machine
->some_ld_name
;
6158 get_some_local_dynamic_name_1 (px
, data
)
6160 void *data ATTRIBUTE_UNUSED
;
6164 if (GET_CODE (x
) == SYMBOL_REF
6165 && local_dynamic_symbolic_operand (x
, Pmode
))
6167 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6175 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6176 C -- print opcode suffix for set/cmov insn.
6177 c -- like C, but print reversed condition
6178 F,f -- likewise, but for floating-point.
6179 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6181 R -- print the prefix for register names.
6182 z -- print the opcode suffix for the size of the current operand.
6183 * -- print a star (in certain assembler syntax)
6184 A -- print an absolute memory reference.
6185 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6186 s -- print a shift double count, followed by the assemblers argument
6188 b -- print the QImode name of the register for the indicated operand.
6189 %b0 would print %al if operands[0] is reg 0.
6190 w -- likewise, print the HImode name of the register.
6191 k -- likewise, print the SImode name of the register.
6192 q -- likewise, print the DImode name of the register.
6193 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6194 y -- print "st(0)" instead of "st" as a register.
6195 D -- print condition for SSE cmp instruction.
6196 P -- if PIC, print an @PLT suffix.
6197 X -- don't print any sort of PIC '@' suffix for a symbol.
6198 & -- print some in-use local-dynamic symbol name.
6202 print_operand (file
, x
, code
)
6212 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6217 assemble_name (file
, get_some_local_dynamic_name ());
6221 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6223 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6225 /* Intel syntax. For absolute addresses, registers should not
6226 be surrounded by braces. */
6227 if (GET_CODE (x
) != REG
)
6230 PRINT_OPERAND (file
, x
, 0);
6238 PRINT_OPERAND (file
, x
, 0);
6243 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6248 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6253 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6258 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6263 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6268 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6273 /* 387 opcodes don't get size suffixes if the operands are
6275 if (STACK_REG_P (x
))
6278 /* Likewise if using Intel opcodes. */
6279 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6282 /* This is the size of op from size of operand. */
6283 switch (GET_MODE_SIZE (GET_MODE (x
)))
6286 #ifdef HAVE_GAS_FILDS_FISTS
6292 if (GET_MODE (x
) == SFmode
)
6307 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6309 #ifdef GAS_MNEMONICS
6335 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6337 PRINT_OPERAND (file
, x
, 0);
6343 /* Little bit of braindamage here. The SSE compare instructions
6344 does use completely different names for the comparisons that the
6345 fp conditional moves. */
6346 switch (GET_CODE (x
))
6361 fputs ("unord", file
);
6365 fputs ("neq", file
);
6369 fputs ("nlt", file
);
6373 fputs ("nle", file
);
6376 fputs ("ord", file
);
6384 #ifdef CMOV_SUN_AS_SYNTAX
6385 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6387 switch (GET_MODE (x
))
6389 case HImode
: putc ('w', file
); break;
6391 case SFmode
: putc ('l', file
); break;
6393 case DFmode
: putc ('q', file
); break;
6401 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6404 #ifdef CMOV_SUN_AS_SYNTAX
6405 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6408 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6411 /* Like above, but reverse condition */
6413 /* Check to see if argument to %c is really a constant
6414 and not a condition code which needs to be reversed. */
6415 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
6417 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6420 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6423 #ifdef CMOV_SUN_AS_SYNTAX
6424 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6427 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6433 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6436 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6439 int pred_val
= INTVAL (XEXP (x
, 0));
6441 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6442 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6444 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6445 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6447 /* Emit hints only in the case default branch prediction
6448 heruistics would fail. */
6449 if (taken
!= cputaken
)
6451 /* We use 3e (DS) prefix for taken branches and
6452 2e (CS) prefix for not taken branches. */
6454 fputs ("ds ; ", file
);
6456 fputs ("cs ; ", file
);
6463 output_operand_lossage ("invalid operand code `%c'", code
);
6467 if (GET_CODE (x
) == REG
)
6469 PRINT_REG (x
, code
, file
);
6472 else if (GET_CODE (x
) == MEM
)
6474 /* No `byte ptr' prefix for call instructions. */
6475 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6478 switch (GET_MODE_SIZE (GET_MODE (x
)))
6480 case 1: size
= "BYTE"; break;
6481 case 2: size
= "WORD"; break;
6482 case 4: size
= "DWORD"; break;
6483 case 8: size
= "QWORD"; break;
6484 case 12: size
= "XWORD"; break;
6485 case 16: size
= "XMMWORD"; break;
6490 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6493 else if (code
== 'w')
6495 else if (code
== 'k')
6499 fputs (" PTR ", file
);
6503 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
6504 output_pic_addr_const (file
, x
, code
);
6505 /* Avoid (%rip) for call operands. */
6506 else if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6507 && GET_CODE (x
) != CONST_INT
)
6508 output_addr_const (file
, x
);
6509 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6510 output_operand_lossage ("invalid constraints for operand");
6515 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6520 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6521 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6523 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6525 fprintf (file
, "0x%lx", l
);
6528 /* These float cases don't actually occur as immediate operands. */
6529 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6534 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6535 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
6536 fprintf (file
, "%s", dstr
);
6539 else if (GET_CODE (x
) == CONST_DOUBLE
6540 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
6545 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6546 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
6547 fprintf (file
, "%s", dstr
);
6550 else if (GET_CODE (x
) == CONST
6551 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6552 && XINT (XEXP (x
, 0), 1) == UNSPEC_TP
)
6554 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6555 fputs ("DWORD PTR ", file
);
6556 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6558 fputs ("gs:0", file
);
6565 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6567 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6570 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6571 || GET_CODE (x
) == LABEL_REF
)
6573 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6576 fputs ("OFFSET FLAT:", file
);
6579 if (GET_CODE (x
) == CONST_INT
)
6580 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6582 output_pic_addr_const (file
, x
, code
);
6584 output_addr_const (file
, x
);
6588 /* Print a memory operand whose address is ADDR. */
6591 print_operand_address (file
, addr
)
6595 struct ix86_address parts
;
6596 rtx base
, index
, disp
;
6599 if (! ix86_decompose_address (addr
, &parts
))
6603 index
= parts
.index
;
6605 scale
= parts
.scale
;
6607 if (!base
&& !index
)
6609 /* Displacement only requires special attention. */
6611 if (GET_CODE (disp
) == CONST_INT
)
6613 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6615 if (USER_LABEL_PREFIX
[0] == 0)
6617 fputs ("ds:", file
);
6619 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
6622 output_pic_addr_const (file
, addr
, 0);
6624 output_addr_const (file
, addr
);
6626 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6628 && (GET_CODE (addr
) == SYMBOL_REF
6629 || GET_CODE (addr
) == LABEL_REF
6630 || (GET_CODE (addr
) == CONST
6631 && GET_CODE (XEXP (addr
, 0)) == PLUS
6632 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
6633 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)))
6634 fputs ("(%rip)", file
);
6638 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6643 output_pic_addr_const (file
, disp
, 0);
6644 else if (GET_CODE (disp
) == LABEL_REF
)
6645 output_asm_label (disp
);
6647 output_addr_const (file
, disp
);
6652 PRINT_REG (base
, 0, file
);
6656 PRINT_REG (index
, 0, file
);
6658 fprintf (file
, ",%d", scale
);
6664 rtx offset
= NULL_RTX
;
6668 /* Pull out the offset of a symbol; print any symbol itself. */
6669 if (GET_CODE (disp
) == CONST
6670 && GET_CODE (XEXP (disp
, 0)) == PLUS
6671 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
6673 offset
= XEXP (XEXP (disp
, 0), 1);
6674 disp
= gen_rtx_CONST (VOIDmode
,
6675 XEXP (XEXP (disp
, 0), 0));
6679 output_pic_addr_const (file
, disp
, 0);
6680 else if (GET_CODE (disp
) == LABEL_REF
)
6681 output_asm_label (disp
);
6682 else if (GET_CODE (disp
) == CONST_INT
)
6685 output_addr_const (file
, disp
);
6691 PRINT_REG (base
, 0, file
);
6694 if (INTVAL (offset
) >= 0)
6696 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6700 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
6707 PRINT_REG (index
, 0, file
);
6709 fprintf (file
, "*%d", scale
);
6717 output_addr_const_extra (file
, x
)
6723 if (GET_CODE (x
) != UNSPEC
)
6726 op
= XVECEXP (x
, 0, 0);
6727 switch (XINT (x
, 1))
6729 case UNSPEC_GOTTPOFF
:
6730 output_addr_const (file
, op
);
6731 fputs ("@GOTTPOFF", file
);
6734 output_addr_const (file
, op
);
6735 fputs ("@TPOFF", file
);
6738 output_addr_const (file
, op
);
6739 fputs ("@NTPOFF", file
);
6742 output_addr_const (file
, op
);
6743 fputs ("@DTPOFF", file
);
6753 /* Split one or more DImode RTL references into pairs of SImode
6754 references. The RTL can be REG, offsettable MEM, integer constant, or
6755 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6756 split and "num" is its length. lo_half and hi_half are output arrays
6757 that parallel "operands". */
6760 split_di (operands
, num
, lo_half
, hi_half
)
6763 rtx lo_half
[], hi_half
[];
6767 rtx op
= operands
[num
];
6769 /* simplify_subreg refuse to split volatile memory addresses,
6770 but we still have to handle it. */
6771 if (GET_CODE (op
) == MEM
)
6773 lo_half
[num
] = adjust_address (op
, SImode
, 0);
6774 hi_half
[num
] = adjust_address (op
, SImode
, 4);
6778 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
6779 GET_MODE (op
) == VOIDmode
6780 ? DImode
: GET_MODE (op
), 0);
6781 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
6782 GET_MODE (op
) == VOIDmode
6783 ? DImode
: GET_MODE (op
), 4);
6787 /* Split one or more TImode RTL references into pairs of SImode
6788 references. The RTL can be REG, offsettable MEM, integer constant, or
6789 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6790 split and "num" is its length. lo_half and hi_half are output arrays
6791 that parallel "operands". */
6794 split_ti (operands
, num
, lo_half
, hi_half
)
6797 rtx lo_half
[], hi_half
[];
6801 rtx op
= operands
[num
];
6803 /* simplify_subreg refuse to split volatile memory addresses, but we
6804 still have to handle it. */
6805 if (GET_CODE (op
) == MEM
)
6807 lo_half
[num
] = adjust_address (op
, DImode
, 0);
6808 hi_half
[num
] = adjust_address (op
, DImode
, 8);
6812 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
6813 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
6818 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6819 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6820 is the expression of the binary operation. The output may either be
6821 emitted here, or returned to the caller, like all output_* functions.
6823 There is no guarantee that the operands are the same mode, as they
6824 might be within FLOAT or FLOAT_EXTEND expressions. */
6826 #ifndef SYSV386_COMPAT
6827 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6828 wants to fix the assemblers because that causes incompatibility
6829 with gcc. No-one wants to fix gcc because that causes
6830 incompatibility with assemblers... You can use the option of
6831 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6832 #define SYSV386_COMPAT 1
6836 output_387_binary_op (insn
, operands
)
6840 static char buf
[30];
6843 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
6845 #ifdef ENABLE_CHECKING
6846 /* Even if we do not want to check the inputs, this documents input
6847 constraints. Which helps in understanding the following code. */
6848 if (STACK_REG_P (operands
[0])
6849 && ((REG_P (operands
[1])
6850 && REGNO (operands
[0]) == REGNO (operands
[1])
6851 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
6852 || (REG_P (operands
[2])
6853 && REGNO (operands
[0]) == REGNO (operands
[2])
6854 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
6855 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
6861 switch (GET_CODE (operands
[3]))
6864 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6865 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6873 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6874 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6882 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6883 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6891 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
6892 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
6906 if (GET_MODE (operands
[0]) == SFmode
)
6907 strcat (buf
, "ss\t{%2, %0|%0, %2}");
6909 strcat (buf
, "sd\t{%2, %0|%0, %2}");
6914 switch (GET_CODE (operands
[3]))
6918 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
6920 rtx temp
= operands
[2];
6921 operands
[2] = operands
[1];
6925 /* know operands[0] == operands[1]. */
6927 if (GET_CODE (operands
[2]) == MEM
)
6933 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6935 if (STACK_TOP_P (operands
[0]))
6936 /* How is it that we are storing to a dead operand[2]?
6937 Well, presumably operands[1] is dead too. We can't
6938 store the result to st(0) as st(0) gets popped on this
6939 instruction. Instead store to operands[2] (which I
6940 think has to be st(1)). st(1) will be popped later.
6941 gcc <= 2.8.1 didn't have this check and generated
6942 assembly code that the Unixware assembler rejected. */
6943 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6945 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6949 if (STACK_TOP_P (operands
[0]))
6950 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6952 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6957 if (GET_CODE (operands
[1]) == MEM
)
6963 if (GET_CODE (operands
[2]) == MEM
)
6969 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
6972 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6973 derived assemblers, confusingly reverse the direction of
6974 the operation for fsub{r} and fdiv{r} when the
6975 destination register is not st(0). The Intel assembler
6976 doesn't have this brain damage. Read !SYSV386_COMPAT to
6977 figure out what the hardware really does. */
6978 if (STACK_TOP_P (operands
[0]))
6979 p
= "{p\t%0, %2|rp\t%2, %0}";
6981 p
= "{rp\t%2, %0|p\t%0, %2}";
6983 if (STACK_TOP_P (operands
[0]))
6984 /* As above for fmul/fadd, we can't store to st(0). */
6985 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6987 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6992 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
6995 if (STACK_TOP_P (operands
[0]))
6996 p
= "{rp\t%0, %1|p\t%1, %0}";
6998 p
= "{p\t%1, %0|rp\t%0, %1}";
7000 if (STACK_TOP_P (operands
[0]))
7001 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7003 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7008 if (STACK_TOP_P (operands
[0]))
7010 if (STACK_TOP_P (operands
[1]))
7011 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7013 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7016 else if (STACK_TOP_P (operands
[1]))
7019 p
= "{\t%1, %0|r\t%0, %1}";
7021 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7027 p
= "{r\t%2, %0|\t%0, %2}";
7029 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7042 /* Output code to initialize control word copies used by
7043 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7044 is set to control word rounding downwards. */
7046 emit_i387_cw_initialization (normal
, round_down
)
7047 rtx normal
, round_down
;
7049 rtx reg
= gen_reg_rtx (HImode
);
7051 emit_insn (gen_x86_fnstcw_1 (normal
));
7052 emit_move_insn (reg
, normal
);
7053 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7055 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7057 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
7058 emit_move_insn (round_down
, reg
);
7061 /* Output code for INSN to convert a float to a signed int. OPERANDS
7062 are the insn operands. The output may be [HSD]Imode and the input
7063 operand may be [SDX]Fmode. */
7066 output_fix_trunc (insn
, operands
)
7070 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7071 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7073 /* Jump through a hoop or two for DImode, since the hardware has no
7074 non-popping instruction. We used to do this a different way, but
7075 that was somewhat fragile and broke with post-reload splitters. */
7076 if (dimode_p
&& !stack_top_dies
)
7077 output_asm_insn ("fld\t%y1", operands
);
7079 if (!STACK_TOP_P (operands
[1]))
7082 if (GET_CODE (operands
[0]) != MEM
)
7085 output_asm_insn ("fldcw\t%3", operands
);
7086 if (stack_top_dies
|| dimode_p
)
7087 output_asm_insn ("fistp%z0\t%0", operands
);
7089 output_asm_insn ("fist%z0\t%0", operands
);
7090 output_asm_insn ("fldcw\t%2", operands
);
7095 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7096 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7097 when fucom should be used. */
7100 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
7103 int eflags_p
, unordered_p
;
7106 rtx cmp_op0
= operands
[0];
7107 rtx cmp_op1
= operands
[1];
7108 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7113 cmp_op1
= operands
[2];
7117 if (GET_MODE (operands
[0]) == SFmode
)
7119 return "ucomiss\t{%1, %0|%0, %1}";
7121 return "comiss\t{%1, %0|%0, %y}";
7124 return "ucomisd\t{%1, %0|%0, %1}";
7126 return "comisd\t{%1, %0|%0, %y}";
7129 if (! STACK_TOP_P (cmp_op0
))
7132 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7134 if (STACK_REG_P (cmp_op1
)
7136 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7137 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7139 /* If both the top of the 387 stack dies, and the other operand
7140 is also a stack register that dies, then this must be a
7141 `fcompp' float compare */
7145 /* There is no double popping fcomi variant. Fortunately,
7146 eflags is immune from the fstp's cc clobbering. */
7148 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7150 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7158 return "fucompp\n\tfnstsw\t%0";
7160 return "fcompp\n\tfnstsw\t%0";
7173 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7175 static const char * const alt
[24] =
7187 "fcomi\t{%y1, %0|%0, %y1}",
7188 "fcomip\t{%y1, %0|%0, %y1}",
7189 "fucomi\t{%y1, %0|%0, %y1}",
7190 "fucomip\t{%y1, %0|%0, %y1}",
7197 "fcom%z2\t%y2\n\tfnstsw\t%0",
7198 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7199 "fucom%z2\t%y2\n\tfnstsw\t%0",
7200 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7202 "ficom%z2\t%y2\n\tfnstsw\t%0",
7203 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7211 mask
= eflags_p
<< 3;
7212 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7213 mask
|= unordered_p
<< 1;
7214 mask
|= stack_top_dies
;
7227 ix86_output_addr_vec_elt (file
, value
)
7231 const char *directive
= ASM_LONG
;
7236 directive
= ASM_QUAD
;
7242 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7246 ix86_output_addr_diff_elt (file
, value
, rel
)
7251 fprintf (file
, "%s%s%d-%s%d\n",
7252 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7253 else if (HAVE_AS_GOTOFF_IN_DATA
)
7254 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7256 asm_fprintf (file
, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7257 ASM_LONG
, LPREFIX
, value
);
7260 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7264 ix86_expand_clear (dest
)
7269 /* We play register width games, which are only valid after reload. */
7270 if (!reload_completed
)
7273 /* Avoid HImode and its attendant prefix byte. */
7274 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7275 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7277 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7279 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7280 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7282 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7283 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7289 /* X is an unchanging MEM. If it is a constant pool reference, return
7290 the constant pool rtx, else NULL. */
7293 maybe_get_pool_constant (x
)
7300 if (GET_CODE (x
) != PLUS
)
7302 if (XEXP (x
, 0) != pic_offset_table_rtx
)
7305 if (GET_CODE (x
) != CONST
)
7308 if (GET_CODE (x
) != UNSPEC
)
7310 if (XINT (x
, 1) != UNSPEC_GOTOFF
)
7312 x
= XVECEXP (x
, 0, 0);
7315 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7316 return get_pool_constant (x
);
7322 ix86_expand_move (mode
, operands
)
7323 enum machine_mode mode
;
7326 int strict
= (reload_in_progress
|| reload_completed
);
7327 rtx insn
, op0
, op1
, tmp
;
7332 /* ??? We have a slight problem. We need to say that tls symbols are
7333 not legitimate constants so that reload does not helpfully reload
7334 these constants from a REG_EQUIV, which we cannot handle. (Recall
7335 that general- and local-dynamic address resolution requires a
7338 However, if we say that tls symbols are not legitimate constants,
7339 then emit_move_insn helpfully drop them into the constant pool.
7341 It is far easier to work around emit_move_insn than reload. Recognize
7342 the MEM that we would have created and extract the symbol_ref. */
7345 && GET_CODE (op1
) == MEM
7346 && RTX_UNCHANGING_P (op1
))
7348 tmp
= maybe_get_pool_constant (op1
);
7349 /* Note that we only care about symbolic constants here, which
7350 unlike CONST_INT will always have a proper mode. */
7351 if (tmp
&& GET_MODE (tmp
) == Pmode
)
7355 if (tls_symbolic_operand (op1
, Pmode
))
7357 op1
= legitimize_address (op1
, op1
, VOIDmode
);
7358 if (GET_CODE (op0
) == MEM
)
7360 tmp
= gen_reg_rtx (mode
);
7361 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, op1
));
7365 else if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7367 if (GET_CODE (op0
) == MEM
)
7368 op1
= force_reg (Pmode
, op1
);
7372 if (GET_CODE (temp
) != REG
)
7373 temp
= gen_reg_rtx (Pmode
);
7374 temp
= legitimize_pic_address (op1
, temp
);
7382 if (GET_CODE (op0
) == MEM
7383 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7384 || !push_operand (op0
, mode
))
7385 && GET_CODE (op1
) == MEM
)
7386 op1
= force_reg (mode
, op1
);
7388 if (push_operand (op0
, mode
)
7389 && ! general_no_elim_operand (op1
, mode
))
7390 op1
= copy_to_mode_reg (mode
, op1
);
7392 /* Force large constants in 64bit compilation into register
7393 to get them CSEed. */
7394 if (TARGET_64BIT
&& mode
== DImode
7395 && immediate_operand (op1
, mode
)
7396 && !x86_64_zero_extended_value (op1
)
7397 && !register_operand (op0
, mode
)
7398 && optimize
&& !reload_completed
&& !reload_in_progress
)
7399 op1
= copy_to_mode_reg (mode
, op1
);
7401 if (FLOAT_MODE_P (mode
))
7403 /* If we are loading a floating point constant to a register,
7404 force the value to memory now, since we'll get better code
7405 out the back end. */
7409 else if (GET_CODE (op1
) == CONST_DOUBLE
7410 && register_operand (op0
, mode
))
7411 op1
= validize_mem (force_const_mem (mode
, op1
));
7415 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
7421 ix86_expand_vector_move (mode
, operands
)
7422 enum machine_mode mode
;
7425 /* Force constants other than zero into memory. We do not know how
7426 the instructions used to build constants modify the upper 64 bits
7427 of the register, once we have that information we may be able
7428 to handle some of them more efficiently. */
7429 if ((reload_in_progress
| reload_completed
) == 0
7430 && register_operand (operands
[0], mode
)
7431 && CONSTANT_P (operands
[1]))
7433 rtx addr
= gen_reg_rtx (Pmode
);
7434 emit_move_insn (addr
, XEXP (force_const_mem (mode
, operands
[1]), 0));
7435 operands
[1] = gen_rtx_MEM (mode
, addr
);
7438 /* Make operand1 a register if it isn't already. */
7439 if ((reload_in_progress
| reload_completed
) == 0
7440 && !register_operand (operands
[0], mode
)
7441 && !register_operand (operands
[1], mode
)
7442 && operands
[1] != CONST0_RTX (mode
))
7444 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
7445 emit_move_insn (operands
[0], temp
);
7449 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
7452 /* Attempt to expand a binary operator. Make the expansion closer to the
7453 actual machine, then just general_operand, which will allow 3 separate
7454 memory references (one output, two input) in a single insn. */
7457 ix86_expand_binary_operator (code
, mode
, operands
)
7459 enum machine_mode mode
;
7462 int matching_memory
;
7463 rtx src1
, src2
, dst
, op
, clob
;
7469 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7470 if (GET_RTX_CLASS (code
) == 'c'
7471 && (rtx_equal_p (dst
, src2
)
7472 || immediate_operand (src1
, mode
)))
7479 /* If the destination is memory, and we do not have matching source
7480 operands, do things in registers. */
7481 matching_memory
= 0;
7482 if (GET_CODE (dst
) == MEM
)
7484 if (rtx_equal_p (dst
, src1
))
7485 matching_memory
= 1;
7486 else if (GET_RTX_CLASS (code
) == 'c'
7487 && rtx_equal_p (dst
, src2
))
7488 matching_memory
= 2;
7490 dst
= gen_reg_rtx (mode
);
7493 /* Both source operands cannot be in memory. */
7494 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7496 if (matching_memory
!= 2)
7497 src2
= force_reg (mode
, src2
);
7499 src1
= force_reg (mode
, src1
);
7502 /* If the operation is not commutable, source 1 cannot be a constant
7503 or non-matching memory. */
7504 if ((CONSTANT_P (src1
)
7505 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7506 && GET_RTX_CLASS (code
) != 'c')
7507 src1
= force_reg (mode
, src1
);
7509 /* If optimizing, copy to regs to improve CSE */
7510 if (optimize
&& ! no_new_pseudos
)
7512 if (GET_CODE (dst
) == MEM
)
7513 dst
= gen_reg_rtx (mode
);
7514 if (GET_CODE (src1
) == MEM
)
7515 src1
= force_reg (mode
, src1
);
7516 if (GET_CODE (src2
) == MEM
)
7517 src2
= force_reg (mode
, src2
);
7520 /* Emit the instruction. */
7522 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7523 if (reload_in_progress
)
7525 /* Reload doesn't know about the flags register, and doesn't know that
7526 it doesn't want to clobber it. We can only do this with PLUS. */
7533 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7534 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7537 /* Fix up the destination if needed. */
7538 if (dst
!= operands
[0])
7539 emit_move_insn (operands
[0], dst
);
7542 /* Return TRUE or FALSE depending on whether the binary operator meets the
7543 appropriate constraints. */
7546 ix86_binary_operator_ok (code
, mode
, operands
)
7548 enum machine_mode mode ATTRIBUTE_UNUSED
;
7551 /* Both source operands cannot be in memory. */
7552 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
7554 /* If the operation is not commutable, source 1 cannot be a constant. */
7555 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
7557 /* If the destination is memory, we must have a matching source operand. */
7558 if (GET_CODE (operands
[0]) == MEM
7559 && ! (rtx_equal_p (operands
[0], operands
[1])
7560 || (GET_RTX_CLASS (code
) == 'c'
7561 && rtx_equal_p (operands
[0], operands
[2]))))
7563 /* If the operation is not commutable and the source 1 is memory, we must
7564 have a matching destination. */
7565 if (GET_CODE (operands
[1]) == MEM
7566 && GET_RTX_CLASS (code
) != 'c'
7567 && ! rtx_equal_p (operands
[0], operands
[1]))
7572 /* Attempt to expand a unary operator. Make the expansion closer to the
7573 actual machine, then just general_operand, which will allow 2 separate
7574 memory references (one output, one input) in a single insn. */
7577 ix86_expand_unary_operator (code
, mode
, operands
)
7579 enum machine_mode mode
;
7582 int matching_memory
;
7583 rtx src
, dst
, op
, clob
;
7588 /* If the destination is memory, and we do not have matching source
7589 operands, do things in registers. */
7590 matching_memory
= 0;
7591 if (GET_CODE (dst
) == MEM
)
7593 if (rtx_equal_p (dst
, src
))
7594 matching_memory
= 1;
7596 dst
= gen_reg_rtx (mode
);
7599 /* When source operand is memory, destination must match. */
7600 if (!matching_memory
&& GET_CODE (src
) == MEM
)
7601 src
= force_reg (mode
, src
);
7603 /* If optimizing, copy to regs to improve CSE */
7604 if (optimize
&& ! no_new_pseudos
)
7606 if (GET_CODE (dst
) == MEM
)
7607 dst
= gen_reg_rtx (mode
);
7608 if (GET_CODE (src
) == MEM
)
7609 src
= force_reg (mode
, src
);
7612 /* Emit the instruction. */
7614 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
7615 if (reload_in_progress
|| code
== NOT
)
7617 /* Reload doesn't know about the flags register, and doesn't know that
7618 it doesn't want to clobber it. */
7625 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7626 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7629 /* Fix up the destination if needed. */
7630 if (dst
!= operands
[0])
7631 emit_move_insn (operands
[0], dst
);
7634 /* Return TRUE or FALSE depending on whether the unary operator meets the
7635 appropriate constraints. */
7638 ix86_unary_operator_ok (code
, mode
, operands
)
7639 enum rtx_code code ATTRIBUTE_UNUSED
;
7640 enum machine_mode mode ATTRIBUTE_UNUSED
;
7641 rtx operands
[2] ATTRIBUTE_UNUSED
;
7643 /* If one of operands is memory, source and destination must match. */
7644 if ((GET_CODE (operands
[0]) == MEM
7645 || GET_CODE (operands
[1]) == MEM
)
7646 && ! rtx_equal_p (operands
[0], operands
[1]))
7651 /* Return TRUE or FALSE depending on whether the first SET in INSN
7652 has source and destination with matching CC modes, and that the
7653 CC mode is at least as constrained as REQ_MODE. */
7656 ix86_match_ccmode (insn
, req_mode
)
7658 enum machine_mode req_mode
;
7661 enum machine_mode set_mode
;
7663 set
= PATTERN (insn
);
7664 if (GET_CODE (set
) == PARALLEL
)
7665 set
= XVECEXP (set
, 0, 0);
7666 if (GET_CODE (set
) != SET
)
7668 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
7671 set_mode
= GET_MODE (SET_DEST (set
));
7675 if (req_mode
!= CCNOmode
7676 && (req_mode
!= CCmode
7677 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
7681 if (req_mode
== CCGCmode
)
7685 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
7689 if (req_mode
== CCZmode
)
7699 return (GET_MODE (SET_SRC (set
)) == set_mode
);
7702 /* Generate insn patterns to do an integer compare of OPERANDS. */
7705 ix86_expand_int_compare (code
, op0
, op1
)
7709 enum machine_mode cmpmode
;
7712 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
7713 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
7715 /* This is very simple, but making the interface the same as in the
7716 FP case makes the rest of the code easier. */
7717 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
7718 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
7720 /* Return the test that should be put into the flags user, i.e.
7721 the bcc, scc, or cmov instruction. */
7722 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
7725 /* Figure out whether to use ordered or unordered fp comparisons.
7726 Return the appropriate mode to use. */
7729 ix86_fp_compare_mode (code
)
7730 enum rtx_code code ATTRIBUTE_UNUSED
;
7732 /* ??? In order to make all comparisons reversible, we do all comparisons
7733 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7734 all forms trapping and nontrapping comparisons, we can make inequality
7735 comparisons trapping again, since it results in better code when using
7736 FCOM based compares. */
7737 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
7741 ix86_cc_mode (code
, op0
, op1
)
7745 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7746 return ix86_fp_compare_mode (code
);
7749 /* Only zero flag is needed. */
7751 case NE
: /* ZF!=0 */
7753 /* Codes needing carry flag. */
7754 case GEU
: /* CF=0 */
7755 case GTU
: /* CF=0 & ZF=0 */
7756 case LTU
: /* CF=1 */
7757 case LEU
: /* CF=1 | ZF=1 */
7759 /* Codes possibly doable only with sign flag when
7760 comparing against zero. */
7761 case GE
: /* SF=OF or SF=0 */
7762 case LT
: /* SF<>OF or SF=1 */
7763 if (op1
== const0_rtx
)
7766 /* For other cases Carry flag is not required. */
7768 /* Codes doable only with sign flag when comparing
7769 against zero, but we miss jump instruction for it
7770 so we need to use relational tests agains overflow
7771 that thus needs to be zero. */
7772 case GT
: /* ZF=0 & SF=OF */
7773 case LE
: /* ZF=1 | SF<>OF */
7774 if (op1
== const0_rtx
)
7778 /* strcmp pattern do (use flags) and combine may ask us for proper
7787 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7790 ix86_use_fcomi_compare (code
)
7791 enum rtx_code code ATTRIBUTE_UNUSED
;
7793 enum rtx_code swapped_code
= swap_condition (code
);
7794 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
7795 || (ix86_fp_comparison_cost (swapped_code
)
7796 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
7799 /* Swap, force into registers, or otherwise massage the two operands
7800 to a fp comparison. The operands are updated in place; the new
7801 comparsion code is returned. */
7803 static enum rtx_code
7804 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
7808 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
7809 rtx op0
= *pop0
, op1
= *pop1
;
7810 enum machine_mode op_mode
= GET_MODE (op0
);
7811 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
7813 /* All of the unordered compare instructions only work on registers.
7814 The same is true of the XFmode compare instructions. The same is
7815 true of the fcomi compare instructions. */
7818 && (fpcmp_mode
== CCFPUmode
7819 || op_mode
== XFmode
7820 || op_mode
== TFmode
7821 || ix86_use_fcomi_compare (code
)))
7823 op0
= force_reg (op_mode
, op0
);
7824 op1
= force_reg (op_mode
, op1
);
7828 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7829 things around if they appear profitable, otherwise force op0
7832 if (standard_80387_constant_p (op0
) == 0
7833 || (GET_CODE (op0
) == MEM
7834 && ! (standard_80387_constant_p (op1
) == 0
7835 || GET_CODE (op1
) == MEM
)))
7838 tmp
= op0
, op0
= op1
, op1
= tmp
;
7839 code
= swap_condition (code
);
7842 if (GET_CODE (op0
) != REG
)
7843 op0
= force_reg (op_mode
, op0
);
7845 if (CONSTANT_P (op1
))
7847 if (standard_80387_constant_p (op1
))
7848 op1
= force_reg (op_mode
, op1
);
7850 op1
= validize_mem (force_const_mem (op_mode
, op1
));
7854 /* Try to rearrange the comparison to make it cheaper. */
7855 if (ix86_fp_comparison_cost (code
)
7856 > ix86_fp_comparison_cost (swap_condition (code
))
7857 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
7860 tmp
= op0
, op0
= op1
, op1
= tmp
;
7861 code
= swap_condition (code
);
7862 if (GET_CODE (op0
) != REG
)
7863 op0
= force_reg (op_mode
, op0
);
7871 /* Convert comparison codes we use to represent FP comparison to integer
7872 code that will result in proper branch. Return UNKNOWN if no such code
7874 static enum rtx_code
7875 ix86_fp_compare_code_to_integer (code
)
7905 /* Split comparison code CODE into comparisons we can do using branch
7906 instructions. BYPASS_CODE is comparison code for branch that will
7907 branch around FIRST_CODE and SECOND_CODE. If some of branches
7908 is not required, set value to NIL.
7909 We never require more than two branches. */
7911 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
7912 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
7918 /* The fcomi comparison sets flags as follows:
7928 case GT
: /* GTU - CF=0 & ZF=0 */
7929 case GE
: /* GEU - CF=0 */
7930 case ORDERED
: /* PF=0 */
7931 case UNORDERED
: /* PF=1 */
7932 case UNEQ
: /* EQ - ZF=1 */
7933 case UNLT
: /* LTU - CF=1 */
7934 case UNLE
: /* LEU - CF=1 | ZF=1 */
7935 case LTGT
: /* EQ - ZF=0 */
7937 case LT
: /* LTU - CF=1 - fails on unordered */
7939 *bypass_code
= UNORDERED
;
7941 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
7943 *bypass_code
= UNORDERED
;
7945 case EQ
: /* EQ - ZF=1 - fails on unordered */
7947 *bypass_code
= UNORDERED
;
7949 case NE
: /* NE - ZF=0 - fails on unordered */
7951 *second_code
= UNORDERED
;
7953 case UNGE
: /* GEU - CF=0 - fails on unordered */
7955 *second_code
= UNORDERED
;
7957 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
7959 *second_code
= UNORDERED
;
7964 if (!TARGET_IEEE_FP
)
7971 /* Return cost of comparison done fcom + arithmetics operations on AX.
7972 All following functions do use number of instructions as an cost metrics.
7973 In future this should be tweaked to compute bytes for optimize_size and
7974 take into account performance of various instructions on various CPUs. */
7976 ix86_fp_comparison_arithmetics_cost (code
)
7979 if (!TARGET_IEEE_FP
)
7981 /* The cost of code output by ix86_expand_fp_compare. */
8009 /* Return cost of comparison done using fcomi operation.
8010 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8012 ix86_fp_comparison_fcomi_cost (code
)
8015 enum rtx_code bypass_code
, first_code
, second_code
;
8016 /* Return arbitarily high cost when instruction is not supported - this
8017 prevents gcc from using it. */
8020 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8021 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
8024 /* Return cost of comparison done using sahf operation.
8025 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8027 ix86_fp_comparison_sahf_cost (code
)
8030 enum rtx_code bypass_code
, first_code
, second_code
;
8031 /* Return arbitarily high cost when instruction is not preferred - this
8032 avoids gcc from using it. */
8033 if (!TARGET_USE_SAHF
&& !optimize_size
)
8035 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8036 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
8039 /* Compute cost of the comparison done using any method.
8040 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8042 ix86_fp_comparison_cost (code
)
8045 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8048 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8049 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8051 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8052 if (min
> sahf_cost
)
8054 if (min
> fcomi_cost
)
8059 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8062 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
8064 rtx op0
, op1
, scratch
;
8068 enum machine_mode fpcmp_mode
, intcmp_mode
;
8070 int cost
= ix86_fp_comparison_cost (code
);
8071 enum rtx_code bypass_code
, first_code
, second_code
;
8073 fpcmp_mode
= ix86_fp_compare_mode (code
);
8074 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8077 *second_test
= NULL_RTX
;
8079 *bypass_test
= NULL_RTX
;
8081 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8083 /* Do fcomi/sahf based test when profitable. */
8084 if ((bypass_code
== NIL
|| bypass_test
)
8085 && (second_code
== NIL
|| second_test
)
8086 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8090 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8091 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8097 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8098 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8100 scratch
= gen_reg_rtx (HImode
);
8101 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8102 emit_insn (gen_x86_sahf_1 (scratch
));
8105 /* The FP codes work out to act like unsigned. */
8106 intcmp_mode
= fpcmp_mode
;
8108 if (bypass_code
!= NIL
)
8109 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8110 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8112 if (second_code
!= NIL
)
8113 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8114 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8119 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8120 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8121 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8123 scratch
= gen_reg_rtx (HImode
);
8124 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8126 /* In the unordered case, we have to check C2 for NaN's, which
8127 doesn't happen to work out to anything nice combination-wise.
8128 So do some bit twiddling on the value we've got in AH to come
8129 up with an appropriate set of condition codes. */
8131 intcmp_mode
= CCNOmode
;
8136 if (code
== GT
|| !TARGET_IEEE_FP
)
8138 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8143 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8144 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8145 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8146 intcmp_mode
= CCmode
;
8152 if (code
== LT
&& TARGET_IEEE_FP
)
8154 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8155 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8156 intcmp_mode
= CCmode
;
8161 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8167 if (code
== GE
|| !TARGET_IEEE_FP
)
8169 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8174 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8175 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8182 if (code
== LE
&& TARGET_IEEE_FP
)
8184 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8185 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8186 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8187 intcmp_mode
= CCmode
;
8192 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8198 if (code
== EQ
&& TARGET_IEEE_FP
)
8200 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8201 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8202 intcmp_mode
= CCmode
;
8207 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8214 if (code
== NE
&& TARGET_IEEE_FP
)
8216 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8217 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8223 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8229 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8233 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8242 /* Return the test that should be put into the flags user, i.e.
8243 the bcc, scc, or cmov instruction. */
8244 return gen_rtx_fmt_ee (code
, VOIDmode
,
8245 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8250 ix86_expand_compare (code
, second_test
, bypass_test
)
8252 rtx
*second_test
, *bypass_test
;
8255 op0
= ix86_compare_op0
;
8256 op1
= ix86_compare_op1
;
8259 *second_test
= NULL_RTX
;
8261 *bypass_test
= NULL_RTX
;
8263 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8264 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8265 second_test
, bypass_test
);
8267 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8272 /* Return true if the CODE will result in nontrivial jump sequence. */
8274 ix86_fp_jump_nontrivial_p (code
)
8277 enum rtx_code bypass_code
, first_code
, second_code
;
8280 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8281 return bypass_code
!= NIL
|| second_code
!= NIL
;
8285 ix86_expand_branch (code
, label
)
8291 switch (GET_MODE (ix86_compare_op0
))
8297 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8298 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8299 gen_rtx_LABEL_REF (VOIDmode
, label
),
8301 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8311 enum rtx_code bypass_code
, first_code
, second_code
;
8313 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8316 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8318 /* Check whether we will use the natural sequence with one jump. If
8319 so, we can expand jump early. Otherwise delay expansion by
8320 creating compound insn to not confuse optimizers. */
8321 if (bypass_code
== NIL
&& second_code
== NIL
8324 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8325 gen_rtx_LABEL_REF (VOIDmode
, label
),
8330 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8331 ix86_compare_op0
, ix86_compare_op1
);
8332 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8333 gen_rtx_LABEL_REF (VOIDmode
, label
),
8335 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8337 use_fcomi
= ix86_use_fcomi_compare (code
);
8338 vec
= rtvec_alloc (3 + !use_fcomi
);
8339 RTVEC_ELT (vec
, 0) = tmp
;
8341 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8343 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8346 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
8348 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
8356 /* Expand DImode branch into multiple compare+branch. */
8358 rtx lo
[2], hi
[2], label2
;
8359 enum rtx_code code1
, code2
, code3
;
8361 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
8363 tmp
= ix86_compare_op0
;
8364 ix86_compare_op0
= ix86_compare_op1
;
8365 ix86_compare_op1
= tmp
;
8366 code
= swap_condition (code
);
8368 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
8369 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
8371 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8372 avoid two branches. This costs one extra insn, so disable when
8373 optimizing for size. */
8375 if ((code
== EQ
|| code
== NE
)
8377 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
8382 if (hi
[1] != const0_rtx
)
8383 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
8384 NULL_RTX
, 0, OPTAB_WIDEN
);
8387 if (lo
[1] != const0_rtx
)
8388 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
8389 NULL_RTX
, 0, OPTAB_WIDEN
);
8391 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
8392 NULL_RTX
, 0, OPTAB_WIDEN
);
8394 ix86_compare_op0
= tmp
;
8395 ix86_compare_op1
= const0_rtx
;
8396 ix86_expand_branch (code
, label
);
8400 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8401 op1 is a constant and the low word is zero, then we can just
8402 examine the high word. */
8404 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
8407 case LT
: case LTU
: case GE
: case GEU
:
8408 ix86_compare_op0
= hi
[0];
8409 ix86_compare_op1
= hi
[1];
8410 ix86_expand_branch (code
, label
);
8416 /* Otherwise, we need two or three jumps. */
8418 label2
= gen_label_rtx ();
8421 code2
= swap_condition (code
);
8422 code3
= unsigned_condition (code
);
8426 case LT
: case GT
: case LTU
: case GTU
:
8429 case LE
: code1
= LT
; code2
= GT
; break;
8430 case GE
: code1
= GT
; code2
= LT
; break;
8431 case LEU
: code1
= LTU
; code2
= GTU
; break;
8432 case GEU
: code1
= GTU
; code2
= LTU
; break;
8434 case EQ
: code1
= NIL
; code2
= NE
; break;
8435 case NE
: code2
= NIL
; break;
8443 * if (hi(a) < hi(b)) goto true;
8444 * if (hi(a) > hi(b)) goto false;
8445 * if (lo(a) < lo(b)) goto true;
8449 ix86_compare_op0
= hi
[0];
8450 ix86_compare_op1
= hi
[1];
8453 ix86_expand_branch (code1
, label
);
8455 ix86_expand_branch (code2
, label2
);
8457 ix86_compare_op0
= lo
[0];
8458 ix86_compare_op1
= lo
[1];
8459 ix86_expand_branch (code3
, label
);
8462 emit_label (label2
);
8471 /* Split branch based on floating point condition. */
8473 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
8475 rtx op1
, op2
, target1
, target2
, tmp
;
8478 rtx label
= NULL_RTX
;
8480 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
8483 if (target2
!= pc_rtx
)
8486 code
= reverse_condition_maybe_unordered (code
);
8491 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
8492 tmp
, &second
, &bypass
);
8494 if (split_branch_probability
>= 0)
8496 /* Distribute the probabilities across the jumps.
8497 Assume the BYPASS and SECOND to be always test
8499 probability
= split_branch_probability
;
8501 /* Value of 1 is low enough to make no need for probability
8502 to be updated. Later we may run some experiments and see
8503 if unordered values are more frequent in practice. */
8505 bypass_probability
= 1;
8507 second_probability
= 1;
8509 if (bypass
!= NULL_RTX
)
8511 label
= gen_label_rtx ();
8512 i
= emit_jump_insn (gen_rtx_SET
8514 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8516 gen_rtx_LABEL_REF (VOIDmode
,
8519 if (bypass_probability
>= 0)
8521 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8522 GEN_INT (bypass_probability
),
8525 i
= emit_jump_insn (gen_rtx_SET
8527 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8528 condition
, target1
, target2
)));
8529 if (probability
>= 0)
8531 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8532 GEN_INT (probability
),
8534 if (second
!= NULL_RTX
)
8536 i
= emit_jump_insn (gen_rtx_SET
8538 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
8540 if (second_probability
>= 0)
8542 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8543 GEN_INT (second_probability
),
8546 if (label
!= NULL_RTX
)
8551 ix86_expand_setcc (code
, dest
)
8555 rtx ret
, tmp
, tmpreg
;
8556 rtx second_test
, bypass_test
;
8558 if (GET_MODE (ix86_compare_op0
) == DImode
8560 return 0; /* FAIL */
8562 if (GET_MODE (dest
) != QImode
)
8565 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8566 PUT_MODE (ret
, QImode
);
8571 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
8572 if (bypass_test
|| second_test
)
8574 rtx test
= second_test
;
8576 rtx tmp2
= gen_reg_rtx (QImode
);
8583 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
8585 PUT_MODE (test
, QImode
);
8586 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
8589 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
8591 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
8594 return 1; /* DONE */
8598 ix86_expand_int_movcc (operands
)
8601 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
8602 rtx compare_seq
, compare_op
;
8603 rtx second_test
, bypass_test
;
8604 enum machine_mode mode
= GET_MODE (operands
[0]);
8606 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8607 In case comparsion is done with immediate, we can convert it to LTU or
8608 GEU by altering the integer. */
8610 if ((code
== LEU
|| code
== GTU
)
8611 && GET_CODE (ix86_compare_op1
) == CONST_INT
8613 && INTVAL (ix86_compare_op1
) != -1
8614 /* For x86-64, the immediate field in the instruction is 32-bit
8615 signed, so we can't increment a DImode value above 0x7fffffff. */
8617 || GET_MODE (ix86_compare_op0
) != DImode
8618 || INTVAL (ix86_compare_op1
) != 0x7fffffff)
8619 && GET_CODE (operands
[2]) == CONST_INT
8620 && GET_CODE (operands
[3]) == CONST_INT
)
8626 ix86_compare_op1
= gen_int_mode (INTVAL (ix86_compare_op1
) + 1,
8627 GET_MODE (ix86_compare_op0
));
8631 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8632 compare_seq
= get_insns ();
8635 compare_code
= GET_CODE (compare_op
);
8637 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8638 HImode insns, we'd be swallowed in word prefix ops. */
8641 && (mode
!= DImode
|| TARGET_64BIT
)
8642 && GET_CODE (operands
[2]) == CONST_INT
8643 && GET_CODE (operands
[3]) == CONST_INT
)
8645 rtx out
= operands
[0];
8646 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
8647 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
8650 if ((compare_code
== LTU
|| compare_code
== GEU
)
8651 && !second_test
&& !bypass_test
)
8654 /* Detect overlap between destination and compare sources. */
8657 /* To simplify rest of code, restrict to the GEU case. */
8658 if (compare_code
== LTU
)
8663 compare_code
= reverse_condition (compare_code
);
8664 code
= reverse_condition (code
);
8668 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
8669 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
8670 tmp
= gen_reg_rtx (mode
);
8672 emit_insn (compare_seq
);
8674 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
8676 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
8688 tmp
= expand_simple_binop (mode
, PLUS
,
8690 tmp
, 1, OPTAB_DIRECT
);
8701 tmp
= expand_simple_binop (mode
, IOR
,
8703 tmp
, 1, OPTAB_DIRECT
);
8705 else if (diff
== -1 && ct
)
8715 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
8717 tmp
= expand_simple_binop (mode
, PLUS
,
8719 tmp
, 1, OPTAB_DIRECT
);
8726 * andl cf - ct, dest
8731 tmp
= expand_simple_binop (mode
, AND
,
8733 gen_int_mode (cf
- ct
, mode
),
8734 tmp
, 1, OPTAB_DIRECT
);
8736 tmp
= expand_simple_binop (mode
, PLUS
,
8738 tmp
, 1, OPTAB_DIRECT
);
8742 emit_move_insn (out
, tmp
);
8744 return 1; /* DONE */
8751 tmp
= ct
, ct
= cf
, cf
= tmp
;
8753 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8755 /* We may be reversing unordered compare to normal compare, that
8756 is not valid in general (we may convert non-trapping condition
8757 to trapping one), however on i386 we currently emit all
8758 comparisons unordered. */
8759 compare_code
= reverse_condition_maybe_unordered (compare_code
);
8760 code
= reverse_condition_maybe_unordered (code
);
8764 compare_code
= reverse_condition (compare_code
);
8765 code
= reverse_condition (code
);
8770 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
8771 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
8773 if (ix86_compare_op1
== const0_rtx
8774 && (code
== LT
|| code
== GE
))
8775 compare_code
= code
;
8776 else if (ix86_compare_op1
== constm1_rtx
)
8780 else if (code
== GT
)
8785 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8786 if (compare_code
!= NIL
8787 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
8788 && (cf
== -1 || ct
== -1))
8790 /* If lea code below could be used, only optimize
8791 if it results in a 2 insn sequence. */
8793 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8794 || diff
== 3 || diff
== 5 || diff
== 9)
8795 || (compare_code
== LT
&& ct
== -1)
8796 || (compare_code
== GE
&& cf
== -1))
8799 * notl op1 (if necessary)
8807 code
= reverse_condition (code
);
8810 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8811 ix86_compare_op1
, VOIDmode
, 0, -1);
8813 out
= expand_simple_binop (mode
, IOR
,
8815 out
, 1, OPTAB_DIRECT
);
8816 if (out
!= operands
[0])
8817 emit_move_insn (operands
[0], out
);
8819 return 1; /* DONE */
8823 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
8824 || diff
== 3 || diff
== 5 || diff
== 9)
8825 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
8831 * lea cf(dest*(ct-cf)),dest
8835 * This also catches the degenerate setcc-only case.
8841 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8842 ix86_compare_op1
, VOIDmode
, 0, 1);
8845 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8846 done in proper mode to match. */
8853 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
8857 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
8863 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
8867 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
8873 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
8874 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
8876 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
8877 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8881 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
8883 if (out
!= operands
[0])
8884 emit_move_insn (operands
[0], out
);
8886 return 1; /* DONE */
8890 * General case: Jumpful:
8891 * xorl dest,dest cmpl op1, op2
8892 * cmpl op1, op2 movl ct, dest
8894 * decl dest movl cf, dest
8895 * andl (cf-ct),dest 1:
8900 * This is reasonably steep, but branch mispredict costs are
8901 * high on modern cpus, so consider failing only if optimizing
8904 * %%% Parameterize branch_cost on the tuning architecture, then
8905 * use that. The 80386 couldn't care less about mispredicts.
8908 if (!optimize_size
&& !TARGET_CMOVE
)
8914 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
8915 /* We may be reversing unordered compare to normal compare,
8916 that is not valid in general (we may convert non-trapping
8917 condition to trapping one), however on i386 we currently
8918 emit all comparisons unordered. */
8919 code
= reverse_condition_maybe_unordered (code
);
8922 code
= reverse_condition (code
);
8923 if (compare_code
!= NIL
)
8924 compare_code
= reverse_condition (compare_code
);
8928 if (compare_code
!= NIL
)
8930 /* notl op1 (if needed)
8935 For x < 0 (resp. x <= -1) there will be no notl,
8936 so if possible swap the constants to get rid of the
8938 True/false will be -1/0 while code below (store flag
8939 followed by decrement) is 0/-1, so the constants need
8940 to be exchanged once more. */
8942 if (compare_code
== GE
|| !cf
)
8944 code
= reverse_condition (code
);
8949 HOST_WIDE_INT tmp
= cf
;
8954 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8955 ix86_compare_op1
, VOIDmode
, 0, -1);
8959 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
8960 ix86_compare_op1
, VOIDmode
, 0, 1);
8962 out
= expand_simple_binop (mode
, PLUS
,
8964 out
, 1, OPTAB_DIRECT
);
8967 out
= expand_simple_binop (mode
, AND
,
8969 gen_int_mode (cf
- ct
, mode
),
8970 out
, 1, OPTAB_DIRECT
);
8971 out
= expand_simple_binop (mode
, PLUS
,
8973 out
, 1, OPTAB_DIRECT
);
8974 if (out
!= operands
[0])
8975 emit_move_insn (operands
[0], out
);
8977 return 1; /* DONE */
8983 /* Try a few things more with specific constants and a variable. */
8986 rtx var
, orig_out
, out
, tmp
;
8989 return 0; /* FAIL */
8991 /* If one of the two operands is an interesting constant, load a
8992 constant with the above and mask it in with a logical operation. */
8994 if (GET_CODE (operands
[2]) == CONST_INT
)
8997 if (INTVAL (operands
[2]) == 0)
8998 operands
[3] = constm1_rtx
, op
= and_optab
;
8999 else if (INTVAL (operands
[2]) == -1)
9000 operands
[3] = const0_rtx
, op
= ior_optab
;
9002 return 0; /* FAIL */
9004 else if (GET_CODE (operands
[3]) == CONST_INT
)
9007 if (INTVAL (operands
[3]) == 0)
9008 operands
[2] = constm1_rtx
, op
= and_optab
;
9009 else if (INTVAL (operands
[3]) == -1)
9010 operands
[2] = const0_rtx
, op
= ior_optab
;
9012 return 0; /* FAIL */
9015 return 0; /* FAIL */
9017 orig_out
= operands
[0];
9018 tmp
= gen_reg_rtx (mode
);
9021 /* Recurse to get the constant loaded. */
9022 if (ix86_expand_int_movcc (operands
) == 0)
9023 return 0; /* FAIL */
9025 /* Mask in the interesting variable. */
9026 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9028 if (out
!= orig_out
)
9029 emit_move_insn (orig_out
, out
);
9031 return 1; /* DONE */
9035 * For comparison with above,
9045 if (! nonimmediate_operand (operands
[2], mode
))
9046 operands
[2] = force_reg (mode
, operands
[2]);
9047 if (! nonimmediate_operand (operands
[3], mode
))
9048 operands
[3] = force_reg (mode
, operands
[3]);
9050 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9052 rtx tmp
= gen_reg_rtx (mode
);
9053 emit_move_insn (tmp
, operands
[3]);
9056 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9058 rtx tmp
= gen_reg_rtx (mode
);
9059 emit_move_insn (tmp
, operands
[2]);
9062 if (! register_operand (operands
[2], VOIDmode
)
9063 && ! register_operand (operands
[3], VOIDmode
))
9064 operands
[2] = force_reg (mode
, operands
[2]);
9066 emit_insn (compare_seq
);
9067 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9068 gen_rtx_IF_THEN_ELSE (mode
,
9069 compare_op
, operands
[2],
9072 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9073 gen_rtx_IF_THEN_ELSE (mode
,
9078 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9079 gen_rtx_IF_THEN_ELSE (mode
,
9084 return 1; /* DONE */
9088 ix86_expand_fp_movcc (operands
)
9093 rtx compare_op
, second_test
, bypass_test
;
9095 /* For SF/DFmode conditional moves based on comparisons
9096 in same mode, we may want to use SSE min/max instructions. */
9097 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9098 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9099 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9100 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9102 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9103 /* We may be called from the post-reload splitter. */
9104 && (!REG_P (operands
[0])
9105 || SSE_REG_P (operands
[0])
9106 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9108 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9109 code
= GET_CODE (operands
[1]);
9111 /* See if we have (cross) match between comparison operands and
9112 conditional move operands. */
9113 if (rtx_equal_p (operands
[2], op1
))
9118 code
= reverse_condition_maybe_unordered (code
);
9120 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9122 /* Check for min operation. */
9125 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9126 if (memory_operand (op0
, VOIDmode
))
9127 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9128 if (GET_MODE (operands
[0]) == SFmode
)
9129 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9131 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9134 /* Check for max operation. */
9137 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9138 if (memory_operand (op0
, VOIDmode
))
9139 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9140 if (GET_MODE (operands
[0]) == SFmode
)
9141 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9143 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9147 /* Manage condition to be sse_comparison_operator. In case we are
9148 in non-ieee mode, try to canonicalize the destination operand
9149 to be first in the comparison - this helps reload to avoid extra
9151 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9152 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9154 rtx tmp
= ix86_compare_op0
;
9155 ix86_compare_op0
= ix86_compare_op1
;
9156 ix86_compare_op1
= tmp
;
9157 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9158 VOIDmode
, ix86_compare_op0
,
9161 /* Similary try to manage result to be first operand of conditional
9162 move. We also don't support the NE comparison on SSE, so try to
9164 if ((rtx_equal_p (operands
[0], operands
[3])
9165 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9166 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9168 rtx tmp
= operands
[2];
9169 operands
[2] = operands
[3];
9171 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9172 (GET_CODE (operands
[1])),
9173 VOIDmode
, ix86_compare_op0
,
9176 if (GET_MODE (operands
[0]) == SFmode
)
9177 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9178 operands
[2], operands
[3],
9179 ix86_compare_op0
, ix86_compare_op1
));
9181 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9182 operands
[2], operands
[3],
9183 ix86_compare_op0
, ix86_compare_op1
));
9187 /* The floating point conditional move instructions don't directly
9188 support conditions resulting from a signed integer comparison. */
9190 code
= GET_CODE (operands
[1]);
9191 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9193 /* The floating point conditional move instructions don't directly
9194 support signed integer comparisons. */
9196 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9198 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9200 tmp
= gen_reg_rtx (QImode
);
9201 ix86_expand_setcc (code
, tmp
);
9203 ix86_compare_op0
= tmp
;
9204 ix86_compare_op1
= const0_rtx
;
9205 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9207 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9209 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9210 emit_move_insn (tmp
, operands
[3]);
9213 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9215 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9216 emit_move_insn (tmp
, operands
[2]);
9220 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9221 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9226 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9227 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9232 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9233 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9241 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9242 works for floating pointer parameters and nonoffsetable memories.
9243 For pushes, it returns just stack offsets; the values will be saved
9244 in the right order. Maximally three parts are generated. */
9247 ix86_split_to_parts (operand
, parts
, mode
)
9250 enum machine_mode mode
;
9255 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
9257 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9259 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9261 if (size
< 2 || size
> 3)
9264 /* Optimize constant pool reference to immediates. This is used by fp
9265 moves, that force all constants to memory to allow combining. */
9266 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
9268 rtx tmp
= maybe_get_pool_constant (operand
);
9273 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9275 /* The only non-offsetable memories we handle are pushes. */
9276 if (! push_operand (operand
, VOIDmode
))
9279 operand
= copy_rtx (operand
);
9280 PUT_MODE (operand
, Pmode
);
9281 parts
[0] = parts
[1] = parts
[2] = operand
;
9283 else if (!TARGET_64BIT
)
9286 split_di (&operand
, 1, &parts
[0], &parts
[1]);
9289 if (REG_P (operand
))
9291 if (!reload_completed
)
9293 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
9294 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9296 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
9298 else if (offsettable_memref_p (operand
))
9300 operand
= adjust_address (operand
, SImode
, 0);
9302 parts
[1] = adjust_address (operand
, SImode
, 4);
9304 parts
[2] = adjust_address (operand
, SImode
, 8);
9306 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9311 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9316 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9317 parts
[2] = gen_int_mode (l
[2], SImode
);
9320 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
9325 parts
[1] = gen_int_mode (l
[1], SImode
);
9326 parts
[0] = gen_int_mode (l
[0], SImode
);
9335 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
9336 if (mode
== XFmode
|| mode
== TFmode
)
9338 if (REG_P (operand
))
9340 if (!reload_completed
)
9342 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
9343 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9345 else if (offsettable_memref_p (operand
))
9347 operand
= adjust_address (operand
, DImode
, 0);
9349 parts
[1] = adjust_address (operand
, SImode
, 8);
9351 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9356 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9357 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9358 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9359 if (HOST_BITS_PER_WIDE_INT
>= 64)
9362 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9363 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
9366 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
9367 parts
[1] = gen_int_mode (l
[2], SImode
);
9377 /* Emit insns to perform a move or push of DI, DF, and XF values.
9378 Return false when normal moves are needed; true when all required
9379 insns have been emitted. Operands 2-4 contain the input values
9380 int the correct order; operands 5-7 contain the output values. */
9383 ix86_split_long_move (operands
)
9390 enum machine_mode mode
= GET_MODE (operands
[0]);
9392 /* The DFmode expanders may ask us to move double.
9393 For 64bit target this is single move. By hiding the fact
9394 here we simplify i386.md splitters. */
9395 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
9397 /* Optimize constant pool reference to immediates. This is used by
9398 fp moves, that force all constants to memory to allow combining. */
9400 if (GET_CODE (operands
[1]) == MEM
9401 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
9402 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
9403 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
9404 if (push_operand (operands
[0], VOIDmode
))
9406 operands
[0] = copy_rtx (operands
[0]);
9407 PUT_MODE (operands
[0], Pmode
);
9410 operands
[0] = gen_lowpart (DImode
, operands
[0]);
9411 operands
[1] = gen_lowpart (DImode
, operands
[1]);
9412 emit_move_insn (operands
[0], operands
[1]);
9416 /* The only non-offsettable memory we handle is push. */
9417 if (push_operand (operands
[0], VOIDmode
))
9419 else if (GET_CODE (operands
[0]) == MEM
9420 && ! offsettable_memref_p (operands
[0]))
9423 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
9424 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
9426 /* When emitting push, take care for source operands on the stack. */
9427 if (push
&& GET_CODE (operands
[1]) == MEM
9428 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
9431 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
9432 XEXP (part
[1][2], 0));
9433 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
9434 XEXP (part
[1][1], 0));
9437 /* We need to do copy in the right order in case an address register
9438 of the source overlaps the destination. */
9439 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
9441 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
9443 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9446 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
9449 /* Collision in the middle part can be handled by reordering. */
9450 if (collisions
== 1 && nparts
== 3
9451 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9454 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
9455 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
9458 /* If there are more collisions, we can't handle it by reordering.
9459 Do an lea to the last part and use only one colliding move. */
9460 else if (collisions
> 1)
9463 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
9464 XEXP (part
[1][0], 0)));
9465 part
[1][0] = change_address (part
[1][0],
9466 TARGET_64BIT
? DImode
: SImode
,
9467 part
[0][nparts
- 1]);
9468 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
9470 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
9480 /* We use only first 12 bytes of TFmode value, but for pushing we
9481 are required to adjust stack as if we were pushing real 16byte
9483 if (mode
== TFmode
&& !TARGET_64BIT
)
9484 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
9486 emit_move_insn (part
[0][2], part
[1][2]);
9491 /* In 64bit mode we don't have 32bit push available. In case this is
9492 register, it is OK - we will just use larger counterpart. We also
9493 retype memory - these comes from attempt to avoid REX prefix on
9494 moving of second half of TFmode value. */
9495 if (GET_MODE (part
[1][1]) == SImode
)
9497 if (GET_CODE (part
[1][1]) == MEM
)
9498 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
9499 else if (REG_P (part
[1][1]))
9500 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
9503 if (GET_MODE (part
[1][0]) == SImode
)
9504 part
[1][0] = part
[1][1];
9507 emit_move_insn (part
[0][1], part
[1][1]);
9508 emit_move_insn (part
[0][0], part
[1][0]);
9512 /* Choose correct order to not overwrite the source before it is copied. */
9513 if ((REG_P (part
[0][0])
9514 && REG_P (part
[1][1])
9515 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
9517 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
9519 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
9523 operands
[2] = part
[0][2];
9524 operands
[3] = part
[0][1];
9525 operands
[4] = part
[0][0];
9526 operands
[5] = part
[1][2];
9527 operands
[6] = part
[1][1];
9528 operands
[7] = part
[1][0];
9532 operands
[2] = part
[0][1];
9533 operands
[3] = part
[0][0];
9534 operands
[5] = part
[1][1];
9535 operands
[6] = part
[1][0];
9542 operands
[2] = part
[0][0];
9543 operands
[3] = part
[0][1];
9544 operands
[4] = part
[0][2];
9545 operands
[5] = part
[1][0];
9546 operands
[6] = part
[1][1];
9547 operands
[7] = part
[1][2];
9551 operands
[2] = part
[0][0];
9552 operands
[3] = part
[0][1];
9553 operands
[5] = part
[1][0];
9554 operands
[6] = part
[1][1];
9557 emit_move_insn (operands
[2], operands
[5]);
9558 emit_move_insn (operands
[3], operands
[6]);
9560 emit_move_insn (operands
[4], operands
[7]);
9566 ix86_split_ashldi (operands
, scratch
)
9567 rtx
*operands
, scratch
;
9569 rtx low
[2], high
[2];
9572 if (GET_CODE (operands
[2]) == CONST_INT
)
9574 split_di (operands
, 2, low
, high
);
9575 count
= INTVAL (operands
[2]) & 63;
9579 emit_move_insn (high
[0], low
[1]);
9580 emit_move_insn (low
[0], const0_rtx
);
9583 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
9587 if (!rtx_equal_p (operands
[0], operands
[1]))
9588 emit_move_insn (operands
[0], operands
[1]);
9589 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
9590 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
9595 if (!rtx_equal_p (operands
[0], operands
[1]))
9596 emit_move_insn (operands
[0], operands
[1]);
9598 split_di (operands
, 1, low
, high
);
9600 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
9601 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
9603 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9605 if (! no_new_pseudos
)
9606 scratch
= force_reg (SImode
, const0_rtx
);
9608 emit_move_insn (scratch
, const0_rtx
);
9610 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
9614 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
9619 ix86_split_ashrdi (operands
, scratch
)
9620 rtx
*operands
, scratch
;
9622 rtx low
[2], high
[2];
9625 if (GET_CODE (operands
[2]) == CONST_INT
)
9627 split_di (operands
, 2, low
, high
);
9628 count
= INTVAL (operands
[2]) & 63;
9632 emit_move_insn (low
[0], high
[1]);
9634 if (! reload_completed
)
9635 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
9638 emit_move_insn (high
[0], low
[0]);
9639 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
9643 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9647 if (!rtx_equal_p (operands
[0], operands
[1]))
9648 emit_move_insn (operands
[0], operands
[1]);
9649 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9650 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
9655 if (!rtx_equal_p (operands
[0], operands
[1]))
9656 emit_move_insn (operands
[0], operands
[1]);
9658 split_di (operands
, 1, low
, high
);
9660 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9661 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
9663 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9665 if (! no_new_pseudos
)
9666 scratch
= gen_reg_rtx (SImode
);
9667 emit_move_insn (scratch
, high
[0]);
9668 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
9669 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
9673 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
9678 ix86_split_lshrdi (operands
, scratch
)
9679 rtx
*operands
, scratch
;
9681 rtx low
[2], high
[2];
9684 if (GET_CODE (operands
[2]) == CONST_INT
)
9686 split_di (operands
, 2, low
, high
);
9687 count
= INTVAL (operands
[2]) & 63;
9691 emit_move_insn (low
[0], high
[1]);
9692 emit_move_insn (high
[0], const0_rtx
);
9695 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
9699 if (!rtx_equal_p (operands
[0], operands
[1]))
9700 emit_move_insn (operands
[0], operands
[1]);
9701 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
9702 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
9707 if (!rtx_equal_p (operands
[0], operands
[1]))
9708 emit_move_insn (operands
[0], operands
[1]);
9710 split_di (operands
, 1, low
, high
);
9712 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
9713 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
9715 /* Heh. By reversing the arguments, we can reuse this pattern. */
9716 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9718 if (! no_new_pseudos
)
9719 scratch
= force_reg (SImode
, const0_rtx
);
9721 emit_move_insn (scratch
, const0_rtx
);
9723 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
9727 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
9731 /* Helper function for the string operations below. Dest VARIABLE whether
9732 it is aligned to VALUE bytes. If true, jump to the label. */
9734 ix86_expand_aligntest (variable
, value
)
9738 rtx label
= gen_label_rtx ();
9739 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
9740 if (GET_MODE (variable
) == DImode
)
9741 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
9743 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
9744 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
9749 /* Adjust COUNTER by the VALUE. */
9751 ix86_adjust_counter (countreg
, value
)
9753 HOST_WIDE_INT value
;
9755 if (GET_MODE (countreg
) == DImode
)
9756 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
9758 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
9761 /* Zero extend possibly SImode EXP to Pmode register. */
9763 ix86_zero_extend_to_Pmode (exp
)
9767 if (GET_MODE (exp
) == VOIDmode
)
9768 return force_reg (Pmode
, exp
);
9769 if (GET_MODE (exp
) == Pmode
)
9770 return copy_to_mode_reg (Pmode
, exp
);
9771 r
= gen_reg_rtx (Pmode
);
9772 emit_insn (gen_zero_extendsidi2 (r
, exp
));
9776 /* Expand string move (memcpy) operation. Use i386 string operations when
9777 profitable. expand_clrstr contains similar code. */
9779 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
9780 rtx dst
, src
, count_exp
, align_exp
;
9782 rtx srcreg
, destreg
, countreg
;
9783 enum machine_mode counter_mode
;
9784 HOST_WIDE_INT align
= 0;
9785 unsigned HOST_WIDE_INT count
= 0;
9790 if (GET_CODE (align_exp
) == CONST_INT
)
9791 align
= INTVAL (align_exp
);
9793 /* This simple hack avoids all inlining code and simplifies code below. */
9794 if (!TARGET_ALIGN_STRINGOPS
)
9797 if (GET_CODE (count_exp
) == CONST_INT
)
9798 count
= INTVAL (count_exp
);
9800 /* Figure out proper mode for counter. For 32bits it is always SImode,
9801 for 64bits use SImode when possible, otherwise DImode.
9802 Set count to number of bytes copied when known at compile time. */
9803 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
9804 || x86_64_zero_extended_value (count_exp
))
9805 counter_mode
= SImode
;
9807 counter_mode
= DImode
;
9809 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
9812 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
9813 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
9815 emit_insn (gen_cld ());
9817 /* When optimizing for size emit simple rep ; movsb instruction for
9818 counts not divisible by 4. */
9820 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
9822 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
9824 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
9825 destreg
, srcreg
, countreg
));
9827 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
9828 destreg
, srcreg
, countreg
));
9831 /* For constant aligned (or small unaligned) copies use rep movsl
9832 followed by code copying the rest. For PentiumPro ensure 8 byte
9833 alignment to allow rep movsl acceleration. */
9837 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
9838 || optimize_size
|| count
< (unsigned int) 64))
9840 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
9841 if (count
& ~(size
- 1))
9843 countreg
= copy_to_mode_reg (counter_mode
,
9844 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
9845 & (TARGET_64BIT
? -1 : 0x3fffffff)));
9846 countreg
= ix86_zero_extend_to_Pmode (countreg
);
9850 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
9851 destreg
, srcreg
, countreg
));
9853 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
9854 destreg
, srcreg
, countreg
));
9857 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
9858 destreg
, srcreg
, countreg
));
9860 if (size
== 8 && (count
& 0x04))
9861 emit_insn (gen_strmovsi (destreg
, srcreg
));
9863 emit_insn (gen_strmovhi (destreg
, srcreg
));
9865 emit_insn (gen_strmovqi (destreg
, srcreg
));
9867 /* The generic code based on the glibc implementation:
9868 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9869 allowing accelerated copying there)
9870 - copy the data using rep movsl
9876 int desired_alignment
= (TARGET_PENTIUMPRO
9877 && (count
== 0 || count
>= (unsigned int) 260)
9878 ? 8 : UNITS_PER_WORD
);
9880 /* In case we don't know anything about the alignment, default to
9881 library version, since it is usually equally fast and result in
9883 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
9889 if (TARGET_SINGLE_STRINGOP
)
9890 emit_insn (gen_cld ());
9892 countreg2
= gen_reg_rtx (Pmode
);
9893 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
9895 /* We don't use loops to align destination and to copy parts smaller
9896 than 4 bytes, because gcc is able to optimize such code better (in
9897 the case the destination or the count really is aligned, gcc is often
9898 able to predict the branches) and also it is friendlier to the
9899 hardware branch prediction.
9901 Using loops is benefical for generic case, because we can
9902 handle small counts using the loops. Many CPUs (such as Athlon)
9903 have large REP prefix setup costs.
9905 This is quite costy. Maybe we can revisit this decision later or
9906 add some customizability to this code. */
9908 if (count
== 0 && align
< desired_alignment
)
9910 label
= gen_label_rtx ();
9911 emit_cmp_and_jump_insns (countreg
, GEN_INT (UNITS_PER_WORD
- 1),
9912 LEU
, 0, counter_mode
, 1, label
);
9916 rtx label
= ix86_expand_aligntest (destreg
, 1);
9917 emit_insn (gen_strmovqi (destreg
, srcreg
));
9918 ix86_adjust_counter (countreg
, 1);
9920 LABEL_NUSES (label
) = 1;
9924 rtx label
= ix86_expand_aligntest (destreg
, 2);
9925 emit_insn (gen_strmovhi (destreg
, srcreg
));
9926 ix86_adjust_counter (countreg
, 2);
9928 LABEL_NUSES (label
) = 1;
9930 if (align
<= 4 && desired_alignment
> 4)
9932 rtx label
= ix86_expand_aligntest (destreg
, 4);
9933 emit_insn (gen_strmovsi (destreg
, srcreg
));
9934 ix86_adjust_counter (countreg
, 4);
9936 LABEL_NUSES (label
) = 1;
9939 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
9942 LABEL_NUSES (label
) = 1;
9945 if (!TARGET_SINGLE_STRINGOP
)
9946 emit_insn (gen_cld ());
9949 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
9951 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
9952 destreg
, srcreg
, countreg2
));
9956 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
9957 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
9958 destreg
, srcreg
, countreg2
));
9964 LABEL_NUSES (label
) = 1;
9966 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
9967 emit_insn (gen_strmovsi (destreg
, srcreg
));
9968 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
9970 rtx label
= ix86_expand_aligntest (countreg
, 4);
9971 emit_insn (gen_strmovsi (destreg
, srcreg
));
9973 LABEL_NUSES (label
) = 1;
9975 if (align
> 2 && count
!= 0 && (count
& 2))
9976 emit_insn (gen_strmovhi (destreg
, srcreg
));
9977 if (align
<= 2 || count
== 0)
9979 rtx label
= ix86_expand_aligntest (countreg
, 2);
9980 emit_insn (gen_strmovhi (destreg
, srcreg
));
9982 LABEL_NUSES (label
) = 1;
9984 if (align
> 1 && count
!= 0 && (count
& 1))
9985 emit_insn (gen_strmovqi (destreg
, srcreg
));
9986 if (align
<= 1 || count
== 0)
9988 rtx label
= ix86_expand_aligntest (countreg
, 1);
9989 emit_insn (gen_strmovqi (destreg
, srcreg
));
9991 LABEL_NUSES (label
) = 1;
9995 insns
= get_insns ();
9998 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
10003 /* Expand string clear operation (bzero). Use i386 string operations when
10004 profitable. expand_movstr contains similar code. */
10006 ix86_expand_clrstr (src
, count_exp
, align_exp
)
10007 rtx src
, count_exp
, align_exp
;
10009 rtx destreg
, zeroreg
, countreg
;
10010 enum machine_mode counter_mode
;
10011 HOST_WIDE_INT align
= 0;
10012 unsigned HOST_WIDE_INT count
= 0;
10014 if (GET_CODE (align_exp
) == CONST_INT
)
10015 align
= INTVAL (align_exp
);
10017 /* This simple hack avoids all inlining code and simplifies code below. */
10018 if (!TARGET_ALIGN_STRINGOPS
)
10021 if (GET_CODE (count_exp
) == CONST_INT
)
10022 count
= INTVAL (count_exp
);
10023 /* Figure out proper mode for counter. For 32bits it is always SImode,
10024 for 64bits use SImode when possible, otherwise DImode.
10025 Set count to number of bytes copied when known at compile time. */
10026 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10027 || x86_64_zero_extended_value (count_exp
))
10028 counter_mode
= SImode
;
10030 counter_mode
= DImode
;
10032 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10034 emit_insn (gen_cld ());
10036 /* When optimizing for size emit simple rep ; movsb instruction for
10037 counts not divisible by 4. */
10039 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10041 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10042 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10044 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
10045 destreg
, countreg
));
10047 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
10048 destreg
, countreg
));
10050 else if (count
!= 0
10052 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10053 || optimize_size
|| count
< (unsigned int) 64))
10055 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10056 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10057 if (count
& ~(size
- 1))
10059 countreg
= copy_to_mode_reg (counter_mode
,
10060 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10061 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10062 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10066 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
10067 destreg
, countreg
));
10069 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
10070 destreg
, countreg
));
10073 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
10074 destreg
, countreg
));
10076 if (size
== 8 && (count
& 0x04))
10077 emit_insn (gen_strsetsi (destreg
,
10078 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10080 emit_insn (gen_strsethi (destreg
,
10081 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10083 emit_insn (gen_strsetqi (destreg
,
10084 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10090 /* Compute desired alignment of the string operation. */
10091 int desired_alignment
= (TARGET_PENTIUMPRO
10092 && (count
== 0 || count
>= (unsigned int) 260)
10093 ? 8 : UNITS_PER_WORD
);
10095 /* In case we don't know anything about the alignment, default to
10096 library version, since it is usually equally fast and result in
10098 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10101 if (TARGET_SINGLE_STRINGOP
)
10102 emit_insn (gen_cld ());
10104 countreg2
= gen_reg_rtx (Pmode
);
10105 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10106 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
10108 if (count
== 0 && align
< desired_alignment
)
10110 label
= gen_label_rtx ();
10111 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10112 LEU
, 0, counter_mode
, 1, label
);
10116 rtx label
= ix86_expand_aligntest (destreg
, 1);
10117 emit_insn (gen_strsetqi (destreg
,
10118 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10119 ix86_adjust_counter (countreg
, 1);
10120 emit_label (label
);
10121 LABEL_NUSES (label
) = 1;
10125 rtx label
= ix86_expand_aligntest (destreg
, 2);
10126 emit_insn (gen_strsethi (destreg
,
10127 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10128 ix86_adjust_counter (countreg
, 2);
10129 emit_label (label
);
10130 LABEL_NUSES (label
) = 1;
10132 if (align
<= 4 && desired_alignment
> 4)
10134 rtx label
= ix86_expand_aligntest (destreg
, 4);
10135 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
10136 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
10138 ix86_adjust_counter (countreg
, 4);
10139 emit_label (label
);
10140 LABEL_NUSES (label
) = 1;
10143 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10145 emit_label (label
);
10146 LABEL_NUSES (label
) = 1;
10150 if (!TARGET_SINGLE_STRINGOP
)
10151 emit_insn (gen_cld ());
10154 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10156 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
10157 destreg
, countreg2
));
10161 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10162 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
10163 destreg
, countreg2
));
10167 emit_label (label
);
10168 LABEL_NUSES (label
) = 1;
10171 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10172 emit_insn (gen_strsetsi (destreg
,
10173 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10174 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
10176 rtx label
= ix86_expand_aligntest (countreg
, 2);
10177 emit_insn (gen_strsetsi (destreg
,
10178 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10179 emit_label (label
);
10180 LABEL_NUSES (label
) = 1;
10182 if (align
> 2 && count
!= 0 && (count
& 2))
10183 emit_insn (gen_strsethi (destreg
,
10184 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10185 if (align
<= 2 || count
== 0)
10187 rtx label
= ix86_expand_aligntest (countreg
, 2);
10188 emit_insn (gen_strsethi (destreg
,
10189 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10190 emit_label (label
);
10191 LABEL_NUSES (label
) = 1;
10193 if (align
> 1 && count
!= 0 && (count
& 1))
10194 emit_insn (gen_strsetqi (destreg
,
10195 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10196 if (align
<= 1 || count
== 0)
10198 rtx label
= ix86_expand_aligntest (countreg
, 1);
10199 emit_insn (gen_strsetqi (destreg
,
10200 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10201 emit_label (label
);
10202 LABEL_NUSES (label
) = 1;
10207 /* Expand strlen. */
10209 ix86_expand_strlen (out
, src
, eoschar
, align
)
10210 rtx out
, src
, eoschar
, align
;
10212 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
10214 /* The generic case of strlen expander is long. Avoid it's
10215 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10217 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10218 && !TARGET_INLINE_ALL_STRINGOPS
10220 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
10223 addr
= force_reg (Pmode
, XEXP (src
, 0));
10224 scratch1
= gen_reg_rtx (Pmode
);
10226 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10229 /* Well it seems that some optimizer does not combine a call like
10230 foo(strlen(bar), strlen(bar));
10231 when the move and the subtraction is done here. It does calculate
10232 the length just once when these instructions are done inside of
10233 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10234 often used and I use one fewer register for the lifetime of
10235 output_strlen_unroll() this is better. */
10237 emit_move_insn (out
, addr
);
10239 ix86_expand_strlensi_unroll_1 (out
, align
);
10241 /* strlensi_unroll_1 returns the address of the zero at the end of
10242 the string, like memchr(), so compute the length by subtracting
10243 the start address. */
10245 emit_insn (gen_subdi3 (out
, out
, addr
));
10247 emit_insn (gen_subsi3 (out
, out
, addr
));
10251 scratch2
= gen_reg_rtx (Pmode
);
10252 scratch3
= gen_reg_rtx (Pmode
);
10253 scratch4
= force_reg (Pmode
, constm1_rtx
);
10255 emit_move_insn (scratch3
, addr
);
10256 eoschar
= force_reg (QImode
, eoschar
);
10258 emit_insn (gen_cld ());
10261 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
10262 align
, scratch4
, scratch3
));
10263 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
10264 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
10268 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
10269 align
, scratch4
, scratch3
));
10270 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
10271 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
10277 /* Expand the appropriate insns for doing strlen if not just doing
10280 out = result, initialized with the start address
10281 align_rtx = alignment of the address.
10282 scratch = scratch register, initialized with the startaddress when
10283 not aligned, otherwise undefined
10285 This is just the body. It needs the initialisations mentioned above and
10286 some address computing at the end. These things are done in i386.md. */
10289 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
10290 rtx out
, align_rtx
;
10294 rtx align_2_label
= NULL_RTX
;
10295 rtx align_3_label
= NULL_RTX
;
10296 rtx align_4_label
= gen_label_rtx ();
10297 rtx end_0_label
= gen_label_rtx ();
10299 rtx tmpreg
= gen_reg_rtx (SImode
);
10300 rtx scratch
= gen_reg_rtx (SImode
);
10303 if (GET_CODE (align_rtx
) == CONST_INT
)
10304 align
= INTVAL (align_rtx
);
10306 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10308 /* Is there a known alignment and is it less than 4? */
10311 rtx scratch1
= gen_reg_rtx (Pmode
);
10312 emit_move_insn (scratch1
, out
);
10313 /* Is there a known alignment and is it not 2? */
10316 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
10317 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
10319 /* Leave just the 3 lower bits. */
10320 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
10321 NULL_RTX
, 0, OPTAB_WIDEN
);
10323 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10324 Pmode
, 1, align_4_label
);
10325 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
10326 Pmode
, 1, align_2_label
);
10327 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
10328 Pmode
, 1, align_3_label
);
10332 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10333 check if is aligned to 4 - byte. */
10335 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
10336 NULL_RTX
, 0, OPTAB_WIDEN
);
10338 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10339 Pmode
, 1, align_4_label
);
10342 mem
= gen_rtx_MEM (QImode
, out
);
10344 /* Now compare the bytes. */
10346 /* Compare the first n unaligned byte on a byte per byte basis. */
10347 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
10348 QImode
, 1, end_0_label
);
10350 /* Increment the address. */
10352 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10354 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10356 /* Not needed with an alignment of 2 */
10359 emit_label (align_2_label
);
10361 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10365 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10367 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10369 emit_label (align_3_label
);
10372 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10376 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10378 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10381 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10382 align this loop. It gives only huge programs, but does not help to
10384 emit_label (align_4_label
);
10386 mem
= gen_rtx_MEM (SImode
, out
);
10387 emit_move_insn (scratch
, mem
);
10389 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
10391 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
10393 /* This formula yields a nonzero result iff one of the bytes is zero.
10394 This saves three branches inside loop and many cycles. */
10396 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
10397 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
10398 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
10399 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
10400 gen_int_mode (0x80808080, SImode
)));
10401 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
10406 rtx reg
= gen_reg_rtx (SImode
);
10407 rtx reg2
= gen_reg_rtx (Pmode
);
10408 emit_move_insn (reg
, tmpreg
);
10409 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
10411 /* If zero is not in the first two bytes, move two bytes forward. */
10412 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10413 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10414 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10415 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
10416 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
10419 /* Emit lea manually to avoid clobbering of flags. */
10420 emit_insn (gen_rtx_SET (SImode
, reg2
,
10421 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
10423 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10424 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10425 emit_insn (gen_rtx_SET (VOIDmode
, out
,
10426 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
10433 rtx end_2_label
= gen_label_rtx ();
10434 /* Is zero in the first two bytes? */
10436 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10437 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10438 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
10439 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10440 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
10442 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10443 JUMP_LABEL (tmp
) = end_2_label
;
10445 /* Not in the first two. Move two bytes forward. */
10446 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
10448 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
10450 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
10452 emit_label (end_2_label
);
10456 /* Avoid branch in fixing the byte. */
10457 tmpreg
= gen_lowpart (QImode
, tmpreg
);
10458 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
10460 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
10462 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
10464 emit_label (end_0_label
);
10468 ix86_expand_call (retval
, fnaddr
, callarg1
, callarg2
, pop
)
10469 rtx retval
, fnaddr
, callarg1
, callarg2
, pop
;
10471 rtx use
= NULL
, call
;
10473 if (pop
== const0_rtx
)
10475 if (TARGET_64BIT
&& pop
)
10478 /* Static functions and indirect calls don't need the pic register. */
10479 if (! TARGET_64BIT
&& flag_pic
10480 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
10481 && ! SYMBOL_REF_FLAG (XEXP (fnaddr
, 0)))
10482 use_reg (&use
, pic_offset_table_rtx
);
10484 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
10486 rtx al
= gen_rtx_REG (QImode
, 0);
10487 emit_move_insn (al
, callarg2
);
10488 use_reg (&use
, al
);
10491 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
10493 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
10494 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
10497 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
10499 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
10502 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
10503 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
10504 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
10507 call
= emit_call_insn (call
);
10509 CALL_INSN_FUNCTION_USAGE (call
) = use
;
10513 /* Clear stack slot assignments remembered from previous functions.
10514 This is called from INIT_EXPANDERS once before RTL is emitted for each
10517 static struct machine_function
*
10518 ix86_init_machine_status ()
10520 return ggc_alloc_cleared (sizeof (struct machine_function
));
10523 /* Return a MEM corresponding to a stack slot with mode MODE.
10524 Allocate a new slot if necessary.
10526 The RTL for a function can have several slots available: N is
10527 which slot to use. */
10530 assign_386_stack_local (mode
, n
)
10531 enum machine_mode mode
;
10534 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
10537 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
10538 ix86_stack_locals
[(int) mode
][n
]
10539 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
10541 return ix86_stack_locals
[(int) mode
][n
];
10544 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10546 static GTY(()) rtx ix86_tls_symbol
;
10548 ix86_tls_get_addr ()
10551 if (!ix86_tls_symbol
)
10553 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, (TARGET_GNU_TLS
10554 ? "___tls_get_addr"
10555 : "__tls_get_addr"));
10558 return ix86_tls_symbol
;
10561 /* Calculate the length of the memory address in the instruction
10562 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10565 memory_address_length (addr
)
10568 struct ix86_address parts
;
10569 rtx base
, index
, disp
;
10572 if (GET_CODE (addr
) == PRE_DEC
10573 || GET_CODE (addr
) == POST_INC
10574 || GET_CODE (addr
) == PRE_MODIFY
10575 || GET_CODE (addr
) == POST_MODIFY
)
10578 if (! ix86_decompose_address (addr
, &parts
))
10582 index
= parts
.index
;
10586 /* Register Indirect. */
10587 if (base
&& !index
&& !disp
)
10589 /* Special cases: ebp and esp need the two-byte modrm form. */
10590 if (addr
== stack_pointer_rtx
10591 || addr
== arg_pointer_rtx
10592 || addr
== frame_pointer_rtx
10593 || addr
== hard_frame_pointer_rtx
)
10597 /* Direct Addressing. */
10598 else if (disp
&& !base
&& !index
)
10603 /* Find the length of the displacement constant. */
10606 if (GET_CODE (disp
) == CONST_INT
10607 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
10613 /* An index requires the two-byte modrm form. */
10621 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10622 is set, expect that insn have 8bit immediate alternative. */
10624 ix86_attr_length_immediate_default (insn
, shortform
)
10630 extract_insn_cached (insn
);
10631 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10632 if (CONSTANT_P (recog_data
.operand
[i
]))
10637 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
10638 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
10642 switch (get_attr_mode (insn
))
10653 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10658 fatal_insn ("unknown insn mode", insn
);
10664 /* Compute default value for "length_address" attribute. */
10666 ix86_attr_length_address_default (insn
)
10670 extract_insn_cached (insn
);
10671 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10672 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
10674 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
10680 /* Return the maximum number of instructions a cpu can issue. */
10687 case PROCESSOR_PENTIUM
:
10691 case PROCESSOR_PENTIUMPRO
:
10692 case PROCESSOR_PENTIUM4
:
10693 case PROCESSOR_ATHLON
:
10701 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10702 by DEP_INSN and nothing set by DEP_INSN. */
10705 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
10706 rtx insn
, dep_insn
;
10707 enum attr_type insn_type
;
10711 /* Simplify the test for uninteresting insns. */
10712 if (insn_type
!= TYPE_SETCC
10713 && insn_type
!= TYPE_ICMOV
10714 && insn_type
!= TYPE_FCMOV
10715 && insn_type
!= TYPE_IBR
)
10718 if ((set
= single_set (dep_insn
)) != 0)
10720 set
= SET_DEST (set
);
10723 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
10724 && XVECLEN (PATTERN (dep_insn
), 0) == 2
10725 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
10726 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
10728 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
10729 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
10734 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
10737 /* This test is true if the dependent insn reads the flags but
10738 not any other potentially set register. */
10739 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
10742 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
10748 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10749 address with operands set by DEP_INSN. */
10752 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
10753 rtx insn
, dep_insn
;
10754 enum attr_type insn_type
;
10758 if (insn_type
== TYPE_LEA
10761 addr
= PATTERN (insn
);
10762 if (GET_CODE (addr
) == SET
)
10764 else if (GET_CODE (addr
) == PARALLEL
10765 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
10766 addr
= XVECEXP (addr
, 0, 0);
10769 addr
= SET_SRC (addr
);
10774 extract_insn_cached (insn
);
10775 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
10776 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
10778 addr
= XEXP (recog_data
.operand
[i
], 0);
10785 return modified_in_p (addr
, dep_insn
);
10789 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
10790 rtx insn
, link
, dep_insn
;
10793 enum attr_type insn_type
, dep_insn_type
;
10794 enum attr_memory memory
, dep_memory
;
10796 int dep_insn_code_number
;
10798 /* Anti and output depenancies have zero cost on all CPUs. */
10799 if (REG_NOTE_KIND (link
) != 0)
10802 dep_insn_code_number
= recog_memoized (dep_insn
);
10804 /* If we can't recognize the insns, we can't really do anything. */
10805 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
10808 insn_type
= get_attr_type (insn
);
10809 dep_insn_type
= get_attr_type (dep_insn
);
10813 case PROCESSOR_PENTIUM
:
10814 /* Address Generation Interlock adds a cycle of latency. */
10815 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10818 /* ??? Compares pair with jump/setcc. */
10819 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
10822 /* Floating point stores require value to be ready one cycle ealier. */
10823 if (insn_type
== TYPE_FMOV
10824 && get_attr_memory (insn
) == MEMORY_STORE
10825 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10829 case PROCESSOR_PENTIUMPRO
:
10830 memory
= get_attr_memory (insn
);
10831 dep_memory
= get_attr_memory (dep_insn
);
10833 /* Since we can't represent delayed latencies of load+operation,
10834 increase the cost here for non-imov insns. */
10835 if (dep_insn_type
!= TYPE_IMOV
10836 && dep_insn_type
!= TYPE_FMOV
10837 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
10840 /* INT->FP conversion is expensive. */
10841 if (get_attr_fp_int_src (dep_insn
))
10844 /* There is one cycle extra latency between an FP op and a store. */
10845 if (insn_type
== TYPE_FMOV
10846 && (set
= single_set (dep_insn
)) != NULL_RTX
10847 && (set2
= single_set (insn
)) != NULL_RTX
10848 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
10849 && GET_CODE (SET_DEST (set2
)) == MEM
)
10852 /* Show ability of reorder buffer to hide latency of load by executing
10853 in parallel with previous instruction in case
10854 previous instruction is not needed to compute the address. */
10855 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10856 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10858 /* Claim moves to take one cycle, as core can issue one load
10859 at time and the next load can start cycle later. */
10860 if (dep_insn_type
== TYPE_IMOV
10861 || dep_insn_type
== TYPE_FMOV
)
10869 memory
= get_attr_memory (insn
);
10870 dep_memory
= get_attr_memory (dep_insn
);
10871 /* The esp dependency is resolved before the instruction is really
10873 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
10874 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
10877 /* Since we can't represent delayed latencies of load+operation,
10878 increase the cost here for non-imov insns. */
10879 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10880 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
10882 /* INT->FP conversion is expensive. */
10883 if (get_attr_fp_int_src (dep_insn
))
10886 /* Show ability of reorder buffer to hide latency of load by executing
10887 in parallel with previous instruction in case
10888 previous instruction is not needed to compute the address. */
10889 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10890 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10892 /* Claim moves to take one cycle, as core can issue one load
10893 at time and the next load can start cycle later. */
10894 if (dep_insn_type
== TYPE_IMOV
10895 || dep_insn_type
== TYPE_FMOV
)
10904 case PROCESSOR_ATHLON
:
10905 memory
= get_attr_memory (insn
);
10906 dep_memory
= get_attr_memory (dep_insn
);
10908 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
10910 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
10915 /* Show ability of reorder buffer to hide latency of load by executing
10916 in parallel with previous instruction in case
10917 previous instruction is not needed to compute the address. */
10918 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
10919 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
10921 /* Claim moves to take one cycle, as core can issue one load
10922 at time and the next load can start cycle later. */
10923 if (dep_insn_type
== TYPE_IMOV
10924 || dep_insn_type
== TYPE_FMOV
)
10926 else if (cost
>= 3)
10941 struct ppro_sched_data
10944 int issued_this_cycle
;
10948 static enum attr_ppro_uops
10949 ix86_safe_ppro_uops (insn
)
10952 if (recog_memoized (insn
) >= 0)
10953 return get_attr_ppro_uops (insn
);
10955 return PPRO_UOPS_MANY
;
10959 ix86_dump_ppro_packet (dump
)
10962 if (ix86_sched_data
.ppro
.decode
[0])
10964 fprintf (dump
, "PPRO packet: %d",
10965 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
10966 if (ix86_sched_data
.ppro
.decode
[1])
10967 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
10968 if (ix86_sched_data
.ppro
.decode
[2])
10969 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
10970 fputc ('\n', dump
);
10974 /* We're beginning a new block. Initialize data structures as necessary. */
10977 ix86_sched_init (dump
, sched_verbose
, veclen
)
10978 FILE *dump ATTRIBUTE_UNUSED
;
10979 int sched_verbose ATTRIBUTE_UNUSED
;
10980 int veclen ATTRIBUTE_UNUSED
;
10982 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
10985 /* Shift INSN to SLOT, and shift everything else down. */
10988 ix86_reorder_insn (insnp
, slot
)
10995 insnp
[0] = insnp
[1];
10996 while (++insnp
!= slot
);
11002 ix86_sched_reorder_ppro (ready
, e_ready
)
11007 enum attr_ppro_uops cur_uops
;
11008 int issued_this_cycle
;
11012 /* At this point .ppro.decode contains the state of the three
11013 decoders from last "cycle". That is, those insns that were
11014 actually independent. But here we're scheduling for the
11015 decoder, and we may find things that are decodable in the
11018 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
11019 issued_this_cycle
= 0;
11022 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11024 /* If the decoders are empty, and we've a complex insn at the
11025 head of the priority queue, let it issue without complaint. */
11026 if (decode
[0] == NULL
)
11028 if (cur_uops
== PPRO_UOPS_MANY
)
11030 decode
[0] = *insnp
;
11034 /* Otherwise, search for a 2-4 uop unsn to issue. */
11035 while (cur_uops
!= PPRO_UOPS_FEW
)
11037 if (insnp
== ready
)
11039 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11042 /* If so, move it to the head of the line. */
11043 if (cur_uops
== PPRO_UOPS_FEW
)
11044 ix86_reorder_insn (insnp
, e_ready
);
11046 /* Issue the head of the queue. */
11047 issued_this_cycle
= 1;
11048 decode
[0] = *e_ready
--;
11051 /* Look for simple insns to fill in the other two slots. */
11052 for (i
= 1; i
< 3; ++i
)
11053 if (decode
[i
] == NULL
)
11055 if (ready
> e_ready
)
11059 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11060 while (cur_uops
!= PPRO_UOPS_ONE
)
11062 if (insnp
== ready
)
11064 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11067 /* Found one. Move it to the head of the queue and issue it. */
11068 if (cur_uops
== PPRO_UOPS_ONE
)
11070 ix86_reorder_insn (insnp
, e_ready
);
11071 decode
[i
] = *e_ready
--;
11072 issued_this_cycle
++;
11076 /* ??? Didn't find one. Ideally, here we would do a lazy split
11077 of 2-uop insns, issue one and queue the other. */
11081 if (issued_this_cycle
== 0)
11082 issued_this_cycle
= 1;
11083 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
11086 /* We are about to being issuing insns for this clock cycle.
11087 Override the default sort algorithm to better slot instructions. */
11089 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
11090 FILE *dump ATTRIBUTE_UNUSED
;
11091 int sched_verbose ATTRIBUTE_UNUSED
;
11094 int clock_var ATTRIBUTE_UNUSED
;
11096 int n_ready
= *n_readyp
;
11097 rtx
*e_ready
= ready
+ n_ready
- 1;
11099 /* Make sure to go ahead and initialize key items in
11100 ix86_sched_data if we are not going to bother trying to
11101 reorder the ready queue. */
11104 ix86_sched_data
.ppro
.issued_this_cycle
= 1;
11113 case PROCESSOR_PENTIUMPRO
:
11114 ix86_sched_reorder_ppro (ready
, e_ready
);
11119 return ix86_issue_rate ();
11122 /* We are about to issue INSN. Return the number of insns left on the
11123 ready queue that can be issued this cycle. */
11126 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
11130 int can_issue_more
;
11136 return can_issue_more
- 1;
11138 case PROCESSOR_PENTIUMPRO
:
11140 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
11142 if (uops
== PPRO_UOPS_MANY
)
11145 ix86_dump_ppro_packet (dump
);
11146 ix86_sched_data
.ppro
.decode
[0] = insn
;
11147 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11148 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11150 ix86_dump_ppro_packet (dump
);
11151 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11153 else if (uops
== PPRO_UOPS_FEW
)
11156 ix86_dump_ppro_packet (dump
);
11157 ix86_sched_data
.ppro
.decode
[0] = insn
;
11158 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11159 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11163 for (i
= 0; i
< 3; ++i
)
11164 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
11166 ix86_sched_data
.ppro
.decode
[i
] = insn
;
11174 ix86_dump_ppro_packet (dump
);
11175 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11176 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11177 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11181 return --ix86_sched_data
.ppro
.issued_this_cycle
;
11186 ia32_use_dfa_pipeline_interface ()
11188 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11193 /* How many alternative schedules to try. This should be as wide as the
11194 scheduling freedom in the DFA, but no wider. Making this value too
11195 large results extra work for the scheduler. */
11198 ia32_multipass_dfa_lookahead ()
11200 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11207 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11208 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11212 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
11214 rtx dstref
, srcref
, dstreg
, srcreg
;
11218 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
11220 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
11224 /* Subroutine of above to actually do the updating by recursively walking
11228 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
11230 rtx dstref
, srcref
, dstreg
, srcreg
;
11232 enum rtx_code code
= GET_CODE (x
);
11233 const char *format_ptr
= GET_RTX_FORMAT (code
);
11236 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
11237 MEM_COPY_ATTRIBUTES (x
, dstref
);
11238 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
11239 MEM_COPY_ATTRIBUTES (x
, srcref
);
11241 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
11243 if (*format_ptr
== 'e')
11244 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
11246 else if (*format_ptr
== 'E')
11247 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
11248 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
11253 /* Compute the alignment given to a constant that is being placed in memory.
11254 EXP is the constant and ALIGN is the alignment that the object would
11256 The value of this function is used instead of that alignment to align
11260 ix86_constant_alignment (exp
, align
)
11264 if (TREE_CODE (exp
) == REAL_CST
)
11266 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11268 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11271 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
11278 /* Compute the alignment for a static variable.
11279 TYPE is the data type, and ALIGN is the alignment that
11280 the object would ordinarily have. The value of this function is used
11281 instead of that alignment to align the object. */
11284 ix86_data_alignment (type
, align
)
11288 if (AGGREGATE_TYPE_P (type
)
11289 && TYPE_SIZE (type
)
11290 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11291 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11292 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11295 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11296 to 16byte boundary. */
11299 if (AGGREGATE_TYPE_P (type
)
11300 && TYPE_SIZE (type
)
11301 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11302 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
11303 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11307 if (TREE_CODE (type
) == ARRAY_TYPE
)
11309 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11311 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11314 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11317 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11319 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11322 else if ((TREE_CODE (type
) == RECORD_TYPE
11323 || TREE_CODE (type
) == UNION_TYPE
11324 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11325 && TYPE_FIELDS (type
))
11327 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11329 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11332 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11333 || TREE_CODE (type
) == INTEGER_TYPE
)
11335 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11337 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11344 /* Compute the alignment for a local variable.
11345 TYPE is the data type, and ALIGN is the alignment that
11346 the object would ordinarily have. The value of this macro is used
11347 instead of that alignment to align the object. */
11350 ix86_local_alignment (type
, align
)
11354 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11355 to 16byte boundary. */
11358 if (AGGREGATE_TYPE_P (type
)
11359 && TYPE_SIZE (type
)
11360 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11361 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
11362 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11365 if (TREE_CODE (type
) == ARRAY_TYPE
)
11367 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11369 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11372 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11374 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11376 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11379 else if ((TREE_CODE (type
) == RECORD_TYPE
11380 || TREE_CODE (type
) == UNION_TYPE
11381 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11382 && TYPE_FIELDS (type
))
11384 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11386 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11389 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11390 || TREE_CODE (type
) == INTEGER_TYPE
)
11393 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11395 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11401 /* Emit RTL insns to initialize the variable parts of a trampoline.
11402 FNADDR is an RTX for the address of the function's pure code.
11403 CXT is an RTX for the static chain value for the function. */
11405 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
11406 rtx tramp
, fnaddr
, cxt
;
11410 /* Compute offset from the end of the jmp to the target function. */
11411 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
11412 plus_constant (tramp
, 10),
11413 NULL_RTX
, 1, OPTAB_DIRECT
);
11414 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
11415 gen_int_mode (0xb9, QImode
));
11416 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
11417 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
11418 gen_int_mode (0xe9, QImode
));
11419 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
11424 /* Try to load address using shorter movl instead of movabs.
11425 We may want to support movq for kernel mode, but kernel does not use
11426 trampolines at the moment. */
11427 if (x86_64_zero_extended_value (fnaddr
))
11429 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
11430 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11431 gen_int_mode (0xbb41, HImode
));
11432 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
11433 gen_lowpart (SImode
, fnaddr
));
11438 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11439 gen_int_mode (0xbb49, HImode
));
11440 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11444 /* Load static chain using movabs to r10. */
11445 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11446 gen_int_mode (0xba49, HImode
));
11447 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11450 /* Jump to the r11 */
11451 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11452 gen_int_mode (0xff49, HImode
));
11453 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
11454 gen_int_mode (0xe3, QImode
));
11456 if (offset
> TRAMPOLINE_SIZE
)
11461 #define def_builtin(MASK, NAME, TYPE, CODE) \
11463 if ((MASK) & target_flags) \
11464 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL); \
11467 struct builtin_description
11469 const unsigned int mask
;
11470 const enum insn_code icode
;
11471 const char *const name
;
11472 const enum ix86_builtins code
;
11473 const enum rtx_code comparison
;
11474 const unsigned int flag
;
11477 /* Used for builtins that are enabled both by -msse and -msse2. */
11478 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11480 static const struct builtin_description bdesc_comi
[] =
11482 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, EQ
, 0 },
11483 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, LT
, 0 },
11484 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, LE
, 0 },
11485 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, LT
, 1 },
11486 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, LE
, 1 },
11487 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, NE
, 0 },
11488 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, EQ
, 0 },
11489 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, LT
, 0 },
11490 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, LE
, 0 },
11491 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, LT
, 1 },
11492 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, LE
, 1 },
11493 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, NE
, 0 },
11494 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, EQ
, 0 },
11495 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, LT
, 0 },
11496 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, LE
, 0 },
11497 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, LT
, 1 },
11498 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, LE
, 1 },
11499 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, NE
, 0 },
11500 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, EQ
, 0 },
11501 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, LT
, 0 },
11502 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, LE
, 0 },
11503 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, LT
, 1 },
11504 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, LE
, 1 },
11505 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, NE
, 0 },
11508 static const struct builtin_description bdesc_2arg
[] =
11511 { MASK_SSE1
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
11512 { MASK_SSE1
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
11513 { MASK_SSE1
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
11514 { MASK_SSE1
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
11515 { MASK_SSE1
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
11516 { MASK_SSE1
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
11517 { MASK_SSE1
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
11518 { MASK_SSE1
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
11520 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
11521 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
11522 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
11523 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
11524 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
11525 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
11526 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
11527 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
11528 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
11529 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
11530 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
11531 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
11532 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
11533 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
11534 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
11535 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS
, LT
, 1 },
11536 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS
, LE
, 1 },
11537 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
11538 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
11539 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
11540 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
11541 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, LT
, 1 },
11542 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, LE
, 1 },
11543 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
11545 { MASK_SSE1
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
11546 { MASK_SSE1
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
11547 { MASK_SSE1
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
11548 { MASK_SSE1
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
11550 { MASK_SSE1
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
11551 { MASK_SSE1
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
11552 { MASK_SSE1
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
11553 { MASK_SSE1
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
11554 { MASK_SSE1
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
11557 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
11558 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
11559 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
11560 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
11561 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
11562 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
11564 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
11565 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
11566 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
11567 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
11568 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
11569 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
11570 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
11571 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
11573 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
11574 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
11575 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
11577 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
11578 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
11579 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
11580 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
11582 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
11583 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
11585 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
11586 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
11587 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
11588 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
11589 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
11590 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
11592 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
11593 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
11594 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
11595 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
11597 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
11598 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
11599 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
11600 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
11601 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
11602 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
11605 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
11606 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
11607 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
11609 { MASK_SSE1
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
11610 { MASK_SSE1
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
11612 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
11613 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
11614 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
11615 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
11616 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
11617 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
11619 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
11620 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
11621 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
11622 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
11623 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
11624 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
11626 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
11627 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
11628 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
11629 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
11631 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
11632 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
11635 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
11636 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
11637 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
11638 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
11639 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
11640 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
11641 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
11642 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
11644 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
11645 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
11646 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
11647 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
11648 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
11649 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
11650 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
11651 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
11652 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
11653 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
11654 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
11655 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
11656 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
11657 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
11658 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
11659 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD
, LT
, 1 },
11660 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD
, LE
, 1 },
11661 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
11662 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
11663 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
11664 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
11665 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD
, LT
, 1 },
11666 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD
, LE
, 1 },
11667 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
11669 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
11670 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
11671 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
11672 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
11674 { MASK_SSE2
, CODE_FOR_sse2_anddf3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
11675 { MASK_SSE2
, CODE_FOR_sse2_nanddf3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
11676 { MASK_SSE2
, CODE_FOR_sse2_iordf3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
11677 { MASK_SSE2
, CODE_FOR_sse2_xordf3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
11679 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
11680 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
11681 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
11684 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
11685 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
11686 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
11687 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
11688 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
11689 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
11690 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
11691 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
11693 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
11694 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
11695 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
11696 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
11697 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
11698 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
11699 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
11700 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
11702 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
11703 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
11704 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
11705 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
11707 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
11708 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
11709 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
11710 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
11712 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
11713 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
11715 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
11716 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
11717 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
11718 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
11719 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
11720 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
11722 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
11723 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
11724 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
11725 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
11727 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
11728 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
11729 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
11730 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
11731 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
11732 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
11734 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
11735 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
11736 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
11738 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
11739 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
11741 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
11742 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
11743 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
11744 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
11745 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
11746 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
11748 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
11749 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
11750 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
11751 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
11752 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
11753 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
11755 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
11756 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
11757 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
11758 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
11760 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
11762 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
11763 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
11764 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 }
11767 static const struct builtin_description bdesc_1arg
[] =
11769 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
11770 { MASK_SSE1
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
11772 { MASK_SSE1
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
11773 { MASK_SSE1
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
11774 { MASK_SSE1
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
11776 { MASK_SSE1
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
11777 { MASK_SSE1
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
11778 { MASK_SSE1
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
11779 { MASK_SSE1
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
11781 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
11782 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
11783 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
11785 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
11787 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
11788 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
11790 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
11791 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
11792 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
11793 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
11794 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
11796 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
11798 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
11799 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
11801 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
11802 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
11803 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 }
11807 ix86_init_builtins ()
11810 ix86_init_mmx_sse_builtins ();
11813 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11814 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11817 ix86_init_mmx_sse_builtins ()
11819 const struct builtin_description
* d
;
11821 tree endlink
= void_list_node
;
11823 tree pchar_type_node
= build_pointer_type (char_type_node
);
11824 tree pfloat_type_node
= build_pointer_type (float_type_node
);
11825 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
11826 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
11827 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
11830 tree int_ftype_v4sf_v4sf
11831 = build_function_type (integer_type_node
,
11832 tree_cons (NULL_TREE
, V4SF_type_node
,
11833 tree_cons (NULL_TREE
,
11836 tree v4si_ftype_v4sf_v4sf
11837 = build_function_type (V4SI_type_node
,
11838 tree_cons (NULL_TREE
, V4SF_type_node
,
11839 tree_cons (NULL_TREE
,
11842 /* MMX/SSE/integer conversions. */
11843 tree int_ftype_v4sf
11844 = build_function_type (integer_type_node
,
11845 tree_cons (NULL_TREE
, V4SF_type_node
,
11847 tree int_ftype_v8qi
11848 = build_function_type (integer_type_node
,
11849 tree_cons (NULL_TREE
, V8QI_type_node
,
11851 tree v4sf_ftype_v4sf_int
11852 = build_function_type (V4SF_type_node
,
11853 tree_cons (NULL_TREE
, V4SF_type_node
,
11854 tree_cons (NULL_TREE
, integer_type_node
,
11856 tree v4sf_ftype_v4sf_v2si
11857 = build_function_type (V4SF_type_node
,
11858 tree_cons (NULL_TREE
, V4SF_type_node
,
11859 tree_cons (NULL_TREE
, V2SI_type_node
,
11861 tree int_ftype_v4hi_int
11862 = build_function_type (integer_type_node
,
11863 tree_cons (NULL_TREE
, V4HI_type_node
,
11864 tree_cons (NULL_TREE
, integer_type_node
,
11866 tree v4hi_ftype_v4hi_int_int
11867 = build_function_type (V4HI_type_node
,
11868 tree_cons (NULL_TREE
, V4HI_type_node
,
11869 tree_cons (NULL_TREE
, integer_type_node
,
11870 tree_cons (NULL_TREE
,
11873 /* Miscellaneous. */
11874 tree v8qi_ftype_v4hi_v4hi
11875 = build_function_type (V8QI_type_node
,
11876 tree_cons (NULL_TREE
, V4HI_type_node
,
11877 tree_cons (NULL_TREE
, V4HI_type_node
,
11879 tree v4hi_ftype_v2si_v2si
11880 = build_function_type (V4HI_type_node
,
11881 tree_cons (NULL_TREE
, V2SI_type_node
,
11882 tree_cons (NULL_TREE
, V2SI_type_node
,
11884 tree v4sf_ftype_v4sf_v4sf_int
11885 = build_function_type (V4SF_type_node
,
11886 tree_cons (NULL_TREE
, V4SF_type_node
,
11887 tree_cons (NULL_TREE
, V4SF_type_node
,
11888 tree_cons (NULL_TREE
,
11891 tree v2si_ftype_v4hi_v4hi
11892 = build_function_type (V2SI_type_node
,
11893 tree_cons (NULL_TREE
, V4HI_type_node
,
11894 tree_cons (NULL_TREE
, V4HI_type_node
,
11896 tree v4hi_ftype_v4hi_int
11897 = build_function_type (V4HI_type_node
,
11898 tree_cons (NULL_TREE
, V4HI_type_node
,
11899 tree_cons (NULL_TREE
, integer_type_node
,
11901 tree v4hi_ftype_v4hi_di
11902 = build_function_type (V4HI_type_node
,
11903 tree_cons (NULL_TREE
, V4HI_type_node
,
11904 tree_cons (NULL_TREE
,
11905 long_long_integer_type_node
,
11907 tree v2si_ftype_v2si_di
11908 = build_function_type (V2SI_type_node
,
11909 tree_cons (NULL_TREE
, V2SI_type_node
,
11910 tree_cons (NULL_TREE
,
11911 long_long_integer_type_node
,
11913 tree void_ftype_void
11914 = build_function_type (void_type_node
, endlink
);
11915 tree void_ftype_unsigned
11916 = build_function_type (void_type_node
,
11917 tree_cons (NULL_TREE
, unsigned_type_node
,
11919 tree unsigned_ftype_void
11920 = build_function_type (unsigned_type_node
, endlink
);
11922 = build_function_type (long_long_unsigned_type_node
, endlink
);
11923 tree v4sf_ftype_void
11924 = build_function_type (V4SF_type_node
, endlink
);
11925 tree v2si_ftype_v4sf
11926 = build_function_type (V2SI_type_node
,
11927 tree_cons (NULL_TREE
, V4SF_type_node
,
11929 /* Loads/stores. */
11930 tree maskmovq_args
= tree_cons (NULL_TREE
, V8QI_type_node
,
11931 tree_cons (NULL_TREE
, V8QI_type_node
,
11932 tree_cons (NULL_TREE
,
11935 tree void_ftype_v8qi_v8qi_pchar
11936 = build_function_type (void_type_node
, maskmovq_args
);
11937 tree v4sf_ftype_pfloat
11938 = build_function_type (V4SF_type_node
,
11939 tree_cons (NULL_TREE
, pfloat_type_node
,
11941 /* @@@ the type is bogus */
11942 tree v4sf_ftype_v4sf_pv2si
11943 = build_function_type (V4SF_type_node
,
11944 tree_cons (NULL_TREE
, V4SF_type_node
,
11945 tree_cons (NULL_TREE
, pv2si_type_node
,
11947 tree void_ftype_pv2si_v4sf
11948 = build_function_type (void_type_node
,
11949 tree_cons (NULL_TREE
, pv2si_type_node
,
11950 tree_cons (NULL_TREE
, V4SF_type_node
,
11952 tree void_ftype_pfloat_v4sf
11953 = build_function_type (void_type_node
,
11954 tree_cons (NULL_TREE
, pfloat_type_node
,
11955 tree_cons (NULL_TREE
, V4SF_type_node
,
11957 tree void_ftype_pdi_di
11958 = build_function_type (void_type_node
,
11959 tree_cons (NULL_TREE
, pdi_type_node
,
11960 tree_cons (NULL_TREE
,
11961 long_long_unsigned_type_node
,
11963 tree void_ftype_pv2di_v2di
11964 = build_function_type (void_type_node
,
11965 tree_cons (NULL_TREE
, pv2di_type_node
,
11966 tree_cons (NULL_TREE
,
11969 /* Normal vector unops. */
11970 tree v4sf_ftype_v4sf
11971 = build_function_type (V4SF_type_node
,
11972 tree_cons (NULL_TREE
, V4SF_type_node
,
11975 /* Normal vector binops. */
11976 tree v4sf_ftype_v4sf_v4sf
11977 = build_function_type (V4SF_type_node
,
11978 tree_cons (NULL_TREE
, V4SF_type_node
,
11979 tree_cons (NULL_TREE
, V4SF_type_node
,
11981 tree v8qi_ftype_v8qi_v8qi
11982 = build_function_type (V8QI_type_node
,
11983 tree_cons (NULL_TREE
, V8QI_type_node
,
11984 tree_cons (NULL_TREE
, V8QI_type_node
,
11986 tree v4hi_ftype_v4hi_v4hi
11987 = build_function_type (V4HI_type_node
,
11988 tree_cons (NULL_TREE
, V4HI_type_node
,
11989 tree_cons (NULL_TREE
, V4HI_type_node
,
11991 tree v2si_ftype_v2si_v2si
11992 = build_function_type (V2SI_type_node
,
11993 tree_cons (NULL_TREE
, V2SI_type_node
,
11994 tree_cons (NULL_TREE
, V2SI_type_node
,
11996 tree di_ftype_di_di
11997 = build_function_type (long_long_unsigned_type_node
,
11998 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
11999 tree_cons (NULL_TREE
,
12000 long_long_unsigned_type_node
,
12003 tree v2si_ftype_v2sf
12004 = build_function_type (V2SI_type_node
,
12005 tree_cons (NULL_TREE
, V2SF_type_node
,
12007 tree v2sf_ftype_v2si
12008 = build_function_type (V2SF_type_node
,
12009 tree_cons (NULL_TREE
, V2SI_type_node
,
12011 tree v2si_ftype_v2si
12012 = build_function_type (V2SI_type_node
,
12013 tree_cons (NULL_TREE
, V2SI_type_node
,
12015 tree v2sf_ftype_v2sf
12016 = build_function_type (V2SF_type_node
,
12017 tree_cons (NULL_TREE
, V2SF_type_node
,
12019 tree v2sf_ftype_v2sf_v2sf
12020 = build_function_type (V2SF_type_node
,
12021 tree_cons (NULL_TREE
, V2SF_type_node
,
12022 tree_cons (NULL_TREE
,
12025 tree v2si_ftype_v2sf_v2sf
12026 = build_function_type (V2SI_type_node
,
12027 tree_cons (NULL_TREE
, V2SF_type_node
,
12028 tree_cons (NULL_TREE
,
12031 tree pint_type_node
= build_pointer_type (integer_type_node
);
12032 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12033 tree int_ftype_v2df_v2df
12034 = build_function_type (integer_type_node
,
12035 tree_cons (NULL_TREE
, V2DF_type_node
,
12036 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
)));
12039 = build_function_type (intTI_type_node
, endlink
);
12040 tree ti_ftype_ti_ti
12041 = build_function_type (intTI_type_node
,
12042 tree_cons (NULL_TREE
, intTI_type_node
,
12043 tree_cons (NULL_TREE
, intTI_type_node
,
12045 tree void_ftype_pvoid
12046 = build_function_type (void_type_node
,
12047 tree_cons (NULL_TREE
, ptr_type_node
, endlink
));
12049 = build_function_type (V2DI_type_node
,
12050 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
12052 tree v4sf_ftype_v4si
12053 = build_function_type (V4SF_type_node
,
12054 tree_cons (NULL_TREE
, V4SI_type_node
, endlink
));
12055 tree v4si_ftype_v4sf
12056 = build_function_type (V4SI_type_node
,
12057 tree_cons (NULL_TREE
, V4SF_type_node
, endlink
));
12058 tree v2df_ftype_v4si
12059 = build_function_type (V2DF_type_node
,
12060 tree_cons (NULL_TREE
, V4SI_type_node
, endlink
));
12061 tree v4si_ftype_v2df
12062 = build_function_type (V4SI_type_node
,
12063 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
12064 tree v2si_ftype_v2df
12065 = build_function_type (V2SI_type_node
,
12066 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
12067 tree v4sf_ftype_v2df
12068 = build_function_type (V4SF_type_node
,
12069 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
12070 tree v2df_ftype_v2si
12071 = build_function_type (V2DF_type_node
,
12072 tree_cons (NULL_TREE
, V2SI_type_node
, endlink
));
12073 tree v2df_ftype_v4sf
12074 = build_function_type (V2DF_type_node
,
12075 tree_cons (NULL_TREE
, V4SF_type_node
, endlink
));
12076 tree int_ftype_v2df
12077 = build_function_type (integer_type_node
,
12078 tree_cons (NULL_TREE
, V2DF_type_node
, endlink
));
12079 tree v2df_ftype_v2df_int
12080 = build_function_type (V2DF_type_node
,
12081 tree_cons (NULL_TREE
, V2DF_type_node
,
12082 tree_cons (NULL_TREE
, integer_type_node
,
12084 tree v4sf_ftype_v4sf_v2df
12085 = build_function_type (V4SF_type_node
,
12086 tree_cons (NULL_TREE
, V4SF_type_node
,
12087 tree_cons (NULL_TREE
, V2DF_type_node
,
12089 tree v2df_ftype_v2df_v4sf
12090 = build_function_type (V2DF_type_node
,
12091 tree_cons (NULL_TREE
, V2DF_type_node
,
12092 tree_cons (NULL_TREE
, V4SF_type_node
,
12094 tree v2df_ftype_v2df_v2df_int
12095 = build_function_type (V2DF_type_node
,
12096 tree_cons (NULL_TREE
, V2DF_type_node
,
12097 tree_cons (NULL_TREE
, V2DF_type_node
,
12098 tree_cons (NULL_TREE
,
12101 tree v2df_ftype_v2df_pv2si
12102 = build_function_type (V2DF_type_node
,
12103 tree_cons (NULL_TREE
, V2DF_type_node
,
12104 tree_cons (NULL_TREE
, pv2si_type_node
,
12106 tree void_ftype_pv2si_v2df
12107 = build_function_type (void_type_node
,
12108 tree_cons (NULL_TREE
, pv2si_type_node
,
12109 tree_cons (NULL_TREE
, V2DF_type_node
,
12111 tree void_ftype_pdouble_v2df
12112 = build_function_type (void_type_node
,
12113 tree_cons (NULL_TREE
, pdouble_type_node
,
12114 tree_cons (NULL_TREE
, V2DF_type_node
,
12116 tree void_ftype_pint_int
12117 = build_function_type (void_type_node
,
12118 tree_cons (NULL_TREE
, pint_type_node
,
12119 tree_cons (NULL_TREE
, integer_type_node
,
12121 tree maskmovdqu_args
= tree_cons (NULL_TREE
, V16QI_type_node
,
12122 tree_cons (NULL_TREE
, V16QI_type_node
,
12123 tree_cons (NULL_TREE
,
12126 tree void_ftype_v16qi_v16qi_pchar
12127 = build_function_type (void_type_node
, maskmovdqu_args
);
12128 tree v2df_ftype_pdouble
12129 = build_function_type (V2DF_type_node
,
12130 tree_cons (NULL_TREE
, pdouble_type_node
,
12132 tree v2df_ftype_v2df_v2df
12133 = build_function_type (V2DF_type_node
,
12134 tree_cons (NULL_TREE
, V2DF_type_node
,
12135 tree_cons (NULL_TREE
, V2DF_type_node
,
12137 tree v16qi_ftype_v16qi_v16qi
12138 = build_function_type (V16QI_type_node
,
12139 tree_cons (NULL_TREE
, V16QI_type_node
,
12140 tree_cons (NULL_TREE
, V16QI_type_node
,
12142 tree v8hi_ftype_v8hi_v8hi
12143 = build_function_type (V8HI_type_node
,
12144 tree_cons (NULL_TREE
, V8HI_type_node
,
12145 tree_cons (NULL_TREE
, V8HI_type_node
,
12147 tree v4si_ftype_v4si_v4si
12148 = build_function_type (V4SI_type_node
,
12149 tree_cons (NULL_TREE
, V4SI_type_node
,
12150 tree_cons (NULL_TREE
, V4SI_type_node
,
12152 tree v2di_ftype_v2di_v2di
12153 = build_function_type (V2DI_type_node
,
12154 tree_cons (NULL_TREE
, V2DI_type_node
,
12155 tree_cons (NULL_TREE
, V2DI_type_node
,
12157 tree v2di_ftype_v2df_v2df
12158 = build_function_type (V2DI_type_node
,
12159 tree_cons (NULL_TREE
, V2DF_type_node
,
12160 tree_cons (NULL_TREE
, V2DF_type_node
,
12162 tree v2df_ftype_v2df
12163 = build_function_type (V2DF_type_node
,
12164 tree_cons (NULL_TREE
, V2DF_type_node
,
12166 tree v2df_ftype_double
12167 = build_function_type (V2DF_type_node
,
12168 tree_cons (NULL_TREE
, double_type_node
,
12170 tree v2df_ftype_double_double
12171 = build_function_type (V2DF_type_node
,
12172 tree_cons (NULL_TREE
, double_type_node
,
12173 tree_cons (NULL_TREE
, double_type_node
,
12175 tree int_ftype_v8hi_int
12176 = build_function_type (integer_type_node
,
12177 tree_cons (NULL_TREE
, V8HI_type_node
,
12178 tree_cons (NULL_TREE
, integer_type_node
,
12180 tree v8hi_ftype_v8hi_int_int
12181 = build_function_type (V8HI_type_node
,
12182 tree_cons (NULL_TREE
, V8HI_type_node
,
12183 tree_cons (NULL_TREE
, integer_type_node
,
12184 tree_cons (NULL_TREE
,
12187 tree v2di_ftype_v2di_int
12188 = build_function_type (V2DI_type_node
,
12189 tree_cons (NULL_TREE
, V2DI_type_node
,
12190 tree_cons (NULL_TREE
, integer_type_node
,
12192 tree v4si_ftype_v4si_int
12193 = build_function_type (V4SI_type_node
,
12194 tree_cons (NULL_TREE
, V4SI_type_node
,
12195 tree_cons (NULL_TREE
, integer_type_node
,
12197 tree v8hi_ftype_v8hi_int
12198 = build_function_type (V8HI_type_node
,
12199 tree_cons (NULL_TREE
, V8HI_type_node
,
12200 tree_cons (NULL_TREE
, integer_type_node
,
12202 tree v8hi_ftype_v8hi_v2di
12203 = build_function_type (V8HI_type_node
,
12204 tree_cons (NULL_TREE
, V8HI_type_node
,
12205 tree_cons (NULL_TREE
, V2DI_type_node
,
12207 tree v4si_ftype_v4si_v2di
12208 = build_function_type (V4SI_type_node
,
12209 tree_cons (NULL_TREE
, V4SI_type_node
,
12210 tree_cons (NULL_TREE
, V2DI_type_node
,
12212 tree v4si_ftype_v8hi_v8hi
12213 = build_function_type (V4SI_type_node
,
12214 tree_cons (NULL_TREE
, V8HI_type_node
,
12215 tree_cons (NULL_TREE
, V8HI_type_node
,
12217 tree di_ftype_v8qi_v8qi
12218 = build_function_type (long_long_unsigned_type_node
,
12219 tree_cons (NULL_TREE
, V8QI_type_node
,
12220 tree_cons (NULL_TREE
, V8QI_type_node
,
12222 tree v2di_ftype_v16qi_v16qi
12223 = build_function_type (V2DI_type_node
,
12224 tree_cons (NULL_TREE
, V16QI_type_node
,
12225 tree_cons (NULL_TREE
, V16QI_type_node
,
12227 tree int_ftype_v16qi
12228 = build_function_type (integer_type_node
,
12229 tree_cons (NULL_TREE
, V16QI_type_node
, endlink
));
12231 /* Add all builtins that are more or less simple operations on two
12233 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12235 /* Use one of the operands; the target can have a different mode for
12236 mask-generating compares. */
12237 enum machine_mode mode
;
12242 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12247 type
= v16qi_ftype_v16qi_v16qi
;
12250 type
= v8hi_ftype_v8hi_v8hi
;
12253 type
= v4si_ftype_v4si_v4si
;
12256 type
= v2di_ftype_v2di_v2di
;
12259 type
= v2df_ftype_v2df_v2df
;
12262 type
= ti_ftype_ti_ti
;
12265 type
= v4sf_ftype_v4sf_v4sf
;
12268 type
= v8qi_ftype_v8qi_v8qi
;
12271 type
= v4hi_ftype_v4hi_v4hi
;
12274 type
= v2si_ftype_v2si_v2si
;
12277 type
= di_ftype_di_di
;
12284 /* Override for comparisons. */
12285 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12286 || d
->icode
== CODE_FOR_maskncmpv4sf3
12287 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12288 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12289 type
= v4si_ftype_v4sf_v4sf
;
12291 if (d
->icode
== CODE_FOR_maskcmpv2df3
12292 || d
->icode
== CODE_FOR_maskncmpv2df3
12293 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12294 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12295 type
= v2di_ftype_v2df_v2df
;
12297 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12300 /* Add the remaining MMX insns with somewhat more complicated types. */
12301 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12302 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12303 def_builtin (MASK_MMX
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12304 def_builtin (MASK_MMX
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12305 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12306 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12307 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12309 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12310 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12311 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12313 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12314 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12316 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12317 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12319 /* comi/ucomi insns. */
12320 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12321 if (d
->mask
== MASK_SSE2
)
12322 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12324 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12326 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12327 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12328 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12330 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12331 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12332 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12333 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12334 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12335 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12337 def_builtin (MASK_SSE1
, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDPS
);
12338 def_builtin (MASK_SSE1
, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ANDNPS
);
12339 def_builtin (MASK_SSE1
, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_ORPS
);
12340 def_builtin (MASK_SSE1
, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf
, IX86_BUILTIN_XORPS
);
12342 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12343 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12345 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12347 def_builtin (MASK_SSE1
, "__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
12348 def_builtin (MASK_SSE1
, "__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
12349 def_builtin (MASK_SSE1
, "__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
12350 def_builtin (MASK_SSE1
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12351 def_builtin (MASK_SSE1
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12352 def_builtin (MASK_SSE1
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12354 def_builtin (MASK_SSE1
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12355 def_builtin (MASK_SSE1
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12356 def_builtin (MASK_SSE1
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12357 def_builtin (MASK_SSE1
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
12359 def_builtin (MASK_SSE1
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
12360 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
12361 def_builtin (MASK_SSE1
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
12362 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
12364 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
12366 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
12368 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
12369 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
12370 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
12371 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
12372 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
12373 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
12375 def_builtin (MASK_SSE1
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
12377 /* Original 3DNow! */
12378 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
12379 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
12380 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
12381 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
12382 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
12383 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
12384 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
12385 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
12386 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
12387 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
12388 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
12389 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
12390 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
12391 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
12392 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
12393 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
12394 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
12395 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
12396 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
12397 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
12399 /* 3DNow! extension as used in the Athlon CPU. */
12400 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
12401 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
12402 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
12403 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
12404 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
12405 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
12407 def_builtin (MASK_SSE1
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
12410 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
12411 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
12413 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
12414 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
12416 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADAPD
);
12417 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADUPD
);
12418 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADSD
);
12419 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
12420 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
12421 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
12423 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
12424 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
12425 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
12426 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
12428 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
12429 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
12430 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
12431 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
12432 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
12434 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
12435 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
12436 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
12437 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
12439 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
12440 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
12442 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
12444 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
12445 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
12447 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
12448 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
12449 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
12450 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
12451 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
12453 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
12455 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
12456 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
12458 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
12459 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
12460 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
12462 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
12463 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
12464 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
12466 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
12467 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
12468 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
12469 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pdouble
, IX86_BUILTIN_LOADPD1
);
12470 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADRPD
);
12471 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
12472 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
12474 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pvoid
, IX86_BUILTIN_CLFLUSH
);
12475 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
12476 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
12478 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
12479 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
12480 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
12482 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
12483 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
12484 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
12486 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
12487 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
12489 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
12490 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
12491 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
12493 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
12494 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
12495 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
12497 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
12498 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
12500 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
12503 /* Errors in the source file can cause expand_expr to return const0_rtx
12504 where we expect a vector. To avoid crashing, use one of the vector
12505 clear instructions. */
12507 safe_vector_operand (x
, mode
)
12509 enum machine_mode mode
;
12511 if (x
!= const0_rtx
)
12513 x
= gen_reg_rtx (mode
);
12515 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
12516 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
12517 : gen_rtx_SUBREG (DImode
, x
, 0)));
12519 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
12520 : gen_rtx_SUBREG (V4SFmode
, x
, 0)));
12524 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12527 ix86_expand_binop_builtin (icode
, arglist
, target
)
12528 enum insn_code icode
;
12533 tree arg0
= TREE_VALUE (arglist
);
12534 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12535 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12536 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12537 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12538 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12539 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
12541 if (VECTOR_MODE_P (mode0
))
12542 op0
= safe_vector_operand (op0
, mode0
);
12543 if (VECTOR_MODE_P (mode1
))
12544 op1
= safe_vector_operand (op1
, mode1
);
12547 || GET_MODE (target
) != tmode
12548 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12549 target
= gen_reg_rtx (tmode
);
12551 /* In case the insn wants input operands in modes different from
12552 the result, abort. */
12553 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
12556 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12557 op0
= copy_to_mode_reg (mode0
, op0
);
12558 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12559 op1
= copy_to_mode_reg (mode1
, op1
);
12561 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12562 yet one of the two must not be a memory. This is normally enforced
12563 by expanders, but we didn't bother to create one here. */
12564 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12565 op0
= copy_to_mode_reg (mode0
, op0
);
12567 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12574 /* In type_for_mode we restrict the ability to create TImode types
12575 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12576 to have a V4SFmode signature. Convert them in-place to TImode. */
12579 ix86_expand_timode_binop_builtin (icode
, arglist
, target
)
12580 enum insn_code icode
;
12585 tree arg0
= TREE_VALUE (arglist
);
12586 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12587 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12588 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12590 op0
= gen_lowpart (TImode
, op0
);
12591 op1
= gen_lowpart (TImode
, op1
);
12592 target
= gen_reg_rtx (TImode
);
12594 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, TImode
))
12595 op0
= copy_to_mode_reg (TImode
, op0
);
12596 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, TImode
))
12597 op1
= copy_to_mode_reg (TImode
, op1
);
12599 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12600 yet one of the two must not be a memory. This is normally enforced
12601 by expanders, but we didn't bother to create one here. */
12602 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12603 op0
= copy_to_mode_reg (TImode
, op0
);
12605 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12610 return gen_lowpart (V4SFmode
, target
);
12613 /* Subroutine of ix86_expand_builtin to take care of stores. */
12616 ix86_expand_store_builtin (icode
, arglist
)
12617 enum insn_code icode
;
12621 tree arg0
= TREE_VALUE (arglist
);
12622 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12623 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12624 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12625 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
12626 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
12628 if (VECTOR_MODE_P (mode1
))
12629 op1
= safe_vector_operand (op1
, mode1
);
12631 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12633 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
12634 op1
= copy_to_mode_reg (mode1
, op1
);
12636 pat
= GEN_FCN (icode
) (op0
, op1
);
12642 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12645 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
12646 enum insn_code icode
;
12652 tree arg0
= TREE_VALUE (arglist
);
12653 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12654 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12655 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12658 || GET_MODE (target
) != tmode
12659 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12660 target
= gen_reg_rtx (tmode
);
12662 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12665 if (VECTOR_MODE_P (mode0
))
12666 op0
= safe_vector_operand (op0
, mode0
);
12668 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12669 op0
= copy_to_mode_reg (mode0
, op0
);
12672 pat
= GEN_FCN (icode
) (target
, op0
);
12679 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12680 sqrtss, rsqrtss, rcpss. */
12683 ix86_expand_unop1_builtin (icode
, arglist
, target
)
12684 enum insn_code icode
;
12689 tree arg0
= TREE_VALUE (arglist
);
12690 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12691 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12692 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12695 || GET_MODE (target
) != tmode
12696 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12697 target
= gen_reg_rtx (tmode
);
12699 if (VECTOR_MODE_P (mode0
))
12700 op0
= safe_vector_operand (op0
, mode0
);
12702 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12703 op0
= copy_to_mode_reg (mode0
, op0
);
12706 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
12707 op1
= copy_to_mode_reg (mode0
, op1
);
12709 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12716 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12719 ix86_expand_sse_compare (d
, arglist
, target
)
12720 const struct builtin_description
*d
;
12725 tree arg0
= TREE_VALUE (arglist
);
12726 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12727 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12728 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12730 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
12731 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
12732 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
12733 enum rtx_code comparison
= d
->comparison
;
12735 if (VECTOR_MODE_P (mode0
))
12736 op0
= safe_vector_operand (op0
, mode0
);
12737 if (VECTOR_MODE_P (mode1
))
12738 op1
= safe_vector_operand (op1
, mode1
);
12740 /* Swap operands if we have a comparison that isn't available in
12744 rtx tmp
= gen_reg_rtx (mode1
);
12745 emit_move_insn (tmp
, op1
);
12751 || GET_MODE (target
) != tmode
12752 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
12753 target
= gen_reg_rtx (tmode
);
12755 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
12756 op0
= copy_to_mode_reg (mode0
, op0
);
12757 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
12758 op1
= copy_to_mode_reg (mode1
, op1
);
12760 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12761 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
12768 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12771 ix86_expand_sse_comi (d
, arglist
, target
)
12772 const struct builtin_description
*d
;
12777 tree arg0
= TREE_VALUE (arglist
);
12778 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12779 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12780 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12782 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
12783 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
12784 enum rtx_code comparison
= d
->comparison
;
12786 if (VECTOR_MODE_P (mode0
))
12787 op0
= safe_vector_operand (op0
, mode0
);
12788 if (VECTOR_MODE_P (mode1
))
12789 op1
= safe_vector_operand (op1
, mode1
);
12791 /* Swap operands if we have a comparison that isn't available in
12800 target
= gen_reg_rtx (SImode
);
12801 emit_move_insn (target
, const0_rtx
);
12802 target
= gen_rtx_SUBREG (QImode
, target
, 0);
12804 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
12805 op0
= copy_to_mode_reg (mode0
, op0
);
12806 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
12807 op1
= copy_to_mode_reg (mode1
, op1
);
12809 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12810 pat
= GEN_FCN (d
->icode
) (op0
, op1
, op2
);
12814 emit_insn (gen_rtx_SET (VOIDmode
,
12815 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
12816 gen_rtx_fmt_ee (comparison
, QImode
,
12817 gen_rtx_REG (CCmode
, FLAGS_REG
),
12820 return SUBREG_REG (target
);
12823 /* Expand an expression EXP that calls a built-in function,
12824 with result going to TARGET if that's convenient
12825 (and in mode MODE if that's convenient).
12826 SUBTARGET may be used as the target for computing one of EXP's operands.
12827 IGNORE is nonzero if the value is to be ignored. */
12830 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
12833 rtx subtarget ATTRIBUTE_UNUSED
;
12834 enum machine_mode mode ATTRIBUTE_UNUSED
;
12835 int ignore ATTRIBUTE_UNUSED
;
12837 const struct builtin_description
*d
;
12839 enum insn_code icode
;
12840 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
12841 tree arglist
= TREE_OPERAND (exp
, 1);
12842 tree arg0
, arg1
, arg2
;
12843 rtx op0
, op1
, op2
, pat
;
12844 enum machine_mode tmode
, mode0
, mode1
, mode2
;
12845 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
12849 case IX86_BUILTIN_EMMS
:
12850 emit_insn (gen_emms ());
12853 case IX86_BUILTIN_SFENCE
:
12854 emit_insn (gen_sfence ());
12857 case IX86_BUILTIN_PEXTRW
:
12858 case IX86_BUILTIN_PEXTRW128
:
12859 icode
= (fcode
== IX86_BUILTIN_PEXTRW
12860 ? CODE_FOR_mmx_pextrw
12861 : CODE_FOR_sse2_pextrw
);
12862 arg0
= TREE_VALUE (arglist
);
12863 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12864 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12865 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12866 tmode
= insn_data
[icode
].operand
[0].mode
;
12867 mode0
= insn_data
[icode
].operand
[1].mode
;
12868 mode1
= insn_data
[icode
].operand
[2].mode
;
12870 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12871 op0
= copy_to_mode_reg (mode0
, op0
);
12872 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12874 /* @@@ better error message */
12875 error ("selector must be an immediate");
12876 return gen_reg_rtx (tmode
);
12879 || GET_MODE (target
) != tmode
12880 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12881 target
= gen_reg_rtx (tmode
);
12882 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12888 case IX86_BUILTIN_PINSRW
:
12889 case IX86_BUILTIN_PINSRW128
:
12890 icode
= (fcode
== IX86_BUILTIN_PINSRW
12891 ? CODE_FOR_mmx_pinsrw
12892 : CODE_FOR_sse2_pinsrw
);
12893 arg0
= TREE_VALUE (arglist
);
12894 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12895 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12896 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12897 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12898 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
12899 tmode
= insn_data
[icode
].operand
[0].mode
;
12900 mode0
= insn_data
[icode
].operand
[1].mode
;
12901 mode1
= insn_data
[icode
].operand
[2].mode
;
12902 mode2
= insn_data
[icode
].operand
[3].mode
;
12904 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12905 op0
= copy_to_mode_reg (mode0
, op0
);
12906 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12907 op1
= copy_to_mode_reg (mode1
, op1
);
12908 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
12910 /* @@@ better error message */
12911 error ("selector must be an immediate");
12915 || GET_MODE (target
) != tmode
12916 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12917 target
= gen_reg_rtx (tmode
);
12918 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
12924 case IX86_BUILTIN_MASKMOVQ
:
12925 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
12926 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
12927 : CODE_FOR_sse2_maskmovdqu
);
12928 /* Note the arg order is different from the operand order. */
12929 arg1
= TREE_VALUE (arglist
);
12930 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
12931 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
12932 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12933 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12934 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
12935 mode0
= insn_data
[icode
].operand
[0].mode
;
12936 mode1
= insn_data
[icode
].operand
[1].mode
;
12937 mode2
= insn_data
[icode
].operand
[2].mode
;
12939 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
12940 op0
= copy_to_mode_reg (mode0
, op0
);
12941 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
12942 op1
= copy_to_mode_reg (mode1
, op1
);
12943 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
12944 op2
= copy_to_mode_reg (mode2
, op2
);
12945 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
12951 case IX86_BUILTIN_SQRTSS
:
12952 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
12953 case IX86_BUILTIN_RSQRTSS
:
12954 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
12955 case IX86_BUILTIN_RCPSS
:
12956 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
12958 case IX86_BUILTIN_ANDPS
:
12959 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3
,
12961 case IX86_BUILTIN_ANDNPS
:
12962 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3
,
12964 case IX86_BUILTIN_ORPS
:
12965 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3
,
12967 case IX86_BUILTIN_XORPS
:
12968 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3
,
12971 case IX86_BUILTIN_LOADAPS
:
12972 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
12974 case IX86_BUILTIN_LOADUPS
:
12975 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
12977 case IX86_BUILTIN_STOREAPS
:
12978 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
12979 case IX86_BUILTIN_STOREUPS
:
12980 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
12982 case IX86_BUILTIN_LOADSS
:
12983 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
12985 case IX86_BUILTIN_STORESS
:
12986 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
12988 case IX86_BUILTIN_LOADHPS
:
12989 case IX86_BUILTIN_LOADLPS
:
12990 case IX86_BUILTIN_LOADHPD
:
12991 case IX86_BUILTIN_LOADLPD
:
12992 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
12993 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
12994 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
12995 : CODE_FOR_sse2_movlpd
);
12996 arg0
= TREE_VALUE (arglist
);
12997 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12998 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12999 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13000 tmode
= insn_data
[icode
].operand
[0].mode
;
13001 mode0
= insn_data
[icode
].operand
[1].mode
;
13002 mode1
= insn_data
[icode
].operand
[2].mode
;
13004 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13005 op0
= copy_to_mode_reg (mode0
, op0
);
13006 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13008 || GET_MODE (target
) != tmode
13009 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13010 target
= gen_reg_rtx (tmode
);
13011 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13017 case IX86_BUILTIN_STOREHPS
:
13018 case IX86_BUILTIN_STORELPS
:
13019 case IX86_BUILTIN_STOREHPD
:
13020 case IX86_BUILTIN_STORELPD
:
13021 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
13022 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
13023 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
13024 : CODE_FOR_sse2_movlpd
);
13025 arg0
= TREE_VALUE (arglist
);
13026 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13027 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13028 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13029 mode0
= insn_data
[icode
].operand
[1].mode
;
13030 mode1
= insn_data
[icode
].operand
[2].mode
;
13032 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13033 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13034 op1
= copy_to_mode_reg (mode1
, op1
);
13036 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
13042 case IX86_BUILTIN_MOVNTPS
:
13043 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13044 case IX86_BUILTIN_MOVNTQ
:
13045 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13047 case IX86_BUILTIN_LDMXCSR
:
13048 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13049 target
= assign_386_stack_local (SImode
, 0);
13050 emit_move_insn (target
, op0
);
13051 emit_insn (gen_ldmxcsr (target
));
13054 case IX86_BUILTIN_STMXCSR
:
13055 target
= assign_386_stack_local (SImode
, 0);
13056 emit_insn (gen_stmxcsr (target
));
13057 return copy_to_mode_reg (SImode
, target
);
13059 case IX86_BUILTIN_SHUFPS
:
13060 case IX86_BUILTIN_SHUFPD
:
13061 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13062 ? CODE_FOR_sse_shufps
13063 : CODE_FOR_sse2_shufpd
);
13064 arg0
= TREE_VALUE (arglist
);
13065 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13066 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13067 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13068 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13069 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13070 tmode
= insn_data
[icode
].operand
[0].mode
;
13071 mode0
= insn_data
[icode
].operand
[1].mode
;
13072 mode1
= insn_data
[icode
].operand
[2].mode
;
13073 mode2
= insn_data
[icode
].operand
[3].mode
;
13075 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13076 op0
= copy_to_mode_reg (mode0
, op0
);
13077 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13078 op1
= copy_to_mode_reg (mode1
, op1
);
13079 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13081 /* @@@ better error message */
13082 error ("mask must be an immediate");
13083 return gen_reg_rtx (tmode
);
13086 || GET_MODE (target
) != tmode
13087 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13088 target
= gen_reg_rtx (tmode
);
13089 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13095 case IX86_BUILTIN_PSHUFW
:
13096 case IX86_BUILTIN_PSHUFD
:
13097 case IX86_BUILTIN_PSHUFHW
:
13098 case IX86_BUILTIN_PSHUFLW
:
13099 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13100 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13101 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13102 : CODE_FOR_mmx_pshufw
);
13103 arg0
= TREE_VALUE (arglist
);
13104 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13105 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13106 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13107 tmode
= insn_data
[icode
].operand
[0].mode
;
13108 mode1
= insn_data
[icode
].operand
[1].mode
;
13109 mode2
= insn_data
[icode
].operand
[2].mode
;
13111 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13112 op0
= copy_to_mode_reg (mode1
, op0
);
13113 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13115 /* @@@ better error message */
13116 error ("mask must be an immediate");
13120 || GET_MODE (target
) != tmode
13121 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13122 target
= gen_reg_rtx (tmode
);
13123 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13129 case IX86_BUILTIN_FEMMS
:
13130 emit_insn (gen_femms ());
13133 case IX86_BUILTIN_PAVGUSB
:
13134 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
13136 case IX86_BUILTIN_PF2ID
:
13137 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
13139 case IX86_BUILTIN_PFACC
:
13140 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
13142 case IX86_BUILTIN_PFADD
:
13143 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
13145 case IX86_BUILTIN_PFCMPEQ
:
13146 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
13148 case IX86_BUILTIN_PFCMPGE
:
13149 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
13151 case IX86_BUILTIN_PFCMPGT
:
13152 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
13154 case IX86_BUILTIN_PFMAX
:
13155 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
13157 case IX86_BUILTIN_PFMIN
:
13158 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
13160 case IX86_BUILTIN_PFMUL
:
13161 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
13163 case IX86_BUILTIN_PFRCP
:
13164 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
13166 case IX86_BUILTIN_PFRCPIT1
:
13167 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
13169 case IX86_BUILTIN_PFRCPIT2
:
13170 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
13172 case IX86_BUILTIN_PFRSQIT1
:
13173 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
13175 case IX86_BUILTIN_PFRSQRT
:
13176 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
13178 case IX86_BUILTIN_PFSUB
:
13179 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
13181 case IX86_BUILTIN_PFSUBR
:
13182 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
13184 case IX86_BUILTIN_PI2FD
:
13185 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
13187 case IX86_BUILTIN_PMULHRW
:
13188 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
13190 case IX86_BUILTIN_PF2IW
:
13191 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
13193 case IX86_BUILTIN_PFNACC
:
13194 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
13196 case IX86_BUILTIN_PFPNACC
:
13197 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
13199 case IX86_BUILTIN_PI2FW
:
13200 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
13202 case IX86_BUILTIN_PSWAPDSI
:
13203 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
13205 case IX86_BUILTIN_PSWAPDSF
:
13206 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
13208 case IX86_BUILTIN_SSE_ZERO
:
13209 target
= gen_reg_rtx (V4SFmode
);
13210 emit_insn (gen_sse_clrv4sf (target
));
13213 case IX86_BUILTIN_MMX_ZERO
:
13214 target
= gen_reg_rtx (DImode
);
13215 emit_insn (gen_mmx_clrdi (target
));
13218 case IX86_BUILTIN_SQRTSD
:
13219 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
13220 case IX86_BUILTIN_LOADAPD
:
13221 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
13222 case IX86_BUILTIN_LOADUPD
:
13223 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13225 case IX86_BUILTIN_STOREAPD
:
13226 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13227 case IX86_BUILTIN_STOREUPD
:
13228 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13230 case IX86_BUILTIN_LOADSD
:
13231 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13233 case IX86_BUILTIN_STORESD
:
13234 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
13236 case IX86_BUILTIN_SETPD1
:
13237 target
= assign_386_stack_local (DFmode
, 0);
13238 arg0
= TREE_VALUE (arglist
);
13239 emit_move_insn (adjust_address (target
, DFmode
, 0),
13240 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13241 op0
= gen_reg_rtx (V2DFmode
);
13242 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13243 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, GEN_INT (0)));
13246 case IX86_BUILTIN_SETPD
:
13247 target
= assign_386_stack_local (V2DFmode
, 0);
13248 arg0
= TREE_VALUE (arglist
);
13249 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13250 emit_move_insn (adjust_address (target
, DFmode
, 0),
13251 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13252 emit_move_insn (adjust_address (target
, DFmode
, 8),
13253 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13254 op0
= gen_reg_rtx (V2DFmode
);
13255 emit_insn (gen_sse2_movapd (op0
, target
));
13258 case IX86_BUILTIN_LOADRPD
:
13259 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
13260 gen_reg_rtx (V2DFmode
), 1);
13261 emit_insn (gen_sse2_shufpd (target
, target
, target
, GEN_INT (1)));
13264 case IX86_BUILTIN_LOADPD1
:
13265 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13266 gen_reg_rtx (V2DFmode
), 1);
13267 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13270 case IX86_BUILTIN_STOREPD1
:
13271 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13272 case IX86_BUILTIN_STORERPD
:
13273 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13275 case IX86_BUILTIN_MFENCE
:
13276 emit_insn (gen_sse2_mfence ());
13278 case IX86_BUILTIN_LFENCE
:
13279 emit_insn (gen_sse2_lfence ());
13282 case IX86_BUILTIN_CLFLUSH
:
13283 arg0
= TREE_VALUE (arglist
);
13284 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13285 icode
= CODE_FOR_sse2_clflush
;
13286 mode0
= insn_data
[icode
].operand
[0].mode
;
13287 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13288 op0
= copy_to_mode_reg (mode0
, op0
);
13290 emit_insn (gen_sse2_clflush (op0
));
13293 case IX86_BUILTIN_MOVNTPD
:
13294 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13295 case IX86_BUILTIN_MOVNTDQ
:
13296 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13297 case IX86_BUILTIN_MOVNTI
:
13298 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13304 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13305 if (d
->code
== fcode
)
13307 /* Compares are treated specially. */
13308 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13309 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13310 || d
->icode
== CODE_FOR_maskncmpv4sf3
13311 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
13312 || d
->icode
== CODE_FOR_maskcmpv2df3
13313 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13314 || d
->icode
== CODE_FOR_maskncmpv2df3
13315 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13316 return ix86_expand_sse_compare (d
, arglist
, target
);
13318 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
13321 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
13322 if (d
->code
== fcode
)
13323 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
13325 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13326 if (d
->code
== fcode
)
13327 return ix86_expand_sse_comi (d
, arglist
, target
);
13329 /* @@@ Should really do something sensible here. */
13333 /* Store OPERAND to the memory after reload is completed. This means
13334 that we can't easily use assign_stack_local. */
13336 ix86_force_to_memory (mode
, operand
)
13337 enum machine_mode mode
;
13341 if (!reload_completed
)
13343 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
13345 result
= gen_rtx_MEM (mode
,
13346 gen_rtx_PLUS (Pmode
,
13348 GEN_INT (-RED_ZONE_SIZE
)));
13349 emit_move_insn (result
, operand
);
13351 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
13357 operand
= gen_lowpart (DImode
, operand
);
13361 gen_rtx_SET (VOIDmode
,
13362 gen_rtx_MEM (DImode
,
13363 gen_rtx_PRE_DEC (DImode
,
13364 stack_pointer_rtx
)),
13370 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13379 split_di (&operand
, 1, operands
, operands
+ 1);
13381 gen_rtx_SET (VOIDmode
,
13382 gen_rtx_MEM (SImode
,
13383 gen_rtx_PRE_DEC (Pmode
,
13384 stack_pointer_rtx
)),
13387 gen_rtx_SET (VOIDmode
,
13388 gen_rtx_MEM (SImode
,
13389 gen_rtx_PRE_DEC (Pmode
,
13390 stack_pointer_rtx
)),
13395 /* It is better to store HImodes as SImodes. */
13396 if (!TARGET_PARTIAL_REG_STALL
)
13397 operand
= gen_lowpart (SImode
, operand
);
13401 gen_rtx_SET (VOIDmode
,
13402 gen_rtx_MEM (GET_MODE (operand
),
13403 gen_rtx_PRE_DEC (SImode
,
13404 stack_pointer_rtx
)),
13410 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13415 /* Free operand from the memory. */
13417 ix86_free_from_memory (mode
)
13418 enum machine_mode mode
;
13420 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
13424 if (mode
== DImode
|| TARGET_64BIT
)
13426 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
13430 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13431 to pop or add instruction if registers are available. */
13432 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13433 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
13438 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13439 QImode must go into class Q_REGS.
13440 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13441 movdf to do mem-to-mem moves through integer regs. */
13443 ix86_preferred_reload_class (x
, class)
13445 enum reg_class
class;
13447 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
13449 /* SSE can't load any constant directly yet. */
13450 if (SSE_CLASS_P (class))
13452 /* Floats can load 0 and 1. */
13453 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
13455 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13456 if (MAYBE_SSE_CLASS_P (class))
13457 return (reg_class_subset_p (class, GENERAL_REGS
)
13458 ? GENERAL_REGS
: FLOAT_REGS
);
13462 /* General regs can load everything. */
13463 if (reg_class_subset_p (class, GENERAL_REGS
))
13464 return GENERAL_REGS
;
13465 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13466 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13469 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
13471 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
13476 /* If we are copying between general and FP registers, we need a memory
13477 location. The same is true for SSE and MMX registers.
13479 The macro can't work reliably when one of the CLASSES is class containing
13480 registers from multiple units (SSE, MMX, integer). We avoid this by never
13481 combining those units in single alternative in the machine description.
13482 Ensure that this constraint holds to avoid unexpected surprises.
13484 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13485 enforce these sanity checks. */
13487 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
13488 enum reg_class class1
, class2
;
13489 enum machine_mode mode
;
13492 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
13493 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
13494 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
13495 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
13496 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
13497 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
13504 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
13505 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
13506 && (mode
) != SImode
)
13507 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13508 && (mode
) != SImode
));
13510 /* Return the cost of moving data from a register in class CLASS1 to
13511 one in class CLASS2.
13513 It is not required that the cost always equal 2 when FROM is the same as TO;
13514 on some machines it is expensive to move between registers if they are not
13515 general registers. */
13517 ix86_register_move_cost (mode
, class1
, class2
)
13518 enum machine_mode mode
;
13519 enum reg_class class1
, class2
;
13521 /* In case we require secondary memory, compute cost of the store followed
13522 by load. In case of copying from general_purpose_register we may emit
13523 multiple stores followed by single load causing memory size mismatch
13524 stall. Count this as arbitarily high cost of 20. */
13525 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
13528 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
13530 return (MEMORY_MOVE_COST (mode
, class1
, 0)
13531 + MEMORY_MOVE_COST (mode
, class2
, 1) + add_cost
);
13533 /* Moves between SSE/MMX and integer unit are expensive. */
13534 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13535 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
13536 return ix86_cost
->mmxsse_to_integer
;
13537 if (MAYBE_FLOAT_CLASS_P (class1
))
13538 return ix86_cost
->fp_move
;
13539 if (MAYBE_SSE_CLASS_P (class1
))
13540 return ix86_cost
->sse_move
;
13541 if (MAYBE_MMX_CLASS_P (class1
))
13542 return ix86_cost
->mmx_move
;
13546 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13548 ix86_hard_regno_mode_ok (regno
, mode
)
13550 enum machine_mode mode
;
13552 /* Flags and only flags can only hold CCmode values. */
13553 if (CC_REGNO_P (regno
))
13554 return GET_MODE_CLASS (mode
) == MODE_CC
;
13555 if (GET_MODE_CLASS (mode
) == MODE_CC
13556 || GET_MODE_CLASS (mode
) == MODE_RANDOM
13557 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
13559 if (FP_REGNO_P (regno
))
13560 return VALID_FP_MODE_P (mode
);
13561 if (SSE_REGNO_P (regno
))
13562 return VALID_SSE_REG_MODE (mode
);
13563 if (MMX_REGNO_P (regno
))
13564 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
13565 /* We handle both integer and floats in the general purpose registers.
13566 In future we should be able to handle vector modes as well. */
13567 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
13569 /* Take care for QImode values - they can be in non-QI regs, but then
13570 they do cause partial register stalls. */
13571 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
13573 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
13576 /* Return the cost of moving data of mode M between a
13577 register and memory. A value of 2 is the default; this cost is
13578 relative to those in `REGISTER_MOVE_COST'.
13580 If moving between registers and memory is more expensive than
13581 between two registers, you should define this macro to express the
13584 Model also increased moving costs of QImode registers in non
13588 ix86_memory_move_cost (mode
, class, in
)
13589 enum machine_mode mode
;
13590 enum reg_class
class;
13593 if (FLOAT_CLASS_P (class))
13611 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
13613 if (SSE_CLASS_P (class))
13616 switch (GET_MODE_SIZE (mode
))
13630 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
13632 if (MMX_CLASS_P (class))
13635 switch (GET_MODE_SIZE (mode
))
13646 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
13648 switch (GET_MODE_SIZE (mode
))
13652 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
13653 : ix86_cost
->movzbl_load
);
13655 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
13656 : ix86_cost
->int_store
[0] + 4);
13659 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
13661 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13662 if (mode
== TFmode
)
13664 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
13665 * (int) GET_MODE_SIZE (mode
) / 4);
13669 #ifdef DO_GLOBAL_CTORS_BODY
13671 ix86_svr3_asm_out_constructor (symbol
, priority
)
13673 int priority ATTRIBUTE_UNUSED
;
13676 fputs ("\tpushl $", asm_out_file
);
13677 assemble_name (asm_out_file
, XSTR (symbol
, 0));
13678 fputc ('\n', asm_out_file
);
13682 /* Order the registers for register allocator. */
13685 x86_order_regs_for_local_alloc ()
13690 /* First allocate the local general purpose registers. */
13691 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
13692 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
13693 reg_alloc_order
[pos
++] = i
;
13695 /* Global general purpose registers. */
13696 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
13697 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
13698 reg_alloc_order
[pos
++] = i
;
13700 /* x87 registers come first in case we are doing FP math
13702 if (!TARGET_SSE_MATH
)
13703 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
13704 reg_alloc_order
[pos
++] = i
;
13706 /* SSE registers. */
13707 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
13708 reg_alloc_order
[pos
++] = i
;
13709 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
13710 reg_alloc_order
[pos
++] = i
;
13712 /* x87 registerts. */
13713 if (TARGET_SSE_MATH
)
13714 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
13715 reg_alloc_order
[pos
++] = i
;
13717 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
13718 reg_alloc_order
[pos
++] = i
;
13720 /* Initialize the rest of array as we do not allocate some registers
13722 while (pos
< FIRST_PSEUDO_REGISTER
)
13723 reg_alloc_order
[pos
++] = 0;
13727 x86_output_mi_thunk (file
, delta
, function
)
13735 if (ix86_regparm
> 0)
13736 parm
= TYPE_ARG_TYPES (TREE_TYPE (function
));
13739 for (; parm
; parm
= TREE_CHAIN (parm
))
13740 if (TREE_VALUE (parm
) == void_type_node
)
13743 xops
[0] = GEN_INT (delta
);
13746 int n
= aggregate_value_p (TREE_TYPE (TREE_TYPE (function
))) != 0;
13747 xops
[1] = gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
13748 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops
);
13751 fprintf (file
, "\tjmp *");
13752 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13753 fprintf (file
, "@GOTPCREL(%%rip)\n");
13757 fprintf (file
, "\tjmp ");
13758 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13759 fprintf (file
, "\n");
13765 xops
[1] = gen_rtx_REG (SImode
, 0);
13766 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
))))
13767 xops
[1] = gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
13769 xops
[1] = gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
13770 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops
);
13774 xops
[0] = pic_offset_table_rtx
;
13775 xops
[1] = gen_label_rtx ();
13776 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
13778 if (ix86_regparm
> 2)
13780 output_asm_insn ("push{l}\t%0", xops
);
13781 output_asm_insn ("call\t%P1", xops
);
13782 ASM_OUTPUT_INTERNAL_LABEL (file
, "L", CODE_LABEL_NUMBER (xops
[1]));
13783 output_asm_insn ("pop{l}\t%0", xops
);
13785 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops
);
13786 xops
[0] = gen_rtx_MEM (SImode
, XEXP (DECL_RTL (function
), 0));
13788 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops
);
13789 asm_fprintf (file
, "\tpop{l\t%%ebx|\t%%ebx}\n");
13790 asm_fprintf (file
, "\tjmp\t{*%%ecx|%%ecx}\n");
13794 fprintf (file
, "\tjmp ");
13795 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
13796 fprintf (file
, "\n");
13801 #include "gt-i386.h"