1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Return index of given mode in mult and division cost tables. */
54 #define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
61 /* Processor costs (relative to an add) */
63 struct processor_costs size_cost
= { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
69 0, /* cost of multiply per each bit set */
70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
71 3, /* cost of movsx */
72 3, /* cost of movzx */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
106 /* Processor costs (relative to an add) */
108 struct processor_costs i386_cost
= { /* 386 specific costs */
109 1, /* cost of an add instruction */
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
114 1, /* cost of multiply per each bit set */
115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
116 3, /* cost of movsx */
117 2, /* cost of movzx */
118 15, /* "large" insn */
120 4, /* cost for loading QImode using movzbl */
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
123 Relative to reg-reg move (2). */
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
152 struct processor_costs i486_cost
= { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
158 1, /* cost of multiply per each bit set */
159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
160 3, /* cost of movsx */
161 2, /* cost of movzx */
162 15, /* "large" insn */
164 4, /* cost for loading QImode using movzbl */
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
167 Relative to reg-reg move (2). */
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
196 struct processor_costs pentium_cost
= {
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
199 4, /* variable shift costs */
200 1, /* constant shift costs */
201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
202 0, /* cost of multiply per each bit set */
203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
204 3, /* cost of movsx */
205 2, /* cost of movzx */
206 8, /* "large" insn */
208 6, /* cost for loading QImode using movzbl */
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
211 Relative to reg-reg move (2). */
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
240 struct processor_costs pentiumpro_cost
= {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* variable shift costs */
244 1, /* constant shift costs */
245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
246 0, /* cost of multiply per each bit set */
247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
248 1, /* cost of movsx */
249 1, /* cost of movzx */
250 8, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
284 struct processor_costs k6_cost
= {
285 1, /* cost of an add instruction */
286 2, /* cost of a lea instruction */
287 1, /* variable shift costs */
288 1, /* constant shift costs */
289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
290 0, /* cost of multiply per each bit set */
291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
292 2, /* cost of movsx */
293 2, /* cost of movzx */
294 8, /* "large" insn */
296 3, /* cost for loading QImode using movzbl */
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
328 struct processor_costs athlon_cost
= {
329 1, /* cost of an add instruction */
330 2, /* cost of a lea instruction */
331 1, /* variable shift costs */
332 1, /* constant shift costs */
333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
334 0, /* cost of multiply per each bit set */
335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
336 1, /* cost of movsx */
337 1, /* cost of movzx */
338 8, /* "large" insn */
340 4, /* cost for loading QImode using movzbl */
341 {3, 4, 3}, /* cost of loading integer registers
342 in QImode, HImode and SImode.
343 Relative to reg-reg move (2). */
344 {3, 4, 3}, /* cost of storing integer registers */
345 4, /* cost of reg,reg fld/fst */
346 {4, 4, 12}, /* cost of loading fp registers
347 in SFmode, DFmode and XFmode */
348 {6, 6, 8}, /* cost of loading integer registers */
349 2, /* cost of moving MMX register */
350 {4, 4}, /* cost of loading MMX registers
351 in SImode and DImode */
352 {4, 4}, /* cost of storing MMX registers
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
355 {4, 4, 6}, /* cost of loading SSE registers
356 in SImode, DImode and TImode */
357 {4, 4, 5}, /* cost of storing SSE registers
358 in SImode, DImode and TImode */
359 5, /* MMX or SSE register to integer */
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
372 struct processor_costs k8_cost
= {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
416 struct processor_costs pentium4_cost
= {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
422 0, /* cost of multiply per each bit set */
423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
424 1, /* cost of movsx */
425 1, /* cost of movzx */
426 16, /* "large" insn */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
459 const struct processor_costs
*ix86_cost
= &pentium_cost
;
461 /* Processor feature/optimization bitmasks. */
462 #define m_386 (1<<PROCESSOR_I386)
463 #define m_486 (1<<PROCESSOR_I486)
464 #define m_PENT (1<<PROCESSOR_PENTIUM)
465 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466 #define m_K6 (1<<PROCESSOR_K6)
467 #define m_ATHLON (1<<PROCESSOR_ATHLON)
468 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
469 #define m_K8 (1<<PROCESSOR_K8)
470 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON_K8
;
473 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON_K8
| m_PENT4
;
474 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
475 const int x86_movx
= m_ATHLON_K8
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
476 const int x86_double_with_add
= ~m_386
;
477 const int x86_use_bit_test
= m_386
;
478 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8
| m_K6
;
479 const int x86_cmove
= m_PPRO
| m_ATHLON_K8
| m_PENT4
;
480 const int x86_3dnow_a
= m_ATHLON_K8
;
481 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON_K8
| m_PENT4
;
482 const int x86_branch_hints
= m_PENT4
;
483 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
484 const int x86_partial_reg_stall
= m_PPRO
;
485 const int x86_use_loop
= m_K6
;
486 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON_K8
| m_PENT
);
487 const int x86_use_mov0
= m_K6
;
488 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
489 const int x86_read_modify_write
= ~m_PENT
;
490 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
491 const int x86_split_long_moves
= m_PPRO
;
492 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON_K8
;
493 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
494 const int x86_single_stringop
= m_386
| m_PENT4
;
495 const int x86_qimode_math
= ~(0);
496 const int x86_promote_qi_regs
= 0;
497 const int x86_himode_math
= ~(m_PPRO
);
498 const int x86_promote_hi_regs
= m_PPRO
;
499 const int x86_sub_esp_4
= m_ATHLON_K8
| m_PPRO
| m_PENT4
;
500 const int x86_sub_esp_8
= m_ATHLON_K8
| m_PPRO
| m_386
| m_486
| m_PENT4
;
501 const int x86_add_esp_4
= m_ATHLON_K8
| m_K6
| m_PENT4
;
502 const int x86_add_esp_8
= m_ATHLON_K8
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
503 const int x86_integer_DFmode_moves
= ~(m_ATHLON_K8
| m_PENT4
| m_PPRO
);
504 const int x86_partial_reg_dependency
= m_ATHLON_K8
| m_PENT4
;
505 const int x86_memory_mismatch_stall
= m_ATHLON_K8
| m_PENT4
;
506 const int x86_accumulate_outgoing_args
= m_ATHLON_K8
| m_PENT4
| m_PPRO
;
507 const int x86_prologue_using_move
= m_ATHLON_K8
| m_PENT4
| m_PPRO
;
508 const int x86_epilogue_using_move
= m_ATHLON_K8
| m_PENT4
| m_PPRO
;
509 const int x86_decompose_lea
= m_PENT4
;
510 const int x86_shift1
= ~m_486
;
511 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON_K8
| m_PENT4
;
512 const int x86_sse_partial_reg_dependency
= m_PENT4
| m_PPRO
;
513 /* Set for machines where the type and dependencies are resolved on SSE register
514 parts instead of whole registers, so we may maintain just lower part of
515 scalar values in proper format leaving the upper part undefined. */
516 const int x86_sse_partial_regs
= m_ATHLON_K8
;
517 /* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519 const int x86_sse_partial_regs_for_cvtsd2ss
= 0;
520 const int x86_sse_typeless_stores
= m_ATHLON_K8
;
521 const int x86_sse_load0_by_pxor
= m_PPRO
| m_PENT4
;
522 const int x86_use_ffreep
= m_ATHLON_K8
;
523 const int x86_rep_movl_optimal
= m_386
| m_PENT
| m_PPRO
| m_K6
;
525 /* In case the average insn count for single function invocation is
526 lower than this constant, emit fast (but longer) prologue and
528 #define FAST_PROLOGUE_INSN_COUNT 20
530 /* Set by prologue expander and used by epilogue expander to determine
532 static int use_fast_prologue_epilogue
;
534 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
535 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
536 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
537 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
539 /* Array of the smallest class containing reg number REGNO, indexed by
540 REGNO. Used by REGNO_REG_CLASS in i386.h. */
542 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
545 AREG
, DREG
, CREG
, BREG
,
547 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
549 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
550 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
553 /* flags, fpsr, dirflag, frame */
554 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
555 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
557 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
559 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
560 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
561 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
565 /* The "default" register map used in 32bit mode. */
567 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
569 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
570 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
571 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
572 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
573 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
575 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
578 static int const x86_64_int_parameter_registers
[6] =
580 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
581 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
584 static int const x86_64_int_return_registers
[4] =
586 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
589 /* The "default" register map used in 64bit mode. */
590 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
592 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
593 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
594 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
595 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
596 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
597 8,9,10,11,12,13,14,15, /* extended integer registers */
598 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
601 /* Define the register numbers to be used in Dwarf debugging information.
602 The SVR4 reference port C compiler uses the following register numbers
603 in its Dwarf output code:
604 0 for %eax (gcc regno = 0)
605 1 for %ecx (gcc regno = 2)
606 2 for %edx (gcc regno = 1)
607 3 for %ebx (gcc regno = 3)
608 4 for %esp (gcc regno = 7)
609 5 for %ebp (gcc regno = 6)
610 6 for %esi (gcc regno = 4)
611 7 for %edi (gcc regno = 5)
612 The following three DWARF register numbers are never generated by
613 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
614 believes these numbers have these meanings.
615 8 for %eip (no gcc equivalent)
616 9 for %eflags (gcc regno = 17)
617 10 for %trapno (no gcc equivalent)
618 It is not at all clear how we should number the FP stack registers
619 for the x86 architecture. If the version of SDB on x86/svr4 were
620 a bit less brain dead with respect to floating-point then we would
621 have a precedent to follow with respect to DWARF register numbers
622 for x86 FP registers, but the SDB on x86/svr4 is so completely
623 broken with respect to FP registers that it is hardly worth thinking
624 of it as something to strive for compatibility with.
625 The version of x86/svr4 SDB I have at the moment does (partially)
626 seem to believe that DWARF register number 11 is associated with
627 the x86 register %st(0), but that's about all. Higher DWARF
628 register numbers don't seem to be associated with anything in
629 particular, and even for DWARF regno 11, SDB only seems to under-
630 stand that it should say that a variable lives in %st(0) (when
631 asked via an `=' command) if we said it was in DWARF regno 11,
632 but SDB still prints garbage when asked for the value of the
633 variable in question (via a `/' command).
634 (Also note that the labels SDB prints for various FP stack regs
635 when doing an `x' command are all wrong.)
636 Note that these problems generally don't affect the native SVR4
637 C compiler because it doesn't allow the use of -O with -g and
638 because when it is *not* optimizing, it allocates a memory
639 location for each floating-point variable, and the memory
640 location is what gets described in the DWARF AT_location
641 attribute for the variable in question.
642 Regardless of the severe mental illness of the x86/svr4 SDB, we
643 do something sensible here and we use the following DWARF
644 register numbers. Note that these are all stack-top-relative
646 11 for %st(0) (gcc regno = 8)
647 12 for %st(1) (gcc regno = 9)
648 13 for %st(2) (gcc regno = 10)
649 14 for %st(3) (gcc regno = 11)
650 15 for %st(4) (gcc regno = 12)
651 16 for %st(5) (gcc regno = 13)
652 17 for %st(6) (gcc regno = 14)
653 18 for %st(7) (gcc regno = 15)
655 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
657 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
658 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
659 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
660 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
661 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
663 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
666 /* Test and compare insns in i386.md store the information needed to
667 generate branch and scc insns here. */
669 rtx ix86_compare_op0
= NULL_RTX
;
670 rtx ix86_compare_op1
= NULL_RTX
;
672 /* The encoding characters for the four TLS models present in ELF. */
674 static char const tls_model_chars
[] = " GLil";
676 #define MAX_386_STACK_LOCALS 3
677 /* Size of the register save area. */
678 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
680 /* Define the structure for the machine field in struct function. */
681 struct machine_function
GTY(())
683 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
684 const char *some_ld_name
;
685 int save_varrargs_registers
;
686 int accesses_prev_frame
;
689 #define ix86_stack_locals (cfun->machine->stack_locals)
690 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
692 /* Structure describing stack frame layout.
693 Stack grows downward:
699 saved frame pointer if frame_pointer_needed
700 <- HARD_FRAME_POINTER
706 > to_allocate <- FRAME_POINTER
718 int outgoing_arguments_size
;
721 HOST_WIDE_INT to_allocate
;
722 /* The offsets relative to ARG_POINTER. */
723 HOST_WIDE_INT frame_pointer_offset
;
724 HOST_WIDE_INT hard_frame_pointer_offset
;
725 HOST_WIDE_INT stack_pointer_offset
;
728 /* Used to enable/disable debugging features. */
729 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
730 /* Code model option as passed by user. */
731 const char *ix86_cmodel_string
;
733 enum cmodel ix86_cmodel
;
735 const char *ix86_asm_string
;
736 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
738 const char *ix86_tls_dialect_string
;
739 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
741 /* Which unit we are generating floating point math for. */
742 enum fpmath_unit ix86_fpmath
;
744 /* Which cpu are we scheduling for. */
745 enum processor_type ix86_cpu
;
746 /* Which instruction set architecture to use. */
747 enum processor_type ix86_arch
;
749 /* Strings to hold which cpu and instruction set architecture to use. */
750 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
751 const char *ix86_arch_string
; /* for -march=<xxx> */
752 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
754 /* # of registers to use to pass arguments. */
755 const char *ix86_regparm_string
;
757 /* true if sse prefetch instruction is not NOOP. */
758 int x86_prefetch_sse
;
760 /* ix86_regparm_string as a number */
763 /* Alignment to use for loops and jumps: */
765 /* Power of two alignment for loops. */
766 const char *ix86_align_loops_string
;
768 /* Power of two alignment for non-loop jumps. */
769 const char *ix86_align_jumps_string
;
771 /* Power of two alignment for stack boundary in bytes. */
772 const char *ix86_preferred_stack_boundary_string
;
774 /* Preferred alignment for stack boundary in bits. */
775 int ix86_preferred_stack_boundary
;
777 /* Values 1-5: see jump.c */
778 int ix86_branch_cost
;
779 const char *ix86_branch_cost_string
;
781 /* Power of two alignment for functions. */
782 const char *ix86_align_funcs_string
;
784 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785 static char internal_label_prefix
[16];
786 static int internal_label_prefix_len
;
788 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
789 static int tls_symbolic_operand_1
PARAMS ((rtx
, enum tls_model
));
790 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
791 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
793 static const char *get_some_local_dynamic_name
PARAMS ((void));
794 static int get_some_local_dynamic_name_1
PARAMS ((rtx
*, void *));
795 static rtx maybe_get_pool_constant
PARAMS ((rtx
));
796 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
797 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
799 static rtx get_thread_pointer
PARAMS ((void));
800 static void get_pc_thunk_name
PARAMS ((char [32], unsigned int));
801 static rtx gen_push
PARAMS ((rtx
));
802 static int memory_address_length
PARAMS ((rtx addr
));
803 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
804 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
805 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
806 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
807 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
808 static struct machine_function
* ix86_init_machine_status
PARAMS ((void));
809 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
810 static int ix86_nsaved_regs
PARAMS ((void));
811 static void ix86_emit_save_regs
PARAMS ((void));
812 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
813 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
814 static void ix86_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
815 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
816 static void ix86_sched_reorder_ppro
PARAMS ((rtx
*, rtx
*));
817 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
818 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
819 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
820 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
821 static int ix86_issue_rate
PARAMS ((void));
822 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
823 static void ix86_sched_init
PARAMS ((FILE *, int, int));
824 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
825 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
826 static int ia32_use_dfa_pipeline_interface
PARAMS ((void));
827 static int ia32_multipass_dfa_lookahead
PARAMS ((void));
828 static void ix86_init_mmx_sse_builtins
PARAMS ((void));
829 static rtx x86_this_parameter
PARAMS ((tree
));
830 static void x86_output_mi_thunk
PARAMS ((FILE *, tree
, HOST_WIDE_INT
,
831 HOST_WIDE_INT
, tree
));
832 static bool x86_can_output_mi_thunk
PARAMS ((tree
, HOST_WIDE_INT
,
833 HOST_WIDE_INT
, tree
));
834 bool ix86_expand_carry_flag_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
*));
838 rtx base
, index
, disp
;
842 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
843 static int ix86_address_cost
PARAMS ((rtx
));
844 static bool ix86_cannot_force_const_mem
PARAMS ((rtx
));
846 static void ix86_encode_section_info
PARAMS ((tree
, int)) ATTRIBUTE_UNUSED
;
847 static const char *ix86_strip_name_encoding
PARAMS ((const char *))
850 struct builtin_description
;
851 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
853 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
855 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
856 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
857 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
858 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
));
859 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
860 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
861 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
865 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
867 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
868 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
869 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
870 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
871 static unsigned int ix86_select_alt_pic_regnum
PARAMS ((void));
872 static int ix86_save_reg
PARAMS ((unsigned int, int));
873 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
874 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
875 static int ix86_fntype_regparm
PARAMS ((tree
));
876 const struct attribute_spec ix86_attribute_table
[];
877 static bool ix86_function_ok_for_sibcall
PARAMS ((tree
, tree
));
878 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
879 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
880 static int ix86_value_regno
PARAMS ((enum machine_mode
));
881 static bool ix86_ms_bitfield_layout_p
PARAMS ((tree
));
882 static tree ix86_handle_struct_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
883 static int extended_reg_mentioned_1
PARAMS ((rtx
*, void *));
884 static bool ix86_rtx_costs
PARAMS ((rtx
, int, int, int *));
886 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
887 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
890 /* Register class used for passing given 64bit part of the argument.
891 These represent classes as documented by the PS ABI, with the exception
892 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
893 use SF or DFmode move instead of DImode to avoid reformatting penalties.
895 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
896 whenever possible (upper half does contain padding).
898 enum x86_64_reg_class
901 X86_64_INTEGER_CLASS
,
902 X86_64_INTEGERSI_CLASS
,
911 static const char * const x86_64_reg_class_name
[] =
912 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
914 #define MAX_CLASSES 4
915 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
916 enum x86_64_reg_class
[MAX_CLASSES
],
918 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
920 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
922 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
923 enum x86_64_reg_class
));
925 /* Initialize the GCC target structure. */
926 #undef TARGET_ATTRIBUTE_TABLE
927 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
928 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
929 # undef TARGET_MERGE_DECL_ATTRIBUTES
930 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
933 #undef TARGET_COMP_TYPE_ATTRIBUTES
934 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
936 #undef TARGET_INIT_BUILTINS
937 #define TARGET_INIT_BUILTINS ix86_init_builtins
939 #undef TARGET_EXPAND_BUILTIN
940 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
942 #undef TARGET_ASM_FUNCTION_EPILOGUE
943 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
945 #undef TARGET_ASM_OPEN_PAREN
946 #define TARGET_ASM_OPEN_PAREN ""
947 #undef TARGET_ASM_CLOSE_PAREN
948 #define TARGET_ASM_CLOSE_PAREN ""
950 #undef TARGET_ASM_ALIGNED_HI_OP
951 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952 #undef TARGET_ASM_ALIGNED_SI_OP
953 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
955 #undef TARGET_ASM_ALIGNED_DI_OP
956 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
959 #undef TARGET_ASM_UNALIGNED_HI_OP
960 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961 #undef TARGET_ASM_UNALIGNED_SI_OP
962 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963 #undef TARGET_ASM_UNALIGNED_DI_OP
964 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
966 #undef TARGET_SCHED_ADJUST_COST
967 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968 #undef TARGET_SCHED_ISSUE_RATE
969 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970 #undef TARGET_SCHED_VARIABLE_ISSUE
971 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972 #undef TARGET_SCHED_INIT
973 #define TARGET_SCHED_INIT ix86_sched_init
974 #undef TARGET_SCHED_REORDER
975 #define TARGET_SCHED_REORDER ix86_sched_reorder
976 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
977 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
983 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
984 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
987 #undef TARGET_HAVE_TLS
988 #define TARGET_HAVE_TLS true
990 #undef TARGET_CANNOT_FORCE_CONST_MEM
991 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
993 #undef TARGET_MS_BITFIELD_LAYOUT_P
994 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
996 #undef TARGET_ASM_OUTPUT_MI_THUNK
997 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
998 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
999 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1001 #undef TARGET_RTX_COSTS
1002 #define TARGET_RTX_COSTS ix86_rtx_costs
1003 #undef TARGET_ADDRESS_COST
1004 #define TARGET_ADDRESS_COST ix86_address_cost
1006 struct gcc_target targetm
= TARGET_INITIALIZER
;
1008 /* Sometimes certain combinations of command options do not make
1009 sense on a particular target machine. You can define a macro
1010 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1011 defined, is executed once just after all the command options have
1014 Don't use this macro to turn on various extra optimizations for
1015 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1021 /* Comes from final.c -- no real reason to change it. */
1022 #define MAX_CODE_ALIGN 16
1026 const struct processor_costs
*cost
; /* Processor costs */
1027 const int target_enable
; /* Target flags to enable. */
1028 const int target_disable
; /* Target flags to disable. */
1029 const int align_loop
; /* Default alignments. */
1030 const int align_loop_max_skip
;
1031 const int align_jump
;
1032 const int align_jump_max_skip
;
1033 const int align_func
;
1035 const processor_target_table
[PROCESSOR_max
] =
1037 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1038 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1039 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1040 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1041 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1042 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1043 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1044 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16}
1047 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1050 const char *const name
; /* processor name or nickname. */
1051 const enum processor_type processor
;
1052 const enum pta_flags
1057 PTA_PREFETCH_SSE
= 8,
1063 const processor_alias_table
[] =
1065 {"i386", PROCESSOR_I386
, 0},
1066 {"i486", PROCESSOR_I486
, 0},
1067 {"i586", PROCESSOR_PENTIUM
, 0},
1068 {"pentium", PROCESSOR_PENTIUM
, 0},
1069 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1070 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1071 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1072 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1073 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1074 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1075 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1076 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1077 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
1078 PTA_MMX
| PTA_PREFETCH_SSE
},
1079 {"k6", PROCESSOR_K6
, PTA_MMX
},
1080 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1081 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1082 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1084 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1085 | PTA_3DNOW
| PTA_3DNOW_A
},
1086 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1087 | PTA_3DNOW_A
| PTA_SSE
},
1088 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1089 | PTA_3DNOW_A
| PTA_SSE
},
1090 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1091 | PTA_3DNOW_A
| PTA_SSE
},
1092 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1093 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
},
1096 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1098 /* By default our XFmode is the 80-bit extended format. If we have
1099 use TFmode instead, it's also the 80-bit format, but with padding. */
1100 real_format_for_mode
[XFmode
- QFmode
] = &ieee_extended_intel_96_format
;
1101 real_format_for_mode
[TFmode
- QFmode
] = &ieee_extended_intel_128_format
;
1103 /* Set the default values for switches whose default depends on TARGET_64BIT
1104 in case they weren't overwritten by command line options. */
1107 if (flag_omit_frame_pointer
== 2)
1108 flag_omit_frame_pointer
= 1;
1109 if (flag_asynchronous_unwind_tables
== 2)
1110 flag_asynchronous_unwind_tables
= 1;
1111 if (flag_pcc_struct_return
== 2)
1112 flag_pcc_struct_return
= 0;
1116 if (flag_omit_frame_pointer
== 2)
1117 flag_omit_frame_pointer
= 0;
1118 if (flag_asynchronous_unwind_tables
== 2)
1119 flag_asynchronous_unwind_tables
= 0;
1120 if (flag_pcc_struct_return
== 2)
1121 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
1124 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1125 SUBTARGET_OVERRIDE_OPTIONS
;
1128 if (!ix86_cpu_string
&& ix86_arch_string
)
1129 ix86_cpu_string
= ix86_arch_string
;
1130 if (!ix86_cpu_string
)
1131 ix86_cpu_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1132 if (!ix86_arch_string
)
1133 ix86_arch_string
= TARGET_64BIT
? "k8" : "i386";
1135 if (ix86_cmodel_string
!= 0)
1137 if (!strcmp (ix86_cmodel_string
, "small"))
1138 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1140 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1141 else if (!strcmp (ix86_cmodel_string
, "32"))
1142 ix86_cmodel
= CM_32
;
1143 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1144 ix86_cmodel
= CM_KERNEL
;
1145 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1146 ix86_cmodel
= CM_MEDIUM
;
1147 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1148 ix86_cmodel
= CM_LARGE
;
1150 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1154 ix86_cmodel
= CM_32
;
1156 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1158 if (ix86_asm_string
!= 0)
1160 if (!strcmp (ix86_asm_string
, "intel"))
1161 ix86_asm_dialect
= ASM_INTEL
;
1162 else if (!strcmp (ix86_asm_string
, "att"))
1163 ix86_asm_dialect
= ASM_ATT
;
1165 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1167 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1168 error ("code model `%s' not supported in the %s bit mode",
1169 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1170 if (ix86_cmodel
== CM_LARGE
)
1171 sorry ("code model `large' not supported yet");
1172 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1173 sorry ("%i-bit mode not compiled in",
1174 (target_flags
& MASK_64BIT
) ? 64 : 32);
1176 for (i
= 0; i
< pta_size
; i
++)
1177 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1179 ix86_arch
= processor_alias_table
[i
].processor
;
1180 /* Default cpu tuning to the architecture. */
1181 ix86_cpu
= ix86_arch
;
1182 if (processor_alias_table
[i
].flags
& PTA_MMX
1183 && !(target_flags_explicit
& MASK_MMX
))
1184 target_flags
|= MASK_MMX
;
1185 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1186 && !(target_flags_explicit
& MASK_3DNOW
))
1187 target_flags
|= MASK_3DNOW
;
1188 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1189 && !(target_flags_explicit
& MASK_3DNOW_A
))
1190 target_flags
|= MASK_3DNOW_A
;
1191 if (processor_alias_table
[i
].flags
& PTA_SSE
1192 && !(target_flags_explicit
& MASK_SSE
))
1193 target_flags
|= MASK_SSE
;
1194 if (processor_alias_table
[i
].flags
& PTA_SSE2
1195 && !(target_flags_explicit
& MASK_SSE2
))
1196 target_flags
|= MASK_SSE2
;
1197 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1198 x86_prefetch_sse
= true;
1199 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1200 error ("CPU you selected does not support x86-64 instruction set");
1205 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1207 for (i
= 0; i
< pta_size
; i
++)
1208 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
1210 ix86_cpu
= processor_alias_table
[i
].processor
;
1211 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
1212 error ("CPU you selected does not support x86-64 instruction set");
1215 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1216 x86_prefetch_sse
= true;
1218 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
1221 ix86_cost
= &size_cost
;
1223 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
1224 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
1225 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
1227 /* Arrange to set up i386_stack_locals for all functions. */
1228 init_machine_status
= ix86_init_machine_status
;
1230 /* Validate -mregparm= value. */
1231 if (ix86_regparm_string
)
1233 i
= atoi (ix86_regparm_string
);
1234 if (i
< 0 || i
> REGPARM_MAX
)
1235 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1241 ix86_regparm
= REGPARM_MAX
;
1243 /* If the user has provided any of the -malign-* options,
1244 warn and use that value only if -falign-* is not set.
1245 Remove this code in GCC 3.2 or later. */
1246 if (ix86_align_loops_string
)
1248 warning ("-malign-loops is obsolete, use -falign-loops");
1249 if (align_loops
== 0)
1251 i
= atoi (ix86_align_loops_string
);
1252 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1253 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1255 align_loops
= 1 << i
;
1259 if (ix86_align_jumps_string
)
1261 warning ("-malign-jumps is obsolete, use -falign-jumps");
1262 if (align_jumps
== 0)
1264 i
= atoi (ix86_align_jumps_string
);
1265 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1266 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1268 align_jumps
= 1 << i
;
1272 if (ix86_align_funcs_string
)
1274 warning ("-malign-functions is obsolete, use -falign-functions");
1275 if (align_functions
== 0)
1277 i
= atoi (ix86_align_funcs_string
);
1278 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1279 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1281 align_functions
= 1 << i
;
1285 /* Default align_* from the processor table. */
1286 if (align_loops
== 0)
1288 align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
1289 align_loops_max_skip
= processor_target_table
[ix86_cpu
].align_loop_max_skip
;
1291 if (align_jumps
== 0)
1293 align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
1294 align_jumps_max_skip
= processor_target_table
[ix86_cpu
].align_jump_max_skip
;
1296 if (align_functions
== 0)
1298 align_functions
= processor_target_table
[ix86_cpu
].align_func
;
1301 /* Validate -mpreferred-stack-boundary= value, or provide default.
1302 The default of 128 bits is for Pentium III's SSE __m128, but we
1303 don't want additional code to keep the stack aligned when
1304 optimizing for code size. */
1305 ix86_preferred_stack_boundary
= (optimize_size
1306 ? TARGET_64BIT
? 128 : 32
1308 if (ix86_preferred_stack_boundary_string
)
1310 i
= atoi (ix86_preferred_stack_boundary_string
);
1311 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1312 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1313 TARGET_64BIT
? 4 : 2);
1315 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1318 /* Validate -mbranch-cost= value, or provide default. */
1319 ix86_branch_cost
= processor_target_table
[ix86_cpu
].cost
->branch_cost
;
1320 if (ix86_branch_cost_string
)
1322 i
= atoi (ix86_branch_cost_string
);
1324 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1326 ix86_branch_cost
= i
;
1329 if (ix86_tls_dialect_string
)
1331 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1332 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1333 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1334 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1336 error ("bad value (%s) for -mtls-dialect= switch",
1337 ix86_tls_dialect_string
);
1340 /* Keep nonleaf frame pointers. */
1341 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1342 flag_omit_frame_pointer
= 1;
1344 /* If we're doing fast math, we don't care about comparison order
1345 wrt NaNs. This lets us use a shorter comparison sequence. */
1346 if (flag_unsafe_math_optimizations
)
1347 target_flags
&= ~MASK_IEEE_FP
;
1349 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1350 since the insns won't need emulation. */
1351 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1352 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1356 if (TARGET_ALIGN_DOUBLE
)
1357 error ("-malign-double makes no sense in the 64bit mode");
1359 error ("-mrtd calling convention not supported in the 64bit mode");
1360 /* Enable by default the SSE and MMX builtins. */
1361 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1362 ix86_fpmath
= FPMATH_SSE
;
1365 ix86_fpmath
= FPMATH_387
;
1367 if (ix86_fpmath_string
!= 0)
1369 if (! strcmp (ix86_fpmath_string
, "387"))
1370 ix86_fpmath
= FPMATH_387
;
1371 else if (! strcmp (ix86_fpmath_string
, "sse"))
1375 warning ("SSE instruction set disabled, using 387 arithmetics");
1376 ix86_fpmath
= FPMATH_387
;
1379 ix86_fpmath
= FPMATH_SSE
;
1381 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1382 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1386 warning ("SSE instruction set disabled, using 387 arithmetics");
1387 ix86_fpmath
= FPMATH_387
;
1389 else if (!TARGET_80387
)
1391 warning ("387 instruction set disabled, using SSE arithmetics");
1392 ix86_fpmath
= FPMATH_SSE
;
1395 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1398 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1401 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1405 target_flags
|= MASK_MMX
;
1406 x86_prefetch_sse
= true;
1409 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1412 target_flags
|= MASK_MMX
;
1413 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1414 extensions it adds. */
1415 if (x86_3dnow_a
& (1 << ix86_arch
))
1416 target_flags
|= MASK_3DNOW_A
;
1418 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1419 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1421 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1423 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1426 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1427 p
= strchr (internal_label_prefix
, 'X');
1428 internal_label_prefix_len
= p
- internal_label_prefix
;
1434 optimization_options (level
, size
)
1436 int size ATTRIBUTE_UNUSED
;
1438 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1439 make the problem with not enough registers even worse. */
1440 #ifdef INSN_SCHEDULING
1442 flag_schedule_insns
= 0;
1445 /* The default values of these switches depend on the TARGET_64BIT
1446 that is not known at this moment. Mark these values with 2 and
1447 let user the to override these. In case there is no command line option
1448 specifying them, we will set the defaults in override_options. */
1450 flag_omit_frame_pointer
= 2;
1451 flag_pcc_struct_return
= 2;
1452 flag_asynchronous_unwind_tables
= 2;
1455 /* Table of valid machine attributes. */
1456 const struct attribute_spec ix86_attribute_table
[] =
1458 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1459 /* Stdcall attribute says callee is responsible for popping arguments
1460 if they are not variable. */
1461 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1462 /* Fastcall attribute says callee is responsible for popping arguments
1463 if they are not variable. */
1464 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1465 /* Cdecl attribute says the callee is a normal C declaration */
1466 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1467 /* Regparm attribute specifies how many integer arguments are to be
1468 passed in registers. */
1469 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1470 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1471 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1472 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1473 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1475 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1476 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
1477 { NULL
, 0, 0, false, false, false, NULL
}
1480 /* If PIC, we cannot make sibling calls to global functions
1481 because the PLT requires %ebx live.
1482 If we are returning floats on the register stack, we cannot make
1483 sibling calls to functions that return floats. (The stack adjust
1484 instruction will wind up after the sibcall jump, and not be executed.) */
1487 ix86_function_ok_for_sibcall (decl
, exp
)
1491 /* If we are generating position-independent code, we cannot sibcall
1492 optimize any indirect call, or a direct call to a global function,
1493 as the PLT requires %ebx be live. */
1494 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| TREE_PUBLIC (decl
)))
1497 /* If we are returning floats on the 80387 register stack, we cannot
1498 make a sibcall from a function that doesn't return a float to a
1499 function that does; the necessary stack adjustment will not be
1501 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp
)))
1502 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)))))
1505 /* If this call is indirect, we'll need to be able to use a call-clobbered
1506 register for the address of the target function. Make sure that all
1507 such registers are not used for passing parameters. */
1508 if (!decl
&& !TARGET_64BIT
)
1510 int regparm
= ix86_regparm
;
1513 /* We're looking at the CALL_EXPR, we need the type of the function. */
1514 type
= TREE_OPERAND (exp
, 0); /* pointer expression */
1515 type
= TREE_TYPE (type
); /* pointer type */
1516 type
= TREE_TYPE (type
); /* function type */
1518 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1520 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1524 /* ??? Need to count the actual number of registers to be used,
1525 not the possible number of registers. Fix later. */
1530 /* Otherwise okay. That also includes certain types of indirect calls. */
1534 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1535 arguments as in struct attribute_spec.handler. */
1537 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1540 tree args ATTRIBUTE_UNUSED
;
1541 int flags ATTRIBUTE_UNUSED
;
1544 if (TREE_CODE (*node
) != FUNCTION_TYPE
1545 && TREE_CODE (*node
) != METHOD_TYPE
1546 && TREE_CODE (*node
) != FIELD_DECL
1547 && TREE_CODE (*node
) != TYPE_DECL
)
1549 warning ("`%s' attribute only applies to functions",
1550 IDENTIFIER_POINTER (name
));
1551 *no_add_attrs
= true;
1555 if (is_attribute_p ("fastcall", name
))
1557 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
1559 error ("fastcall and stdcall attributes are not compatible");
1561 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
1563 error ("fastcall and regparm attributes are not compatible");
1566 else if (is_attribute_p ("stdcall", name
))
1568 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1570 error ("fastcall and stdcall attributes are not compatible");
1577 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1578 *no_add_attrs
= true;
1584 /* Handle a "regparm" attribute;
1585 arguments as in struct attribute_spec.handler. */
1587 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1591 int flags ATTRIBUTE_UNUSED
;
1594 if (TREE_CODE (*node
) != FUNCTION_TYPE
1595 && TREE_CODE (*node
) != METHOD_TYPE
1596 && TREE_CODE (*node
) != FIELD_DECL
1597 && TREE_CODE (*node
) != TYPE_DECL
)
1599 warning ("`%s' attribute only applies to functions",
1600 IDENTIFIER_POINTER (name
));
1601 *no_add_attrs
= true;
1607 cst
= TREE_VALUE (args
);
1608 if (TREE_CODE (cst
) != INTEGER_CST
)
1610 warning ("`%s' attribute requires an integer constant argument",
1611 IDENTIFIER_POINTER (name
));
1612 *no_add_attrs
= true;
1614 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1616 warning ("argument to `%s' attribute larger than %d",
1617 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1618 *no_add_attrs
= true;
1621 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
1623 error ("fastcall and regparm attributes are not compatible");
1630 /* Return 0 if the attributes for two types are incompatible, 1 if they
1631 are compatible, and 2 if they are nearly compatible (which causes a
1632 warning to be generated). */
1635 ix86_comp_type_attributes (type1
, type2
)
1639 /* Check for mismatch of non-default calling convention. */
1640 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1642 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1645 /* Check for mismatched fastcall types */
1646 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
1647 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
1650 /* Check for mismatched return types (cdecl vs stdcall). */
1651 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1652 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1657 /* Return the regparm value for a fuctio with the indicated TYPE. */
1660 ix86_fntype_regparm (type
)
1665 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1667 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1669 return ix86_regparm
;
1672 /* Value is the number of bytes of arguments automatically
1673 popped when returning from a subroutine call.
1674 FUNDECL is the declaration node of the function (as a tree),
1675 FUNTYPE is the data type of the function (as a tree),
1676 or for a library call it is an identifier node for the subroutine name.
1677 SIZE is the number of bytes of arguments passed on the stack.
1679 On the 80386, the RTD insn may be used to pop them if the number
1680 of args is fixed, but if the number is variable then the caller
1681 must pop them all. RTD can't be used for library calls now
1682 because the library is compiled with the Unix compiler.
1683 Use of RTD is a selectable option, since it is incompatible with
1684 standard Unix calling sequences. If the option is not selected,
1685 the caller must always pop the args.
1687 The attribute stdcall is equivalent to RTD on a per module basis. */
1690 ix86_return_pops_args (fundecl
, funtype
, size
)
1695 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1697 /* Cdecl functions override -mrtd, and never pop the stack. */
1698 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1700 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1701 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
1702 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
1706 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1707 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1708 == void_type_node
)))
1712 /* Lose any fake structure return argument if it is passed on the stack. */
1713 if (aggregate_value_p (TREE_TYPE (funtype
))
1716 int nregs
= ix86_fntype_regparm (funtype
);
1719 return GET_MODE_SIZE (Pmode
);
1725 /* Argument support functions. */
1727 /* Return true when register may be used to pass function parameters. */
1729 ix86_function_arg_regno_p (regno
)
1734 return (regno
< REGPARM_MAX
1735 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1736 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1738 /* RAX is used as hidden argument to va_arg functions. */
1741 for (i
= 0; i
< REGPARM_MAX
; i
++)
1742 if (regno
== x86_64_int_parameter_registers
[i
])
1747 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1748 for a call to a function whose data type is FNTYPE.
1749 For a library call, FNTYPE is 0. */
1752 init_cumulative_args (cum
, fntype
, libname
)
1753 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1754 tree fntype
; /* tree ptr for function decl */
1755 rtx libname
; /* SYMBOL_REF of library name or 0 */
1757 static CUMULATIVE_ARGS zero_cum
;
1758 tree param
, next_param
;
1760 if (TARGET_DEBUG_ARG
)
1762 fprintf (stderr
, "\ninit_cumulative_args (");
1764 fprintf (stderr
, "fntype code = %s, ret code = %s",
1765 tree_code_name
[(int) TREE_CODE (fntype
)],
1766 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1768 fprintf (stderr
, "no fntype");
1771 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1776 /* Set up the number of registers to use for passing arguments. */
1777 cum
->nregs
= ix86_regparm
;
1778 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1779 if (fntype
&& !TARGET_64BIT
)
1781 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1784 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1786 cum
->maybe_vaarg
= false;
1788 /* Use ecx and edx registers if function has fastcall attribute */
1789 if (fntype
&& !TARGET_64BIT
)
1791 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
1799 /* Determine if this function has variable arguments. This is
1800 indicated by the last argument being 'void_type_mode' if there
1801 are no variable arguments. If there are variable arguments, then
1802 we won't pass anything in registers */
1806 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1807 param
!= 0; param
= next_param
)
1809 next_param
= TREE_CHAIN (param
);
1810 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1817 cum
->maybe_vaarg
= true;
1821 if ((!fntype
&& !libname
)
1822 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1823 cum
->maybe_vaarg
= 1;
1825 if (TARGET_DEBUG_ARG
)
1826 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1831 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1832 of this code is to classify each 8bytes of incoming argument by the register
1833 class and assign registers accordingly. */
1835 /* Return the union class of CLASS1 and CLASS2.
1836 See the x86-64 PS ABI for details. */
1838 static enum x86_64_reg_class
1839 merge_classes (class1
, class2
)
1840 enum x86_64_reg_class class1
, class2
;
1842 /* Rule #1: If both classes are equal, this is the resulting class. */
1843 if (class1
== class2
)
1846 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1848 if (class1
== X86_64_NO_CLASS
)
1850 if (class2
== X86_64_NO_CLASS
)
1853 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1854 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1855 return X86_64_MEMORY_CLASS
;
1857 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1858 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1859 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1860 return X86_64_INTEGERSI_CLASS
;
1861 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1862 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1863 return X86_64_INTEGER_CLASS
;
1865 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1866 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1867 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1868 return X86_64_MEMORY_CLASS
;
1870 /* Rule #6: Otherwise class SSE is used. */
1871 return X86_64_SSE_CLASS
;
1874 /* Classify the argument of type TYPE and mode MODE.
1875 CLASSES will be filled by the register class used to pass each word
1876 of the operand. The number of words is returned. In case the parameter
1877 should be passed in memory, 0 is returned. As a special case for zero
1878 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1880 BIT_OFFSET is used internally for handling records and specifies offset
1881 of the offset in bits modulo 256 to avoid overflow cases.
1883 See the x86-64 PS ABI for details.
1887 classify_argument (mode
, type
, classes
, bit_offset
)
1888 enum machine_mode mode
;
1890 enum x86_64_reg_class classes
[MAX_CLASSES
];
1894 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1895 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1897 /* Variable sized entities are always passed/returned in memory. */
1901 if (type
&& AGGREGATE_TYPE_P (type
))
1905 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1907 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1911 for (i
= 0; i
< words
; i
++)
1912 classes
[i
] = X86_64_NO_CLASS
;
1914 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1915 signalize memory class, so handle it as special case. */
1918 classes
[0] = X86_64_NO_CLASS
;
1922 /* Classify each field of record and merge classes. */
1923 if (TREE_CODE (type
) == RECORD_TYPE
)
1925 /* For classes first merge in the field of the subclasses. */
1926 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1928 tree bases
= TYPE_BINFO_BASETYPES (type
);
1929 int n_bases
= TREE_VEC_LENGTH (bases
);
1932 for (i
= 0; i
< n_bases
; ++i
)
1934 tree binfo
= TREE_VEC_ELT (bases
, i
);
1936 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1937 tree type
= BINFO_TYPE (binfo
);
1939 num
= classify_argument (TYPE_MODE (type
),
1941 (offset
+ bit_offset
) % 256);
1944 for (i
= 0; i
< num
; i
++)
1946 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
1948 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1952 /* And now merge the fields of structure. */
1953 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1955 if (TREE_CODE (field
) == FIELD_DECL
)
1959 /* Bitfields are always classified as integer. Handle them
1960 early, since later code would consider them to be
1961 misaligned integers. */
1962 if (DECL_BIT_FIELD (field
))
1964 for (i
= int_bit_position (field
) / 8 / 8;
1965 i
< (int_bit_position (field
)
1966 + tree_low_cst (DECL_SIZE (field
), 0)
1969 merge_classes (X86_64_INTEGER_CLASS
,
1974 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1975 TREE_TYPE (field
), subclasses
,
1976 (int_bit_position (field
)
1977 + bit_offset
) % 256);
1980 for (i
= 0; i
< num
; i
++)
1983 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
1985 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1991 /* Arrays are handled as small records. */
1992 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1995 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1996 TREE_TYPE (type
), subclasses
, bit_offset
);
2000 /* The partial classes are now full classes. */
2001 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2002 subclasses
[0] = X86_64_SSE_CLASS
;
2003 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
2004 subclasses
[0] = X86_64_INTEGER_CLASS
;
2006 for (i
= 0; i
< words
; i
++)
2007 classes
[i
] = subclasses
[i
% num
];
2009 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2010 else if (TREE_CODE (type
) == UNION_TYPE
2011 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
2013 /* For classes first merge in the field of the subclasses. */
2014 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
2016 tree bases
= TYPE_BINFO_BASETYPES (type
);
2017 int n_bases
= TREE_VEC_LENGTH (bases
);
2020 for (i
= 0; i
< n_bases
; ++i
)
2022 tree binfo
= TREE_VEC_ELT (bases
, i
);
2024 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
2025 tree type
= BINFO_TYPE (binfo
);
2027 num
= classify_argument (TYPE_MODE (type
),
2029 (offset
+ (bit_offset
% 64)) % 256);
2032 for (i
= 0; i
< num
; i
++)
2034 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
2036 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2040 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
2042 if (TREE_CODE (field
) == FIELD_DECL
)
2045 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2046 TREE_TYPE (field
), subclasses
,
2050 for (i
= 0; i
< num
; i
++)
2051 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2058 /* Final merger cleanup. */
2059 for (i
= 0; i
< words
; i
++)
2061 /* If one class is MEMORY, everything should be passed in
2063 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2066 /* The X86_64_SSEUP_CLASS should be always preceded by
2067 X86_64_SSE_CLASS. */
2068 if (classes
[i
] == X86_64_SSEUP_CLASS
2069 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
2070 classes
[i
] = X86_64_SSE_CLASS
;
2072 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2073 if (classes
[i
] == X86_64_X87UP_CLASS
2074 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
2075 classes
[i
] = X86_64_SSE_CLASS
;
2080 /* Compute alignment needed. We align all types to natural boundaries with
2081 exception of XFmode that is aligned to 64bits. */
2082 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2084 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2087 mode_alignment
= 128;
2088 else if (mode
== XCmode
)
2089 mode_alignment
= 256;
2090 /* Misaligned fields are always returned in memory. */
2091 if (bit_offset
% mode_alignment
)
2095 /* Classification of atomic types. */
2105 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2106 classes
[0] = X86_64_INTEGERSI_CLASS
;
2108 classes
[0] = X86_64_INTEGER_CLASS
;
2112 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2115 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2116 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
2119 if (!(bit_offset
% 64))
2120 classes
[0] = X86_64_SSESF_CLASS
;
2122 classes
[0] = X86_64_SSE_CLASS
;
2125 classes
[0] = X86_64_SSEDF_CLASS
;
2128 classes
[0] = X86_64_X87_CLASS
;
2129 classes
[1] = X86_64_X87UP_CLASS
;
2132 classes
[0] = X86_64_X87_CLASS
;
2133 classes
[1] = X86_64_X87UP_CLASS
;
2134 classes
[2] = X86_64_X87_CLASS
;
2135 classes
[3] = X86_64_X87UP_CLASS
;
2138 classes
[0] = X86_64_SSEDF_CLASS
;
2139 classes
[1] = X86_64_SSEDF_CLASS
;
2142 classes
[0] = X86_64_SSE_CLASS
;
2150 classes
[0] = X86_64_SSE_CLASS
;
2151 classes
[1] = X86_64_SSEUP_CLASS
;
2166 /* Examine the argument and return set number of register required in each
2167 class. Return 0 iff parameter should be passed in memory. */
2169 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
2170 enum machine_mode mode
;
2172 int *int_nregs
, *sse_nregs
;
2175 enum x86_64_reg_class
class[MAX_CLASSES
];
2176 int n
= classify_argument (mode
, type
, class, 0);
2182 for (n
--; n
>= 0; n
--)
2185 case X86_64_INTEGER_CLASS
:
2186 case X86_64_INTEGERSI_CLASS
:
2189 case X86_64_SSE_CLASS
:
2190 case X86_64_SSESF_CLASS
:
2191 case X86_64_SSEDF_CLASS
:
2194 case X86_64_NO_CLASS
:
2195 case X86_64_SSEUP_CLASS
:
2197 case X86_64_X87_CLASS
:
2198 case X86_64_X87UP_CLASS
:
2202 case X86_64_MEMORY_CLASS
:
2207 /* Construct container for the argument used by GCC interface. See
2208 FUNCTION_ARG for the detailed description. */
2210 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
2211 enum machine_mode mode
;
2214 int nintregs
, nsseregs
;
2218 enum machine_mode tmpmode
;
2220 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2221 enum x86_64_reg_class
class[MAX_CLASSES
];
2225 int needed_sseregs
, needed_intregs
;
2226 rtx exp
[MAX_CLASSES
];
2229 n
= classify_argument (mode
, type
, class, 0);
2230 if (TARGET_DEBUG_ARG
)
2233 fprintf (stderr
, "Memory class\n");
2236 fprintf (stderr
, "Classes:");
2237 for (i
= 0; i
< n
; i
++)
2239 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2241 fprintf (stderr
, "\n");
2246 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2248 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2251 /* First construct simple cases. Avoid SCmode, since we want to use
2252 single register to pass this type. */
2253 if (n
== 1 && mode
!= SCmode
)
2256 case X86_64_INTEGER_CLASS
:
2257 case X86_64_INTEGERSI_CLASS
:
2258 return gen_rtx_REG (mode
, intreg
[0]);
2259 case X86_64_SSE_CLASS
:
2260 case X86_64_SSESF_CLASS
:
2261 case X86_64_SSEDF_CLASS
:
2262 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2263 case X86_64_X87_CLASS
:
2264 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2265 case X86_64_NO_CLASS
:
2266 /* Zero sized array, struct or class. */
2271 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
2272 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2274 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2275 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
2276 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2277 && class[1] == X86_64_INTEGER_CLASS
2278 && (mode
== CDImode
|| mode
== TImode
)
2279 && intreg
[0] + 1 == intreg
[1])
2280 return gen_rtx_REG (mode
, intreg
[0]);
2282 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2283 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
2284 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
2286 /* Otherwise figure out the entries of the PARALLEL. */
2287 for (i
= 0; i
< n
; i
++)
2291 case X86_64_NO_CLASS
:
2293 case X86_64_INTEGER_CLASS
:
2294 case X86_64_INTEGERSI_CLASS
:
2295 /* Merge TImodes on aligned occasions here too. */
2296 if (i
* 8 + 8 > bytes
)
2297 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2298 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2302 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2303 if (tmpmode
== BLKmode
)
2305 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2306 gen_rtx_REG (tmpmode
, *intreg
),
2310 case X86_64_SSESF_CLASS
:
2311 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2312 gen_rtx_REG (SFmode
,
2313 SSE_REGNO (sse_regno
)),
2317 case X86_64_SSEDF_CLASS
:
2318 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2319 gen_rtx_REG (DFmode
,
2320 SSE_REGNO (sse_regno
)),
2324 case X86_64_SSE_CLASS
:
2325 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2329 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2330 gen_rtx_REG (tmpmode
,
2331 SSE_REGNO (sse_regno
)),
2333 if (tmpmode
== TImode
)
2341 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2342 for (i
= 0; i
< nexps
; i
++)
2343 XVECEXP (ret
, 0, i
) = exp
[i
];
2347 /* Update the data in CUM to advance over an argument
2348 of mode MODE and data type TYPE.
2349 (TYPE is null for libcalls where that information may not be available.) */
2352 function_arg_advance (cum
, mode
, type
, named
)
2353 CUMULATIVE_ARGS
*cum
; /* current arg information */
2354 enum machine_mode mode
; /* current arg mode */
2355 tree type
; /* type of the argument or 0 if lib support */
2356 int named
; /* whether or not the argument was named */
2359 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2360 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2362 if (TARGET_DEBUG_ARG
)
2364 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2365 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2368 int int_nregs
, sse_nregs
;
2369 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2370 cum
->words
+= words
;
2371 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2373 cum
->nregs
-= int_nregs
;
2374 cum
->sse_nregs
-= sse_nregs
;
2375 cum
->regno
+= int_nregs
;
2376 cum
->sse_regno
+= sse_nregs
;
2379 cum
->words
+= words
;
2383 if (TARGET_SSE
&& mode
== TImode
)
2385 cum
->sse_words
+= words
;
2386 cum
->sse_nregs
-= 1;
2387 cum
->sse_regno
+= 1;
2388 if (cum
->sse_nregs
<= 0)
2396 cum
->words
+= words
;
2397 cum
->nregs
-= words
;
2398 cum
->regno
+= words
;
2400 if (cum
->nregs
<= 0)
2410 /* Define where to put the arguments to a function.
2411 Value is zero to push the argument on the stack,
2412 or a hard register in which to store the argument.
2414 MODE is the argument's machine mode.
2415 TYPE is the data type of the argument (as a tree).
2416 This is null for libcalls where that information may
2418 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2419 the preceding args and about the function being called.
2420 NAMED is nonzero if this argument is a named parameter
2421 (otherwise it is an extra parameter matching an ellipsis). */
2424 function_arg (cum
, mode
, type
, named
)
2425 CUMULATIVE_ARGS
*cum
; /* current arg information */
2426 enum machine_mode mode
; /* current arg mode */
2427 tree type
; /* type of the argument or 0 if lib support */
2428 int named
; /* != 0 for normal args, == 0 for ... args */
2432 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2433 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2435 /* Handle a hidden AL argument containing number of registers for varargs
2436 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2438 if (mode
== VOIDmode
)
2441 return GEN_INT (cum
->maybe_vaarg
2442 ? (cum
->sse_nregs
< 0
2450 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2451 &x86_64_int_parameter_registers
[cum
->regno
],
2456 /* For now, pass fp/complex values on the stack. */
2465 if (words
<= cum
->nregs
)
2467 int regno
= cum
->regno
;
2469 /* Fastcall allocates the first two DWORD (SImode) or
2470 smaller arguments to ECX and EDX. */
2473 if (mode
== BLKmode
|| mode
== DImode
)
2476 /* ECX not EAX is the first allocated register. */
2480 ret
= gen_rtx_REG (mode
, regno
);
2485 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
2489 if (TARGET_DEBUG_ARG
)
2492 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2493 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2496 print_simple_rtl (stderr
, ret
);
2498 fprintf (stderr
, ", stack");
2500 fprintf (stderr
, " )\n");
2506 /* A C expression that indicates when an argument must be passed by
2507 reference. If nonzero for an argument, a copy of that argument is
2508 made in memory and a pointer to the argument is passed instead of
2509 the argument itself. The pointer is passed in whatever way is
2510 appropriate for passing a pointer to that type. */
2513 function_arg_pass_by_reference (cum
, mode
, type
, named
)
2514 CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
;
2515 enum machine_mode mode ATTRIBUTE_UNUSED
;
2517 int named ATTRIBUTE_UNUSED
;
2522 if (type
&& int_size_in_bytes (type
) == -1)
2524 if (TARGET_DEBUG_ARG
)
2525 fprintf (stderr
, "function_arg_pass_by_reference\n");
2532 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2536 ix86_function_arg_boundary (mode
, type
)
2537 enum machine_mode mode
;
2542 return PARM_BOUNDARY
;
2544 align
= TYPE_ALIGN (type
);
2546 align
= GET_MODE_ALIGNMENT (mode
);
2547 if (align
< PARM_BOUNDARY
)
2548 align
= PARM_BOUNDARY
;
2554 /* Return true if N is a possible register number of function value. */
2556 ix86_function_value_regno_p (regno
)
2561 return ((regno
) == 0
2562 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2563 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2565 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2566 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2567 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2570 /* Define how to find the value returned by a function.
2571 VALTYPE is the data type of the value (as a tree).
2572 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2573 otherwise, FUNC is 0. */
2575 ix86_function_value (valtype
)
2580 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2581 REGPARM_MAX
, SSE_REGPARM_MAX
,
2582 x86_64_int_return_registers
, 0);
2583 /* For zero sized structures, construct_container return NULL, but we need
2584 to keep rest of compiler happy by returning meaningful value. */
2586 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2590 return gen_rtx_REG (TYPE_MODE (valtype
),
2591 ix86_value_regno (TYPE_MODE (valtype
)));
2594 /* Return false iff type is returned in memory. */
2596 ix86_return_in_memory (type
)
2599 int needed_intregs
, needed_sseregs
;
2602 return !examine_argument (TYPE_MODE (type
), type
, 1,
2603 &needed_intregs
, &needed_sseregs
);
2607 if (TYPE_MODE (type
) == BLKmode
2608 || (VECTOR_MODE_P (TYPE_MODE (type
))
2609 && int_size_in_bytes (type
) == 8)
2610 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2611 && TYPE_MODE (type
) != TFmode
2612 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2618 /* Define how to find the value returned by a library function
2619 assuming the value has mode MODE. */
2621 ix86_libcall_value (mode
)
2622 enum machine_mode mode
;
2632 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2635 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2637 return gen_rtx_REG (mode
, 0);
2641 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
2644 /* Given a mode, return the register to use for a return value. */
2647 ix86_value_regno (mode
)
2648 enum machine_mode mode
;
2650 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
2651 return FIRST_FLOAT_REG
;
2652 if (mode
== TImode
|| VECTOR_MODE_P (mode
))
2653 return FIRST_SSE_REG
;
2657 /* Create the va_list data type. */
2660 ix86_build_va_list ()
2662 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2664 /* For i386 we use plain pointer to argument area. */
2666 return build_pointer_type (char_type_node
);
2668 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2669 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2671 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2672 unsigned_type_node
);
2673 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2674 unsigned_type_node
);
2675 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2677 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2680 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2681 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2682 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2683 DECL_FIELD_CONTEXT (f_sav
) = record
;
2685 TREE_CHAIN (record
) = type_decl
;
2686 TYPE_NAME (record
) = type_decl
;
2687 TYPE_FIELDS (record
) = f_gpr
;
2688 TREE_CHAIN (f_gpr
) = f_fpr
;
2689 TREE_CHAIN (f_fpr
) = f_ovf
;
2690 TREE_CHAIN (f_ovf
) = f_sav
;
2692 layout_type (record
);
2694 /* The correct type is an array type of one element. */
2695 return build_array_type (record
, build_index_type (size_zero_node
));
2698 /* Perform any needed actions needed for a function that is receiving a
2699 variable number of arguments.
2703 MODE and TYPE are the mode and type of the current parameter.
2705 PRETEND_SIZE is a variable that should be set to the amount of stack
2706 that must be pushed by the prolog to pretend that our caller pushed
2709 Normally, this macro will push all remaining incoming registers on the
2710 stack and set PRETEND_SIZE to the length of the registers pushed. */
2713 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2714 CUMULATIVE_ARGS
*cum
;
2715 enum machine_mode mode
;
2717 int *pretend_size ATTRIBUTE_UNUSED
;
2721 CUMULATIVE_ARGS next_cum
;
2722 rtx save_area
= NULL_RTX
, mem
;
2735 /* Indicate to allocate space on the stack for varargs save area. */
2736 ix86_save_varrargs_registers
= 1;
2738 fntype
= TREE_TYPE (current_function_decl
);
2739 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2740 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2741 != void_type_node
));
2743 /* For varargs, we do not want to skip the dummy va_dcl argument.
2744 For stdargs, we do want to skip the last named argument. */
2747 function_arg_advance (&next_cum
, mode
, type
, 1);
2750 save_area
= frame_pointer_rtx
;
2752 set
= get_varargs_alias_set ();
2754 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2756 mem
= gen_rtx_MEM (Pmode
,
2757 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2758 set_mem_alias_set (mem
, set
);
2759 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2760 x86_64_int_parameter_registers
[i
]));
2763 if (next_cum
.sse_nregs
)
2765 /* Now emit code to save SSE registers. The AX parameter contains number
2766 of SSE parameter registers used to call this function. We use
2767 sse_prologue_save insn template that produces computed jump across
2768 SSE saves. We need some preparation work to get this working. */
2770 label
= gen_label_rtx ();
2771 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2773 /* Compute address to jump to :
2774 label - 5*eax + nnamed_sse_arguments*5 */
2775 tmp_reg
= gen_reg_rtx (Pmode
);
2776 nsse_reg
= gen_reg_rtx (Pmode
);
2777 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2778 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2779 gen_rtx_MULT (Pmode
, nsse_reg
,
2781 if (next_cum
.sse_regno
)
2784 gen_rtx_CONST (DImode
,
2785 gen_rtx_PLUS (DImode
,
2787 GEN_INT (next_cum
.sse_regno
* 4))));
2789 emit_move_insn (nsse_reg
, label_ref
);
2790 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2792 /* Compute address of memory block we save into. We always use pointer
2793 pointing 127 bytes after first byte to store - this is needed to keep
2794 instruction size limited by 4 bytes. */
2795 tmp_reg
= gen_reg_rtx (Pmode
);
2796 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2797 plus_constant (save_area
,
2798 8 * REGPARM_MAX
+ 127)));
2799 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2800 set_mem_alias_set (mem
, set
);
2801 set_mem_align (mem
, BITS_PER_WORD
);
2803 /* And finally do the dirty job! */
2804 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
2805 GEN_INT (next_cum
.sse_regno
), label
));
2810 /* Implement va_start. */
2813 ix86_va_start (valist
, nextarg
)
2817 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2818 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2819 tree gpr
, fpr
, ovf
, sav
, t
;
2821 /* Only 64bit target needs something special. */
2824 std_expand_builtin_va_start (valist
, nextarg
);
2828 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2829 f_fpr
= TREE_CHAIN (f_gpr
);
2830 f_ovf
= TREE_CHAIN (f_fpr
);
2831 f_sav
= TREE_CHAIN (f_ovf
);
2833 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2834 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2835 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2836 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2837 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2839 /* Count number of gp and fp argument registers used. */
2840 words
= current_function_args_info
.words
;
2841 n_gpr
= current_function_args_info
.regno
;
2842 n_fpr
= current_function_args_info
.sse_regno
;
2844 if (TARGET_DEBUG_ARG
)
2845 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2846 (int) words
, (int) n_gpr
, (int) n_fpr
);
2848 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2849 build_int_2 (n_gpr
* 8, 0));
2850 TREE_SIDE_EFFECTS (t
) = 1;
2851 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2853 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2854 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2855 TREE_SIDE_EFFECTS (t
) = 1;
2856 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2858 /* Find the overflow area. */
2859 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2861 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2862 build_int_2 (words
* UNITS_PER_WORD
, 0));
2863 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2864 TREE_SIDE_EFFECTS (t
) = 1;
2865 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2867 /* Find the register save area.
2868 Prologue of the function save it right above stack frame. */
2869 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2870 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2871 TREE_SIDE_EFFECTS (t
) = 1;
2872 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2875 /* Implement va_arg. */
2877 ix86_va_arg (valist
, type
)
2880 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2881 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2882 tree gpr
, fpr
, ovf
, sav
, t
;
2884 rtx lab_false
, lab_over
= NULL_RTX
;
2889 /* Only 64bit target needs something special. */
2892 return std_expand_builtin_va_arg (valist
, type
);
2895 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2896 f_fpr
= TREE_CHAIN (f_gpr
);
2897 f_ovf
= TREE_CHAIN (f_fpr
);
2898 f_sav
= TREE_CHAIN (f_ovf
);
2900 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2901 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2902 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2903 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2904 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2906 size
= int_size_in_bytes (type
);
2909 /* Passed by reference. */
2911 type
= build_pointer_type (type
);
2912 size
= int_size_in_bytes (type
);
2914 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2916 container
= construct_container (TYPE_MODE (type
), type
, 0,
2917 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2919 * Pull the value out of the saved registers ...
2922 addr_rtx
= gen_reg_rtx (Pmode
);
2926 rtx int_addr_rtx
, sse_addr_rtx
;
2927 int needed_intregs
, needed_sseregs
;
2930 lab_over
= gen_label_rtx ();
2931 lab_false
= gen_label_rtx ();
2933 examine_argument (TYPE_MODE (type
), type
, 0,
2934 &needed_intregs
, &needed_sseregs
);
2937 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2938 || TYPE_ALIGN (type
) > 128);
2940 /* In case we are passing structure, verify that it is consecutive block
2941 on the register save area. If not we need to do moves. */
2942 if (!need_temp
&& !REG_P (container
))
2944 /* Verify that all registers are strictly consecutive */
2945 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2949 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2951 rtx slot
= XVECEXP (container
, 0, i
);
2952 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
2953 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2961 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2963 rtx slot
= XVECEXP (container
, 0, i
);
2964 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
2965 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2972 int_addr_rtx
= addr_rtx
;
2973 sse_addr_rtx
= addr_rtx
;
2977 int_addr_rtx
= gen_reg_rtx (Pmode
);
2978 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2980 /* First ensure that we fit completely in registers. */
2983 emit_cmp_and_jump_insns (expand_expr
2984 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2985 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2986 1) * 8), GE
, const1_rtx
, SImode
,
2991 emit_cmp_and_jump_insns (expand_expr
2992 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2993 GEN_INT ((SSE_REGPARM_MAX
-
2994 needed_sseregs
+ 1) * 16 +
2995 REGPARM_MAX
* 8), GE
, const1_rtx
,
2996 SImode
, 1, lab_false
);
2999 /* Compute index to start of area used for integer regs. */
3002 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
3003 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
3004 if (r
!= int_addr_rtx
)
3005 emit_move_insn (int_addr_rtx
, r
);
3009 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
3010 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
3011 if (r
!= sse_addr_rtx
)
3012 emit_move_insn (sse_addr_rtx
, r
);
3019 /* Never use the memory itself, as it has the alias set. */
3020 addr_rtx
= XEXP (assign_temp (type
, 0, 1, 0), 0);
3021 mem
= gen_rtx_MEM (BLKmode
, addr_rtx
);
3022 set_mem_alias_set (mem
, get_varargs_alias_set ());
3023 set_mem_align (mem
, BITS_PER_UNIT
);
3025 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
3027 rtx slot
= XVECEXP (container
, 0, i
);
3028 rtx reg
= XEXP (slot
, 0);
3029 enum machine_mode mode
= GET_MODE (reg
);
3035 if (SSE_REGNO_P (REGNO (reg
)))
3037 src_addr
= sse_addr_rtx
;
3038 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
3042 src_addr
= int_addr_rtx
;
3043 src_offset
= REGNO (reg
) * 8;
3045 src_mem
= gen_rtx_MEM (mode
, src_addr
);
3046 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
3047 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
3048 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
3049 emit_move_insn (dest_mem
, src_mem
);
3056 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
3057 build_int_2 (needed_intregs
* 8, 0));
3058 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
3059 TREE_SIDE_EFFECTS (t
) = 1;
3060 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3065 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
3066 build_int_2 (needed_sseregs
* 16, 0));
3067 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
3068 TREE_SIDE_EFFECTS (t
) = 1;
3069 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3072 emit_jump_insn (gen_jump (lab_over
));
3074 emit_label (lab_false
);
3077 /* ... otherwise out of the overflow area. */
3079 /* Care for on-stack alignment if needed. */
3080 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
3084 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
3085 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
3086 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
3090 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
3092 emit_move_insn (addr_rtx
, r
);
3095 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
3096 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
3097 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
3098 TREE_SIDE_EFFECTS (t
) = 1;
3099 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3102 emit_label (lab_over
);
3106 r
= gen_rtx_MEM (Pmode
, addr_rtx
);
3107 set_mem_alias_set (r
, get_varargs_alias_set ());
3108 emit_move_insn (addr_rtx
, r
);
3114 /* Return nonzero if OP is either a i387 or SSE fp register. */
3116 any_fp_register_operand (op
, mode
)
3118 enum machine_mode mode ATTRIBUTE_UNUSED
;
3120 return ANY_FP_REG_P (op
);
3123 /* Return nonzero if OP is an i387 fp register. */
3125 fp_register_operand (op
, mode
)
3127 enum machine_mode mode ATTRIBUTE_UNUSED
;
3129 return FP_REG_P (op
);
3132 /* Return nonzero if OP is a non-fp register_operand. */
3134 register_and_not_any_fp_reg_operand (op
, mode
)
3136 enum machine_mode mode
;
3138 return register_operand (op
, mode
) && !ANY_FP_REG_P (op
);
3141 /* Return nonzero if OP is a register operand other than an
3142 i387 fp register. */
3144 register_and_not_fp_reg_operand (op
, mode
)
3146 enum machine_mode mode
;
3148 return register_operand (op
, mode
) && !FP_REG_P (op
);
3151 /* Return nonzero if OP is general operand representable on x86_64. */
3154 x86_64_general_operand (op
, mode
)
3156 enum machine_mode mode
;
3159 return general_operand (op
, mode
);
3160 if (nonimmediate_operand (op
, mode
))
3162 return x86_64_sign_extended_value (op
);
3165 /* Return nonzero if OP is general operand representable on x86_64
3166 as either sign extended or zero extended constant. */
3169 x86_64_szext_general_operand (op
, mode
)
3171 enum machine_mode mode
;
3174 return general_operand (op
, mode
);
3175 if (nonimmediate_operand (op
, mode
))
3177 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3180 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3183 x86_64_nonmemory_operand (op
, mode
)
3185 enum machine_mode mode
;
3188 return nonmemory_operand (op
, mode
);
3189 if (register_operand (op
, mode
))
3191 return x86_64_sign_extended_value (op
);
3194 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3197 x86_64_movabs_operand (op
, mode
)
3199 enum machine_mode mode
;
3201 if (!TARGET_64BIT
|| !flag_pic
)
3202 return nonmemory_operand (op
, mode
);
3203 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
3205 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
3210 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3213 x86_64_szext_nonmemory_operand (op
, mode
)
3215 enum machine_mode mode
;
3218 return nonmemory_operand (op
, mode
);
3219 if (register_operand (op
, mode
))
3221 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
3224 /* Return nonzero if OP is immediate operand representable on x86_64. */
3227 x86_64_immediate_operand (op
, mode
)
3229 enum machine_mode mode
;
3232 return immediate_operand (op
, mode
);
3233 return x86_64_sign_extended_value (op
);
3236 /* Return nonzero if OP is immediate operand representable on x86_64. */
3239 x86_64_zext_immediate_operand (op
, mode
)
3241 enum machine_mode mode ATTRIBUTE_UNUSED
;
3243 return x86_64_zero_extended_value (op
);
3246 /* Return nonzero if OP is (const_int 1), else return zero. */
3249 const_int_1_operand (op
, mode
)
3251 enum machine_mode mode ATTRIBUTE_UNUSED
;
3253 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
3256 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3257 for shift & compare patterns, as shifting by 0 does not change flags),
3258 else return zero. */
3261 const_int_1_31_operand (op
, mode
)
3263 enum machine_mode mode ATTRIBUTE_UNUSED
;
3265 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 1 && INTVAL (op
) <= 31);
3268 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3269 reference and a constant. */
3272 symbolic_operand (op
, mode
)
3274 enum machine_mode mode ATTRIBUTE_UNUSED
;
3276 switch (GET_CODE (op
))
3284 if (GET_CODE (op
) == SYMBOL_REF
3285 || GET_CODE (op
) == LABEL_REF
3286 || (GET_CODE (op
) == UNSPEC
3287 && (XINT (op
, 1) == UNSPEC_GOT
3288 || XINT (op
, 1) == UNSPEC_GOTOFF
3289 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
3291 if (GET_CODE (op
) != PLUS
3292 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3296 if (GET_CODE (op
) == SYMBOL_REF
3297 || GET_CODE (op
) == LABEL_REF
)
3299 /* Only @GOTOFF gets offsets. */
3300 if (GET_CODE (op
) != UNSPEC
3301 || XINT (op
, 1) != UNSPEC_GOTOFF
)
3304 op
= XVECEXP (op
, 0, 0);
3305 if (GET_CODE (op
) == SYMBOL_REF
3306 || GET_CODE (op
) == LABEL_REF
)
3315 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3318 pic_symbolic_operand (op
, mode
)
3320 enum machine_mode mode ATTRIBUTE_UNUSED
;
3322 if (GET_CODE (op
) != CONST
)
3327 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
3332 if (GET_CODE (op
) == UNSPEC
)
3334 if (GET_CODE (op
) != PLUS
3335 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3338 if (GET_CODE (op
) == UNSPEC
)
3344 /* Return true if OP is a symbolic operand that resolves locally. */
3347 local_symbolic_operand (op
, mode
)
3349 enum machine_mode mode ATTRIBUTE_UNUSED
;
3351 if (GET_CODE (op
) == CONST
3352 && GET_CODE (XEXP (op
, 0)) == PLUS
3353 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3354 op
= XEXP (XEXP (op
, 0), 0);
3356 if (GET_CODE (op
) == LABEL_REF
)
3359 if (GET_CODE (op
) != SYMBOL_REF
)
3362 /* These we've been told are local by varasm and encode_section_info
3364 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
3367 /* There is, however, a not insubstantial body of code in the rest of
3368 the compiler that assumes it can just stick the results of
3369 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3370 /* ??? This is a hack. Should update the body of the compiler to
3371 always create a DECL an invoke targetm.encode_section_info. */
3372 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
3373 internal_label_prefix_len
) == 0)
3379 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3382 tls_symbolic_operand (op
, mode
)
3384 enum machine_mode mode ATTRIBUTE_UNUSED
;
3386 const char *symbol_str
;
3388 if (GET_CODE (op
) != SYMBOL_REF
)
3390 symbol_str
= XSTR (op
, 0);
3392 if (symbol_str
[0] != '%')
3394 return strchr (tls_model_chars
, symbol_str
[1]) - tls_model_chars
;
3398 tls_symbolic_operand_1 (op
, kind
)
3400 enum tls_model kind
;
3402 const char *symbol_str
;
3404 if (GET_CODE (op
) != SYMBOL_REF
)
3406 symbol_str
= XSTR (op
, 0);
3408 return symbol_str
[0] == '%' && symbol_str
[1] == tls_model_chars
[kind
];
3412 global_dynamic_symbolic_operand (op
, mode
)
3414 enum machine_mode mode ATTRIBUTE_UNUSED
;
3416 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3420 local_dynamic_symbolic_operand (op
, mode
)
3422 enum machine_mode mode ATTRIBUTE_UNUSED
;
3424 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3428 initial_exec_symbolic_operand (op
, mode
)
3430 enum machine_mode mode ATTRIBUTE_UNUSED
;
3432 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3436 local_exec_symbolic_operand (op
, mode
)
3438 enum machine_mode mode ATTRIBUTE_UNUSED
;
3440 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3443 /* Test for a valid operand for a call instruction. Don't allow the
3444 arg pointer register or virtual regs since they may decay into
3445 reg + const, which the patterns can't handle. */
3448 call_insn_operand (op
, mode
)
3450 enum machine_mode mode ATTRIBUTE_UNUSED
;
3452 /* Disallow indirect through a virtual register. This leads to
3453 compiler aborts when trying to eliminate them. */
3454 if (GET_CODE (op
) == REG
3455 && (op
== arg_pointer_rtx
3456 || op
== frame_pointer_rtx
3457 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3458 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3461 /* Disallow `call 1234'. Due to varying assembler lameness this
3462 gets either rejected or translated to `call .+1234'. */
3463 if (GET_CODE (op
) == CONST_INT
)
3466 /* Explicitly allow SYMBOL_REF even if pic. */
3467 if (GET_CODE (op
) == SYMBOL_REF
)
3470 /* Otherwise we can allow any general_operand in the address. */
3471 return general_operand (op
, Pmode
);
3474 /* Test for a valid operand for a call instruction. Don't allow the
3475 arg pointer register or virtual regs since they may decay into
3476 reg + const, which the patterns can't handle. */
3479 sibcall_insn_operand (op
, mode
)
3481 enum machine_mode mode ATTRIBUTE_UNUSED
;
3483 /* Disallow indirect through a virtual register. This leads to
3484 compiler aborts when trying to eliminate them. */
3485 if (GET_CODE (op
) == REG
3486 && (op
== arg_pointer_rtx
3487 || op
== frame_pointer_rtx
3488 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3489 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3492 /* Explicitly allow SYMBOL_REF even if pic. */
3493 if (GET_CODE (op
) == SYMBOL_REF
)
3496 /* Otherwise we can only allow register operands. */
3497 return register_operand (op
, Pmode
);
3501 constant_call_address_operand (op
, mode
)
3503 enum machine_mode mode ATTRIBUTE_UNUSED
;
3505 if (GET_CODE (op
) == CONST
3506 && GET_CODE (XEXP (op
, 0)) == PLUS
3507 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3508 op
= XEXP (XEXP (op
, 0), 0);
3509 return GET_CODE (op
) == SYMBOL_REF
;
3512 /* Match exactly zero and one. */
3515 const0_operand (op
, mode
)
3517 enum machine_mode mode
;
3519 return op
== CONST0_RTX (mode
);
3523 const1_operand (op
, mode
)
3525 enum machine_mode mode ATTRIBUTE_UNUSED
;
3527 return op
== const1_rtx
;
3530 /* Match 2, 4, or 8. Used for leal multiplicands. */
3533 const248_operand (op
, mode
)
3535 enum machine_mode mode ATTRIBUTE_UNUSED
;
3537 return (GET_CODE (op
) == CONST_INT
3538 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3541 /* True if this is a constant appropriate for an increment or decrement. */
3544 incdec_operand (op
, mode
)
3546 enum machine_mode mode ATTRIBUTE_UNUSED
;
3548 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3549 registers, since carry flag is not set. */
3550 if (TARGET_PENTIUM4
&& !optimize_size
)
3552 return op
== const1_rtx
|| op
== constm1_rtx
;
3555 /* Return nonzero if OP is acceptable as operand of DImode shift
3559 shiftdi_operand (op
, mode
)
3561 enum machine_mode mode ATTRIBUTE_UNUSED
;
3564 return nonimmediate_operand (op
, mode
);
3566 return register_operand (op
, mode
);
3569 /* Return false if this is the stack pointer, or any other fake
3570 register eliminable to the stack pointer. Otherwise, this is
3573 This is used to prevent esp from being used as an index reg.
3574 Which would only happen in pathological cases. */
3577 reg_no_sp_operand (op
, mode
)
3579 enum machine_mode mode
;
3582 if (GET_CODE (t
) == SUBREG
)
3584 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3587 return register_operand (op
, mode
);
3591 mmx_reg_operand (op
, mode
)
3593 enum machine_mode mode ATTRIBUTE_UNUSED
;
3595 return MMX_REG_P (op
);
3598 /* Return false if this is any eliminable register. Otherwise
3602 general_no_elim_operand (op
, mode
)
3604 enum machine_mode mode
;
3607 if (GET_CODE (t
) == SUBREG
)
3609 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3610 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3611 || t
== virtual_stack_dynamic_rtx
)
3614 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3615 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3618 return general_operand (op
, mode
);
3621 /* Return false if this is any eliminable register. Otherwise
3622 register_operand or const_int. */
3625 nonmemory_no_elim_operand (op
, mode
)
3627 enum machine_mode mode
;
3630 if (GET_CODE (t
) == SUBREG
)
3632 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3633 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3634 || t
== virtual_stack_dynamic_rtx
)
3637 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3640 /* Return false if this is any eliminable register or stack register,
3641 otherwise work like register_operand. */
3644 index_register_operand (op
, mode
)
3646 enum machine_mode mode
;
3649 if (GET_CODE (t
) == SUBREG
)
3653 if (t
== arg_pointer_rtx
3654 || t
== frame_pointer_rtx
3655 || t
== virtual_incoming_args_rtx
3656 || t
== virtual_stack_vars_rtx
3657 || t
== virtual_stack_dynamic_rtx
3658 || REGNO (t
) == STACK_POINTER_REGNUM
)
3661 return general_operand (op
, mode
);
3664 /* Return true if op is a Q_REGS class register. */
3667 q_regs_operand (op
, mode
)
3669 enum machine_mode mode
;
3671 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3673 if (GET_CODE (op
) == SUBREG
)
3674 op
= SUBREG_REG (op
);
3675 return ANY_QI_REG_P (op
);
3678 /* Return true if op is an flags register. */
3681 flags_reg_operand (op
, mode
)
3683 enum machine_mode mode
;
3685 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3687 return REG_P (op
) && REGNO (op
) == FLAGS_REG
&& GET_MODE (op
) != VOIDmode
;
3690 /* Return true if op is a NON_Q_REGS class register. */
3693 non_q_regs_operand (op
, mode
)
3695 enum machine_mode mode
;
3697 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3699 if (GET_CODE (op
) == SUBREG
)
3700 op
= SUBREG_REG (op
);
3701 return NON_QI_REG_P (op
);
3705 zero_extended_scalar_load_operand (op
, mode
)
3707 enum machine_mode mode ATTRIBUTE_UNUSED
;
3710 if (GET_CODE (op
) != MEM
)
3712 op
= maybe_get_pool_constant (op
);
3715 if (GET_CODE (op
) != CONST_VECTOR
)
3718 (GET_MODE_SIZE (GET_MODE (op
)) /
3719 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op
))));
3720 for (n_elts
--; n_elts
> 0; n_elts
--)
3722 rtx elt
= CONST_VECTOR_ELT (op
, n_elts
);
3723 if (elt
!= CONST0_RTX (GET_MODE_INNER (GET_MODE (op
))))
3729 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3732 sse_comparison_operator (op
, mode
)
3734 enum machine_mode mode ATTRIBUTE_UNUSED
;
3736 enum rtx_code code
= GET_CODE (op
);
3739 /* Operations supported directly. */
3749 /* These are equivalent to ones above in non-IEEE comparisons. */
3756 return !TARGET_IEEE_FP
;
3761 /* Return 1 if OP is a valid comparison operator in valid mode. */
3763 ix86_comparison_operator (op
, mode
)
3765 enum machine_mode mode
;
3767 enum machine_mode inmode
;
3768 enum rtx_code code
= GET_CODE (op
);
3769 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3771 if (GET_RTX_CLASS (code
) != '<')
3773 inmode
= GET_MODE (XEXP (op
, 0));
3775 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3777 enum rtx_code second_code
, bypass_code
;
3778 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3779 return (bypass_code
== NIL
&& second_code
== NIL
);
3786 if (inmode
== CCmode
|| inmode
== CCGCmode
3787 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3790 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3791 if (inmode
== CCmode
)
3795 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3803 /* Return 1 if OP is a valid comparison operator testing carry flag
3806 ix86_carry_flag_operator (op
, mode
)
3808 enum machine_mode mode
;
3810 enum machine_mode inmode
;
3811 enum rtx_code code
= GET_CODE (op
);
3813 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3815 if (GET_RTX_CLASS (code
) != '<')
3817 inmode
= GET_MODE (XEXP (op
, 0));
3818 if (GET_CODE (XEXP (op
, 0)) != REG
3819 || REGNO (XEXP (op
, 0)) != 17
3820 || XEXP (op
, 1) != const0_rtx
)
3823 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3825 enum rtx_code second_code
, bypass_code
;
3827 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3828 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3830 code
= ix86_fp_compare_code_to_integer (code
);
3832 else if (inmode
!= CCmode
)
3837 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3840 fcmov_comparison_operator (op
, mode
)
3842 enum machine_mode mode
;
3844 enum machine_mode inmode
;
3845 enum rtx_code code
= GET_CODE (op
);
3847 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3849 if (GET_RTX_CLASS (code
) != '<')
3851 inmode
= GET_MODE (XEXP (op
, 0));
3852 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3854 enum rtx_code second_code
, bypass_code
;
3856 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3857 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3859 code
= ix86_fp_compare_code_to_integer (code
);
3861 /* i387 supports just limited amount of conditional codes. */
3864 case LTU
: case GTU
: case LEU
: case GEU
:
3865 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3868 case ORDERED
: case UNORDERED
:
3876 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3879 promotable_binary_operator (op
, mode
)
3881 enum machine_mode mode ATTRIBUTE_UNUSED
;
3883 switch (GET_CODE (op
))
3886 /* Modern CPUs have same latency for HImode and SImode multiply,
3887 but 386 and 486 do HImode multiply faster. */
3888 return ix86_cpu
> PROCESSOR_I486
;
3900 /* Nearly general operand, but accept any const_double, since we wish
3901 to be able to drop them into memory rather than have them get pulled
3905 cmp_fp_expander_operand (op
, mode
)
3907 enum machine_mode mode
;
3909 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3911 if (GET_CODE (op
) == CONST_DOUBLE
)
3913 return general_operand (op
, mode
);
3916 /* Match an SI or HImode register for a zero_extract. */
3919 ext_register_operand (op
, mode
)
3921 enum machine_mode mode ATTRIBUTE_UNUSED
;
3924 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3925 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3928 if (!register_operand (op
, VOIDmode
))
3931 /* Be careful to accept only registers having upper parts. */
3932 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3933 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3936 /* Return 1 if this is a valid binary floating-point operation.
3937 OP is the expression matched, and MODE is its mode. */
3940 binary_fp_operator (op
, mode
)
3942 enum machine_mode mode
;
3944 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3947 switch (GET_CODE (op
))
3953 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3961 mult_operator (op
, mode
)
3963 enum machine_mode mode ATTRIBUTE_UNUSED
;
3965 return GET_CODE (op
) == MULT
;
3969 div_operator (op
, mode
)
3971 enum machine_mode mode ATTRIBUTE_UNUSED
;
3973 return GET_CODE (op
) == DIV
;
3977 arith_or_logical_operator (op
, mode
)
3979 enum machine_mode mode
;
3981 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3982 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3983 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3986 /* Returns 1 if OP is memory operand with a displacement. */
3989 memory_displacement_operand (op
, mode
)
3991 enum machine_mode mode
;
3993 struct ix86_address parts
;
3995 if (! memory_operand (op
, mode
))
3998 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
4001 return parts
.disp
!= NULL_RTX
;
4004 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4005 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4007 ??? It seems likely that this will only work because cmpsi is an
4008 expander, and no actual insns use this. */
4011 cmpsi_operand (op
, mode
)
4013 enum machine_mode mode
;
4015 if (nonimmediate_operand (op
, mode
))
4018 if (GET_CODE (op
) == AND
4019 && GET_MODE (op
) == SImode
4020 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
4021 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
4022 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
4023 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
4024 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
4025 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
4031 /* Returns 1 if OP is memory operand that can not be represented by the
4035 long_memory_operand (op
, mode
)
4037 enum machine_mode mode
;
4039 if (! memory_operand (op
, mode
))
4042 return memory_address_length (op
) != 0;
4045 /* Return nonzero if the rtx is known aligned. */
4048 aligned_operand (op
, mode
)
4050 enum machine_mode mode
;
4052 struct ix86_address parts
;
4054 if (!general_operand (op
, mode
))
4057 /* Registers and immediate operands are always "aligned". */
4058 if (GET_CODE (op
) != MEM
)
4061 /* Don't even try to do any aligned optimizations with volatiles. */
4062 if (MEM_VOLATILE_P (op
))
4067 /* Pushes and pops are only valid on the stack pointer. */
4068 if (GET_CODE (op
) == PRE_DEC
4069 || GET_CODE (op
) == POST_INC
)
4072 /* Decode the address. */
4073 if (! ix86_decompose_address (op
, &parts
))
4076 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
4077 parts
.base
= SUBREG_REG (parts
.base
);
4078 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
4079 parts
.index
= SUBREG_REG (parts
.index
);
4081 /* Look for some component that isn't known to be aligned. */
4085 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
4090 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
4095 if (GET_CODE (parts
.disp
) != CONST_INT
4096 || (INTVAL (parts
.disp
) & 3) != 0)
4100 /* Didn't find one -- this must be an aligned address. */
4104 /* Return true if the constant is something that can be loaded with
4105 a special instruction. Only handle 0.0 and 1.0; others are less
4109 standard_80387_constant_p (x
)
4112 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
4114 /* Note that on the 80387, other constants, such as pi, that we should support
4115 too. On some machines, these are much slower to load as standard constant,
4116 than to load from doubles in memory. */
4117 if (x
== CONST0_RTX (GET_MODE (x
)))
4119 if (x
== CONST1_RTX (GET_MODE (x
)))
4124 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4127 standard_sse_constant_p (x
)
4130 if (x
== const0_rtx
)
4132 return (x
== CONST0_RTX (GET_MODE (x
)));
4135 /* Returns 1 if OP contains a symbol reference */
4138 symbolic_reference_mentioned_p (op
)
4141 register const char *fmt
;
4144 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4147 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4148 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4154 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4155 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4159 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4166 /* Return 1 if it is appropriate to emit `ret' instructions in the
4167 body of a function. Do this only if the epilogue is simple, needing a
4168 couple of insns. Prior to reloading, we can't tell how many registers
4169 must be saved, so return 0 then. Return 0 if there is no frame
4170 marker to de-allocate.
4172 If NON_SAVING_SETJMP is defined and true, then it is not possible
4173 for the epilogue to be simple, so return 0. This is a special case
4174 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4175 until final, but jump_optimize may need to know sooner if a
4179 ix86_can_use_return_insn_p ()
4181 struct ix86_frame frame
;
4183 #ifdef NON_SAVING_SETJMP
4184 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
4188 if (! reload_completed
|| frame_pointer_needed
)
4191 /* Don't allow more than 32 pop, since that's all we can do
4192 with one instruction. */
4193 if (current_function_pops_args
4194 && current_function_args_size
>= 32768)
4197 ix86_compute_frame_layout (&frame
);
4198 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
4201 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4203 x86_64_sign_extended_value (value
)
4206 switch (GET_CODE (value
))
4208 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4209 to be at least 32 and this all acceptable constants are
4210 represented as CONST_INT. */
4212 if (HOST_BITS_PER_WIDE_INT
== 32)
4216 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
4217 return trunc_int_for_mode (val
, SImode
) == val
;
4221 /* For certain code models, the symbolic references are known to fit.
4222 in CM_SMALL_PIC model we know it fits if it is local to the shared
4223 library. Don't count TLS SYMBOL_REFs here, since they should fit
4224 only if inside of UNSPEC handled below. */
4226 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
);
4228 /* For certain code models, the code is near as well. */
4230 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
4231 || ix86_cmodel
== CM_KERNEL
);
4233 /* We also may accept the offsetted memory references in certain special
4236 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
)
4237 switch (XINT (XEXP (value
, 0), 1))
4239 case UNSPEC_GOTPCREL
:
4241 case UNSPEC_GOTNTPOFF
:
4247 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4249 rtx op1
= XEXP (XEXP (value
, 0), 0);
4250 rtx op2
= XEXP (XEXP (value
, 0), 1);
4251 HOST_WIDE_INT offset
;
4253 if (ix86_cmodel
== CM_LARGE
)
4255 if (GET_CODE (op2
) != CONST_INT
)
4257 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
4258 switch (GET_CODE (op1
))
4261 /* For CM_SMALL assume that latest object is 16MB before
4262 end of 31bits boundary. We may also accept pretty
4263 large negative constants knowing that all objects are
4264 in the positive half of address space. */
4265 if (ix86_cmodel
== CM_SMALL
4266 && offset
< 16*1024*1024
4267 && trunc_int_for_mode (offset
, SImode
) == offset
)
4269 /* For CM_KERNEL we know that all object resist in the
4270 negative half of 32bits address space. We may not
4271 accept negative offsets, since they may be just off
4272 and we may accept pretty large positive ones. */
4273 if (ix86_cmodel
== CM_KERNEL
4275 && trunc_int_for_mode (offset
, SImode
) == offset
)
4279 /* These conditions are similar to SYMBOL_REF ones, just the
4280 constraints for code models differ. */
4281 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4282 && offset
< 16*1024*1024
4283 && trunc_int_for_mode (offset
, SImode
) == offset
)
4285 if (ix86_cmodel
== CM_KERNEL
4287 && trunc_int_for_mode (offset
, SImode
) == offset
)
4291 switch (XINT (op1
, 1))
4296 && trunc_int_for_mode (offset
, SImode
) == offset
)
4310 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4312 x86_64_zero_extended_value (value
)
4315 switch (GET_CODE (value
))
4318 if (HOST_BITS_PER_WIDE_INT
== 32)
4319 return (GET_MODE (value
) == VOIDmode
4320 && !CONST_DOUBLE_HIGH (value
));
4324 if (HOST_BITS_PER_WIDE_INT
== 32)
4325 return INTVAL (value
) >= 0;
4327 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
4330 /* For certain code models, the symbolic references are known to fit. */
4332 return ix86_cmodel
== CM_SMALL
;
4334 /* For certain code models, the code is near as well. */
4336 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
4338 /* We also may accept the offsetted memory references in certain special
4341 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
4343 rtx op1
= XEXP (XEXP (value
, 0), 0);
4344 rtx op2
= XEXP (XEXP (value
, 0), 1);
4346 if (ix86_cmodel
== CM_LARGE
)
4348 switch (GET_CODE (op1
))
4352 /* For small code model we may accept pretty large positive
4353 offsets, since one bit is available for free. Negative
4354 offsets are limited by the size of NULL pointer area
4355 specified by the ABI. */
4356 if (ix86_cmodel
== CM_SMALL
4357 && GET_CODE (op2
) == CONST_INT
4358 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4359 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4362 /* ??? For the kernel, we may accept adjustment of
4363 -0x10000000, since we know that it will just convert
4364 negative address space to positive, but perhaps this
4365 is not worthwhile. */
4368 /* These conditions are similar to SYMBOL_REF ones, just the
4369 constraints for code models differ. */
4370 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4371 && GET_CODE (op2
) == CONST_INT
4372 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4373 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4387 /* Value should be nonzero if functions must have frame pointers.
4388 Zero means the frame pointer need not be set up (and parms may
4389 be accessed via the stack pointer) in functions that seem suitable. */
4392 ix86_frame_pointer_required ()
4394 /* If we accessed previous frames, then the generated code expects
4395 to be able to access the saved ebp value in our frame. */
4396 if (cfun
->machine
->accesses_prev_frame
)
4399 /* Several x86 os'es need a frame pointer for other reasons,
4400 usually pertaining to setjmp. */
4401 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4404 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4405 the frame pointer by default. Turn it back on now if we've not
4406 got a leaf function. */
4407 if (TARGET_OMIT_LEAF_FRAME_POINTER
4408 && (!current_function_is_leaf
))
4411 if (current_function_profile
)
4417 /* Record that the current function accesses previous call frames. */
4420 ix86_setup_frame_addresses ()
4422 cfun
->machine
->accesses_prev_frame
= 1;
4425 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4426 # define USE_HIDDEN_LINKONCE 1
4428 # define USE_HIDDEN_LINKONCE 0
4431 static int pic_labels_used
;
4433 /* Fills in the label name that should be used for a pc thunk for
4434 the given register. */
4437 get_pc_thunk_name (name
, regno
)
4441 if (USE_HIDDEN_LINKONCE
)
4442 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4444 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4448 /* This function generates code for -fpic that loads %ebx with
4449 the return address of the caller and then returns. */
4452 ix86_asm_file_end (file
)
4458 for (regno
= 0; regno
< 8; ++regno
)
4462 if (! ((pic_labels_used
>> regno
) & 1))
4465 get_pc_thunk_name (name
, regno
);
4467 if (USE_HIDDEN_LINKONCE
)
4471 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4473 TREE_PUBLIC (decl
) = 1;
4474 TREE_STATIC (decl
) = 1;
4475 DECL_ONE_ONLY (decl
) = 1;
4477 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4478 named_section (decl
, NULL
, 0);
4480 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4481 fputs ("\t.hidden\t", file
);
4482 assemble_name (file
, name
);
4484 ASM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
4489 ASM_OUTPUT_LABEL (file
, name
);
4492 xops
[0] = gen_rtx_REG (SImode
, regno
);
4493 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4494 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4495 output_asm_insn ("ret", xops
);
4499 /* Emit code for the SET_GOT patterns. */
4502 output_set_got (dest
)
4508 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4510 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4512 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4515 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4517 output_asm_insn ("call\t%a2", xops
);
4520 /* Output the "canonical" label name ("Lxx$pb") here too. This
4521 is what will be referred to by the Mach-O PIC subsystem. */
4522 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4524 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
4525 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4528 output_asm_insn ("pop{l}\t%0", xops
);
4533 get_pc_thunk_name (name
, REGNO (dest
));
4534 pic_labels_used
|= 1 << REGNO (dest
);
4536 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4537 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4538 output_asm_insn ("call\t%X2", xops
);
4541 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4542 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4543 else if (!TARGET_MACHO
)
4544 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4549 /* Generate an "push" pattern for input ARG. */
4555 return gen_rtx_SET (VOIDmode
,
4557 gen_rtx_PRE_DEC (Pmode
,
4558 stack_pointer_rtx
)),
4562 /* Return >= 0 if there is an unused call-clobbered register available
4563 for the entire function. */
4566 ix86_select_alt_pic_regnum ()
4568 if (current_function_is_leaf
&& !current_function_profile
)
4571 for (i
= 2; i
>= 0; --i
)
4572 if (!regs_ever_live
[i
])
4576 return INVALID_REGNUM
;
4579 /* Return 1 if we need to save REGNO. */
4581 ix86_save_reg (regno
, maybe_eh_return
)
4583 int maybe_eh_return
;
4585 if (pic_offset_table_rtx
4586 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4587 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4588 || current_function_profile
4589 || current_function_calls_eh_return
))
4591 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4596 if (current_function_calls_eh_return
&& maybe_eh_return
)
4601 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4602 if (test
== INVALID_REGNUM
)
4609 return (regs_ever_live
[regno
]
4610 && !call_used_regs
[regno
]
4611 && !fixed_regs
[regno
]
4612 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4615 /* Return number of registers to be saved on the stack. */
4623 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4624 if (ix86_save_reg (regno
, true))
4629 /* Return the offset between two registers, one to be eliminated, and the other
4630 its replacement, at the start of a routine. */
4633 ix86_initial_elimination_offset (from
, to
)
4637 struct ix86_frame frame
;
4638 ix86_compute_frame_layout (&frame
);
4640 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4641 return frame
.hard_frame_pointer_offset
;
4642 else if (from
== FRAME_POINTER_REGNUM
4643 && to
== HARD_FRAME_POINTER_REGNUM
)
4644 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4647 if (to
!= STACK_POINTER_REGNUM
)
4649 else if (from
== ARG_POINTER_REGNUM
)
4650 return frame
.stack_pointer_offset
;
4651 else if (from
!= FRAME_POINTER_REGNUM
)
4654 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4658 /* Fill structure ix86_frame about frame of currently computed function. */
4661 ix86_compute_frame_layout (frame
)
4662 struct ix86_frame
*frame
;
4664 HOST_WIDE_INT total_size
;
4665 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4667 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4668 HOST_WIDE_INT size
= get_frame_size ();
4670 frame
->nregs
= ix86_nsaved_regs ();
4673 /* Skip return address and saved base pointer. */
4674 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4676 frame
->hard_frame_pointer_offset
= offset
;
4678 /* Do some sanity checking of stack_alignment_needed and
4679 preferred_alignment, since i386 port is the only using those features
4680 that may break easily. */
4682 if (size
&& !stack_alignment_needed
)
4684 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4686 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4688 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4691 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4692 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4694 /* Register save area */
4695 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4698 if (ix86_save_varrargs_registers
)
4700 offset
+= X86_64_VARARGS_SIZE
;
4701 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4704 frame
->va_arg_size
= 0;
4706 /* Align start of frame for local function. */
4707 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4708 & -stack_alignment_needed
) - offset
;
4710 offset
+= frame
->padding1
;
4712 /* Frame pointer points here. */
4713 frame
->frame_pointer_offset
= offset
;
4717 /* Add outgoing arguments area. Can be skipped if we eliminated
4718 all the function calls as dead code. */
4719 if (ACCUMULATE_OUTGOING_ARGS
&& !current_function_is_leaf
)
4721 offset
+= current_function_outgoing_args_size
;
4722 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4725 frame
->outgoing_arguments_size
= 0;
4727 /* Align stack boundary. Only needed if we're calling another function
4729 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4730 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4731 & -preferred_alignment
) - offset
;
4733 frame
->padding2
= 0;
4735 offset
+= frame
->padding2
;
4737 /* We've reached end of stack frame. */
4738 frame
->stack_pointer_offset
= offset
;
4740 /* Size prologue needs to allocate. */
4741 frame
->to_allocate
=
4742 (size
+ frame
->padding1
+ frame
->padding2
4743 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4745 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4746 && current_function_is_leaf
)
4748 frame
->red_zone_size
= frame
->to_allocate
;
4749 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4750 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4753 frame
->red_zone_size
= 0;
4754 frame
->to_allocate
-= frame
->red_zone_size
;
4755 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4757 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4758 fprintf (stderr
, "size: %i\n", size
);
4759 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4760 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4761 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4762 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4763 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4764 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4765 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4766 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4767 frame
->hard_frame_pointer_offset
);
4768 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4772 /* Emit code to save registers in the prologue. */
4775 ix86_emit_save_regs ()
4780 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4781 if (ix86_save_reg (regno
, true))
4783 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4784 RTX_FRAME_RELATED_P (insn
) = 1;
4788 /* Emit code to save registers using MOV insns. First register
4789 is restored from POINTER + OFFSET. */
4791 ix86_emit_save_regs_using_mov (pointer
, offset
)
4793 HOST_WIDE_INT offset
;
4798 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4799 if (ix86_save_reg (regno
, true))
4801 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4803 gen_rtx_REG (Pmode
, regno
));
4804 RTX_FRAME_RELATED_P (insn
) = 1;
4805 offset
+= UNITS_PER_WORD
;
4809 /* Expand the prologue into a bunch of separate insns. */
4812 ix86_expand_prologue ()
4816 struct ix86_frame frame
;
4818 HOST_WIDE_INT allocate
;
4820 ix86_compute_frame_layout (&frame
);
4823 int count
= frame
.nregs
;
4825 /* The fast prologue uses move instead of push to save registers. This
4826 is significantly longer, but also executes faster as modern hardware
4827 can execute the moves in parallel, but can't do that for push/pop.
4829 Be careful about choosing what prologue to emit: When function takes
4830 many instructions to execute we may use slow version as well as in
4831 case function is known to be outside hot spot (this is known with
4832 feedback only). Weight the size of function by number of registers
4833 to save as it is cheap to use one or two push instructions but very
4834 slow to use many of them. */
4836 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
4837 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
4838 || (flag_branch_probabilities
4839 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
4840 use_fast_prologue_epilogue
= 0;
4842 use_fast_prologue_epilogue
= !expensive_function_p (count
);
4843 if (TARGET_PROLOGUE_USING_MOVE
)
4844 use_mov
= use_fast_prologue_epilogue
;
4847 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4848 slower on all targets. Also sdb doesn't like it. */
4850 if (frame_pointer_needed
)
4852 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4853 RTX_FRAME_RELATED_P (insn
) = 1;
4855 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4856 RTX_FRAME_RELATED_P (insn
) = 1;
4859 allocate
= frame
.to_allocate
;
4860 /* In case we are dealing only with single register and empty frame,
4861 push is equivalent of the mov+add sequence. */
4862 if (allocate
== 0 && frame
.nregs
<= 1)
4866 ix86_emit_save_regs ();
4868 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4872 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4874 insn
= emit_insn (gen_pro_epilogue_adjust_stack
4875 (stack_pointer_rtx
, stack_pointer_rtx
,
4876 GEN_INT (-allocate
)));
4877 RTX_FRAME_RELATED_P (insn
) = 1;
4881 /* ??? Is this only valid for Win32? */
4888 arg0
= gen_rtx_REG (SImode
, 0);
4889 emit_move_insn (arg0
, GEN_INT (allocate
));
4891 sym
= gen_rtx_MEM (FUNCTION_MODE
,
4892 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
4893 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
4895 CALL_INSN_FUNCTION_USAGE (insn
)
4896 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
4897 CALL_INSN_FUNCTION_USAGE (insn
));
4899 /* Don't allow scheduling pass to move insns across __alloca
4901 emit_insn (gen_blockage (const0_rtx
));
4905 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4906 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4908 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4909 -frame
.nregs
* UNITS_PER_WORD
);
4912 #ifdef SUBTARGET_PROLOGUE
4916 pic_reg_used
= false;
4917 if (pic_offset_table_rtx
4918 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4919 || current_function_profile
))
4921 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4923 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4924 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4926 pic_reg_used
= true;
4931 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4933 /* Even with accurate pre-reload life analysis, we can wind up
4934 deleting all references to the pic register after reload.
4935 Consider if cross-jumping unifies two sides of a branch
4936 controlled by a comparison vs the only read from a global.
4937 In which case, allow the set_got to be deleted, though we're
4938 too late to do anything about the ebx save in the prologue. */
4939 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4942 /* Prevent function calls from be scheduled before the call to mcount.
4943 In the pic_reg_used case, make sure that the got load isn't deleted. */
4944 if (current_function_profile
)
4945 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4948 /* Emit code to restore saved registers using MOV insns. First register
4949 is restored from POINTER + OFFSET. */
4951 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4954 int maybe_eh_return
;
4958 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4959 if (ix86_save_reg (regno
, maybe_eh_return
))
4961 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4962 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4964 offset
+= UNITS_PER_WORD
;
4968 /* Restore function stack, frame, and registers. */
4971 ix86_expand_epilogue (style
)
4975 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4976 struct ix86_frame frame
;
4977 HOST_WIDE_INT offset
;
4979 ix86_compute_frame_layout (&frame
);
4981 /* Calculate start of saved registers relative to ebp. Special care
4982 must be taken for the normal return case of a function using
4983 eh_return: the eax and edx registers are marked as saved, but not
4984 restored along this path. */
4985 offset
= frame
.nregs
;
4986 if (current_function_calls_eh_return
&& style
!= 2)
4988 offset
*= -UNITS_PER_WORD
;
4990 /* If we're only restoring one register and sp is not valid then
4991 using a move instruction to restore the register since it's
4992 less work than reloading sp and popping the register.
4994 The default code result in stack adjustment using add/lea instruction,
4995 while this code results in LEAVE instruction (or discrete equivalent),
4996 so it is profitable in some other cases as well. Especially when there
4997 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4998 and there is exactly one register to pop. This heuristic may need some
4999 tuning in future. */
5000 if ((!sp_valid
&& frame
.nregs
<= 1)
5001 || (TARGET_EPILOGUE_USING_MOVE
5002 && use_fast_prologue_epilogue
5003 && (frame
.nregs
> 1 || frame
.to_allocate
))
5004 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
5005 || (frame_pointer_needed
&& TARGET_USE_LEAVE
5006 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
5007 || current_function_calls_eh_return
)
5009 /* Restore registers. We can use ebp or esp to address the memory
5010 locations. If both are available, default to ebp, since offsets
5011 are known to be small. Only exception is esp pointing directly to the
5012 end of block of saved registers, where we may simplify addressing
5015 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
5016 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
5017 frame
.to_allocate
, style
== 2);
5019 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
5020 offset
, style
== 2);
5022 /* eh_return epilogues need %ecx added to the stack pointer. */
5025 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
5027 if (frame_pointer_needed
)
5029 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
5030 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
5031 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
5033 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
5034 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
5036 emit_insn (gen_pro_epilogue_adjust_stack
5037 (stack_pointer_rtx
, sa
, const0_rtx
));
5041 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
5042 tmp
= plus_constant (tmp
, (frame
.to_allocate
5043 + frame
.nregs
* UNITS_PER_WORD
));
5044 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
5047 else if (!frame_pointer_needed
)
5048 emit_insn (gen_pro_epilogue_adjust_stack
5049 (stack_pointer_rtx
, stack_pointer_rtx
,
5050 GEN_INT (frame
.to_allocate
5051 + frame
.nregs
* UNITS_PER_WORD
)));
5052 /* If not an i386, mov & pop is faster than "leave". */
5053 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
5054 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5057 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
5058 hard_frame_pointer_rtx
,
5061 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5063 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5068 /* First step is to deallocate the stack frame so that we can
5069 pop the registers. */
5072 if (!frame_pointer_needed
)
5074 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
5075 hard_frame_pointer_rtx
,
5078 else if (frame
.to_allocate
)
5079 emit_insn (gen_pro_epilogue_adjust_stack
5080 (stack_pointer_rtx
, stack_pointer_rtx
,
5081 GEN_INT (frame
.to_allocate
)));
5083 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5084 if (ix86_save_reg (regno
, false))
5087 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
5089 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
5091 if (frame_pointer_needed
)
5093 /* Leave results in shorter dependency chains on CPUs that are
5094 able to grok it fast. */
5095 if (TARGET_USE_LEAVE
)
5096 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
5097 else if (TARGET_64BIT
)
5098 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
5100 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
5104 /* Sibcall epilogues don't want a return instruction. */
5108 if (current_function_pops_args
&& current_function_args_size
)
5110 rtx popc
= GEN_INT (current_function_pops_args
);
5112 /* i386 can only pop 64K bytes. If asked to pop more, pop
5113 return address, do explicit add, and jump indirectly to the
5116 if (current_function_pops_args
>= 65536)
5118 rtx ecx
= gen_rtx_REG (SImode
, 2);
5120 /* There are is no "pascal" calling convention in 64bit ABI. */
5124 emit_insn (gen_popsi1 (ecx
));
5125 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
5126 emit_jump_insn (gen_return_indirect_internal (ecx
));
5129 emit_jump_insn (gen_return_pop_internal (popc
));
5132 emit_jump_insn (gen_return_internal ());
5135 /* Reset from the function's potential modifications. */
5138 ix86_output_function_epilogue (file
, size
)
5139 FILE *file ATTRIBUTE_UNUSED
;
5140 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
5142 if (pic_offset_table_rtx
)
5143 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
5146 /* Extract the parts of an RTL expression that is a valid memory address
5147 for an instruction. Return 0 if the structure of the address is
5148 grossly off. Return -1 if the address contains ASHIFT, so it is not
5149 strictly valid, but still used for computing length of lea instruction.
5153 ix86_decompose_address (addr
, out
)
5155 struct ix86_address
*out
;
5157 rtx base
= NULL_RTX
;
5158 rtx index
= NULL_RTX
;
5159 rtx disp
= NULL_RTX
;
5160 HOST_WIDE_INT scale
= 1;
5161 rtx scale_rtx
= NULL_RTX
;
5164 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
5166 else if (GET_CODE (addr
) == PLUS
)
5168 rtx op0
= XEXP (addr
, 0);
5169 rtx op1
= XEXP (addr
, 1);
5170 enum rtx_code code0
= GET_CODE (op0
);
5171 enum rtx_code code1
= GET_CODE (op1
);
5173 if (code0
== REG
|| code0
== SUBREG
)
5175 if (code1
== REG
|| code1
== SUBREG
)
5176 index
= op0
, base
= op1
; /* index + base */
5178 base
= op0
, disp
= op1
; /* base + displacement */
5180 else if (code0
== MULT
)
5182 index
= XEXP (op0
, 0);
5183 scale_rtx
= XEXP (op0
, 1);
5184 if (code1
== REG
|| code1
== SUBREG
)
5185 base
= op1
; /* index*scale + base */
5187 disp
= op1
; /* index*scale + disp */
5189 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
5191 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
5192 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
5193 base
= XEXP (op0
, 1);
5196 else if (code0
== PLUS
)
5198 index
= XEXP (op0
, 0); /* index + base + disp */
5199 base
= XEXP (op0
, 1);
5205 else if (GET_CODE (addr
) == MULT
)
5207 index
= XEXP (addr
, 0); /* index*scale */
5208 scale_rtx
= XEXP (addr
, 1);
5210 else if (GET_CODE (addr
) == ASHIFT
)
5214 /* We're called for lea too, which implements ashift on occasion. */
5215 index
= XEXP (addr
, 0);
5216 tmp
= XEXP (addr
, 1);
5217 if (GET_CODE (tmp
) != CONST_INT
)
5219 scale
= INTVAL (tmp
);
5220 if ((unsigned HOST_WIDE_INT
) scale
> 3)
5226 disp
= addr
; /* displacement */
5228 /* Extract the integral value of scale. */
5231 if (GET_CODE (scale_rtx
) != CONST_INT
)
5233 scale
= INTVAL (scale_rtx
);
5236 /* Allow arg pointer and stack pointer as index if there is not scaling */
5237 if (base
&& index
&& scale
== 1
5238 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
5239 || index
== stack_pointer_rtx
))
5246 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5247 if ((base
== hard_frame_pointer_rtx
5248 || base
== frame_pointer_rtx
5249 || base
== arg_pointer_rtx
) && !disp
)
5252 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5253 Avoid this by transforming to [%esi+0]. */
5254 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
5255 && base
&& !index
&& !disp
5257 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
5260 /* Special case: encode reg+reg instead of reg*2. */
5261 if (!base
&& index
&& scale
&& scale
== 2)
5262 base
= index
, scale
= 1;
5264 /* Special case: scaling cannot be encoded without base or displacement. */
5265 if (!base
&& !disp
&& index
&& scale
!= 1)
5276 /* Return cost of the memory address x.
5277 For i386, it is better to use a complex address than let gcc copy
5278 the address into a reg and make a new pseudo. But not if the address
5279 requires to two regs - that would mean more pseudos with longer
5282 ix86_address_cost (x
)
5285 struct ix86_address parts
;
5288 if (!ix86_decompose_address (x
, &parts
))
5291 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
5292 parts
.base
= SUBREG_REG (parts
.base
);
5293 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
5294 parts
.index
= SUBREG_REG (parts
.index
);
5296 /* More complex memory references are better. */
5297 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
5300 /* Attempt to minimize number of registers in the address. */
5302 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
5304 && (!REG_P (parts
.index
)
5305 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
5309 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
5311 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
5312 && parts
.base
!= parts
.index
)
5315 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5316 since it's predecode logic can't detect the length of instructions
5317 and it degenerates to vector decoded. Increase cost of such
5318 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5319 to split such addresses or even refuse such addresses at all.
5321 Following addressing modes are affected:
5326 The first and last case may be avoidable by explicitly coding the zero in
5327 memory address, but I don't have AMD-K6 machine handy to check this
5331 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5332 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
5333 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
5339 /* If X is a machine specific address (i.e. a symbol or label being
5340 referenced as a displacement from the GOT implemented using an
5341 UNSPEC), then return the base term. Otherwise return X. */
5344 ix86_find_base_term (x
)
5351 if (GET_CODE (x
) != CONST
)
5354 if (GET_CODE (term
) == PLUS
5355 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
5356 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
5357 term
= XEXP (term
, 0);
5358 if (GET_CODE (term
) != UNSPEC
5359 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
5362 term
= XVECEXP (term
, 0, 0);
5364 if (GET_CODE (term
) != SYMBOL_REF
5365 && GET_CODE (term
) != LABEL_REF
)
5371 if (GET_CODE (x
) != PLUS
5372 || XEXP (x
, 0) != pic_offset_table_rtx
5373 || GET_CODE (XEXP (x
, 1)) != CONST
)
5376 term
= XEXP (XEXP (x
, 1), 0);
5378 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
5379 term
= XEXP (term
, 0);
5381 if (GET_CODE (term
) != UNSPEC
5382 || XINT (term
, 1) != UNSPEC_GOTOFF
)
5385 term
= XVECEXP (term
, 0, 0);
5387 if (GET_CODE (term
) != SYMBOL_REF
5388 && GET_CODE (term
) != LABEL_REF
)
5394 /* Determine if a given RTX is a valid constant. We already know this
5395 satisfies CONSTANT_P. */
5398 legitimate_constant_p (x
)
5403 switch (GET_CODE (x
))
5406 /* TLS symbols are not constant. */
5407 if (tls_symbolic_operand (x
, Pmode
))
5412 inner
= XEXP (x
, 0);
5414 /* Offsets of TLS symbols are never valid.
5415 Discourage CSE from creating them. */
5416 if (GET_CODE (inner
) == PLUS
5417 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
5420 /* Only some unspecs are valid as "constants". */
5421 if (GET_CODE (inner
) == UNSPEC
)
5422 switch (XINT (inner
, 1))
5425 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5435 /* Otherwise we handle everything else in the move patterns. */
5439 /* Determine if it's legal to put X into the constant pool. This
5440 is not possible for the address of thread-local symbols, which
5441 is checked above. */
5444 ix86_cannot_force_const_mem (x
)
5447 return !legitimate_constant_p (x
);
5450 /* Determine if a given RTX is a valid constant address. */
5453 constant_address_p (x
)
5456 switch (GET_CODE (x
))
5463 return TARGET_64BIT
;
5466 /* For Mach-O, really believe the CONST. */
5469 /* Otherwise fall through. */
5471 return !flag_pic
&& legitimate_constant_p (x
);
5478 /* Nonzero if the constant value X is a legitimate general operand
5479 when generating PIC code. It is given that flag_pic is on and
5480 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5483 legitimate_pic_operand_p (x
)
5488 switch (GET_CODE (x
))
5491 inner
= XEXP (x
, 0);
5493 /* Only some unspecs are valid as "constants". */
5494 if (GET_CODE (inner
) == UNSPEC
)
5495 switch (XINT (inner
, 1))
5498 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5506 return legitimate_pic_address_disp_p (x
);
5513 /* Determine if a given CONST RTX is a valid memory displacement
5517 legitimate_pic_address_disp_p (disp
)
5522 /* In 64bit mode we can allow direct addresses of symbols and labels
5523 when they are not dynamic symbols. */
5526 /* TLS references should always be enclosed in UNSPEC. */
5527 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
5529 if (GET_CODE (disp
) == SYMBOL_REF
5530 && ix86_cmodel
== CM_SMALL_PIC
5531 && (CONSTANT_POOL_ADDRESS_P (disp
)
5532 || SYMBOL_REF_FLAG (disp
)))
5534 if (GET_CODE (disp
) == LABEL_REF
)
5536 if (GET_CODE (disp
) == CONST
5537 && GET_CODE (XEXP (disp
, 0)) == PLUS
5538 && ((GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
5539 && ix86_cmodel
== CM_SMALL_PIC
5540 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp
, 0), 0))
5541 || SYMBOL_REF_FLAG (XEXP (XEXP (disp
, 0), 0))))
5542 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
5543 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
5544 && INTVAL (XEXP (XEXP (disp
, 0), 1)) < 16*1024*1024
5545 && INTVAL (XEXP (XEXP (disp
, 0), 1)) >= -16*1024*1024)
5548 if (GET_CODE (disp
) != CONST
)
5550 disp
= XEXP (disp
, 0);
5554 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5555 of GOT tables. We should not need these anyway. */
5556 if (GET_CODE (disp
) != UNSPEC
5557 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5560 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5561 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5567 if (GET_CODE (disp
) == PLUS
)
5569 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5571 disp
= XEXP (disp
, 0);
5575 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5576 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
5578 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5579 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5580 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5582 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5583 if (strstr (sym_name
, "$pb") != 0)
5588 if (GET_CODE (disp
) != UNSPEC
)
5591 switch (XINT (disp
, 1))
5596 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5598 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5599 case UNSPEC_GOTTPOFF
:
5600 case UNSPEC_GOTNTPOFF
:
5601 case UNSPEC_INDNTPOFF
:
5604 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5606 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5608 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5614 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5615 memory address for an instruction. The MODE argument is the machine mode
5616 for the MEM expression that wants to use this address.
5618 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5619 convert common non-canonical forms to canonical form so that they will
5623 legitimate_address_p (mode
, addr
, strict
)
5624 enum machine_mode mode
;
5628 struct ix86_address parts
;
5629 rtx base
, index
, disp
;
5630 HOST_WIDE_INT scale
;
5631 const char *reason
= NULL
;
5632 rtx reason_rtx
= NULL_RTX
;
5634 if (TARGET_DEBUG_ADDR
)
5637 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5638 GET_MODE_NAME (mode
), strict
);
5642 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
5644 if (TARGET_DEBUG_ADDR
)
5645 fprintf (stderr
, "Success.\n");
5649 if (ix86_decompose_address (addr
, &parts
) <= 0)
5651 reason
= "decomposition failed";
5656 index
= parts
.index
;
5658 scale
= parts
.scale
;
5660 /* Validate base register.
5662 Don't allow SUBREG's here, it can lead to spill failures when the base
5663 is one word out of a two word structure, which is represented internally
5671 if (GET_CODE (base
) == SUBREG
)
5672 reg
= SUBREG_REG (base
);
5676 if (GET_CODE (reg
) != REG
)
5678 reason
= "base is not a register";
5682 if (GET_MODE (base
) != Pmode
)
5684 reason
= "base is not in Pmode";
5688 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
5689 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
5691 reason
= "base is not valid";
5696 /* Validate index register.
5698 Don't allow SUBREG's here, it can lead to spill failures when the index
5699 is one word out of a two word structure, which is represented internally
5707 if (GET_CODE (index
) == SUBREG
)
5708 reg
= SUBREG_REG (index
);
5712 if (GET_CODE (reg
) != REG
)
5714 reason
= "index is not a register";
5718 if (GET_MODE (index
) != Pmode
)
5720 reason
= "index is not in Pmode";
5724 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
5725 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
5727 reason
= "index is not valid";
5732 /* Validate scale factor. */
5735 reason_rtx
= GEN_INT (scale
);
5738 reason
= "scale without index";
5742 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5744 reason
= "scale is not a valid multiplier";
5749 /* Validate displacement. */
5754 if (GET_CODE (disp
) == CONST
5755 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5756 switch (XINT (XEXP (disp
, 0), 1))
5760 case UNSPEC_GOTPCREL
:
5763 goto is_legitimate_pic
;
5765 case UNSPEC_GOTTPOFF
:
5766 case UNSPEC_GOTNTPOFF
:
5767 case UNSPEC_INDNTPOFF
:
5773 reason
= "invalid address unspec";
5777 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5779 && !machopic_operand_p (disp
)
5784 if (TARGET_64BIT
&& (index
|| base
))
5786 /* foo@dtpoff(%rX) is ok. */
5787 if (GET_CODE (disp
) != CONST
5788 || GET_CODE (XEXP (disp
, 0)) != PLUS
5789 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
5790 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
5791 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
5792 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
5794 reason
= "non-constant pic memory reference";
5798 else if (! legitimate_pic_address_disp_p (disp
))
5800 reason
= "displacement is an invalid pic construct";
5804 /* This code used to verify that a symbolic pic displacement
5805 includes the pic_offset_table_rtx register.
5807 While this is good idea, unfortunately these constructs may
5808 be created by "adds using lea" optimization for incorrect
5817 This code is nonsensical, but results in addressing
5818 GOT table with pic_offset_table_rtx base. We can't
5819 just refuse it easily, since it gets matched by
5820 "addsi3" pattern, that later gets split to lea in the
5821 case output register differs from input. While this
5822 can be handled by separate addsi pattern for this case
5823 that never results in lea, this seems to be easier and
5824 correct fix for crash to disable this test. */
5826 else if (!CONSTANT_ADDRESS_P (disp
))
5828 reason
= "displacement is not constant";
5831 else if (TARGET_64BIT
&& !x86_64_sign_extended_value (disp
))
5833 reason
= "displacement is out of range";
5836 else if (!TARGET_64BIT
&& GET_CODE (disp
) == CONST_DOUBLE
)
5838 reason
= "displacement is a const_double";
5843 /* Everything looks valid. */
5844 if (TARGET_DEBUG_ADDR
)
5845 fprintf (stderr
, "Success.\n");
5849 if (TARGET_DEBUG_ADDR
)
5851 fprintf (stderr
, "Error: %s\n", reason
);
5852 debug_rtx (reason_rtx
);
5857 /* Return an unique alias set for the GOT. */
5859 static HOST_WIDE_INT
5860 ix86_GOT_alias_set ()
5862 static HOST_WIDE_INT set
= -1;
5864 set
= new_alias_set ();
5868 /* Return a legitimate reference for ORIG (an address) using the
5869 register REG. If REG is 0, a new pseudo is generated.
5871 There are two types of references that must be handled:
5873 1. Global data references must load the address from the GOT, via
5874 the PIC reg. An insn is emitted to do this load, and the reg is
5877 2. Static data references, constant pool addresses, and code labels
5878 compute the address as an offset from the GOT, whose base is in
5879 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5880 differentiate them from global data objects. The returned
5881 address is the PIC reg + an unspec constant.
5883 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5884 reg also appears in the address. */
5887 legitimize_pic_address (orig
, reg
)
5897 reg
= gen_reg_rtx (Pmode
);
5898 /* Use the generic Mach-O PIC machinery. */
5899 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5902 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
5904 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
5906 /* This symbol may be referenced via a displacement from the PIC
5907 base address (@GOTOFF). */
5909 if (reload_in_progress
)
5910 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5911 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5912 new = gen_rtx_CONST (Pmode
, new);
5913 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5917 emit_move_insn (reg
, new);
5921 else if (GET_CODE (addr
) == SYMBOL_REF
)
5925 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5926 new = gen_rtx_CONST (Pmode
, new);
5927 new = gen_rtx_MEM (Pmode
, new);
5928 RTX_UNCHANGING_P (new) = 1;
5929 set_mem_alias_set (new, ix86_GOT_alias_set ());
5932 reg
= gen_reg_rtx (Pmode
);
5933 /* Use directly gen_movsi, otherwise the address is loaded
5934 into register for CSE. We don't want to CSE this addresses,
5935 instead we CSE addresses from the GOT table, so skip this. */
5936 emit_insn (gen_movsi (reg
, new));
5941 /* This symbol must be referenced via a load from the
5942 Global Offset Table (@GOT). */
5944 if (reload_in_progress
)
5945 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5946 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5947 new = gen_rtx_CONST (Pmode
, new);
5948 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5949 new = gen_rtx_MEM (Pmode
, new);
5950 RTX_UNCHANGING_P (new) = 1;
5951 set_mem_alias_set (new, ix86_GOT_alias_set ());
5954 reg
= gen_reg_rtx (Pmode
);
5955 emit_move_insn (reg
, new);
5961 if (GET_CODE (addr
) == CONST
)
5963 addr
= XEXP (addr
, 0);
5965 /* We must match stuff we generate before. Assume the only
5966 unspecs that can get here are ours. Not that we could do
5967 anything with them anyway... */
5968 if (GET_CODE (addr
) == UNSPEC
5969 || (GET_CODE (addr
) == PLUS
5970 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5972 if (GET_CODE (addr
) != PLUS
)
5975 if (GET_CODE (addr
) == PLUS
)
5977 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5979 /* Check first to see if this is a constant offset from a @GOTOFF
5980 symbol reference. */
5981 if (local_symbolic_operand (op0
, Pmode
)
5982 && GET_CODE (op1
) == CONST_INT
)
5986 if (reload_in_progress
)
5987 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5988 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5990 new = gen_rtx_PLUS (Pmode
, new, op1
);
5991 new = gen_rtx_CONST (Pmode
, new);
5992 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5996 emit_move_insn (reg
, new);
6002 if (INTVAL (op1
) < -16*1024*1024
6003 || INTVAL (op1
) >= 16*1024*1024)
6004 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
6009 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
6010 new = legitimize_pic_address (XEXP (addr
, 1),
6011 base
== reg
? NULL_RTX
: reg
);
6013 if (GET_CODE (new) == CONST_INT
)
6014 new = plus_constant (base
, INTVAL (new));
6017 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
6019 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
6020 new = XEXP (new, 1);
6022 new = gen_rtx_PLUS (Pmode
, base
, new);
6031 ix86_encode_section_info (decl
, first
)
6033 int first ATTRIBUTE_UNUSED
;
6035 bool local_p
= (*targetm
.binds_local_p
) (decl
);
6038 rtl
= DECL_P (decl
) ? DECL_RTL (decl
) : TREE_CST_RTL (decl
);
6039 if (GET_CODE (rtl
) != MEM
)
6041 symbol
= XEXP (rtl
, 0);
6042 if (GET_CODE (symbol
) != SYMBOL_REF
)
6045 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6046 symbol so that we may access it directly in the GOT. */
6049 SYMBOL_REF_FLAG (symbol
) = local_p
;
6051 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6052 "local dynamic", "initial exec" or "local exec" TLS models
6055 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL (decl
))
6057 const char *symbol_str
;
6060 enum tls_model kind
= decl_tls_model (decl
);
6062 if (TARGET_64BIT
&& ! flag_pic
)
6064 /* x86-64 doesn't allow non-pic code for shared libraries,
6065 so don't generate GD/LD TLS models for non-pic code. */
6068 case TLS_MODEL_GLOBAL_DYNAMIC
:
6069 kind
= TLS_MODEL_INITIAL_EXEC
; break;
6070 case TLS_MODEL_LOCAL_DYNAMIC
:
6071 kind
= TLS_MODEL_LOCAL_EXEC
; break;
6077 symbol_str
= XSTR (symbol
, 0);
6079 if (symbol_str
[0] == '%')
6081 if (symbol_str
[1] == tls_model_chars
[kind
])
6085 len
= strlen (symbol_str
) + 1;
6086 newstr
= alloca (len
+ 2);
6089 newstr
[1] = tls_model_chars
[kind
];
6090 memcpy (newstr
+ 2, symbol_str
, len
);
6092 XSTR (symbol
, 0) = ggc_alloc_string (newstr
, len
+ 2 - 1);
6096 /* Undo the above when printing symbol names. */
6099 ix86_strip_name_encoding (str
)
6109 /* Load the thread pointer into a register. */
6112 get_thread_pointer ()
6116 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
6117 tp
= gen_rtx_MEM (Pmode
, tp
);
6118 RTX_UNCHANGING_P (tp
) = 1;
6119 set_mem_alias_set (tp
, ix86_GOT_alias_set ());
6120 tp
= force_reg (Pmode
, tp
);
6125 /* Try machine-dependent ways of modifying an illegitimate address
6126 to be legitimate. If we find one, return the new, valid address.
6127 This macro is used in only one place: `memory_address' in explow.c.
6129 OLDX is the address as it was before break_out_memory_refs was called.
6130 In some cases it is useful to look at this to decide what needs to be done.
6132 MODE and WIN are passed so that this macro can use
6133 GO_IF_LEGITIMATE_ADDRESS.
6135 It is always safe for this macro to do nothing. It exists to recognize
6136 opportunities to optimize the output.
6138 For the 80386, we handle X+REG by loading X into a register R and
6139 using R+REG. R will go in a general reg and indexing will be used.
6140 However, if REG is a broken-out memory address or multiplication,
6141 nothing needs to be done because REG can certainly go in a general reg.
6143 When -fpic is used, special handling is needed for symbolic references.
6144 See comments by legitimize_pic_address in i386.c for details. */
6147 legitimize_address (x
, oldx
, mode
)
6149 register rtx oldx ATTRIBUTE_UNUSED
;
6150 enum machine_mode mode
;
6155 if (TARGET_DEBUG_ADDR
)
6157 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6158 GET_MODE_NAME (mode
));
6162 log
= tls_symbolic_operand (x
, mode
);
6165 rtx dest
, base
, off
, pic
;
6170 case TLS_MODEL_GLOBAL_DYNAMIC
:
6171 dest
= gen_reg_rtx (Pmode
);
6174 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
6177 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
6178 insns
= get_insns ();
6181 emit_libcall_block (insns
, dest
, rax
, x
);
6184 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
6187 case TLS_MODEL_LOCAL_DYNAMIC
:
6188 base
= gen_reg_rtx (Pmode
);
6191 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
6194 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
6195 insns
= get_insns ();
6198 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
6199 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
6200 emit_libcall_block (insns
, base
, rax
, note
);
6203 emit_insn (gen_tls_local_dynamic_base_32 (base
));
6205 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
6206 off
= gen_rtx_CONST (Pmode
, off
);
6208 return gen_rtx_PLUS (Pmode
, base
, off
);
6210 case TLS_MODEL_INITIAL_EXEC
:
6214 type
= UNSPEC_GOTNTPOFF
;
6218 if (reload_in_progress
)
6219 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
6220 pic
= pic_offset_table_rtx
;
6221 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
6223 else if (!TARGET_GNU_TLS
)
6225 pic
= gen_reg_rtx (Pmode
);
6226 emit_insn (gen_set_got (pic
));
6227 type
= UNSPEC_GOTTPOFF
;
6232 type
= UNSPEC_INDNTPOFF
;
6235 base
= get_thread_pointer ();
6237 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
6238 off
= gen_rtx_CONST (Pmode
, off
);
6240 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
6241 off
= gen_rtx_MEM (Pmode
, off
);
6242 RTX_UNCHANGING_P (off
) = 1;
6243 set_mem_alias_set (off
, ix86_GOT_alias_set ());
6244 dest
= gen_reg_rtx (Pmode
);
6246 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6248 emit_move_insn (dest
, off
);
6249 return gen_rtx_PLUS (Pmode
, base
, dest
);
6252 emit_insn (gen_subsi3 (dest
, base
, off
));
6255 case TLS_MODEL_LOCAL_EXEC
:
6256 base
= get_thread_pointer ();
6258 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
6259 (TARGET_64BIT
|| TARGET_GNU_TLS
)
6260 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
6261 off
= gen_rtx_CONST (Pmode
, off
);
6263 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
6264 return gen_rtx_PLUS (Pmode
, base
, off
);
6267 dest
= gen_reg_rtx (Pmode
);
6268 emit_insn (gen_subsi3 (dest
, base
, off
));
6279 if (flag_pic
&& SYMBOLIC_CONST (x
))
6280 return legitimize_pic_address (x
, 0);
6282 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6283 if (GET_CODE (x
) == ASHIFT
6284 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6285 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
6288 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
6289 GEN_INT (1 << log
));
6292 if (GET_CODE (x
) == PLUS
)
6294 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6296 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
6297 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
6298 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
6301 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
6302 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
6303 GEN_INT (1 << log
));
6306 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
6307 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
6308 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
6311 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
6312 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
6313 GEN_INT (1 << log
));
6316 /* Put multiply first if it isn't already. */
6317 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6319 rtx tmp
= XEXP (x
, 0);
6320 XEXP (x
, 0) = XEXP (x
, 1);
6325 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6326 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6327 created by virtual register instantiation, register elimination, and
6328 similar optimizations. */
6329 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
6332 x
= gen_rtx_PLUS (Pmode
,
6333 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
6334 XEXP (XEXP (x
, 1), 0)),
6335 XEXP (XEXP (x
, 1), 1));
6339 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6340 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6341 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
6342 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6343 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
6344 && CONSTANT_P (XEXP (x
, 1)))
6347 rtx other
= NULL_RTX
;
6349 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6351 constant
= XEXP (x
, 1);
6352 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6354 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
6356 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
6357 other
= XEXP (x
, 1);
6365 x
= gen_rtx_PLUS (Pmode
,
6366 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
6367 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
6368 plus_constant (other
, INTVAL (constant
)));
6372 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6375 if (GET_CODE (XEXP (x
, 0)) == MULT
)
6378 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
6381 if (GET_CODE (XEXP (x
, 1)) == MULT
)
6384 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
6388 && GET_CODE (XEXP (x
, 1)) == REG
6389 && GET_CODE (XEXP (x
, 0)) == REG
)
6392 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
6395 x
= legitimize_pic_address (x
, 0);
6398 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6401 if (GET_CODE (XEXP (x
, 0)) == REG
)
6403 register rtx temp
= gen_reg_rtx (Pmode
);
6404 register rtx val
= force_operand (XEXP (x
, 1), temp
);
6406 emit_move_insn (temp
, val
);
6412 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6414 register rtx temp
= gen_reg_rtx (Pmode
);
6415 register rtx val
= force_operand (XEXP (x
, 0), temp
);
6417 emit_move_insn (temp
, val
);
6427 /* Print an integer constant expression in assembler syntax. Addition
6428 and subtraction are the only arithmetic that may appear in these
6429 expressions. FILE is the stdio stream to write to, X is the rtx, and
6430 CODE is the operand print code from the output string. */
6433 output_pic_addr_const (file
, x
, code
)
6440 switch (GET_CODE (x
))
6450 assemble_name (file
, XSTR (x
, 0));
6451 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_FLAG (x
))
6452 fputs ("@PLT", file
);
6459 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6460 assemble_name (asm_out_file
, buf
);
6464 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6468 /* This used to output parentheses around the expression,
6469 but that does not work on the 386 (either ATT or BSD assembler). */
6470 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6474 if (GET_MODE (x
) == VOIDmode
)
6476 /* We can use %d if the number is <32 bits and positive. */
6477 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6478 fprintf (file
, "0x%lx%08lx",
6479 (unsigned long) CONST_DOUBLE_HIGH (x
),
6480 (unsigned long) CONST_DOUBLE_LOW (x
));
6482 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6485 /* We can't handle floating point constants;
6486 PRINT_OPERAND must handle them. */
6487 output_operand_lossage ("floating constant misused");
6491 /* Some assemblers need integer constants to appear first. */
6492 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6494 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6496 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6498 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6500 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6502 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6510 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6511 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6513 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6515 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6519 if (XVECLEN (x
, 0) != 1)
6521 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6522 switch (XINT (x
, 1))
6525 fputs ("@GOT", file
);
6528 fputs ("@GOTOFF", file
);
6530 case UNSPEC_GOTPCREL
:
6531 fputs ("@GOTPCREL(%rip)", file
);
6533 case UNSPEC_GOTTPOFF
:
6534 /* FIXME: This might be @TPOFF in Sun ld too. */
6535 fputs ("@GOTTPOFF", file
);
6538 fputs ("@TPOFF", file
);
6542 fputs ("@TPOFF", file
);
6544 fputs ("@NTPOFF", file
);
6547 fputs ("@DTPOFF", file
);
6549 case UNSPEC_GOTNTPOFF
:
6551 fputs ("@GOTTPOFF(%rip)", file
);
6553 fputs ("@GOTNTPOFF", file
);
6555 case UNSPEC_INDNTPOFF
:
6556 fputs ("@INDNTPOFF", file
);
6559 output_operand_lossage ("invalid UNSPEC as operand");
6565 output_operand_lossage ("invalid expression as operand");
6569 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6570 We need to handle our special PIC relocations. */
6573 i386_dwarf_output_addr_const (file
, x
)
6578 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
6582 fprintf (file
, "%s", ASM_LONG
);
6585 output_pic_addr_const (file
, x
, '\0');
6587 output_addr_const (file
, x
);
6591 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6592 We need to emit DTP-relative relocations. */
6595 i386_output_dwarf_dtprel (file
, size
, x
)
6600 fputs (ASM_LONG
, file
);
6601 output_addr_const (file
, x
);
6602 fputs ("@DTPOFF", file
);
6608 fputs (", 0", file
);
6615 /* In the name of slightly smaller debug output, and to cater to
6616 general assembler losage, recognize PIC+GOTOFF and turn it back
6617 into a direct symbol reference. */
6620 i386_simplify_dwarf_addr (orig_x
)
6625 if (GET_CODE (x
) == MEM
)
6630 if (GET_CODE (x
) != CONST
6631 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6632 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6633 || GET_CODE (orig_x
) != MEM
)
6635 return XVECEXP (XEXP (x
, 0), 0, 0);
6638 if (GET_CODE (x
) != PLUS
6639 || GET_CODE (XEXP (x
, 1)) != CONST
)
6642 if (GET_CODE (XEXP (x
, 0)) == REG
6643 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6644 /* %ebx + GOT/GOTOFF */
6646 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6648 /* %ebx + %reg * scale + GOT/GOTOFF */
6650 if (GET_CODE (XEXP (y
, 0)) == REG
6651 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6653 else if (GET_CODE (XEXP (y
, 1)) == REG
6654 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
6658 if (GET_CODE (y
) != REG
6659 && GET_CODE (y
) != MULT
6660 && GET_CODE (y
) != ASHIFT
)
6666 x
= XEXP (XEXP (x
, 1), 0);
6667 if (GET_CODE (x
) == UNSPEC
6668 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6669 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6672 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6673 return XVECEXP (x
, 0, 0);
6676 if (GET_CODE (x
) == PLUS
6677 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6678 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6679 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6680 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6681 && GET_CODE (orig_x
) != MEM
)))
6683 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6685 return gen_rtx_PLUS (Pmode
, y
, x
);
6693 put_condition_code (code
, mode
, reverse
, fp
, file
)
6695 enum machine_mode mode
;
6701 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6703 enum rtx_code second_code
, bypass_code
;
6704 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6705 if (bypass_code
!= NIL
|| second_code
!= NIL
)
6707 code
= ix86_fp_compare_code_to_integer (code
);
6711 code
= reverse_condition (code
);
6722 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6727 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6728 Those same assemblers have the same but opposite losage on cmov. */
6731 suffix
= fp
? "nbe" : "a";
6734 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6736 else if (mode
== CCmode
|| mode
== CCGCmode
)
6747 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6749 else if (mode
== CCmode
|| mode
== CCGCmode
)
6758 suffix
= fp
? "nb" : "ae";
6761 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6771 suffix
= fp
? "u" : "p";
6774 suffix
= fp
? "nu" : "np";
6779 fputs (suffix
, file
);
6783 print_reg (x
, code
, file
)
6788 if (REGNO (x
) == ARG_POINTER_REGNUM
6789 || REGNO (x
) == FRAME_POINTER_REGNUM
6790 || REGNO (x
) == FLAGS_REG
6791 || REGNO (x
) == FPSR_REG
)
6794 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6797 if (code
== 'w' || MMX_REG_P (x
))
6799 else if (code
== 'b')
6801 else if (code
== 'k')
6803 else if (code
== 'q')
6805 else if (code
== 'y')
6807 else if (code
== 'h')
6810 code
= GET_MODE_SIZE (GET_MODE (x
));
6812 /* Irritatingly, AMD extended registers use different naming convention
6813 from the normal registers. */
6814 if (REX_INT_REG_P (x
))
6821 error ("extended registers have no high halves");
6824 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6827 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6830 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6833 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6836 error ("unsupported operand size for extended register");
6844 if (STACK_TOP_P (x
))
6846 fputs ("st(0)", file
);
6853 if (! ANY_FP_REG_P (x
))
6854 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6858 fputs (hi_reg_name
[REGNO (x
)], file
);
6861 fputs (qi_reg_name
[REGNO (x
)], file
);
6864 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6871 /* Locate some local-dynamic symbol still in use by this function
6872 so that we can print its name in some tls_local_dynamic_base
6876 get_some_local_dynamic_name ()
6880 if (cfun
->machine
->some_ld_name
)
6881 return cfun
->machine
->some_ld_name
;
6883 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6885 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6886 return cfun
->machine
->some_ld_name
;
6892 get_some_local_dynamic_name_1 (px
, data
)
6894 void *data ATTRIBUTE_UNUSED
;
6898 if (GET_CODE (x
) == SYMBOL_REF
6899 && local_dynamic_symbolic_operand (x
, Pmode
))
6901 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6909 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6910 C -- print opcode suffix for set/cmov insn.
6911 c -- like C, but print reversed condition
6912 F,f -- likewise, but for floating-point.
6913 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6915 R -- print the prefix for register names.
6916 z -- print the opcode suffix for the size of the current operand.
6917 * -- print a star (in certain assembler syntax)
6918 A -- print an absolute memory reference.
6919 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6920 s -- print a shift double count, followed by the assemblers argument
6922 b -- print the QImode name of the register for the indicated operand.
6923 %b0 would print %al if operands[0] is reg 0.
6924 w -- likewise, print the HImode name of the register.
6925 k -- likewise, print the SImode name of the register.
6926 q -- likewise, print the DImode name of the register.
6927 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6928 y -- print "st(0)" instead of "st" as a register.
6929 D -- print condition for SSE cmp instruction.
6930 P -- if PIC, print an @PLT suffix.
6931 X -- don't print any sort of PIC '@' suffix for a symbol.
6932 & -- print some in-use local-dynamic symbol name.
6936 print_operand (file
, x
, code
)
6946 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6951 assemble_name (file
, get_some_local_dynamic_name ());
6955 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6957 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6959 /* Intel syntax. For absolute addresses, registers should not
6960 be surrounded by braces. */
6961 if (GET_CODE (x
) != REG
)
6964 PRINT_OPERAND (file
, x
, 0);
6972 PRINT_OPERAND (file
, x
, 0);
6977 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6982 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6987 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6992 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6997 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7002 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7007 /* 387 opcodes don't get size suffixes if the operands are
7009 if (STACK_REG_P (x
))
7012 /* Likewise if using Intel opcodes. */
7013 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7016 /* This is the size of op from size of operand. */
7017 switch (GET_MODE_SIZE (GET_MODE (x
)))
7020 #ifdef HAVE_GAS_FILDS_FISTS
7026 if (GET_MODE (x
) == SFmode
)
7041 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
7043 #ifdef GAS_MNEMONICS
7069 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
7071 PRINT_OPERAND (file
, x
, 0);
7077 /* Little bit of braindamage here. The SSE compare instructions
7078 does use completely different names for the comparisons that the
7079 fp conditional moves. */
7080 switch (GET_CODE (x
))
7095 fputs ("unord", file
);
7099 fputs ("neq", file
);
7103 fputs ("nlt", file
);
7107 fputs ("nle", file
);
7110 fputs ("ord", file
);
7118 #ifdef CMOV_SUN_AS_SYNTAX
7119 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7121 switch (GET_MODE (x
))
7123 case HImode
: putc ('w', file
); break;
7125 case SFmode
: putc ('l', file
); break;
7127 case DFmode
: putc ('q', file
); break;
7135 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
7138 #ifdef CMOV_SUN_AS_SYNTAX
7139 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7142 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
7145 /* Like above, but reverse condition */
7147 /* Check to see if argument to %c is really a constant
7148 and not a condition code which needs to be reversed. */
7149 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
7151 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7154 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
7157 #ifdef CMOV_SUN_AS_SYNTAX
7158 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7161 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
7167 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
7170 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
7173 int pred_val
= INTVAL (XEXP (x
, 0));
7175 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
7176 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
7178 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
7179 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
7181 /* Emit hints only in the case default branch prediction
7182 heuristics would fail. */
7183 if (taken
!= cputaken
)
7185 /* We use 3e (DS) prefix for taken branches and
7186 2e (CS) prefix for not taken branches. */
7188 fputs ("ds ; ", file
);
7190 fputs ("cs ; ", file
);
7197 output_operand_lossage ("invalid operand code `%c'", code
);
7201 if (GET_CODE (x
) == REG
)
7203 PRINT_REG (x
, code
, file
);
7206 else if (GET_CODE (x
) == MEM
)
7208 /* No `byte ptr' prefix for call instructions. */
7209 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
7212 switch (GET_MODE_SIZE (GET_MODE (x
)))
7214 case 1: size
= "BYTE"; break;
7215 case 2: size
= "WORD"; break;
7216 case 4: size
= "DWORD"; break;
7217 case 8: size
= "QWORD"; break;
7218 case 12: size
= "XWORD"; break;
7219 case 16: size
= "XMMWORD"; break;
7224 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7227 else if (code
== 'w')
7229 else if (code
== 'k')
7233 fputs (" PTR ", file
);
7237 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
7238 output_pic_addr_const (file
, x
, code
);
7239 /* Avoid (%rip) for call operands. */
7240 else if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
7241 && GET_CODE (x
) != CONST_INT
)
7242 output_addr_const (file
, x
);
7243 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
7244 output_operand_lossage ("invalid constraints for operand");
7249 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
7254 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7255 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
7257 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7259 fprintf (file
, "0x%lx", l
);
7262 /* These float cases don't actually occur as immediate operands. */
7263 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
7267 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7268 fprintf (file
, "%s", dstr
);
7271 else if (GET_CODE (x
) == CONST_DOUBLE
7272 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
7276 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
7277 fprintf (file
, "%s", dstr
);
7284 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
7286 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7289 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
7290 || GET_CODE (x
) == LABEL_REF
)
7292 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7295 fputs ("OFFSET FLAT:", file
);
7298 if (GET_CODE (x
) == CONST_INT
)
7299 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7301 output_pic_addr_const (file
, x
, code
);
7303 output_addr_const (file
, x
);
7307 /* Print a memory operand whose address is ADDR. */
7310 print_operand_address (file
, addr
)
7314 struct ix86_address parts
;
7315 rtx base
, index
, disp
;
7318 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
7320 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7321 fputs ("DWORD PTR ", file
);
7322 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
7325 fputs ("fs:0", file
);
7327 fputs ("gs:0", file
);
7331 if (! ix86_decompose_address (addr
, &parts
))
7335 index
= parts
.index
;
7337 scale
= parts
.scale
;
7339 if (!base
&& !index
)
7341 /* Displacement only requires special attention. */
7343 if (GET_CODE (disp
) == CONST_INT
)
7345 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
7347 if (USER_LABEL_PREFIX
[0] == 0)
7349 fputs ("ds:", file
);
7351 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
7354 output_pic_addr_const (file
, addr
, 0);
7356 output_addr_const (file
, addr
);
7358 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7360 && ((GET_CODE (addr
) == SYMBOL_REF
7361 && ! tls_symbolic_operand (addr
, GET_MODE (addr
)))
7362 || GET_CODE (addr
) == LABEL_REF
7363 || (GET_CODE (addr
) == CONST
7364 && GET_CODE (XEXP (addr
, 0)) == PLUS
7365 && (GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
7366 || GET_CODE (XEXP (XEXP (addr
, 0), 0)) == LABEL_REF
)
7367 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)))
7368 fputs ("(%rip)", file
);
7372 if (ASSEMBLER_DIALECT
== ASM_ATT
)
7377 output_pic_addr_const (file
, disp
, 0);
7378 else if (GET_CODE (disp
) == LABEL_REF
)
7379 output_asm_label (disp
);
7381 output_addr_const (file
, disp
);
7386 PRINT_REG (base
, 0, file
);
7390 PRINT_REG (index
, 0, file
);
7392 fprintf (file
, ",%d", scale
);
7398 rtx offset
= NULL_RTX
;
7402 /* Pull out the offset of a symbol; print any symbol itself. */
7403 if (GET_CODE (disp
) == CONST
7404 && GET_CODE (XEXP (disp
, 0)) == PLUS
7405 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7407 offset
= XEXP (XEXP (disp
, 0), 1);
7408 disp
= gen_rtx_CONST (VOIDmode
,
7409 XEXP (XEXP (disp
, 0), 0));
7413 output_pic_addr_const (file
, disp
, 0);
7414 else if (GET_CODE (disp
) == LABEL_REF
)
7415 output_asm_label (disp
);
7416 else if (GET_CODE (disp
) == CONST_INT
)
7419 output_addr_const (file
, disp
);
7425 PRINT_REG (base
, 0, file
);
7428 if (INTVAL (offset
) >= 0)
7430 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7434 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7441 PRINT_REG (index
, 0, file
);
7443 fprintf (file
, "*%d", scale
);
7451 output_addr_const_extra (file
, x
)
7457 if (GET_CODE (x
) != UNSPEC
)
7460 op
= XVECEXP (x
, 0, 0);
7461 switch (XINT (x
, 1))
7463 case UNSPEC_GOTTPOFF
:
7464 output_addr_const (file
, op
);
7465 /* FIXME: This might be @TPOFF in Sun ld. */
7466 fputs ("@GOTTPOFF", file
);
7469 output_addr_const (file
, op
);
7470 fputs ("@TPOFF", file
);
7473 output_addr_const (file
, op
);
7475 fputs ("@TPOFF", file
);
7477 fputs ("@NTPOFF", file
);
7480 output_addr_const (file
, op
);
7481 fputs ("@DTPOFF", file
);
7483 case UNSPEC_GOTNTPOFF
:
7484 output_addr_const (file
, op
);
7486 fputs ("@GOTTPOFF(%rip)", file
);
7488 fputs ("@GOTNTPOFF", file
);
7490 case UNSPEC_INDNTPOFF
:
7491 output_addr_const (file
, op
);
7492 fputs ("@INDNTPOFF", file
);
7502 /* Split one or more DImode RTL references into pairs of SImode
7503 references. The RTL can be REG, offsettable MEM, integer constant, or
7504 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7505 split and "num" is its length. lo_half and hi_half are output arrays
7506 that parallel "operands". */
7509 split_di (operands
, num
, lo_half
, hi_half
)
7512 rtx lo_half
[], hi_half
[];
7516 rtx op
= operands
[num
];
7518 /* simplify_subreg refuse to split volatile memory addresses,
7519 but we still have to handle it. */
7520 if (GET_CODE (op
) == MEM
)
7522 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7523 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7527 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7528 GET_MODE (op
) == VOIDmode
7529 ? DImode
: GET_MODE (op
), 0);
7530 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7531 GET_MODE (op
) == VOIDmode
7532 ? DImode
: GET_MODE (op
), 4);
7536 /* Split one or more TImode RTL references into pairs of SImode
7537 references. The RTL can be REG, offsettable MEM, integer constant, or
7538 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7539 split and "num" is its length. lo_half and hi_half are output arrays
7540 that parallel "operands". */
7543 split_ti (operands
, num
, lo_half
, hi_half
)
7546 rtx lo_half
[], hi_half
[];
7550 rtx op
= operands
[num
];
7552 /* simplify_subreg refuse to split volatile memory addresses, but we
7553 still have to handle it. */
7554 if (GET_CODE (op
) == MEM
)
7556 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7557 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7561 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7562 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7567 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7568 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7569 is the expression of the binary operation. The output may either be
7570 emitted here, or returned to the caller, like all output_* functions.
7572 There is no guarantee that the operands are the same mode, as they
7573 might be within FLOAT or FLOAT_EXTEND expressions. */
7575 #ifndef SYSV386_COMPAT
7576 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7577 wants to fix the assemblers because that causes incompatibility
7578 with gcc. No-one wants to fix gcc because that causes
7579 incompatibility with assemblers... You can use the option of
7580 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7581 #define SYSV386_COMPAT 1
7585 output_387_binary_op (insn
, operands
)
7589 static char buf
[30];
7592 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
7594 #ifdef ENABLE_CHECKING
7595 /* Even if we do not want to check the inputs, this documents input
7596 constraints. Which helps in understanding the following code. */
7597 if (STACK_REG_P (operands
[0])
7598 && ((REG_P (operands
[1])
7599 && REGNO (operands
[0]) == REGNO (operands
[1])
7600 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7601 || (REG_P (operands
[2])
7602 && REGNO (operands
[0]) == REGNO (operands
[2])
7603 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7604 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7610 switch (GET_CODE (operands
[3]))
7613 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7614 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7622 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7623 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7631 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7632 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7640 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7641 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7655 if (GET_MODE (operands
[0]) == SFmode
)
7656 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7658 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7663 switch (GET_CODE (operands
[3]))
7667 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
7669 rtx temp
= operands
[2];
7670 operands
[2] = operands
[1];
7674 /* know operands[0] == operands[1]. */
7676 if (GET_CODE (operands
[2]) == MEM
)
7682 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7684 if (STACK_TOP_P (operands
[0]))
7685 /* How is it that we are storing to a dead operand[2]?
7686 Well, presumably operands[1] is dead too. We can't
7687 store the result to st(0) as st(0) gets popped on this
7688 instruction. Instead store to operands[2] (which I
7689 think has to be st(1)). st(1) will be popped later.
7690 gcc <= 2.8.1 didn't have this check and generated
7691 assembly code that the Unixware assembler rejected. */
7692 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7694 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7698 if (STACK_TOP_P (operands
[0]))
7699 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7701 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7706 if (GET_CODE (operands
[1]) == MEM
)
7712 if (GET_CODE (operands
[2]) == MEM
)
7718 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7721 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7722 derived assemblers, confusingly reverse the direction of
7723 the operation for fsub{r} and fdiv{r} when the
7724 destination register is not st(0). The Intel assembler
7725 doesn't have this brain damage. Read !SYSV386_COMPAT to
7726 figure out what the hardware really does. */
7727 if (STACK_TOP_P (operands
[0]))
7728 p
= "{p\t%0, %2|rp\t%2, %0}";
7730 p
= "{rp\t%2, %0|p\t%0, %2}";
7732 if (STACK_TOP_P (operands
[0]))
7733 /* As above for fmul/fadd, we can't store to st(0). */
7734 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7736 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7741 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7744 if (STACK_TOP_P (operands
[0]))
7745 p
= "{rp\t%0, %1|p\t%1, %0}";
7747 p
= "{p\t%1, %0|rp\t%0, %1}";
7749 if (STACK_TOP_P (operands
[0]))
7750 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7752 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7757 if (STACK_TOP_P (operands
[0]))
7759 if (STACK_TOP_P (operands
[1]))
7760 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7762 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7765 else if (STACK_TOP_P (operands
[1]))
7768 p
= "{\t%1, %0|r\t%0, %1}";
7770 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7776 p
= "{r\t%2, %0|\t%0, %2}";
7778 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7791 /* Output code to initialize control word copies used by
7792 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7793 is set to control word rounding downwards. */
7795 emit_i387_cw_initialization (normal
, round_down
)
7796 rtx normal
, round_down
;
7798 rtx reg
= gen_reg_rtx (HImode
);
7800 emit_insn (gen_x86_fnstcw_1 (normal
));
7801 emit_move_insn (reg
, normal
);
7802 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7804 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7806 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
7807 emit_move_insn (round_down
, reg
);
7810 /* Output code for INSN to convert a float to a signed int. OPERANDS
7811 are the insn operands. The output may be [HSD]Imode and the input
7812 operand may be [SDX]Fmode. */
7815 output_fix_trunc (insn
, operands
)
7819 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7820 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7822 /* Jump through a hoop or two for DImode, since the hardware has no
7823 non-popping instruction. We used to do this a different way, but
7824 that was somewhat fragile and broke with post-reload splitters. */
7825 if (dimode_p
&& !stack_top_dies
)
7826 output_asm_insn ("fld\t%y1", operands
);
7828 if (!STACK_TOP_P (operands
[1]))
7831 if (GET_CODE (operands
[0]) != MEM
)
7834 output_asm_insn ("fldcw\t%3", operands
);
7835 if (stack_top_dies
|| dimode_p
)
7836 output_asm_insn ("fistp%z0\t%0", operands
);
7838 output_asm_insn ("fist%z0\t%0", operands
);
7839 output_asm_insn ("fldcw\t%2", operands
);
7844 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7845 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7846 when fucom should be used. */
7849 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
7852 int eflags_p
, unordered_p
;
7855 rtx cmp_op0
= operands
[0];
7856 rtx cmp_op1
= operands
[1];
7857 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7862 cmp_op1
= operands
[2];
7866 if (GET_MODE (operands
[0]) == SFmode
)
7868 return "ucomiss\t{%1, %0|%0, %1}";
7870 return "comiss\t{%1, %0|%0, %1}";
7873 return "ucomisd\t{%1, %0|%0, %1}";
7875 return "comisd\t{%1, %0|%0, %1}";
7878 if (! STACK_TOP_P (cmp_op0
))
7881 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7883 if (STACK_REG_P (cmp_op1
)
7885 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7886 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7888 /* If both the top of the 387 stack dies, and the other operand
7889 is also a stack register that dies, then this must be a
7890 `fcompp' float compare */
7894 /* There is no double popping fcomi variant. Fortunately,
7895 eflags is immune from the fstp's cc clobbering. */
7897 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7899 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7907 return "fucompp\n\tfnstsw\t%0";
7909 return "fcompp\n\tfnstsw\t%0";
7922 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7924 static const char * const alt
[24] =
7936 "fcomi\t{%y1, %0|%0, %y1}",
7937 "fcomip\t{%y1, %0|%0, %y1}",
7938 "fucomi\t{%y1, %0|%0, %y1}",
7939 "fucomip\t{%y1, %0|%0, %y1}",
7946 "fcom%z2\t%y2\n\tfnstsw\t%0",
7947 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7948 "fucom%z2\t%y2\n\tfnstsw\t%0",
7949 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7951 "ficom%z2\t%y2\n\tfnstsw\t%0",
7952 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7960 mask
= eflags_p
<< 3;
7961 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7962 mask
|= unordered_p
<< 1;
7963 mask
|= stack_top_dies
;
7976 ix86_output_addr_vec_elt (file
, value
)
7980 const char *directive
= ASM_LONG
;
7985 directive
= ASM_QUAD
;
7991 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7995 ix86_output_addr_diff_elt (file
, value
, rel
)
8000 fprintf (file
, "%s%s%d-%s%d\n",
8001 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
8002 else if (HAVE_AS_GOTOFF_IN_DATA
)
8003 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
8005 else if (TARGET_MACHO
)
8006 fprintf (file
, "%s%s%d-%s\n", ASM_LONG
, LPREFIX
, value
,
8007 machopic_function_base_name () + 1);
8010 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
8011 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
8014 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8018 ix86_expand_clear (dest
)
8023 /* We play register width games, which are only valid after reload. */
8024 if (!reload_completed
)
8027 /* Avoid HImode and its attendant prefix byte. */
8028 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
8029 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
8031 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
8033 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8034 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
8036 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
8037 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
8043 /* X is an unchanging MEM. If it is a constant pool reference, return
8044 the constant pool rtx, else NULL. */
8047 maybe_get_pool_constant (x
)
8052 if (flag_pic
&& ! TARGET_64BIT
)
8054 if (GET_CODE (x
) != PLUS
)
8056 if (XEXP (x
, 0) != pic_offset_table_rtx
)
8059 if (GET_CODE (x
) != CONST
)
8062 if (GET_CODE (x
) != UNSPEC
)
8064 if (XINT (x
, 1) != UNSPEC_GOTOFF
)
8066 x
= XVECEXP (x
, 0, 0);
8069 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8070 return get_pool_constant (x
);
8076 ix86_expand_move (mode
, operands
)
8077 enum machine_mode mode
;
8080 int strict
= (reload_in_progress
|| reload_completed
);
8081 rtx insn
, op0
, op1
, tmp
;
8086 if (tls_symbolic_operand (op1
, Pmode
))
8088 op1
= legitimize_address (op1
, op1
, VOIDmode
);
8089 if (GET_CODE (op0
) == MEM
)
8091 tmp
= gen_reg_rtx (mode
);
8092 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, op1
));
8096 else if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
8101 rtx temp
= ((reload_in_progress
8102 || ((op0
&& GET_CODE (op0
) == REG
)
8104 ? op0
: gen_reg_rtx (Pmode
));
8105 op1
= machopic_indirect_data_reference (op1
, temp
);
8106 op1
= machopic_legitimize_pic_address (op1
, mode
,
8107 temp
== op1
? 0 : temp
);
8111 if (MACHOPIC_INDIRECT
)
8112 op1
= machopic_indirect_data_reference (op1
, 0);
8116 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
8120 #endif /* TARGET_MACHO */
8121 if (GET_CODE (op0
) == MEM
)
8122 op1
= force_reg (Pmode
, op1
);
8126 if (GET_CODE (temp
) != REG
)
8127 temp
= gen_reg_rtx (Pmode
);
8128 temp
= legitimize_pic_address (op1
, temp
);
8136 if (GET_CODE (op0
) == MEM
8137 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
8138 || !push_operand (op0
, mode
))
8139 && GET_CODE (op1
) == MEM
)
8140 op1
= force_reg (mode
, op1
);
8142 if (push_operand (op0
, mode
)
8143 && ! general_no_elim_operand (op1
, mode
))
8144 op1
= copy_to_mode_reg (mode
, op1
);
8146 /* Force large constants in 64bit compilation into register
8147 to get them CSEed. */
8148 if (TARGET_64BIT
&& mode
== DImode
8149 && immediate_operand (op1
, mode
)
8150 && !x86_64_zero_extended_value (op1
)
8151 && !register_operand (op0
, mode
)
8152 && optimize
&& !reload_completed
&& !reload_in_progress
)
8153 op1
= copy_to_mode_reg (mode
, op1
);
8155 if (FLOAT_MODE_P (mode
))
8157 /* If we are loading a floating point constant to a register,
8158 force the value to memory now, since we'll get better code
8159 out the back end. */
8163 else if (GET_CODE (op1
) == CONST_DOUBLE
8164 && register_operand (op0
, mode
))
8165 op1
= validize_mem (force_const_mem (mode
, op1
));
8169 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
8175 ix86_expand_vector_move (mode
, operands
)
8176 enum machine_mode mode
;
8179 /* Force constants other than zero into memory. We do not know how
8180 the instructions used to build constants modify the upper 64 bits
8181 of the register, once we have that information we may be able
8182 to handle some of them more efficiently. */
8183 if ((reload_in_progress
| reload_completed
) == 0
8184 && register_operand (operands
[0], mode
)
8185 && CONSTANT_P (operands
[1]))
8186 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
8188 /* Make operand1 a register if it isn't already. */
8190 && !register_operand (operands
[0], mode
)
8191 && !register_operand (operands
[1], mode
))
8193 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
8194 emit_move_insn (operands
[0], temp
);
8198 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
8201 /* Attempt to expand a binary operator. Make the expansion closer to the
8202 actual machine, then just general_operand, which will allow 3 separate
8203 memory references (one output, two input) in a single insn. */
8206 ix86_expand_binary_operator (code
, mode
, operands
)
8208 enum machine_mode mode
;
8211 int matching_memory
;
8212 rtx src1
, src2
, dst
, op
, clob
;
8218 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8219 if (GET_RTX_CLASS (code
) == 'c'
8220 && (rtx_equal_p (dst
, src2
)
8221 || immediate_operand (src1
, mode
)))
8228 /* If the destination is memory, and we do not have matching source
8229 operands, do things in registers. */
8230 matching_memory
= 0;
8231 if (GET_CODE (dst
) == MEM
)
8233 if (rtx_equal_p (dst
, src1
))
8234 matching_memory
= 1;
8235 else if (GET_RTX_CLASS (code
) == 'c'
8236 && rtx_equal_p (dst
, src2
))
8237 matching_memory
= 2;
8239 dst
= gen_reg_rtx (mode
);
8242 /* Both source operands cannot be in memory. */
8243 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
8245 if (matching_memory
!= 2)
8246 src2
= force_reg (mode
, src2
);
8248 src1
= force_reg (mode
, src1
);
8251 /* If the operation is not commutable, source 1 cannot be a constant
8252 or non-matching memory. */
8253 if ((CONSTANT_P (src1
)
8254 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
8255 && GET_RTX_CLASS (code
) != 'c')
8256 src1
= force_reg (mode
, src1
);
8258 /* If optimizing, copy to regs to improve CSE */
8259 if (optimize
&& ! no_new_pseudos
)
8261 if (GET_CODE (dst
) == MEM
)
8262 dst
= gen_reg_rtx (mode
);
8263 if (GET_CODE (src1
) == MEM
)
8264 src1
= force_reg (mode
, src1
);
8265 if (GET_CODE (src2
) == MEM
)
8266 src2
= force_reg (mode
, src2
);
8269 /* Emit the instruction. */
8271 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
8272 if (reload_in_progress
)
8274 /* Reload doesn't know about the flags register, and doesn't know that
8275 it doesn't want to clobber it. We can only do this with PLUS. */
8282 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8283 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8286 /* Fix up the destination if needed. */
8287 if (dst
!= operands
[0])
8288 emit_move_insn (operands
[0], dst
);
8291 /* Return TRUE or FALSE depending on whether the binary operator meets the
8292 appropriate constraints. */
8295 ix86_binary_operator_ok (code
, mode
, operands
)
8297 enum machine_mode mode ATTRIBUTE_UNUSED
;
8300 /* Both source operands cannot be in memory. */
8301 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
8303 /* If the operation is not commutable, source 1 cannot be a constant. */
8304 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
8306 /* If the destination is memory, we must have a matching source operand. */
8307 if (GET_CODE (operands
[0]) == MEM
8308 && ! (rtx_equal_p (operands
[0], operands
[1])
8309 || (GET_RTX_CLASS (code
) == 'c'
8310 && rtx_equal_p (operands
[0], operands
[2]))))
8312 /* If the operation is not commutable and the source 1 is memory, we must
8313 have a matching destination. */
8314 if (GET_CODE (operands
[1]) == MEM
8315 && GET_RTX_CLASS (code
) != 'c'
8316 && ! rtx_equal_p (operands
[0], operands
[1]))
8321 /* Attempt to expand a unary operator. Make the expansion closer to the
8322 actual machine, then just general_operand, which will allow 2 separate
8323 memory references (one output, one input) in a single insn. */
8326 ix86_expand_unary_operator (code
, mode
, operands
)
8328 enum machine_mode mode
;
8331 int matching_memory
;
8332 rtx src
, dst
, op
, clob
;
8337 /* If the destination is memory, and we do not have matching source
8338 operands, do things in registers. */
8339 matching_memory
= 0;
8340 if (GET_CODE (dst
) == MEM
)
8342 if (rtx_equal_p (dst
, src
))
8343 matching_memory
= 1;
8345 dst
= gen_reg_rtx (mode
);
8348 /* When source operand is memory, destination must match. */
8349 if (!matching_memory
&& GET_CODE (src
) == MEM
)
8350 src
= force_reg (mode
, src
);
8352 /* If optimizing, copy to regs to improve CSE */
8353 if (optimize
&& ! no_new_pseudos
)
8355 if (GET_CODE (dst
) == MEM
)
8356 dst
= gen_reg_rtx (mode
);
8357 if (GET_CODE (src
) == MEM
)
8358 src
= force_reg (mode
, src
);
8361 /* Emit the instruction. */
8363 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
8364 if (reload_in_progress
|| code
== NOT
)
8366 /* Reload doesn't know about the flags register, and doesn't know that
8367 it doesn't want to clobber it. */
8374 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8375 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8378 /* Fix up the destination if needed. */
8379 if (dst
!= operands
[0])
8380 emit_move_insn (operands
[0], dst
);
8383 /* Return TRUE or FALSE depending on whether the unary operator meets the
8384 appropriate constraints. */
8387 ix86_unary_operator_ok (code
, mode
, operands
)
8388 enum rtx_code code ATTRIBUTE_UNUSED
;
8389 enum machine_mode mode ATTRIBUTE_UNUSED
;
8390 rtx operands
[2] ATTRIBUTE_UNUSED
;
8392 /* If one of operands is memory, source and destination must match. */
8393 if ((GET_CODE (operands
[0]) == MEM
8394 || GET_CODE (operands
[1]) == MEM
)
8395 && ! rtx_equal_p (operands
[0], operands
[1]))
8400 /* Return TRUE or FALSE depending on whether the first SET in INSN
8401 has source and destination with matching CC modes, and that the
8402 CC mode is at least as constrained as REQ_MODE. */
8405 ix86_match_ccmode (insn
, req_mode
)
8407 enum machine_mode req_mode
;
8410 enum machine_mode set_mode
;
8412 set
= PATTERN (insn
);
8413 if (GET_CODE (set
) == PARALLEL
)
8414 set
= XVECEXP (set
, 0, 0);
8415 if (GET_CODE (set
) != SET
)
8417 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8420 set_mode
= GET_MODE (SET_DEST (set
));
8424 if (req_mode
!= CCNOmode
8425 && (req_mode
!= CCmode
8426 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8430 if (req_mode
== CCGCmode
)
8434 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8438 if (req_mode
== CCZmode
)
8448 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8451 /* Generate insn patterns to do an integer compare of OPERANDS. */
8454 ix86_expand_int_compare (code
, op0
, op1
)
8458 enum machine_mode cmpmode
;
8461 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8462 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8464 /* This is very simple, but making the interface the same as in the
8465 FP case makes the rest of the code easier. */
8466 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8467 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8469 /* Return the test that should be put into the flags user, i.e.
8470 the bcc, scc, or cmov instruction. */
8471 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8474 /* Figure out whether to use ordered or unordered fp comparisons.
8475 Return the appropriate mode to use. */
8478 ix86_fp_compare_mode (code
)
8479 enum rtx_code code ATTRIBUTE_UNUSED
;
8481 /* ??? In order to make all comparisons reversible, we do all comparisons
8482 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8483 all forms trapping and nontrapping comparisons, we can make inequality
8484 comparisons trapping again, since it results in better code when using
8485 FCOM based compares. */
8486 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8490 ix86_cc_mode (code
, op0
, op1
)
8494 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8495 return ix86_fp_compare_mode (code
);
8498 /* Only zero flag is needed. */
8500 case NE
: /* ZF!=0 */
8502 /* Codes needing carry flag. */
8503 case GEU
: /* CF=0 */
8504 case GTU
: /* CF=0 & ZF=0 */
8505 case LTU
: /* CF=1 */
8506 case LEU
: /* CF=1 | ZF=1 */
8508 /* Codes possibly doable only with sign flag when
8509 comparing against zero. */
8510 case GE
: /* SF=OF or SF=0 */
8511 case LT
: /* SF<>OF or SF=1 */
8512 if (op1
== const0_rtx
)
8515 /* For other cases Carry flag is not required. */
8517 /* Codes doable only with sign flag when comparing
8518 against zero, but we miss jump instruction for it
8519 so we need to use relational tests against overflow
8520 that thus needs to be zero. */
8521 case GT
: /* ZF=0 & SF=OF */
8522 case LE
: /* ZF=1 | SF<>OF */
8523 if (op1
== const0_rtx
)
8527 /* strcmp pattern do (use flags) and combine may ask us for proper
8536 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8539 ix86_use_fcomi_compare (code
)
8540 enum rtx_code code ATTRIBUTE_UNUSED
;
8542 enum rtx_code swapped_code
= swap_condition (code
);
8543 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8544 || (ix86_fp_comparison_cost (swapped_code
)
8545 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8548 /* Swap, force into registers, or otherwise massage the two operands
8549 to a fp comparison. The operands are updated in place; the new
8550 comparison code is returned. */
8552 static enum rtx_code
8553 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
8557 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8558 rtx op0
= *pop0
, op1
= *pop1
;
8559 enum machine_mode op_mode
= GET_MODE (op0
);
8560 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
8562 /* All of the unordered compare instructions only work on registers.
8563 The same is true of the XFmode compare instructions. The same is
8564 true of the fcomi compare instructions. */
8567 && (fpcmp_mode
== CCFPUmode
8568 || op_mode
== XFmode
8569 || op_mode
== TFmode
8570 || ix86_use_fcomi_compare (code
)))
8572 op0
= force_reg (op_mode
, op0
);
8573 op1
= force_reg (op_mode
, op1
);
8577 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8578 things around if they appear profitable, otherwise force op0
8581 if (standard_80387_constant_p (op0
) == 0
8582 || (GET_CODE (op0
) == MEM
8583 && ! (standard_80387_constant_p (op1
) == 0
8584 || GET_CODE (op1
) == MEM
)))
8587 tmp
= op0
, op0
= op1
, op1
= tmp
;
8588 code
= swap_condition (code
);
8591 if (GET_CODE (op0
) != REG
)
8592 op0
= force_reg (op_mode
, op0
);
8594 if (CONSTANT_P (op1
))
8596 if (standard_80387_constant_p (op1
))
8597 op1
= force_reg (op_mode
, op1
);
8599 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8603 /* Try to rearrange the comparison to make it cheaper. */
8604 if (ix86_fp_comparison_cost (code
)
8605 > ix86_fp_comparison_cost (swap_condition (code
))
8606 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8609 tmp
= op0
, op0
= op1
, op1
= tmp
;
8610 code
= swap_condition (code
);
8611 if (GET_CODE (op0
) != REG
)
8612 op0
= force_reg (op_mode
, op0
);
8620 /* Convert comparison codes we use to represent FP comparison to integer
8621 code that will result in proper branch. Return UNKNOWN if no such code
8623 static enum rtx_code
8624 ix86_fp_compare_code_to_integer (code
)
8654 /* Split comparison code CODE into comparisons we can do using branch
8655 instructions. BYPASS_CODE is comparison code for branch that will
8656 branch around FIRST_CODE and SECOND_CODE. If some of branches
8657 is not required, set value to NIL.
8658 We never require more than two branches. */
8660 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
8661 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
8667 /* The fcomi comparison sets flags as follows:
8677 case GT
: /* GTU - CF=0 & ZF=0 */
8678 case GE
: /* GEU - CF=0 */
8679 case ORDERED
: /* PF=0 */
8680 case UNORDERED
: /* PF=1 */
8681 case UNEQ
: /* EQ - ZF=1 */
8682 case UNLT
: /* LTU - CF=1 */
8683 case UNLE
: /* LEU - CF=1 | ZF=1 */
8684 case LTGT
: /* EQ - ZF=0 */
8686 case LT
: /* LTU - CF=1 - fails on unordered */
8688 *bypass_code
= UNORDERED
;
8690 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8692 *bypass_code
= UNORDERED
;
8694 case EQ
: /* EQ - ZF=1 - fails on unordered */
8696 *bypass_code
= UNORDERED
;
8698 case NE
: /* NE - ZF=0 - fails on unordered */
8700 *second_code
= UNORDERED
;
8702 case UNGE
: /* GEU - CF=0 - fails on unordered */
8704 *second_code
= UNORDERED
;
8706 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8708 *second_code
= UNORDERED
;
8713 if (!TARGET_IEEE_FP
)
8720 /* Return cost of comparison done fcom + arithmetics operations on AX.
8721 All following functions do use number of instructions as a cost metrics.
8722 In future this should be tweaked to compute bytes for optimize_size and
8723 take into account performance of various instructions on various CPUs. */
8725 ix86_fp_comparison_arithmetics_cost (code
)
8728 if (!TARGET_IEEE_FP
)
8730 /* The cost of code output by ix86_expand_fp_compare. */
8758 /* Return cost of comparison done using fcomi operation.
8759 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8761 ix86_fp_comparison_fcomi_cost (code
)
8764 enum rtx_code bypass_code
, first_code
, second_code
;
8765 /* Return arbitrarily high cost when instruction is not supported - this
8766 prevents gcc from using it. */
8769 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8770 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
8773 /* Return cost of comparison done using sahf operation.
8774 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8776 ix86_fp_comparison_sahf_cost (code
)
8779 enum rtx_code bypass_code
, first_code
, second_code
;
8780 /* Return arbitrarily high cost when instruction is not preferred - this
8781 avoids gcc from using it. */
8782 if (!TARGET_USE_SAHF
&& !optimize_size
)
8784 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8785 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
8788 /* Compute cost of the comparison done using any method.
8789 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8791 ix86_fp_comparison_cost (code
)
8794 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8797 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8798 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8800 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8801 if (min
> sahf_cost
)
8803 if (min
> fcomi_cost
)
8808 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8811 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
8813 rtx op0
, op1
, scratch
;
8817 enum machine_mode fpcmp_mode
, intcmp_mode
;
8819 int cost
= ix86_fp_comparison_cost (code
);
8820 enum rtx_code bypass_code
, first_code
, second_code
;
8822 fpcmp_mode
= ix86_fp_compare_mode (code
);
8823 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8826 *second_test
= NULL_RTX
;
8828 *bypass_test
= NULL_RTX
;
8830 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8832 /* Do fcomi/sahf based test when profitable. */
8833 if ((bypass_code
== NIL
|| bypass_test
)
8834 && (second_code
== NIL
|| second_test
)
8835 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8839 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8840 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8846 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8847 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8849 scratch
= gen_reg_rtx (HImode
);
8850 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8851 emit_insn (gen_x86_sahf_1 (scratch
));
8854 /* The FP codes work out to act like unsigned. */
8855 intcmp_mode
= fpcmp_mode
;
8857 if (bypass_code
!= NIL
)
8858 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8859 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8861 if (second_code
!= NIL
)
8862 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8863 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8868 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8869 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8870 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8872 scratch
= gen_reg_rtx (HImode
);
8873 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8875 /* In the unordered case, we have to check C2 for NaN's, which
8876 doesn't happen to work out to anything nice combination-wise.
8877 So do some bit twiddling on the value we've got in AH to come
8878 up with an appropriate set of condition codes. */
8880 intcmp_mode
= CCNOmode
;
8885 if (code
== GT
|| !TARGET_IEEE_FP
)
8887 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8892 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8893 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8894 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8895 intcmp_mode
= CCmode
;
8901 if (code
== LT
&& TARGET_IEEE_FP
)
8903 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8904 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8905 intcmp_mode
= CCmode
;
8910 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8916 if (code
== GE
|| !TARGET_IEEE_FP
)
8918 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8923 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8924 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8931 if (code
== LE
&& TARGET_IEEE_FP
)
8933 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8934 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8935 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8936 intcmp_mode
= CCmode
;
8941 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8947 if (code
== EQ
&& TARGET_IEEE_FP
)
8949 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8950 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8951 intcmp_mode
= CCmode
;
8956 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8963 if (code
== NE
&& TARGET_IEEE_FP
)
8965 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8966 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8972 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8978 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8982 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8991 /* Return the test that should be put into the flags user, i.e.
8992 the bcc, scc, or cmov instruction. */
8993 return gen_rtx_fmt_ee (code
, VOIDmode
,
8994 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8999 ix86_expand_compare (code
, second_test
, bypass_test
)
9001 rtx
*second_test
, *bypass_test
;
9004 op0
= ix86_compare_op0
;
9005 op1
= ix86_compare_op1
;
9008 *second_test
= NULL_RTX
;
9010 *bypass_test
= NULL_RTX
;
9012 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
9013 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9014 second_test
, bypass_test
);
9016 ret
= ix86_expand_int_compare (code
, op0
, op1
);
9021 /* Return true if the CODE will result in nontrivial jump sequence. */
9023 ix86_fp_jump_nontrivial_p (code
)
9026 enum rtx_code bypass_code
, first_code
, second_code
;
9029 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9030 return bypass_code
!= NIL
|| second_code
!= NIL
;
9034 ix86_expand_branch (code
, label
)
9040 switch (GET_MODE (ix86_compare_op0
))
9046 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
9047 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9048 gen_rtx_LABEL_REF (VOIDmode
, label
),
9050 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
9060 enum rtx_code bypass_code
, first_code
, second_code
;
9062 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
9065 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
9067 /* Check whether we will use the natural sequence with one jump. If
9068 so, we can expand jump early. Otherwise delay expansion by
9069 creating compound insn to not confuse optimizers. */
9070 if (bypass_code
== NIL
&& second_code
== NIL
9073 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
9074 gen_rtx_LABEL_REF (VOIDmode
, label
),
9079 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
9080 ix86_compare_op0
, ix86_compare_op1
);
9081 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
9082 gen_rtx_LABEL_REF (VOIDmode
, label
),
9084 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
9086 use_fcomi
= ix86_use_fcomi_compare (code
);
9087 vec
= rtvec_alloc (3 + !use_fcomi
);
9088 RTVEC_ELT (vec
, 0) = tmp
;
9090 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
9092 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
9095 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
9097 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
9105 /* Expand DImode branch into multiple compare+branch. */
9107 rtx lo
[2], hi
[2], label2
;
9108 enum rtx_code code1
, code2
, code3
;
9110 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
9112 tmp
= ix86_compare_op0
;
9113 ix86_compare_op0
= ix86_compare_op1
;
9114 ix86_compare_op1
= tmp
;
9115 code
= swap_condition (code
);
9117 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
9118 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
9120 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9121 avoid two branches. This costs one extra insn, so disable when
9122 optimizing for size. */
9124 if ((code
== EQ
|| code
== NE
)
9126 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
9131 if (hi
[1] != const0_rtx
)
9132 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
9133 NULL_RTX
, 0, OPTAB_WIDEN
);
9136 if (lo
[1] != const0_rtx
)
9137 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
9138 NULL_RTX
, 0, OPTAB_WIDEN
);
9140 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
9141 NULL_RTX
, 0, OPTAB_WIDEN
);
9143 ix86_compare_op0
= tmp
;
9144 ix86_compare_op1
= const0_rtx
;
9145 ix86_expand_branch (code
, label
);
9149 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9150 op1 is a constant and the low word is zero, then we can just
9151 examine the high word. */
9153 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
9156 case LT
: case LTU
: case GE
: case GEU
:
9157 ix86_compare_op0
= hi
[0];
9158 ix86_compare_op1
= hi
[1];
9159 ix86_expand_branch (code
, label
);
9165 /* Otherwise, we need two or three jumps. */
9167 label2
= gen_label_rtx ();
9170 code2
= swap_condition (code
);
9171 code3
= unsigned_condition (code
);
9175 case LT
: case GT
: case LTU
: case GTU
:
9178 case LE
: code1
= LT
; code2
= GT
; break;
9179 case GE
: code1
= GT
; code2
= LT
; break;
9180 case LEU
: code1
= LTU
; code2
= GTU
; break;
9181 case GEU
: code1
= GTU
; code2
= LTU
; break;
9183 case EQ
: code1
= NIL
; code2
= NE
; break;
9184 case NE
: code2
= NIL
; break;
9192 * if (hi(a) < hi(b)) goto true;
9193 * if (hi(a) > hi(b)) goto false;
9194 * if (lo(a) < lo(b)) goto true;
9198 ix86_compare_op0
= hi
[0];
9199 ix86_compare_op1
= hi
[1];
9202 ix86_expand_branch (code1
, label
);
9204 ix86_expand_branch (code2
, label2
);
9206 ix86_compare_op0
= lo
[0];
9207 ix86_compare_op1
= lo
[1];
9208 ix86_expand_branch (code3
, label
);
9211 emit_label (label2
);
9220 /* Split branch based on floating point condition. */
9222 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
9224 rtx op1
, op2
, target1
, target2
, tmp
;
9227 rtx label
= NULL_RTX
;
9229 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
9232 if (target2
!= pc_rtx
)
9235 code
= reverse_condition_maybe_unordered (code
);
9240 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
9241 tmp
, &second
, &bypass
);
9243 if (split_branch_probability
>= 0)
9245 /* Distribute the probabilities across the jumps.
9246 Assume the BYPASS and SECOND to be always test
9248 probability
= split_branch_probability
;
9250 /* Value of 1 is low enough to make no need for probability
9251 to be updated. Later we may run some experiments and see
9252 if unordered values are more frequent in practice. */
9254 bypass_probability
= 1;
9256 second_probability
= 1;
9258 if (bypass
!= NULL_RTX
)
9260 label
= gen_label_rtx ();
9261 i
= emit_jump_insn (gen_rtx_SET
9263 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9265 gen_rtx_LABEL_REF (VOIDmode
,
9268 if (bypass_probability
>= 0)
9270 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9271 GEN_INT (bypass_probability
),
9274 i
= emit_jump_insn (gen_rtx_SET
9276 gen_rtx_IF_THEN_ELSE (VOIDmode
,
9277 condition
, target1
, target2
)));
9278 if (probability
>= 0)
9280 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9281 GEN_INT (probability
),
9283 if (second
!= NULL_RTX
)
9285 i
= emit_jump_insn (gen_rtx_SET
9287 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
9289 if (second_probability
>= 0)
9291 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
9292 GEN_INT (second_probability
),
9295 if (label
!= NULL_RTX
)
9300 ix86_expand_setcc (code
, dest
)
9304 rtx ret
, tmp
, tmpreg
;
9305 rtx second_test
, bypass_test
;
9307 if (GET_MODE (ix86_compare_op0
) == DImode
9309 return 0; /* FAIL */
9311 if (GET_MODE (dest
) != QImode
)
9314 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9315 PUT_MODE (ret
, QImode
);
9320 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
9321 if (bypass_test
|| second_test
)
9323 rtx test
= second_test
;
9325 rtx tmp2
= gen_reg_rtx (QImode
);
9332 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
9334 PUT_MODE (test
, QImode
);
9335 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
9338 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
9340 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
9343 return 1; /* DONE */
9346 /* Expand comparison setting or clearing carry flag. Return true when successful
9347 and set pop for the operation. */
9349 ix86_expand_carry_flag_compare (code
, op0
, op1
, pop
)
9353 enum machine_mode mode
=
9354 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
9356 /* Do not handle DImode compares that go trought special path. Also we can't
9357 deal with FP compares yet. This is possible to add. */
9358 if ((mode
== DImode
&& !TARGET_64BIT
))
9360 if (FLOAT_MODE_P (mode
))
9362 rtx second_test
= NULL
, bypass_test
= NULL
;
9363 rtx compare_op
, compare_seq
;
9365 /* Shortcut: following common codes never translate into carry flag compares. */
9366 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
9367 || code
== ORDERED
|| code
== UNORDERED
)
9370 /* These comparisons require zero flag; swap operands so they won't. */
9371 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
9377 code
= swap_condition (code
);
9380 /* Try to expand the comparsion and verify that we end up with carry flag
9381 based comparsion. This is fails to be true only when we decide to expand
9382 comparsion using arithmetic that is not too common scenario. */
9384 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
9385 &second_test
, &bypass_test
);
9386 compare_seq
= get_insns ();
9389 if (second_test
|| bypass_test
)
9391 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9392 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9393 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
9395 code
= GET_CODE (compare_op
);
9396 if (code
!= LTU
&& code
!= GEU
)
9398 emit_insn (compare_seq
);
9402 if (!INTEGRAL_MODE_P (mode
))
9410 /* Convert a==0 into (unsigned)a<1. */
9413 if (op1
!= const0_rtx
)
9416 code
= (code
== EQ
? LTU
: GEU
);
9419 /* Convert a>b into b<a or a>=b-1. */
9422 if (GET_CODE (op1
) == CONST_INT
)
9424 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
9425 /* Bail out on overflow. We still can swap operands but that
9426 would force loading of the constant into register. */
9427 if (op1
== const0_rtx
9428 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
9430 code
= (code
== GTU
? GEU
: LTU
);
9437 code
= (code
== GTU
? LTU
: GEU
);
9441 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9444 if (mode
== DImode
|| op1
!= const0_rtx
)
9446 op1
= gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode
) - 1)), mode
);
9447 code
= (code
== LT
? GEU
: LTU
);
9451 if (mode
== DImode
|| op1
!= constm1_rtx
)
9453 op1
= gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode
) - 1)), mode
);
9454 code
= (code
== LE
? GEU
: LTU
);
9460 ix86_compare_op0
= op0
;
9461 ix86_compare_op1
= op1
;
9462 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
9463 if (GET_CODE (*pop
) != LTU
&& GET_CODE (*pop
) != GEU
)
9469 ix86_expand_int_movcc (operands
)
9472 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
9473 rtx compare_seq
, compare_op
;
9474 rtx second_test
, bypass_test
;
9475 enum machine_mode mode
= GET_MODE (operands
[0]);
9476 bool sign_bit_compare_p
= false;;
9479 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9480 compare_seq
= get_insns ();
9483 compare_code
= GET_CODE (compare_op
);
9485 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
9486 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
9487 sign_bit_compare_p
= true;
9489 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9490 HImode insns, we'd be swallowed in word prefix ops. */
9492 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
9493 && (mode
!= DImode
|| TARGET_64BIT
)
9494 && GET_CODE (operands
[2]) == CONST_INT
9495 && GET_CODE (operands
[3]) == CONST_INT
)
9497 rtx out
= operands
[0];
9498 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9499 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9503 /* Sign bit compares are better done using shifts than we do by using
9505 if (sign_bit_compare_p
9506 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
9507 ix86_compare_op1
, &compare_op
))
9509 /* Detect overlap between destination and compare sources. */
9512 if (!sign_bit_compare_p
)
9516 compare_code
= GET_CODE (compare_op
);
9518 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
9519 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
9522 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
9525 /* To simplify rest of code, restrict to the GEU case. */
9526 if (compare_code
== LTU
)
9528 HOST_WIDE_INT tmp
= ct
;
9531 compare_code
= reverse_condition (compare_code
);
9532 code
= reverse_condition (code
);
9537 PUT_CODE (compare_op
,
9538 reverse_condition_maybe_unordered
9539 (GET_CODE (compare_op
)));
9541 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
9545 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9546 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9547 tmp
= gen_reg_rtx (mode
);
9550 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
9552 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
9556 if (code
== GT
|| code
== GE
)
9557 code
= reverse_condition (code
);
9560 HOST_WIDE_INT tmp
= ct
;
9565 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
9566 ix86_compare_op1
, VOIDmode
, 0, -1);
9579 tmp
= expand_simple_binop (mode
, PLUS
,
9581 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9592 tmp
= expand_simple_binop (mode
, IOR
,
9594 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9596 else if (diff
== -1 && ct
)
9606 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9608 tmp
= expand_simple_binop (mode
, PLUS
,
9609 copy_rtx (tmp
), GEN_INT (cf
),
9610 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9618 * andl cf - ct, dest
9628 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
9631 tmp
= expand_simple_binop (mode
, AND
,
9633 gen_int_mode (cf
- ct
, mode
),
9634 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9636 tmp
= expand_simple_binop (mode
, PLUS
,
9637 copy_rtx (tmp
), GEN_INT (ct
),
9638 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
9641 if (!rtx_equal_p (tmp
, out
))
9642 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
9644 return 1; /* DONE */
9650 tmp
= ct
, ct
= cf
, cf
= tmp
;
9652 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9654 /* We may be reversing unordered compare to normal compare, that
9655 is not valid in general (we may convert non-trapping condition
9656 to trapping one), however on i386 we currently emit all
9657 comparisons unordered. */
9658 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9659 code
= reverse_condition_maybe_unordered (code
);
9663 compare_code
= reverse_condition (compare_code
);
9664 code
= reverse_condition (code
);
9669 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9670 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
9672 if (ix86_compare_op1
== const0_rtx
9673 && (code
== LT
|| code
== GE
))
9674 compare_code
= code
;
9675 else if (ix86_compare_op1
== constm1_rtx
)
9679 else if (code
== GT
)
9684 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9685 if (compare_code
!= NIL
9686 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9687 && (cf
== -1 || ct
== -1))
9689 /* If lea code below could be used, only optimize
9690 if it results in a 2 insn sequence. */
9692 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9693 || diff
== 3 || diff
== 5 || diff
== 9)
9694 || (compare_code
== LT
&& ct
== -1)
9695 || (compare_code
== GE
&& cf
== -1))
9698 * notl op1 (if necessary)
9706 code
= reverse_condition (code
);
9709 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9710 ix86_compare_op1
, VOIDmode
, 0, -1);
9712 out
= expand_simple_binop (mode
, IOR
,
9714 out
, 1, OPTAB_DIRECT
);
9715 if (out
!= operands
[0])
9716 emit_move_insn (operands
[0], out
);
9718 return 1; /* DONE */
9723 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9724 || diff
== 3 || diff
== 5 || diff
== 9)
9725 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
9726 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
9732 * lea cf(dest*(ct-cf)),dest
9736 * This also catches the degenerate setcc-only case.
9742 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9743 ix86_compare_op1
, VOIDmode
, 0, 1);
9746 /* On x86_64 the lea instruction operates on Pmode, so we need
9747 to get arithmetics done in proper mode to match. */
9749 tmp
= copy_rtx (out
);
9753 out1
= copy_rtx (out
);
9754 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9758 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9764 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9767 if (!rtx_equal_p (tmp
, out
))
9770 out
= force_operand (tmp
, copy_rtx (out
));
9772 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
9774 if (!rtx_equal_p (out
, operands
[0]))
9775 emit_move_insn (operands
[0], copy_rtx (out
));
9777 return 1; /* DONE */
9781 * General case: Jumpful:
9782 * xorl dest,dest cmpl op1, op2
9783 * cmpl op1, op2 movl ct, dest
9785 * decl dest movl cf, dest
9786 * andl (cf-ct),dest 1:
9791 * This is reasonably steep, but branch mispredict costs are
9792 * high on modern cpus, so consider failing only if optimizing
9796 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9797 && BRANCH_COST
>= 2)
9803 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9804 /* We may be reversing unordered compare to normal compare,
9805 that is not valid in general (we may convert non-trapping
9806 condition to trapping one), however on i386 we currently
9807 emit all comparisons unordered. */
9808 code
= reverse_condition_maybe_unordered (code
);
9811 code
= reverse_condition (code
);
9812 if (compare_code
!= NIL
)
9813 compare_code
= reverse_condition (compare_code
);
9817 if (compare_code
!= NIL
)
9819 /* notl op1 (if needed)
9824 For x < 0 (resp. x <= -1) there will be no notl,
9825 so if possible swap the constants to get rid of the
9827 True/false will be -1/0 while code below (store flag
9828 followed by decrement) is 0/-1, so the constants need
9829 to be exchanged once more. */
9831 if (compare_code
== GE
|| !cf
)
9833 code
= reverse_condition (code
);
9838 HOST_WIDE_INT tmp
= cf
;
9843 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9844 ix86_compare_op1
, VOIDmode
, 0, -1);
9848 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9849 ix86_compare_op1
, VOIDmode
, 0, 1);
9851 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
9852 copy_rtx (out
), 1, OPTAB_DIRECT
);
9855 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
9856 gen_int_mode (cf
- ct
, mode
),
9857 copy_rtx (out
), 1, OPTAB_DIRECT
);
9859 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
9860 copy_rtx (out
), 1, OPTAB_DIRECT
);
9861 if (!rtx_equal_p (out
, operands
[0]))
9862 emit_move_insn (operands
[0], copy_rtx (out
));
9864 return 1; /* DONE */
9868 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
9870 /* Try a few things more with specific constants and a variable. */
9873 rtx var
, orig_out
, out
, tmp
;
9875 if (BRANCH_COST
<= 2)
9876 return 0; /* FAIL */
9878 /* If one of the two operands is an interesting constant, load a
9879 constant with the above and mask it in with a logical operation. */
9881 if (GET_CODE (operands
[2]) == CONST_INT
)
9884 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
9885 operands
[3] = constm1_rtx
, op
= and_optab
;
9886 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
9887 operands
[3] = const0_rtx
, op
= ior_optab
;
9889 return 0; /* FAIL */
9891 else if (GET_CODE (operands
[3]) == CONST_INT
)
9894 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
9895 operands
[2] = constm1_rtx
, op
= and_optab
;
9896 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
9897 operands
[2] = const0_rtx
, op
= ior_optab
;
9899 return 0; /* FAIL */
9902 return 0; /* FAIL */
9904 orig_out
= operands
[0];
9905 tmp
= gen_reg_rtx (mode
);
9908 /* Recurse to get the constant loaded. */
9909 if (ix86_expand_int_movcc (operands
) == 0)
9910 return 0; /* FAIL */
9912 /* Mask in the interesting variable. */
9913 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9915 if (!rtx_equal_p (out
, orig_out
))
9916 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
9918 return 1; /* DONE */
9922 * For comparison with above,
9932 if (! nonimmediate_operand (operands
[2], mode
))
9933 operands
[2] = force_reg (mode
, operands
[2]);
9934 if (! nonimmediate_operand (operands
[3], mode
))
9935 operands
[3] = force_reg (mode
, operands
[3]);
9937 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9939 rtx tmp
= gen_reg_rtx (mode
);
9940 emit_move_insn (tmp
, operands
[3]);
9943 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9945 rtx tmp
= gen_reg_rtx (mode
);
9946 emit_move_insn (tmp
, operands
[2]);
9950 if (! register_operand (operands
[2], VOIDmode
)
9952 || ! register_operand (operands
[3], VOIDmode
)))
9953 operands
[2] = force_reg (mode
, operands
[2]);
9956 && ! register_operand (operands
[3], VOIDmode
))
9957 operands
[3] = force_reg (mode
, operands
[3]);
9959 emit_insn (compare_seq
);
9960 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9961 gen_rtx_IF_THEN_ELSE (mode
,
9962 compare_op
, operands
[2],
9965 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
9966 gen_rtx_IF_THEN_ELSE (mode
,
9968 copy_rtx (operands
[3]),
9969 copy_rtx (operands
[0]))));
9971 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
9972 gen_rtx_IF_THEN_ELSE (mode
,
9974 copy_rtx (operands
[2]),
9975 copy_rtx (operands
[0]))));
9977 return 1; /* DONE */
9981 ix86_expand_fp_movcc (operands
)
9986 rtx compare_op
, second_test
, bypass_test
;
9988 /* For SF/DFmode conditional moves based on comparisons
9989 in same mode, we may want to use SSE min/max instructions. */
9990 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9991 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9992 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9993 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9995 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9996 /* We may be called from the post-reload splitter. */
9997 && (!REG_P (operands
[0])
9998 || SSE_REG_P (operands
[0])
9999 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
10001 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
10002 code
= GET_CODE (operands
[1]);
10004 /* See if we have (cross) match between comparison operands and
10005 conditional move operands. */
10006 if (rtx_equal_p (operands
[2], op1
))
10011 code
= reverse_condition_maybe_unordered (code
);
10013 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
10015 /* Check for min operation. */
10016 if (code
== LT
|| code
== UNLE
)
10024 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10025 if (memory_operand (op0
, VOIDmode
))
10026 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10027 if (GET_MODE (operands
[0]) == SFmode
)
10028 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
10030 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
10033 /* Check for max operation. */
10034 if (code
== GT
|| code
== UNGE
)
10042 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
10043 if (memory_operand (op0
, VOIDmode
))
10044 op0
= force_reg (GET_MODE (operands
[0]), op0
);
10045 if (GET_MODE (operands
[0]) == SFmode
)
10046 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
10048 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
10052 /* Manage condition to be sse_comparison_operator. In case we are
10053 in non-ieee mode, try to canonicalize the destination operand
10054 to be first in the comparison - this helps reload to avoid extra
10056 if (!sse_comparison_operator (operands
[1], VOIDmode
)
10057 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
10059 rtx tmp
= ix86_compare_op0
;
10060 ix86_compare_op0
= ix86_compare_op1
;
10061 ix86_compare_op1
= tmp
;
10062 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
10063 VOIDmode
, ix86_compare_op0
,
10066 /* Similarly try to manage result to be first operand of conditional
10067 move. We also don't support the NE comparison on SSE, so try to
10069 if ((rtx_equal_p (operands
[0], operands
[3])
10070 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
10071 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
10073 rtx tmp
= operands
[2];
10074 operands
[2] = operands
[3];
10076 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10077 (GET_CODE (operands
[1])),
10078 VOIDmode
, ix86_compare_op0
,
10081 if (GET_MODE (operands
[0]) == SFmode
)
10082 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
10083 operands
[2], operands
[3],
10084 ix86_compare_op0
, ix86_compare_op1
));
10086 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
10087 operands
[2], operands
[3],
10088 ix86_compare_op0
, ix86_compare_op1
));
10092 /* The floating point conditional move instructions don't directly
10093 support conditions resulting from a signed integer comparison. */
10095 code
= GET_CODE (operands
[1]);
10096 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10098 /* The floating point conditional move instructions don't directly
10099 support signed integer comparisons. */
10101 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
10103 if (second_test
!= NULL
|| bypass_test
!= NULL
)
10105 tmp
= gen_reg_rtx (QImode
);
10106 ix86_expand_setcc (code
, tmp
);
10108 ix86_compare_op0
= tmp
;
10109 ix86_compare_op1
= const0_rtx
;
10110 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
10112 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
10114 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10115 emit_move_insn (tmp
, operands
[3]);
10118 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
10120 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
10121 emit_move_insn (tmp
, operands
[2]);
10125 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10126 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10131 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10132 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10137 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
10138 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
10146 /* Expand conditional increment or decrement using adb/sbb instructions.
10147 The default case using setcc followed by the conditional move can be
10148 done by generic code. */
10150 ix86_expand_int_addcc (operands
)
10153 enum rtx_code code
= GET_CODE (operands
[1]);
10155 rtx val
= const0_rtx
;
10156 bool fpcmp
= false;
10157 enum machine_mode mode
= GET_MODE (operands
[0]);
10159 if (operands
[3] != const1_rtx
10160 && operands
[3] != constm1_rtx
)
10162 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
10163 ix86_compare_op1
, &compare_op
))
10165 code
= GET_CODE (compare_op
);
10167 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
10168 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
10171 code
= ix86_fp_compare_code_to_integer (code
);
10178 PUT_CODE (compare_op
,
10179 reverse_condition_maybe_unordered
10180 (GET_CODE (compare_op
)));
10182 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
10184 PUT_MODE (compare_op
, mode
);
10186 /* Construct either adc or sbb insn. */
10187 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
10189 switch (GET_MODE (operands
[0]))
10192 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10195 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10198 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10201 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10209 switch (GET_MODE (operands
[0]))
10212 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
10215 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
10218 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
10221 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
10227 return 1; /* DONE */
10231 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10232 works for floating pointer parameters and nonoffsetable memories.
10233 For pushes, it returns just stack offsets; the values will be saved
10234 in the right order. Maximally three parts are generated. */
10237 ix86_split_to_parts (operand
, parts
, mode
)
10240 enum machine_mode mode
;
10245 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
10247 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
10249 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
10251 if (size
< 2 || size
> 3)
10254 /* Optimize constant pool reference to immediates. This is used by fp
10255 moves, that force all constants to memory to allow combining. */
10256 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
10258 rtx tmp
= maybe_get_pool_constant (operand
);
10263 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
10265 /* The only non-offsetable memories we handle are pushes. */
10266 if (! push_operand (operand
, VOIDmode
))
10269 operand
= copy_rtx (operand
);
10270 PUT_MODE (operand
, Pmode
);
10271 parts
[0] = parts
[1] = parts
[2] = operand
;
10273 else if (!TARGET_64BIT
)
10275 if (mode
== DImode
)
10276 split_di (&operand
, 1, &parts
[0], &parts
[1]);
10279 if (REG_P (operand
))
10281 if (!reload_completed
)
10283 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
10284 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
10286 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
10288 else if (offsettable_memref_p (operand
))
10290 operand
= adjust_address (operand
, SImode
, 0);
10291 parts
[0] = operand
;
10292 parts
[1] = adjust_address (operand
, SImode
, 4);
10294 parts
[2] = adjust_address (operand
, SImode
, 8);
10296 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10301 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10306 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10307 parts
[2] = gen_int_mode (l
[2], SImode
);
10310 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
10315 parts
[1] = gen_int_mode (l
[1], SImode
);
10316 parts
[0] = gen_int_mode (l
[0], SImode
);
10324 if (mode
== TImode
)
10325 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
10326 if (mode
== XFmode
|| mode
== TFmode
)
10328 if (REG_P (operand
))
10330 if (!reload_completed
)
10332 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
10333 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
10335 else if (offsettable_memref_p (operand
))
10337 operand
= adjust_address (operand
, DImode
, 0);
10338 parts
[0] = operand
;
10339 parts
[1] = adjust_address (operand
, SImode
, 8);
10341 else if (GET_CODE (operand
) == CONST_DOUBLE
)
10346 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
10347 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
10348 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10349 if (HOST_BITS_PER_WIDE_INT
>= 64)
10352 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
10353 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
10356 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
10357 parts
[1] = gen_int_mode (l
[2], SImode
);
10367 /* Emit insns to perform a move or push of DI, DF, and XF values.
10368 Return false when normal moves are needed; true when all required
10369 insns have been emitted. Operands 2-4 contain the input values
10370 int the correct order; operands 5-7 contain the output values. */
10373 ix86_split_long_move (operands
)
10379 int collisions
= 0;
10380 enum machine_mode mode
= GET_MODE (operands
[0]);
10382 /* The DFmode expanders may ask us to move double.
10383 For 64bit target this is single move. By hiding the fact
10384 here we simplify i386.md splitters. */
10385 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
10387 /* Optimize constant pool reference to immediates. This is used by
10388 fp moves, that force all constants to memory to allow combining. */
10390 if (GET_CODE (operands
[1]) == MEM
10391 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
10392 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
10393 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
10394 if (push_operand (operands
[0], VOIDmode
))
10396 operands
[0] = copy_rtx (operands
[0]);
10397 PUT_MODE (operands
[0], Pmode
);
10400 operands
[0] = gen_lowpart (DImode
, operands
[0]);
10401 operands
[1] = gen_lowpart (DImode
, operands
[1]);
10402 emit_move_insn (operands
[0], operands
[1]);
10406 /* The only non-offsettable memory we handle is push. */
10407 if (push_operand (operands
[0], VOIDmode
))
10409 else if (GET_CODE (operands
[0]) == MEM
10410 && ! offsettable_memref_p (operands
[0]))
10413 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
10414 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
10416 /* When emitting push, take care for source operands on the stack. */
10417 if (push
&& GET_CODE (operands
[1]) == MEM
10418 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
10421 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
10422 XEXP (part
[1][2], 0));
10423 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
10424 XEXP (part
[1][1], 0));
10427 /* We need to do copy in the right order in case an address register
10428 of the source overlaps the destination. */
10429 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
10431 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
10433 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10436 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
10439 /* Collision in the middle part can be handled by reordering. */
10440 if (collisions
== 1 && nparts
== 3
10441 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
10444 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
10445 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
10448 /* If there are more collisions, we can't handle it by reordering.
10449 Do an lea to the last part and use only one colliding move. */
10450 else if (collisions
> 1)
10453 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
10454 XEXP (part
[1][0], 0)));
10455 part
[1][0] = change_address (part
[1][0],
10456 TARGET_64BIT
? DImode
: SImode
,
10457 part
[0][nparts
- 1]);
10458 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
10460 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
10470 /* We use only first 12 bytes of TFmode value, but for pushing we
10471 are required to adjust stack as if we were pushing real 16byte
10473 if (mode
== TFmode
&& !TARGET_64BIT
)
10474 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
10476 emit_move_insn (part
[0][2], part
[1][2]);
10481 /* In 64bit mode we don't have 32bit push available. In case this is
10482 register, it is OK - we will just use larger counterpart. We also
10483 retype memory - these comes from attempt to avoid REX prefix on
10484 moving of second half of TFmode value. */
10485 if (GET_MODE (part
[1][1]) == SImode
)
10487 if (GET_CODE (part
[1][1]) == MEM
)
10488 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
10489 else if (REG_P (part
[1][1]))
10490 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
10493 if (GET_MODE (part
[1][0]) == SImode
)
10494 part
[1][0] = part
[1][1];
10497 emit_move_insn (part
[0][1], part
[1][1]);
10498 emit_move_insn (part
[0][0], part
[1][0]);
10502 /* Choose correct order to not overwrite the source before it is copied. */
10503 if ((REG_P (part
[0][0])
10504 && REG_P (part
[1][1])
10505 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
10507 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
10509 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
10513 operands
[2] = part
[0][2];
10514 operands
[3] = part
[0][1];
10515 operands
[4] = part
[0][0];
10516 operands
[5] = part
[1][2];
10517 operands
[6] = part
[1][1];
10518 operands
[7] = part
[1][0];
10522 operands
[2] = part
[0][1];
10523 operands
[3] = part
[0][0];
10524 operands
[5] = part
[1][1];
10525 operands
[6] = part
[1][0];
10532 operands
[2] = part
[0][0];
10533 operands
[3] = part
[0][1];
10534 operands
[4] = part
[0][2];
10535 operands
[5] = part
[1][0];
10536 operands
[6] = part
[1][1];
10537 operands
[7] = part
[1][2];
10541 operands
[2] = part
[0][0];
10542 operands
[3] = part
[0][1];
10543 operands
[5] = part
[1][0];
10544 operands
[6] = part
[1][1];
10547 emit_move_insn (operands
[2], operands
[5]);
10548 emit_move_insn (operands
[3], operands
[6]);
10550 emit_move_insn (operands
[4], operands
[7]);
10556 ix86_split_ashldi (operands
, scratch
)
10557 rtx
*operands
, scratch
;
10559 rtx low
[2], high
[2];
10562 if (GET_CODE (operands
[2]) == CONST_INT
)
10564 split_di (operands
, 2, low
, high
);
10565 count
= INTVAL (operands
[2]) & 63;
10569 emit_move_insn (high
[0], low
[1]);
10570 emit_move_insn (low
[0], const0_rtx
);
10573 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
10577 if (!rtx_equal_p (operands
[0], operands
[1]))
10578 emit_move_insn (operands
[0], operands
[1]);
10579 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
10580 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
10585 if (!rtx_equal_p (operands
[0], operands
[1]))
10586 emit_move_insn (operands
[0], operands
[1]);
10588 split_di (operands
, 1, low
, high
);
10590 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
10591 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
10593 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10595 if (! no_new_pseudos
)
10596 scratch
= force_reg (SImode
, const0_rtx
);
10598 emit_move_insn (scratch
, const0_rtx
);
10600 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
10604 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
10609 ix86_split_ashrdi (operands
, scratch
)
10610 rtx
*operands
, scratch
;
10612 rtx low
[2], high
[2];
10615 if (GET_CODE (operands
[2]) == CONST_INT
)
10617 split_di (operands
, 2, low
, high
);
10618 count
= INTVAL (operands
[2]) & 63;
10622 emit_move_insn (low
[0], high
[1]);
10624 if (! reload_completed
)
10625 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
10628 emit_move_insn (high
[0], low
[0]);
10629 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10633 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10637 if (!rtx_equal_p (operands
[0], operands
[1]))
10638 emit_move_insn (operands
[0], operands
[1]);
10639 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10640 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
10645 if (!rtx_equal_p (operands
[0], operands
[1]))
10646 emit_move_insn (operands
[0], operands
[1]);
10648 split_di (operands
, 1, low
, high
);
10650 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10651 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
10653 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10655 if (! no_new_pseudos
)
10656 scratch
= gen_reg_rtx (SImode
);
10657 emit_move_insn (scratch
, high
[0]);
10658 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
10659 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10663 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
10668 ix86_split_lshrdi (operands
, scratch
)
10669 rtx
*operands
, scratch
;
10671 rtx low
[2], high
[2];
10674 if (GET_CODE (operands
[2]) == CONST_INT
)
10676 split_di (operands
, 2, low
, high
);
10677 count
= INTVAL (operands
[2]) & 63;
10681 emit_move_insn (low
[0], high
[1]);
10682 emit_move_insn (high
[0], const0_rtx
);
10685 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10689 if (!rtx_equal_p (operands
[0], operands
[1]))
10690 emit_move_insn (operands
[0], operands
[1]);
10691 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10692 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10697 if (!rtx_equal_p (operands
[0], operands
[1]))
10698 emit_move_insn (operands
[0], operands
[1]);
10700 split_di (operands
, 1, low
, high
);
10702 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10703 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10705 /* Heh. By reversing the arguments, we can reuse this pattern. */
10706 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10708 if (! no_new_pseudos
)
10709 scratch
= force_reg (SImode
, const0_rtx
);
10711 emit_move_insn (scratch
, const0_rtx
);
10713 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10717 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10721 /* Helper function for the string operations below. Dest VARIABLE whether
10722 it is aligned to VALUE bytes. If true, jump to the label. */
10724 ix86_expand_aligntest (variable
, value
)
10728 rtx label
= gen_label_rtx ();
10729 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10730 if (GET_MODE (variable
) == DImode
)
10731 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10733 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10734 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10739 /* Adjust COUNTER by the VALUE. */
10741 ix86_adjust_counter (countreg
, value
)
10743 HOST_WIDE_INT value
;
10745 if (GET_MODE (countreg
) == DImode
)
10746 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
10748 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
10751 /* Zero extend possibly SImode EXP to Pmode register. */
10753 ix86_zero_extend_to_Pmode (exp
)
10757 if (GET_MODE (exp
) == VOIDmode
)
10758 return force_reg (Pmode
, exp
);
10759 if (GET_MODE (exp
) == Pmode
)
10760 return copy_to_mode_reg (Pmode
, exp
);
10761 r
= gen_reg_rtx (Pmode
);
10762 emit_insn (gen_zero_extendsidi2 (r
, exp
));
10766 /* Expand string move (memcpy) operation. Use i386 string operations when
10767 profitable. expand_clrstr contains similar code. */
10769 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
10770 rtx dst
, src
, count_exp
, align_exp
;
10772 rtx srcreg
, destreg
, countreg
;
10773 enum machine_mode counter_mode
;
10774 HOST_WIDE_INT align
= 0;
10775 unsigned HOST_WIDE_INT count
= 0;
10779 if (GET_CODE (align_exp
) == CONST_INT
)
10780 align
= INTVAL (align_exp
);
10782 /* This simple hack avoids all inlining code and simplifies code below. */
10783 if (!TARGET_ALIGN_STRINGOPS
)
10786 if (GET_CODE (count_exp
) == CONST_INT
)
10788 count
= INTVAL (count_exp
);
10789 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
10793 /* Figure out proper mode for counter. For 32bits it is always SImode,
10794 for 64bits use SImode when possible, otherwise DImode.
10795 Set count to number of bytes copied when known at compile time. */
10796 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10797 || x86_64_zero_extended_value (count_exp
))
10798 counter_mode
= SImode
;
10800 counter_mode
= DImode
;
10804 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
10807 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10808 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10810 emit_insn (gen_cld ());
10812 /* When optimizing for size emit simple rep ; movsb instruction for
10813 counts not divisible by 4. */
10815 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10817 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10819 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
10820 destreg
, srcreg
, countreg
));
10822 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
10823 destreg
, srcreg
, countreg
));
10826 /* For constant aligned (or small unaligned) copies use rep movsl
10827 followed by code copying the rest. For PentiumPro ensure 8 byte
10828 alignment to allow rep movsl acceleration. */
10830 else if (count
!= 0
10832 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10833 || optimize_size
|| count
< (unsigned int) 64))
10835 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10836 if (count
& ~(size
- 1))
10838 countreg
= copy_to_mode_reg (counter_mode
,
10839 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10840 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10841 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10845 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
10846 destreg
, srcreg
, countreg
));
10848 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
10849 destreg
, srcreg
, countreg
));
10852 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
10853 destreg
, srcreg
, countreg
));
10855 if (size
== 8 && (count
& 0x04))
10856 emit_insn (gen_strmovsi (destreg
, srcreg
));
10858 emit_insn (gen_strmovhi (destreg
, srcreg
));
10860 emit_insn (gen_strmovqi (destreg
, srcreg
));
10862 /* The generic code based on the glibc implementation:
10863 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10864 allowing accelerated copying there)
10865 - copy the data using rep movsl
10866 - copy the rest. */
10871 int desired_alignment
= (TARGET_PENTIUMPRO
10872 && (count
== 0 || count
>= (unsigned int) 260)
10873 ? 8 : UNITS_PER_WORD
);
10875 /* In case we don't know anything about the alignment, default to
10876 library version, since it is usually equally fast and result in
10879 Also emit call when we know that the count is large and call overhead
10880 will not be important. */
10881 if (!TARGET_INLINE_ALL_STRINGOPS
10882 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
10888 if (TARGET_SINGLE_STRINGOP
)
10889 emit_insn (gen_cld ());
10891 countreg2
= gen_reg_rtx (Pmode
);
10892 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10894 /* We don't use loops to align destination and to copy parts smaller
10895 than 4 bytes, because gcc is able to optimize such code better (in
10896 the case the destination or the count really is aligned, gcc is often
10897 able to predict the branches) and also it is friendlier to the
10898 hardware branch prediction.
10900 Using loops is beneficial for generic case, because we can
10901 handle small counts using the loops. Many CPUs (such as Athlon)
10902 have large REP prefix setup costs.
10904 This is quite costly. Maybe we can revisit this decision later or
10905 add some customizability to this code. */
10907 if (count
== 0 && align
< desired_alignment
)
10909 label
= gen_label_rtx ();
10910 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10911 LEU
, 0, counter_mode
, 1, label
);
10915 rtx label
= ix86_expand_aligntest (destreg
, 1);
10916 emit_insn (gen_strmovqi (destreg
, srcreg
));
10917 ix86_adjust_counter (countreg
, 1);
10918 emit_label (label
);
10919 LABEL_NUSES (label
) = 1;
10923 rtx label
= ix86_expand_aligntest (destreg
, 2);
10924 emit_insn (gen_strmovhi (destreg
, srcreg
));
10925 ix86_adjust_counter (countreg
, 2);
10926 emit_label (label
);
10927 LABEL_NUSES (label
) = 1;
10929 if (align
<= 4 && desired_alignment
> 4)
10931 rtx label
= ix86_expand_aligntest (destreg
, 4);
10932 emit_insn (gen_strmovsi (destreg
, srcreg
));
10933 ix86_adjust_counter (countreg
, 4);
10934 emit_label (label
);
10935 LABEL_NUSES (label
) = 1;
10938 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10940 emit_label (label
);
10941 LABEL_NUSES (label
) = 1;
10944 if (!TARGET_SINGLE_STRINGOP
)
10945 emit_insn (gen_cld ());
10948 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10950 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
10951 destreg
, srcreg
, countreg2
));
10955 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10956 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
10957 destreg
, srcreg
, countreg2
));
10962 emit_label (label
);
10963 LABEL_NUSES (label
) = 1;
10965 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10966 emit_insn (gen_strmovsi (destreg
, srcreg
));
10967 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
10969 rtx label
= ix86_expand_aligntest (countreg
, 4);
10970 emit_insn (gen_strmovsi (destreg
, srcreg
));
10971 emit_label (label
);
10972 LABEL_NUSES (label
) = 1;
10974 if (align
> 2 && count
!= 0 && (count
& 2))
10975 emit_insn (gen_strmovhi (destreg
, srcreg
));
10976 if (align
<= 2 || count
== 0)
10978 rtx label
= ix86_expand_aligntest (countreg
, 2);
10979 emit_insn (gen_strmovhi (destreg
, srcreg
));
10980 emit_label (label
);
10981 LABEL_NUSES (label
) = 1;
10983 if (align
> 1 && count
!= 0 && (count
& 1))
10984 emit_insn (gen_strmovqi (destreg
, srcreg
));
10985 if (align
<= 1 || count
== 0)
10987 rtx label
= ix86_expand_aligntest (countreg
, 1);
10988 emit_insn (gen_strmovqi (destreg
, srcreg
));
10989 emit_label (label
);
10990 LABEL_NUSES (label
) = 1;
10994 insns
= get_insns ();
10997 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
11002 /* Expand string clear operation (bzero). Use i386 string operations when
11003 profitable. expand_movstr contains similar code. */
11005 ix86_expand_clrstr (src
, count_exp
, align_exp
)
11006 rtx src
, count_exp
, align_exp
;
11008 rtx destreg
, zeroreg
, countreg
;
11009 enum machine_mode counter_mode
;
11010 HOST_WIDE_INT align
= 0;
11011 unsigned HOST_WIDE_INT count
= 0;
11013 if (GET_CODE (align_exp
) == CONST_INT
)
11014 align
= INTVAL (align_exp
);
11016 /* This simple hack avoids all inlining code and simplifies code below. */
11017 if (!TARGET_ALIGN_STRINGOPS
)
11020 if (GET_CODE (count_exp
) == CONST_INT
)
11022 count
= INTVAL (count_exp
);
11023 if (!TARGET_INLINE_ALL_STRINGOPS
&& count
> 64)
11026 /* Figure out proper mode for counter. For 32bits it is always SImode,
11027 for 64bits use SImode when possible, otherwise DImode.
11028 Set count to number of bytes copied when known at compile time. */
11029 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
11030 || x86_64_zero_extended_value (count_exp
))
11031 counter_mode
= SImode
;
11033 counter_mode
= DImode
;
11035 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
11037 emit_insn (gen_cld ());
11039 /* When optimizing for size emit simple rep ; movsb instruction for
11040 counts not divisible by 4. */
11042 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
11044 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
11045 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
11047 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
11048 destreg
, countreg
));
11050 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
11051 destreg
, countreg
));
11053 else if (count
!= 0
11055 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
11056 || optimize_size
|| count
< (unsigned int) 64))
11058 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
11059 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
11060 if (count
& ~(size
- 1))
11062 countreg
= copy_to_mode_reg (counter_mode
,
11063 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
11064 & (TARGET_64BIT
? -1 : 0x3fffffff)));
11065 countreg
= ix86_zero_extend_to_Pmode (countreg
);
11069 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
11070 destreg
, countreg
));
11072 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
11073 destreg
, countreg
));
11076 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
11077 destreg
, countreg
));
11079 if (size
== 8 && (count
& 0x04))
11080 emit_insn (gen_strsetsi (destreg
,
11081 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11083 emit_insn (gen_strsethi (destreg
,
11084 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11086 emit_insn (gen_strsetqi (destreg
,
11087 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11093 /* Compute desired alignment of the string operation. */
11094 int desired_alignment
= (TARGET_PENTIUMPRO
11095 && (count
== 0 || count
>= (unsigned int) 260)
11096 ? 8 : UNITS_PER_WORD
);
11098 /* In case we don't know anything about the alignment, default to
11099 library version, since it is usually equally fast and result in
11102 Also emit call when we know that the count is large and call overhead
11103 will not be important. */
11104 if (!TARGET_INLINE_ALL_STRINGOPS
11105 && (align
< UNITS_PER_WORD
|| !TARGET_REP_MOVL_OPTIMAL
))
11108 if (TARGET_SINGLE_STRINGOP
)
11109 emit_insn (gen_cld ());
11111 countreg2
= gen_reg_rtx (Pmode
);
11112 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
11113 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
11115 if (count
== 0 && align
< desired_alignment
)
11117 label
= gen_label_rtx ();
11118 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
11119 LEU
, 0, counter_mode
, 1, label
);
11123 rtx label
= ix86_expand_aligntest (destreg
, 1);
11124 emit_insn (gen_strsetqi (destreg
,
11125 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11126 ix86_adjust_counter (countreg
, 1);
11127 emit_label (label
);
11128 LABEL_NUSES (label
) = 1;
11132 rtx label
= ix86_expand_aligntest (destreg
, 2);
11133 emit_insn (gen_strsethi (destreg
,
11134 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11135 ix86_adjust_counter (countreg
, 2);
11136 emit_label (label
);
11137 LABEL_NUSES (label
) = 1;
11139 if (align
<= 4 && desired_alignment
> 4)
11141 rtx label
= ix86_expand_aligntest (destreg
, 4);
11142 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
11143 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
11145 ix86_adjust_counter (countreg
, 4);
11146 emit_label (label
);
11147 LABEL_NUSES (label
) = 1;
11150 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
11152 emit_label (label
);
11153 LABEL_NUSES (label
) = 1;
11157 if (!TARGET_SINGLE_STRINGOP
)
11158 emit_insn (gen_cld ());
11161 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
11163 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
11164 destreg
, countreg2
));
11168 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
11169 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
11170 destreg
, countreg2
));
11174 emit_label (label
);
11175 LABEL_NUSES (label
) = 1;
11178 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
11179 emit_insn (gen_strsetsi (destreg
,
11180 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11181 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
11183 rtx label
= ix86_expand_aligntest (countreg
, 4);
11184 emit_insn (gen_strsetsi (destreg
,
11185 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
11186 emit_label (label
);
11187 LABEL_NUSES (label
) = 1;
11189 if (align
> 2 && count
!= 0 && (count
& 2))
11190 emit_insn (gen_strsethi (destreg
,
11191 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11192 if (align
<= 2 || count
== 0)
11194 rtx label
= ix86_expand_aligntest (countreg
, 2);
11195 emit_insn (gen_strsethi (destreg
,
11196 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
11197 emit_label (label
);
11198 LABEL_NUSES (label
) = 1;
11200 if (align
> 1 && count
!= 0 && (count
& 1))
11201 emit_insn (gen_strsetqi (destreg
,
11202 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11203 if (align
<= 1 || count
== 0)
11205 rtx label
= ix86_expand_aligntest (countreg
, 1);
11206 emit_insn (gen_strsetqi (destreg
,
11207 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
11208 emit_label (label
);
11209 LABEL_NUSES (label
) = 1;
11214 /* Expand strlen. */
11216 ix86_expand_strlen (out
, src
, eoschar
, align
)
11217 rtx out
, src
, eoschar
, align
;
11219 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
11221 /* The generic case of strlen expander is long. Avoid it's
11222 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11224 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11225 && !TARGET_INLINE_ALL_STRINGOPS
11227 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
11230 addr
= force_reg (Pmode
, XEXP (src
, 0));
11231 scratch1
= gen_reg_rtx (Pmode
);
11233 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
11236 /* Well it seems that some optimizer does not combine a call like
11237 foo(strlen(bar), strlen(bar));
11238 when the move and the subtraction is done here. It does calculate
11239 the length just once when these instructions are done inside of
11240 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11241 often used and I use one fewer register for the lifetime of
11242 output_strlen_unroll() this is better. */
11244 emit_move_insn (out
, addr
);
11246 ix86_expand_strlensi_unroll_1 (out
, align
);
11248 /* strlensi_unroll_1 returns the address of the zero at the end of
11249 the string, like memchr(), so compute the length by subtracting
11250 the start address. */
11252 emit_insn (gen_subdi3 (out
, out
, addr
));
11254 emit_insn (gen_subsi3 (out
, out
, addr
));
11258 scratch2
= gen_reg_rtx (Pmode
);
11259 scratch3
= gen_reg_rtx (Pmode
);
11260 scratch4
= force_reg (Pmode
, constm1_rtx
);
11262 emit_move_insn (scratch3
, addr
);
11263 eoschar
= force_reg (QImode
, eoschar
);
11265 emit_insn (gen_cld ());
11268 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
11269 align
, scratch4
, scratch3
));
11270 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
11271 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
11275 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
11276 align
, scratch4
, scratch3
));
11277 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
11278 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
11284 /* Expand the appropriate insns for doing strlen if not just doing
11287 out = result, initialized with the start address
11288 align_rtx = alignment of the address.
11289 scratch = scratch register, initialized with the startaddress when
11290 not aligned, otherwise undefined
11292 This is just the body. It needs the initialisations mentioned above and
11293 some address computing at the end. These things are done in i386.md. */
11296 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
11297 rtx out
, align_rtx
;
11301 rtx align_2_label
= NULL_RTX
;
11302 rtx align_3_label
= NULL_RTX
;
11303 rtx align_4_label
= gen_label_rtx ();
11304 rtx end_0_label
= gen_label_rtx ();
11306 rtx tmpreg
= gen_reg_rtx (SImode
);
11307 rtx scratch
= gen_reg_rtx (SImode
);
11311 if (GET_CODE (align_rtx
) == CONST_INT
)
11312 align
= INTVAL (align_rtx
);
11314 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11316 /* Is there a known alignment and is it less than 4? */
11319 rtx scratch1
= gen_reg_rtx (Pmode
);
11320 emit_move_insn (scratch1
, out
);
11321 /* Is there a known alignment and is it not 2? */
11324 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
11325 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
11327 /* Leave just the 3 lower bits. */
11328 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
11329 NULL_RTX
, 0, OPTAB_WIDEN
);
11331 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11332 Pmode
, 1, align_4_label
);
11333 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
11334 Pmode
, 1, align_2_label
);
11335 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
11336 Pmode
, 1, align_3_label
);
11340 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11341 check if is aligned to 4 - byte. */
11343 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
11344 NULL_RTX
, 0, OPTAB_WIDEN
);
11346 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
11347 Pmode
, 1, align_4_label
);
11350 mem
= gen_rtx_MEM (QImode
, out
);
11352 /* Now compare the bytes. */
11354 /* Compare the first n unaligned byte on a byte per byte basis. */
11355 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
11356 QImode
, 1, end_0_label
);
11358 /* Increment the address. */
11360 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11362 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11364 /* Not needed with an alignment of 2 */
11367 emit_label (align_2_label
);
11369 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11373 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11375 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11377 emit_label (align_3_label
);
11380 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
11384 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
11386 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
11389 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11390 align this loop. It gives only huge programs, but does not help to
11392 emit_label (align_4_label
);
11394 mem
= gen_rtx_MEM (SImode
, out
);
11395 emit_move_insn (scratch
, mem
);
11397 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
11399 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
11401 /* This formula yields a nonzero result iff one of the bytes is zero.
11402 This saves three branches inside loop and many cycles. */
11404 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
11405 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
11406 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
11407 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
11408 gen_int_mode (0x80808080, SImode
)));
11409 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
11414 rtx reg
= gen_reg_rtx (SImode
);
11415 rtx reg2
= gen_reg_rtx (Pmode
);
11416 emit_move_insn (reg
, tmpreg
);
11417 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
11419 /* If zero is not in the first two bytes, move two bytes forward. */
11420 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11421 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11422 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11423 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
11424 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
11427 /* Emit lea manually to avoid clobbering of flags. */
11428 emit_insn (gen_rtx_SET (SImode
, reg2
,
11429 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
11431 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11432 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
11433 emit_insn (gen_rtx_SET (VOIDmode
, out
,
11434 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
11441 rtx end_2_label
= gen_label_rtx ();
11442 /* Is zero in the first two bytes? */
11444 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
11445 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
11446 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
11447 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11448 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
11450 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11451 JUMP_LABEL (tmp
) = end_2_label
;
11453 /* Not in the first two. Move two bytes forward. */
11454 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
11456 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
11458 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
11460 emit_label (end_2_label
);
11464 /* Avoid branch in fixing the byte. */
11465 tmpreg
= gen_lowpart (QImode
, tmpreg
);
11466 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
11467 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
11469 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
11471 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
11473 emit_label (end_0_label
);
11477 ix86_expand_call (retval
, fnaddr
, callarg1
, callarg2
, pop
, sibcall
)
11478 rtx retval
, fnaddr
, callarg1
, callarg2
, pop
;
11481 rtx use
= NULL
, call
;
11483 if (pop
== const0_rtx
)
11485 if (TARGET_64BIT
&& pop
)
11489 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
11490 fnaddr
= machopic_indirect_call_target (fnaddr
);
11492 /* Static functions and indirect calls don't need the pic register. */
11493 if (! TARGET_64BIT
&& flag_pic
11494 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
11495 && ! SYMBOL_REF_FLAG (XEXP (fnaddr
, 0)))
11496 use_reg (&use
, pic_offset_table_rtx
);
11498 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
11500 rtx al
= gen_rtx_REG (QImode
, 0);
11501 emit_move_insn (al
, callarg2
);
11502 use_reg (&use
, al
);
11504 #endif /* TARGET_MACHO */
11506 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
11508 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11509 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11511 if (sibcall
&& TARGET_64BIT
11512 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
11515 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
11516 fnaddr
= gen_rtx_REG (Pmode
, 40);
11517 emit_move_insn (fnaddr
, addr
);
11518 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
11521 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
11523 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
11526 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
11527 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
11528 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
11531 call
= emit_call_insn (call
);
11533 CALL_INSN_FUNCTION_USAGE (call
) = use
;
11537 /* Clear stack slot assignments remembered from previous functions.
11538 This is called from INIT_EXPANDERS once before RTL is emitted for each
11541 static struct machine_function
*
11542 ix86_init_machine_status ()
11544 return ggc_alloc_cleared (sizeof (struct machine_function
));
11547 /* Return a MEM corresponding to a stack slot with mode MODE.
11548 Allocate a new slot if necessary.
11550 The RTL for a function can have several slots available: N is
11551 which slot to use. */
11554 assign_386_stack_local (mode
, n
)
11555 enum machine_mode mode
;
11558 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
11561 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
11562 ix86_stack_locals
[(int) mode
][n
]
11563 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
11565 return ix86_stack_locals
[(int) mode
][n
];
11568 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11570 static GTY(()) rtx ix86_tls_symbol
;
11572 ix86_tls_get_addr ()
11575 if (!ix86_tls_symbol
)
11577 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
11578 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
11579 ? "___tls_get_addr"
11580 : "__tls_get_addr");
11583 return ix86_tls_symbol
;
11586 /* Calculate the length of the memory address in the instruction
11587 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11590 memory_address_length (addr
)
11593 struct ix86_address parts
;
11594 rtx base
, index
, disp
;
11597 if (GET_CODE (addr
) == PRE_DEC
11598 || GET_CODE (addr
) == POST_INC
11599 || GET_CODE (addr
) == PRE_MODIFY
11600 || GET_CODE (addr
) == POST_MODIFY
)
11603 if (! ix86_decompose_address (addr
, &parts
))
11607 index
= parts
.index
;
11611 /* Register Indirect. */
11612 if (base
&& !index
&& !disp
)
11614 /* Special cases: ebp and esp need the two-byte modrm form. */
11615 if (addr
== stack_pointer_rtx
11616 || addr
== arg_pointer_rtx
11617 || addr
== frame_pointer_rtx
11618 || addr
== hard_frame_pointer_rtx
)
11622 /* Direct Addressing. */
11623 else if (disp
&& !base
&& !index
)
11628 /* Find the length of the displacement constant. */
11631 if (GET_CODE (disp
) == CONST_INT
11632 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
11638 /* An index requires the two-byte modrm form. */
11646 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11647 is set, expect that insn have 8bit immediate alternative. */
11649 ix86_attr_length_immediate_default (insn
, shortform
)
11655 extract_insn_cached (insn
);
11656 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11657 if (CONSTANT_P (recog_data
.operand
[i
]))
11662 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
11663 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
11667 switch (get_attr_mode (insn
))
11678 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11683 fatal_insn ("unknown insn mode", insn
);
11689 /* Compute default value for "length_address" attribute. */
11691 ix86_attr_length_address_default (insn
)
11695 extract_insn_cached (insn
);
11696 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11697 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11699 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
11705 /* Return the maximum number of instructions a cpu can issue. */
11712 case PROCESSOR_PENTIUM
:
11716 case PROCESSOR_PENTIUMPRO
:
11717 case PROCESSOR_PENTIUM4
:
11718 case PROCESSOR_ATHLON
:
11727 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11728 by DEP_INSN and nothing set by DEP_INSN. */
11731 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
11732 rtx insn
, dep_insn
;
11733 enum attr_type insn_type
;
11737 /* Simplify the test for uninteresting insns. */
11738 if (insn_type
!= TYPE_SETCC
11739 && insn_type
!= TYPE_ICMOV
11740 && insn_type
!= TYPE_FCMOV
11741 && insn_type
!= TYPE_IBR
)
11744 if ((set
= single_set (dep_insn
)) != 0)
11746 set
= SET_DEST (set
);
11749 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
11750 && XVECLEN (PATTERN (dep_insn
), 0) == 2
11751 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
11752 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
11754 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11755 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11760 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
11763 /* This test is true if the dependent insn reads the flags but
11764 not any other potentially set register. */
11765 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
11768 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
11774 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11775 address with operands set by DEP_INSN. */
11778 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
11779 rtx insn
, dep_insn
;
11780 enum attr_type insn_type
;
11784 if (insn_type
== TYPE_LEA
11787 addr
= PATTERN (insn
);
11788 if (GET_CODE (addr
) == SET
)
11790 else if (GET_CODE (addr
) == PARALLEL
11791 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
11792 addr
= XVECEXP (addr
, 0, 0);
11795 addr
= SET_SRC (addr
);
11800 extract_insn_cached (insn
);
11801 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11802 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11804 addr
= XEXP (recog_data
.operand
[i
], 0);
11811 return modified_in_p (addr
, dep_insn
);
11815 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
11816 rtx insn
, link
, dep_insn
;
11819 enum attr_type insn_type
, dep_insn_type
;
11820 enum attr_memory memory
, dep_memory
;
11822 int dep_insn_code_number
;
11824 /* Anti and output dependencies have zero cost on all CPUs. */
11825 if (REG_NOTE_KIND (link
) != 0)
11828 dep_insn_code_number
= recog_memoized (dep_insn
);
11830 /* If we can't recognize the insns, we can't really do anything. */
11831 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
11834 insn_type
= get_attr_type (insn
);
11835 dep_insn_type
= get_attr_type (dep_insn
);
11839 case PROCESSOR_PENTIUM
:
11840 /* Address Generation Interlock adds a cycle of latency. */
11841 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11844 /* ??? Compares pair with jump/setcc. */
11845 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
11848 /* Floating point stores require value to be ready one cycle earlier. */
11849 if (insn_type
== TYPE_FMOV
11850 && get_attr_memory (insn
) == MEMORY_STORE
11851 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11855 case PROCESSOR_PENTIUMPRO
:
11856 memory
= get_attr_memory (insn
);
11857 dep_memory
= get_attr_memory (dep_insn
);
11859 /* Since we can't represent delayed latencies of load+operation,
11860 increase the cost here for non-imov insns. */
11861 if (dep_insn_type
!= TYPE_IMOV
11862 && dep_insn_type
!= TYPE_FMOV
11863 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
11866 /* INT->FP conversion is expensive. */
11867 if (get_attr_fp_int_src (dep_insn
))
11870 /* There is one cycle extra latency between an FP op and a store. */
11871 if (insn_type
== TYPE_FMOV
11872 && (set
= single_set (dep_insn
)) != NULL_RTX
11873 && (set2
= single_set (insn
)) != NULL_RTX
11874 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
11875 && GET_CODE (SET_DEST (set2
)) == MEM
)
11878 /* Show ability of reorder buffer to hide latency of load by executing
11879 in parallel with previous instruction in case
11880 previous instruction is not needed to compute the address. */
11881 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11882 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11884 /* Claim moves to take one cycle, as core can issue one load
11885 at time and the next load can start cycle later. */
11886 if (dep_insn_type
== TYPE_IMOV
11887 || dep_insn_type
== TYPE_FMOV
)
11895 memory
= get_attr_memory (insn
);
11896 dep_memory
= get_attr_memory (dep_insn
);
11897 /* The esp dependency is resolved before the instruction is really
11899 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
11900 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
11903 /* Since we can't represent delayed latencies of load+operation,
11904 increase the cost here for non-imov insns. */
11905 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
11906 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
11908 /* INT->FP conversion is expensive. */
11909 if (get_attr_fp_int_src (dep_insn
))
11912 /* Show ability of reorder buffer to hide latency of load by executing
11913 in parallel with previous instruction in case
11914 previous instruction is not needed to compute the address. */
11915 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11916 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11918 /* Claim moves to take one cycle, as core can issue one load
11919 at time and the next load can start cycle later. */
11920 if (dep_insn_type
== TYPE_IMOV
11921 || dep_insn_type
== TYPE_FMOV
)
11930 case PROCESSOR_ATHLON
:
11932 memory
= get_attr_memory (insn
);
11933 dep_memory
= get_attr_memory (dep_insn
);
11935 /* Show ability of reorder buffer to hide latency of load by executing
11936 in parallel with previous instruction in case
11937 previous instruction is not needed to compute the address. */
11938 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11939 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11941 /* Claim moves to take one cycle, as core can issue one load
11942 at time and the next load can start cycle later. */
11943 if (dep_insn_type
== TYPE_IMOV
11944 || dep_insn_type
== TYPE_FMOV
)
11946 else if (cost
>= 3)
11961 struct ppro_sched_data
11964 int issued_this_cycle
;
11968 static enum attr_ppro_uops
11969 ix86_safe_ppro_uops (insn
)
11972 if (recog_memoized (insn
) >= 0)
11973 return get_attr_ppro_uops (insn
);
11975 return PPRO_UOPS_MANY
;
11979 ix86_dump_ppro_packet (dump
)
11982 if (ix86_sched_data
.ppro
.decode
[0])
11984 fprintf (dump
, "PPRO packet: %d",
11985 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
11986 if (ix86_sched_data
.ppro
.decode
[1])
11987 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
11988 if (ix86_sched_data
.ppro
.decode
[2])
11989 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
11990 fputc ('\n', dump
);
11994 /* We're beginning a new block. Initialize data structures as necessary. */
11997 ix86_sched_init (dump
, sched_verbose
, veclen
)
11998 FILE *dump ATTRIBUTE_UNUSED
;
11999 int sched_verbose ATTRIBUTE_UNUSED
;
12000 int veclen ATTRIBUTE_UNUSED
;
12002 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
12005 /* Shift INSN to SLOT, and shift everything else down. */
12008 ix86_reorder_insn (insnp
, slot
)
12015 insnp
[0] = insnp
[1];
12016 while (++insnp
!= slot
);
12022 ix86_sched_reorder_ppro (ready
, e_ready
)
12027 enum attr_ppro_uops cur_uops
;
12028 int issued_this_cycle
;
12032 /* At this point .ppro.decode contains the state of the three
12033 decoders from last "cycle". That is, those insns that were
12034 actually independent. But here we're scheduling for the
12035 decoder, and we may find things that are decodable in the
12038 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
12039 issued_this_cycle
= 0;
12042 cur_uops
= ix86_safe_ppro_uops (*insnp
);
12044 /* If the decoders are empty, and we've a complex insn at the
12045 head of the priority queue, let it issue without complaint. */
12046 if (decode
[0] == NULL
)
12048 if (cur_uops
== PPRO_UOPS_MANY
)
12050 decode
[0] = *insnp
;
12054 /* Otherwise, search for a 2-4 uop unsn to issue. */
12055 while (cur_uops
!= PPRO_UOPS_FEW
)
12057 if (insnp
== ready
)
12059 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
12062 /* If so, move it to the head of the line. */
12063 if (cur_uops
== PPRO_UOPS_FEW
)
12064 ix86_reorder_insn (insnp
, e_ready
);
12066 /* Issue the head of the queue. */
12067 issued_this_cycle
= 1;
12068 decode
[0] = *e_ready
--;
12071 /* Look for simple insns to fill in the other two slots. */
12072 for (i
= 1; i
< 3; ++i
)
12073 if (decode
[i
] == NULL
)
12075 if (ready
> e_ready
)
12079 cur_uops
= ix86_safe_ppro_uops (*insnp
);
12080 while (cur_uops
!= PPRO_UOPS_ONE
)
12082 if (insnp
== ready
)
12084 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
12087 /* Found one. Move it to the head of the queue and issue it. */
12088 if (cur_uops
== PPRO_UOPS_ONE
)
12090 ix86_reorder_insn (insnp
, e_ready
);
12091 decode
[i
] = *e_ready
--;
12092 issued_this_cycle
++;
12096 /* ??? Didn't find one. Ideally, here we would do a lazy split
12097 of 2-uop insns, issue one and queue the other. */
12101 if (issued_this_cycle
== 0)
12102 issued_this_cycle
= 1;
12103 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
12106 /* We are about to being issuing insns for this clock cycle.
12107 Override the default sort algorithm to better slot instructions. */
12109 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
12110 FILE *dump ATTRIBUTE_UNUSED
;
12111 int sched_verbose ATTRIBUTE_UNUSED
;
12114 int clock_var ATTRIBUTE_UNUSED
;
12116 int n_ready
= *n_readyp
;
12117 rtx
*e_ready
= ready
+ n_ready
- 1;
12119 /* Make sure to go ahead and initialize key items in
12120 ix86_sched_data if we are not going to bother trying to
12121 reorder the ready queue. */
12124 ix86_sched_data
.ppro
.issued_this_cycle
= 1;
12133 case PROCESSOR_PENTIUMPRO
:
12134 ix86_sched_reorder_ppro (ready
, e_ready
);
12139 return ix86_issue_rate ();
12142 /* We are about to issue INSN. Return the number of insns left on the
12143 ready queue that can be issued this cycle. */
12146 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
12150 int can_issue_more
;
12156 return can_issue_more
- 1;
12158 case PROCESSOR_PENTIUMPRO
:
12160 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
12162 if (uops
== PPRO_UOPS_MANY
)
12165 ix86_dump_ppro_packet (dump
);
12166 ix86_sched_data
.ppro
.decode
[0] = insn
;
12167 ix86_sched_data
.ppro
.decode
[1] = NULL
;
12168 ix86_sched_data
.ppro
.decode
[2] = NULL
;
12170 ix86_dump_ppro_packet (dump
);
12171 ix86_sched_data
.ppro
.decode
[0] = NULL
;
12173 else if (uops
== PPRO_UOPS_FEW
)
12176 ix86_dump_ppro_packet (dump
);
12177 ix86_sched_data
.ppro
.decode
[0] = insn
;
12178 ix86_sched_data
.ppro
.decode
[1] = NULL
;
12179 ix86_sched_data
.ppro
.decode
[2] = NULL
;
12183 for (i
= 0; i
< 3; ++i
)
12184 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
12186 ix86_sched_data
.ppro
.decode
[i
] = insn
;
12194 ix86_dump_ppro_packet (dump
);
12195 ix86_sched_data
.ppro
.decode
[0] = NULL
;
12196 ix86_sched_data
.ppro
.decode
[1] = NULL
;
12197 ix86_sched_data
.ppro
.decode
[2] = NULL
;
12201 return --ix86_sched_data
.ppro
.issued_this_cycle
;
12206 ia32_use_dfa_pipeline_interface ()
12208 if (TARGET_PENTIUM
|| TARGET_ATHLON_K8
)
12213 /* How many alternative schedules to try. This should be as wide as the
12214 scheduling freedom in the DFA, but no wider. Making this value too
12215 large results extra work for the scheduler. */
12218 ia32_multipass_dfa_lookahead ()
12220 if (ix86_cpu
== PROCESSOR_PENTIUM
)
12227 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12228 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12232 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
12234 rtx dstref
, srcref
, dstreg
, srcreg
;
12238 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
12240 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
12244 /* Subroutine of above to actually do the updating by recursively walking
12248 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
12250 rtx dstref
, srcref
, dstreg
, srcreg
;
12252 enum rtx_code code
= GET_CODE (x
);
12253 const char *format_ptr
= GET_RTX_FORMAT (code
);
12256 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
12257 MEM_COPY_ATTRIBUTES (x
, dstref
);
12258 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
12259 MEM_COPY_ATTRIBUTES (x
, srcref
);
12261 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
12263 if (*format_ptr
== 'e')
12264 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
12266 else if (*format_ptr
== 'E')
12267 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12268 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
12273 /* Compute the alignment given to a constant that is being placed in memory.
12274 EXP is the constant and ALIGN is the alignment that the object would
12276 The value of this function is used instead of that alignment to align
12280 ix86_constant_alignment (exp
, align
)
12284 if (TREE_CODE (exp
) == REAL_CST
)
12286 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
12288 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
12291 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
12298 /* Compute the alignment for a static variable.
12299 TYPE is the data type, and ALIGN is the alignment that
12300 the object would ordinarily have. The value of this function is used
12301 instead of that alignment to align the object. */
12304 ix86_data_alignment (type
, align
)
12308 if (AGGREGATE_TYPE_P (type
)
12309 && TYPE_SIZE (type
)
12310 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12311 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
12312 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
12315 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12316 to 16byte boundary. */
12319 if (AGGREGATE_TYPE_P (type
)
12320 && TYPE_SIZE (type
)
12321 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12322 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
12323 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12327 if (TREE_CODE (type
) == ARRAY_TYPE
)
12329 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12331 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12334 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12337 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12339 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12342 else if ((TREE_CODE (type
) == RECORD_TYPE
12343 || TREE_CODE (type
) == UNION_TYPE
12344 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12345 && TYPE_FIELDS (type
))
12347 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12349 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12352 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12353 || TREE_CODE (type
) == INTEGER_TYPE
)
12355 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12357 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12364 /* Compute the alignment for a local variable.
12365 TYPE is the data type, and ALIGN is the alignment that
12366 the object would ordinarily have. The value of this macro is used
12367 instead of that alignment to align the object. */
12370 ix86_local_alignment (type
, align
)
12374 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12375 to 16byte boundary. */
12378 if (AGGREGATE_TYPE_P (type
)
12379 && TYPE_SIZE (type
)
12380 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
12381 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
12382 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
12385 if (TREE_CODE (type
) == ARRAY_TYPE
)
12387 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
12389 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
12392 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
12394 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
12396 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
12399 else if ((TREE_CODE (type
) == RECORD_TYPE
12400 || TREE_CODE (type
) == UNION_TYPE
12401 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
12402 && TYPE_FIELDS (type
))
12404 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
12406 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
12409 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
12410 || TREE_CODE (type
) == INTEGER_TYPE
)
12413 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
12415 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
12421 /* Emit RTL insns to initialize the variable parts of a trampoline.
12422 FNADDR is an RTX for the address of the function's pure code.
12423 CXT is an RTX for the static chain value for the function. */
12425 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
12426 rtx tramp
, fnaddr
, cxt
;
12430 /* Compute offset from the end of the jmp to the target function. */
12431 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
12432 plus_constant (tramp
, 10),
12433 NULL_RTX
, 1, OPTAB_DIRECT
);
12434 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
12435 gen_int_mode (0xb9, QImode
));
12436 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
12437 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
12438 gen_int_mode (0xe9, QImode
));
12439 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
12444 /* Try to load address using shorter movl instead of movabs.
12445 We may want to support movq for kernel mode, but kernel does not use
12446 trampolines at the moment. */
12447 if (x86_64_zero_extended_value (fnaddr
))
12449 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
12450 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12451 gen_int_mode (0xbb41, HImode
));
12452 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
12453 gen_lowpart (SImode
, fnaddr
));
12458 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12459 gen_int_mode (0xbb49, HImode
));
12460 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12464 /* Load static chain using movabs to r10. */
12465 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12466 gen_int_mode (0xba49, HImode
));
12467 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
12470 /* Jump to the r11 */
12471 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
12472 gen_int_mode (0xff49, HImode
));
12473 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
12474 gen_int_mode (0xe3, QImode
));
12476 if (offset
> TRAMPOLINE_SIZE
)
12480 #ifdef TRANSFER_FROM_TRAMPOLINE
12481 emit_library_call (gen_rtx (SYMBOL_REF
, Pmode
, "__enable_execute_stack"),
12482 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
12486 #define def_builtin(MASK, NAME, TYPE, CODE) \
12488 if ((MASK) & target_flags) \
12489 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12490 NULL, NULL_TREE); \
12493 struct builtin_description
12495 const unsigned int mask
;
12496 const enum insn_code icode
;
12497 const char *const name
;
12498 const enum ix86_builtins code
;
12499 const enum rtx_code comparison
;
12500 const unsigned int flag
;
12503 /* Used for builtins that are enabled both by -msse and -msse2. */
12504 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12506 static const struct builtin_description bdesc_comi
[] =
12508 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
12509 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
12510 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
12511 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
12512 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
12513 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
12514 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
12515 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
12516 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
12517 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
12518 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
12519 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
12520 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
12521 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
12522 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
12523 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
12524 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
12525 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
12526 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
12527 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
12528 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
12529 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
12530 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
12531 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
12534 static const struct builtin_description bdesc_2arg
[] =
12537 { MASK_SSE1
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
12538 { MASK_SSE1
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
12539 { MASK_SSE1
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
12540 { MASK_SSE1
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
12541 { MASK_SSE1
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
12542 { MASK_SSE1
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
12543 { MASK_SSE1
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
12544 { MASK_SSE1
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
12546 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
12547 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
12548 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
12549 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
12550 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
12551 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
12552 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
12553 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
12554 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
12555 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
12556 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
12557 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
12558 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
12559 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
12560 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
12561 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
12562 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
12563 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
12564 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
12565 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
12567 { MASK_SSE1
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
12568 { MASK_SSE1
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
12569 { MASK_SSE1
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
12570 { MASK_SSE1
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
12572 { MASK_SSE1
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
12573 { MASK_SSE1
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
12574 { MASK_SSE1
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
12575 { MASK_SSE1
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
12577 { MASK_SSE1
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
12578 { MASK_SSE1
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
12579 { MASK_SSE1
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
12580 { MASK_SSE1
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
12581 { MASK_SSE1
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
12584 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
12585 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
12586 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
12587 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
12588 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
12589 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
12591 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
12592 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
12593 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
12594 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
12595 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
12596 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
12597 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
12598 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
12600 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
12601 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
12602 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
12604 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
12605 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
12606 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
12607 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
12609 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
12610 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
12612 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
12613 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
12614 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
12615 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
12616 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
12617 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
12619 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
12620 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
12621 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
12622 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
12624 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
12625 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
12626 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
12627 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
12628 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
12629 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
12632 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
12633 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
12634 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
12636 { MASK_SSE1
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
12637 { MASK_SSE1
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
12639 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
12640 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
12641 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
12642 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
12643 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
12644 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
12646 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
12647 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
12648 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
12649 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
12650 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
12651 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
12653 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
12654 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
12655 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
12656 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
12658 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
12659 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12662 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12663 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12664 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12665 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12666 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12667 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12668 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12669 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12671 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12672 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12673 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12674 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
12675 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
12676 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12677 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
12678 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
12679 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
12680 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
12681 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
12682 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
12683 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12684 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12685 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12686 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12687 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
12688 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
12689 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
12690 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
12692 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12693 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12694 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12695 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12697 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12698 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12699 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12700 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12702 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12703 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12704 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12707 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12708 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12709 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12710 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12711 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
12712 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
12713 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
12714 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
12716 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
12717 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
12718 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
12719 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
12720 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
12721 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
12722 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
12723 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
12725 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
12726 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
12727 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
12728 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
12730 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
12731 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
12732 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
12733 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
12735 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
12736 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
12738 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
12739 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
12740 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
12741 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
12742 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
12743 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
12745 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
12746 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
12747 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
12748 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
12750 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
12751 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
12752 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
12753 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
12754 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
12755 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
12756 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
12757 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
12759 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
12760 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
12761 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
12763 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
12764 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
12766 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
12767 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
12768 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
12769 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
12770 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
12771 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
12773 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
12774 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
12775 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
12776 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
12777 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
12778 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
12780 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
12781 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
12782 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
12783 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
12785 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
12787 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
12788 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
12789 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 }
12792 static const struct builtin_description bdesc_1arg
[] =
12794 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
12795 { MASK_SSE1
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
12797 { MASK_SSE1
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
12798 { MASK_SSE1
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
12799 { MASK_SSE1
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
12801 { MASK_SSE1
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
12802 { MASK_SSE1
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
12803 { MASK_SSE1
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
12804 { MASK_SSE1
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
12806 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
12807 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
12808 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
12809 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
12811 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
12813 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
12814 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
12816 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
12817 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
12818 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
12819 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
12820 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12822 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12824 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12825 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12827 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12828 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12829 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
12831 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 }
12835 ix86_init_builtins ()
12838 ix86_init_mmx_sse_builtins ();
12841 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12842 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12845 ix86_init_mmx_sse_builtins ()
12847 const struct builtin_description
* d
;
12850 tree pchar_type_node
= build_pointer_type (char_type_node
);
12851 tree pcchar_type_node
= build_pointer_type (
12852 build_type_variant (char_type_node
, 1, 0));
12853 tree pfloat_type_node
= build_pointer_type (float_type_node
);
12854 tree pcfloat_type_node
= build_pointer_type (
12855 build_type_variant (float_type_node
, 1, 0));
12856 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
12857 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
12858 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
12861 tree int_ftype_v4sf_v4sf
12862 = build_function_type_list (integer_type_node
,
12863 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12864 tree v4si_ftype_v4sf_v4sf
12865 = build_function_type_list (V4SI_type_node
,
12866 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12867 /* MMX/SSE/integer conversions. */
12868 tree int_ftype_v4sf
12869 = build_function_type_list (integer_type_node
,
12870 V4SF_type_node
, NULL_TREE
);
12871 tree int_ftype_v8qi
12872 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
12873 tree v4sf_ftype_v4sf_int
12874 = build_function_type_list (V4SF_type_node
,
12875 V4SF_type_node
, integer_type_node
, NULL_TREE
);
12876 tree v4sf_ftype_v4sf_v2si
12877 = build_function_type_list (V4SF_type_node
,
12878 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
12879 tree int_ftype_v4hi_int
12880 = build_function_type_list (integer_type_node
,
12881 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12882 tree v4hi_ftype_v4hi_int_int
12883 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
12884 integer_type_node
, integer_type_node
,
12886 /* Miscellaneous. */
12887 tree v8qi_ftype_v4hi_v4hi
12888 = build_function_type_list (V8QI_type_node
,
12889 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12890 tree v4hi_ftype_v2si_v2si
12891 = build_function_type_list (V4HI_type_node
,
12892 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12893 tree v4sf_ftype_v4sf_v4sf_int
12894 = build_function_type_list (V4SF_type_node
,
12895 V4SF_type_node
, V4SF_type_node
,
12896 integer_type_node
, NULL_TREE
);
12897 tree v2si_ftype_v4hi_v4hi
12898 = build_function_type_list (V2SI_type_node
,
12899 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12900 tree v4hi_ftype_v4hi_int
12901 = build_function_type_list (V4HI_type_node
,
12902 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12903 tree v4hi_ftype_v4hi_di
12904 = build_function_type_list (V4HI_type_node
,
12905 V4HI_type_node
, long_long_unsigned_type_node
,
12907 tree v2si_ftype_v2si_di
12908 = build_function_type_list (V2SI_type_node
,
12909 V2SI_type_node
, long_long_unsigned_type_node
,
12911 tree void_ftype_void
12912 = build_function_type (void_type_node
, void_list_node
);
12913 tree void_ftype_unsigned
12914 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
12915 tree unsigned_ftype_void
12916 = build_function_type (unsigned_type_node
, void_list_node
);
12918 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
12919 tree v4sf_ftype_void
12920 = build_function_type (V4SF_type_node
, void_list_node
);
12921 tree v2si_ftype_v4sf
12922 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
12923 /* Loads/stores. */
12924 tree void_ftype_v8qi_v8qi_pchar
12925 = build_function_type_list (void_type_node
,
12926 V8QI_type_node
, V8QI_type_node
,
12927 pchar_type_node
, NULL_TREE
);
12928 tree v4sf_ftype_pcfloat
12929 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
12930 /* @@@ the type is bogus */
12931 tree v4sf_ftype_v4sf_pv2si
12932 = build_function_type_list (V4SF_type_node
,
12933 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
12934 tree void_ftype_pv2si_v4sf
12935 = build_function_type_list (void_type_node
,
12936 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
12937 tree void_ftype_pfloat_v4sf
12938 = build_function_type_list (void_type_node
,
12939 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
12940 tree void_ftype_pdi_di
12941 = build_function_type_list (void_type_node
,
12942 pdi_type_node
, long_long_unsigned_type_node
,
12944 tree void_ftype_pv2di_v2di
12945 = build_function_type_list (void_type_node
,
12946 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
12947 /* Normal vector unops. */
12948 tree v4sf_ftype_v4sf
12949 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12951 /* Normal vector binops. */
12952 tree v4sf_ftype_v4sf_v4sf
12953 = build_function_type_list (V4SF_type_node
,
12954 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12955 tree v8qi_ftype_v8qi_v8qi
12956 = build_function_type_list (V8QI_type_node
,
12957 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12958 tree v4hi_ftype_v4hi_v4hi
12959 = build_function_type_list (V4HI_type_node
,
12960 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12961 tree v2si_ftype_v2si_v2si
12962 = build_function_type_list (V2SI_type_node
,
12963 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12964 tree di_ftype_di_di
12965 = build_function_type_list (long_long_unsigned_type_node
,
12966 long_long_unsigned_type_node
,
12967 long_long_unsigned_type_node
, NULL_TREE
);
12969 tree v2si_ftype_v2sf
12970 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
12971 tree v2sf_ftype_v2si
12972 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
12973 tree v2si_ftype_v2si
12974 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12975 tree v2sf_ftype_v2sf
12976 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12977 tree v2sf_ftype_v2sf_v2sf
12978 = build_function_type_list (V2SF_type_node
,
12979 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12980 tree v2si_ftype_v2sf_v2sf
12981 = build_function_type_list (V2SI_type_node
,
12982 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12983 tree pint_type_node
= build_pointer_type (integer_type_node
);
12984 tree pcint_type_node
= build_pointer_type (
12985 build_type_variant (integer_type_node
, 1, 0));
12986 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12987 tree pcdouble_type_node
= build_pointer_type (
12988 build_type_variant (double_type_node
, 1, 0));
12989 tree int_ftype_v2df_v2df
12990 = build_function_type_list (integer_type_node
,
12991 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12994 = build_function_type (intTI_type_node
, void_list_node
);
12995 tree v2di_ftype_void
12996 = build_function_type (V2DI_type_node
, void_list_node
);
12997 tree ti_ftype_ti_ti
12998 = build_function_type_list (intTI_type_node
,
12999 intTI_type_node
, intTI_type_node
, NULL_TREE
);
13000 tree void_ftype_pcvoid
13001 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
13003 = build_function_type_list (V2DI_type_node
,
13004 long_long_unsigned_type_node
, NULL_TREE
);
13006 = build_function_type_list (long_long_unsigned_type_node
,
13007 V2DI_type_node
, NULL_TREE
);
13008 tree v4sf_ftype_v4si
13009 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
13010 tree v4si_ftype_v4sf
13011 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
13012 tree v2df_ftype_v4si
13013 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
13014 tree v4si_ftype_v2df
13015 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
13016 tree v2si_ftype_v2df
13017 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
13018 tree v4sf_ftype_v2df
13019 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13020 tree v2df_ftype_v2si
13021 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
13022 tree v2df_ftype_v4sf
13023 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13024 tree int_ftype_v2df
13025 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
13026 tree v2df_ftype_v2df_int
13027 = build_function_type_list (V2DF_type_node
,
13028 V2DF_type_node
, integer_type_node
, NULL_TREE
);
13029 tree v4sf_ftype_v4sf_v2df
13030 = build_function_type_list (V4SF_type_node
,
13031 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
13032 tree v2df_ftype_v2df_v4sf
13033 = build_function_type_list (V2DF_type_node
,
13034 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
13035 tree v2df_ftype_v2df_v2df_int
13036 = build_function_type_list (V2DF_type_node
,
13037 V2DF_type_node
, V2DF_type_node
,
13040 tree v2df_ftype_v2df_pv2si
13041 = build_function_type_list (V2DF_type_node
,
13042 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
13043 tree void_ftype_pv2si_v2df
13044 = build_function_type_list (void_type_node
,
13045 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
13046 tree void_ftype_pdouble_v2df
13047 = build_function_type_list (void_type_node
,
13048 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
13049 tree void_ftype_pint_int
13050 = build_function_type_list (void_type_node
,
13051 pint_type_node
, integer_type_node
, NULL_TREE
);
13052 tree void_ftype_v16qi_v16qi_pchar
13053 = build_function_type_list (void_type_node
,
13054 V16QI_type_node
, V16QI_type_node
,
13055 pchar_type_node
, NULL_TREE
);
13056 tree v2df_ftype_pcdouble
13057 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
13058 tree v2df_ftype_v2df_v2df
13059 = build_function_type_list (V2DF_type_node
,
13060 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13061 tree v16qi_ftype_v16qi_v16qi
13062 = build_function_type_list (V16QI_type_node
,
13063 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13064 tree v8hi_ftype_v8hi_v8hi
13065 = build_function_type_list (V8HI_type_node
,
13066 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13067 tree v4si_ftype_v4si_v4si
13068 = build_function_type_list (V4SI_type_node
,
13069 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
13070 tree v2di_ftype_v2di_v2di
13071 = build_function_type_list (V2DI_type_node
,
13072 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13073 tree v2di_ftype_v2df_v2df
13074 = build_function_type_list (V2DI_type_node
,
13075 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13076 tree v2df_ftype_v2df
13077 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
13078 tree v2df_ftype_double
13079 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
13080 tree v2df_ftype_double_double
13081 = build_function_type_list (V2DF_type_node
,
13082 double_type_node
, double_type_node
, NULL_TREE
);
13083 tree int_ftype_v8hi_int
13084 = build_function_type_list (integer_type_node
,
13085 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13086 tree v8hi_ftype_v8hi_int_int
13087 = build_function_type_list (V8HI_type_node
,
13088 V8HI_type_node
, integer_type_node
,
13089 integer_type_node
, NULL_TREE
);
13090 tree v2di_ftype_v2di_int
13091 = build_function_type_list (V2DI_type_node
,
13092 V2DI_type_node
, integer_type_node
, NULL_TREE
);
13093 tree v4si_ftype_v4si_int
13094 = build_function_type_list (V4SI_type_node
,
13095 V4SI_type_node
, integer_type_node
, NULL_TREE
);
13096 tree v8hi_ftype_v8hi_int
13097 = build_function_type_list (V8HI_type_node
,
13098 V8HI_type_node
, integer_type_node
, NULL_TREE
);
13099 tree v8hi_ftype_v8hi_v2di
13100 = build_function_type_list (V8HI_type_node
,
13101 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
13102 tree v4si_ftype_v4si_v2di
13103 = build_function_type_list (V4SI_type_node
,
13104 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
13105 tree v4si_ftype_v8hi_v8hi
13106 = build_function_type_list (V4SI_type_node
,
13107 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
13108 tree di_ftype_v8qi_v8qi
13109 = build_function_type_list (long_long_unsigned_type_node
,
13110 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
13111 tree v2di_ftype_v16qi_v16qi
13112 = build_function_type_list (V2DI_type_node
,
13113 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
13114 tree int_ftype_v16qi
13115 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
13116 tree v16qi_ftype_pcchar
13117 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
13118 tree void_ftype_pchar_v16qi
13119 = build_function_type_list (void_type_node
,
13120 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
13121 tree v4si_ftype_pcint
13122 = build_function_type_list (V4SI_type_node
, pcint_type_node
, NULL_TREE
);
13123 tree void_ftype_pcint_v4si
13124 = build_function_type_list (void_type_node
,
13125 pcint_type_node
, V4SI_type_node
, NULL_TREE
);
13126 tree v2di_ftype_v2di
13127 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
13129 /* Add all builtins that are more or less simple operations on two
13131 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13133 /* Use one of the operands; the target can have a different mode for
13134 mask-generating compares. */
13135 enum machine_mode mode
;
13140 mode
= insn_data
[d
->icode
].operand
[1].mode
;
13145 type
= v16qi_ftype_v16qi_v16qi
;
13148 type
= v8hi_ftype_v8hi_v8hi
;
13151 type
= v4si_ftype_v4si_v4si
;
13154 type
= v2di_ftype_v2di_v2di
;
13157 type
= v2df_ftype_v2df_v2df
;
13160 type
= ti_ftype_ti_ti
;
13163 type
= v4sf_ftype_v4sf_v4sf
;
13166 type
= v8qi_ftype_v8qi_v8qi
;
13169 type
= v4hi_ftype_v4hi_v4hi
;
13172 type
= v2si_ftype_v2si_v2si
;
13175 type
= di_ftype_di_di
;
13182 /* Override for comparisons. */
13183 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13184 || d
->icode
== CODE_FOR_maskncmpv4sf3
13185 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13186 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
13187 type
= v4si_ftype_v4sf_v4sf
;
13189 if (d
->icode
== CODE_FOR_maskcmpv2df3
13190 || d
->icode
== CODE_FOR_maskncmpv2df3
13191 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13192 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13193 type
= v2di_ftype_v2df_v2df
;
13195 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
13198 /* Add the remaining MMX insns with somewhat more complicated types. */
13199 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
13200 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
13201 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
13202 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
13203 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
13205 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
13206 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
13207 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
13209 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
13210 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
13212 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
13213 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
13215 /* comi/ucomi insns. */
13216 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13217 if (d
->mask
== MASK_SSE2
)
13218 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
13220 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
13222 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
13223 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
13224 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
13226 def_builtin (MASK_SSE1
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
13227 def_builtin (MASK_SSE1
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
13228 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
13229 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
13230 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
13231 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
13232 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
13233 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
13235 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
13236 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
13238 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
13240 def_builtin (MASK_SSE1
, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADAPS
);
13241 def_builtin (MASK_SSE1
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
13242 def_builtin (MASK_SSE1
, "__builtin_ia32_loadss", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADSS
);
13243 def_builtin (MASK_SSE1
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
13244 def_builtin (MASK_SSE1
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
13245 def_builtin (MASK_SSE1
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
13247 def_builtin (MASK_SSE1
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
13248 def_builtin (MASK_SSE1
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
13249 def_builtin (MASK_SSE1
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
13250 def_builtin (MASK_SSE1
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
13252 def_builtin (MASK_SSE1
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
13253 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
13254 def_builtin (MASK_SSE1
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
13255 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
13257 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
13259 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
13261 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
13262 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
13263 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
13264 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
13265 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
13266 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
13268 def_builtin (MASK_SSE1
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
13270 /* Original 3DNow! */
13271 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
13272 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
13273 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
13274 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
13275 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
13276 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
13277 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
13278 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
13279 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
13280 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
13281 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
13282 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
13283 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
13284 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
13285 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
13286 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
13287 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
13288 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
13289 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
13290 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
13292 /* 3DNow! extension as used in the Athlon CPU. */
13293 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
13294 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
13295 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
13296 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
13297 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
13298 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
13300 def_builtin (MASK_SSE1
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
13303 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
13304 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
13306 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
13307 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
13308 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
13310 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADAPD
);
13311 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
13312 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADSD
);
13313 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
13314 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
13315 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
13317 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
13318 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
13319 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
13320 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
13322 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
13323 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
13324 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
13325 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
13326 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
13328 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
13329 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
13330 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
13331 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
13333 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
13334 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
13336 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
13338 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
13339 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
13341 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
13342 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
13343 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
13344 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
13345 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
13347 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
13349 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
13350 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
13352 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
13353 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
13354 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
13356 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
13357 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
13358 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
13360 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
13361 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
13362 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
13363 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADPD1
);
13364 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADRPD
);
13365 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
13366 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
13368 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
13369 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
13370 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
13372 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQA
);
13373 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
13374 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pcint
, IX86_BUILTIN_LOADD
);
13375 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
13376 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
13377 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pcint_v4si
, IX86_BUILTIN_STORED
);
13378 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
13380 def_builtin (MASK_SSE1
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
13382 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
13383 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
13384 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
13386 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
13387 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
13388 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
13390 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
13391 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
13393 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
13394 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
13395 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
13396 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
13398 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
13399 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
13400 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
13401 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
13403 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
13404 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
13406 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
13409 /* Errors in the source file can cause expand_expr to return const0_rtx
13410 where we expect a vector. To avoid crashing, use one of the vector
13411 clear instructions. */
13413 safe_vector_operand (x
, mode
)
13415 enum machine_mode mode
;
13417 if (x
!= const0_rtx
)
13419 x
= gen_reg_rtx (mode
);
13421 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
13422 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
13423 : gen_rtx_SUBREG (DImode
, x
, 0)));
13425 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
13426 : gen_rtx_SUBREG (V4SFmode
, x
, 0),
13427 CONST0_RTX (V4SFmode
)));
13431 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13434 ix86_expand_binop_builtin (icode
, arglist
, target
)
13435 enum insn_code icode
;
13440 tree arg0
= TREE_VALUE (arglist
);
13441 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13442 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13443 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13444 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13445 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13446 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
13448 if (VECTOR_MODE_P (mode0
))
13449 op0
= safe_vector_operand (op0
, mode0
);
13450 if (VECTOR_MODE_P (mode1
))
13451 op1
= safe_vector_operand (op1
, mode1
);
13454 || GET_MODE (target
) != tmode
13455 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13456 target
= gen_reg_rtx (tmode
);
13458 /* In case the insn wants input operands in modes different from
13459 the result, abort. */
13460 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
13463 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13464 op0
= copy_to_mode_reg (mode0
, op0
);
13465 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13466 op1
= copy_to_mode_reg (mode1
, op1
);
13468 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13469 yet one of the two must not be a memory. This is normally enforced
13470 by expanders, but we didn't bother to create one here. */
13471 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
13472 op0
= copy_to_mode_reg (mode0
, op0
);
13474 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13481 /* Subroutine of ix86_expand_builtin to take care of stores. */
13484 ix86_expand_store_builtin (icode
, arglist
)
13485 enum insn_code icode
;
13489 tree arg0
= TREE_VALUE (arglist
);
13490 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13491 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13492 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13493 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
13494 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
13496 if (VECTOR_MODE_P (mode1
))
13497 op1
= safe_vector_operand (op1
, mode1
);
13499 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13501 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13502 op1
= copy_to_mode_reg (mode1
, op1
);
13504 pat
= GEN_FCN (icode
) (op0
, op1
);
13510 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13513 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
13514 enum insn_code icode
;
13520 tree arg0
= TREE_VALUE (arglist
);
13521 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13522 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13523 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13526 || GET_MODE (target
) != tmode
13527 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13528 target
= gen_reg_rtx (tmode
);
13530 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13533 if (VECTOR_MODE_P (mode0
))
13534 op0
= safe_vector_operand (op0
, mode0
);
13536 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13537 op0
= copy_to_mode_reg (mode0
, op0
);
13540 pat
= GEN_FCN (icode
) (target
, op0
);
13547 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13548 sqrtss, rsqrtss, rcpss. */
13551 ix86_expand_unop1_builtin (icode
, arglist
, target
)
13552 enum insn_code icode
;
13557 tree arg0
= TREE_VALUE (arglist
);
13558 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13559 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
13560 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
13563 || GET_MODE (target
) != tmode
13564 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13565 target
= gen_reg_rtx (tmode
);
13567 if (VECTOR_MODE_P (mode0
))
13568 op0
= safe_vector_operand (op0
, mode0
);
13570 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13571 op0
= copy_to_mode_reg (mode0
, op0
);
13574 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
13575 op1
= copy_to_mode_reg (mode0
, op1
);
13577 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13584 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13587 ix86_expand_sse_compare (d
, arglist
, target
)
13588 const struct builtin_description
*d
;
13593 tree arg0
= TREE_VALUE (arglist
);
13594 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13595 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13596 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13598 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
13599 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
13600 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
13601 enum rtx_code comparison
= d
->comparison
;
13603 if (VECTOR_MODE_P (mode0
))
13604 op0
= safe_vector_operand (op0
, mode0
);
13605 if (VECTOR_MODE_P (mode1
))
13606 op1
= safe_vector_operand (op1
, mode1
);
13608 /* Swap operands if we have a comparison that isn't available in
13612 rtx tmp
= gen_reg_rtx (mode1
);
13613 emit_move_insn (tmp
, op1
);
13619 || GET_MODE (target
) != tmode
13620 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
13621 target
= gen_reg_rtx (tmode
);
13623 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
13624 op0
= copy_to_mode_reg (mode0
, op0
);
13625 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
13626 op1
= copy_to_mode_reg (mode1
, op1
);
13628 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13629 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
13636 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13639 ix86_expand_sse_comi (d
, arglist
, target
)
13640 const struct builtin_description
*d
;
13645 tree arg0
= TREE_VALUE (arglist
);
13646 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13647 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13648 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13650 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
13651 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
13652 enum rtx_code comparison
= d
->comparison
;
13654 if (VECTOR_MODE_P (mode0
))
13655 op0
= safe_vector_operand (op0
, mode0
);
13656 if (VECTOR_MODE_P (mode1
))
13657 op1
= safe_vector_operand (op1
, mode1
);
13659 /* Swap operands if we have a comparison that isn't available in
13668 target
= gen_reg_rtx (SImode
);
13669 emit_move_insn (target
, const0_rtx
);
13670 target
= gen_rtx_SUBREG (QImode
, target
, 0);
13672 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
13673 op0
= copy_to_mode_reg (mode0
, op0
);
13674 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
13675 op1
= copy_to_mode_reg (mode1
, op1
);
13677 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13678 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
13682 emit_insn (gen_rtx_SET (VOIDmode
,
13683 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
13684 gen_rtx_fmt_ee (comparison
, QImode
,
13688 return SUBREG_REG (target
);
13691 /* Expand an expression EXP that calls a built-in function,
13692 with result going to TARGET if that's convenient
13693 (and in mode MODE if that's convenient).
13694 SUBTARGET may be used as the target for computing one of EXP's operands.
13695 IGNORE is nonzero if the value is to be ignored. */
13698 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
13701 rtx subtarget ATTRIBUTE_UNUSED
;
13702 enum machine_mode mode ATTRIBUTE_UNUSED
;
13703 int ignore ATTRIBUTE_UNUSED
;
13705 const struct builtin_description
*d
;
13707 enum insn_code icode
;
13708 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
13709 tree arglist
= TREE_OPERAND (exp
, 1);
13710 tree arg0
, arg1
, arg2
;
13711 rtx op0
, op1
, op2
, pat
;
13712 enum machine_mode tmode
, mode0
, mode1
, mode2
;
13713 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
13717 case IX86_BUILTIN_EMMS
:
13718 emit_insn (gen_emms ());
13721 case IX86_BUILTIN_SFENCE
:
13722 emit_insn (gen_sfence ());
13725 case IX86_BUILTIN_PEXTRW
:
13726 case IX86_BUILTIN_PEXTRW128
:
13727 icode
= (fcode
== IX86_BUILTIN_PEXTRW
13728 ? CODE_FOR_mmx_pextrw
13729 : CODE_FOR_sse2_pextrw
);
13730 arg0
= TREE_VALUE (arglist
);
13731 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13732 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13733 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13734 tmode
= insn_data
[icode
].operand
[0].mode
;
13735 mode0
= insn_data
[icode
].operand
[1].mode
;
13736 mode1
= insn_data
[icode
].operand
[2].mode
;
13738 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13739 op0
= copy_to_mode_reg (mode0
, op0
);
13740 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13742 /* @@@ better error message */
13743 error ("selector must be an immediate");
13744 return gen_reg_rtx (tmode
);
13747 || GET_MODE (target
) != tmode
13748 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13749 target
= gen_reg_rtx (tmode
);
13750 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13756 case IX86_BUILTIN_PINSRW
:
13757 case IX86_BUILTIN_PINSRW128
:
13758 icode
= (fcode
== IX86_BUILTIN_PINSRW
13759 ? CODE_FOR_mmx_pinsrw
13760 : CODE_FOR_sse2_pinsrw
);
13761 arg0
= TREE_VALUE (arglist
);
13762 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13763 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13764 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13765 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13766 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13767 tmode
= insn_data
[icode
].operand
[0].mode
;
13768 mode0
= insn_data
[icode
].operand
[1].mode
;
13769 mode1
= insn_data
[icode
].operand
[2].mode
;
13770 mode2
= insn_data
[icode
].operand
[3].mode
;
13772 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13773 op0
= copy_to_mode_reg (mode0
, op0
);
13774 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13775 op1
= copy_to_mode_reg (mode1
, op1
);
13776 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13778 /* @@@ better error message */
13779 error ("selector must be an immediate");
13783 || GET_MODE (target
) != tmode
13784 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13785 target
= gen_reg_rtx (tmode
);
13786 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13792 case IX86_BUILTIN_MASKMOVQ
:
13793 case IX86_BUILTIN_MASKMOVDQU
:
13794 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
13795 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
13796 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
13797 : CODE_FOR_sse2_maskmovdqu
));
13798 /* Note the arg order is different from the operand order. */
13799 arg1
= TREE_VALUE (arglist
);
13800 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
13801 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13802 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13803 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13804 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13805 mode0
= insn_data
[icode
].operand
[0].mode
;
13806 mode1
= insn_data
[icode
].operand
[1].mode
;
13807 mode2
= insn_data
[icode
].operand
[2].mode
;
13809 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13810 op0
= copy_to_mode_reg (mode0
, op0
);
13811 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13812 op1
= copy_to_mode_reg (mode1
, op1
);
13813 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
13814 op2
= copy_to_mode_reg (mode2
, op2
);
13815 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
13821 case IX86_BUILTIN_SQRTSS
:
13822 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
13823 case IX86_BUILTIN_RSQRTSS
:
13824 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
13825 case IX86_BUILTIN_RCPSS
:
13826 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
13828 case IX86_BUILTIN_LOADAPS
:
13829 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
13831 case IX86_BUILTIN_LOADUPS
:
13832 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
13834 case IX86_BUILTIN_STOREAPS
:
13835 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
13837 case IX86_BUILTIN_STOREUPS
:
13838 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13840 case IX86_BUILTIN_LOADSS
:
13841 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13843 case IX86_BUILTIN_STORESS
:
13844 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13846 case IX86_BUILTIN_LOADHPS
:
13847 case IX86_BUILTIN_LOADLPS
:
13848 case IX86_BUILTIN_LOADHPD
:
13849 case IX86_BUILTIN_LOADLPD
:
13850 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
13851 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
13852 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
13853 : CODE_FOR_sse2_movlpd
);
13854 arg0
= TREE_VALUE (arglist
);
13855 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13856 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13857 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13858 tmode
= insn_data
[icode
].operand
[0].mode
;
13859 mode0
= insn_data
[icode
].operand
[1].mode
;
13860 mode1
= insn_data
[icode
].operand
[2].mode
;
13862 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13863 op0
= copy_to_mode_reg (mode0
, op0
);
13864 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13866 || GET_MODE (target
) != tmode
13867 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13868 target
= gen_reg_rtx (tmode
);
13869 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13875 case IX86_BUILTIN_STOREHPS
:
13876 case IX86_BUILTIN_STORELPS
:
13877 case IX86_BUILTIN_STOREHPD
:
13878 case IX86_BUILTIN_STORELPD
:
13879 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
13880 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
13881 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
13882 : CODE_FOR_sse2_movlpd
);
13883 arg0
= TREE_VALUE (arglist
);
13884 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13885 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13886 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13887 mode0
= insn_data
[icode
].operand
[1].mode
;
13888 mode1
= insn_data
[icode
].operand
[2].mode
;
13890 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13891 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13892 op1
= copy_to_mode_reg (mode1
, op1
);
13894 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
13900 case IX86_BUILTIN_MOVNTPS
:
13901 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13902 case IX86_BUILTIN_MOVNTQ
:
13903 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13905 case IX86_BUILTIN_LDMXCSR
:
13906 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13907 target
= assign_386_stack_local (SImode
, 0);
13908 emit_move_insn (target
, op0
);
13909 emit_insn (gen_ldmxcsr (target
));
13912 case IX86_BUILTIN_STMXCSR
:
13913 target
= assign_386_stack_local (SImode
, 0);
13914 emit_insn (gen_stmxcsr (target
));
13915 return copy_to_mode_reg (SImode
, target
);
13917 case IX86_BUILTIN_SHUFPS
:
13918 case IX86_BUILTIN_SHUFPD
:
13919 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13920 ? CODE_FOR_sse_shufps
13921 : CODE_FOR_sse2_shufpd
);
13922 arg0
= TREE_VALUE (arglist
);
13923 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13924 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13925 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13926 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13927 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13928 tmode
= insn_data
[icode
].operand
[0].mode
;
13929 mode0
= insn_data
[icode
].operand
[1].mode
;
13930 mode1
= insn_data
[icode
].operand
[2].mode
;
13931 mode2
= insn_data
[icode
].operand
[3].mode
;
13933 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13934 op0
= copy_to_mode_reg (mode0
, op0
);
13935 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13936 op1
= copy_to_mode_reg (mode1
, op1
);
13937 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13939 /* @@@ better error message */
13940 error ("mask must be an immediate");
13941 return gen_reg_rtx (tmode
);
13944 || GET_MODE (target
) != tmode
13945 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13946 target
= gen_reg_rtx (tmode
);
13947 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13953 case IX86_BUILTIN_PSHUFW
:
13954 case IX86_BUILTIN_PSHUFD
:
13955 case IX86_BUILTIN_PSHUFHW
:
13956 case IX86_BUILTIN_PSHUFLW
:
13957 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13958 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13959 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13960 : CODE_FOR_mmx_pshufw
);
13961 arg0
= TREE_VALUE (arglist
);
13962 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13963 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13964 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13965 tmode
= insn_data
[icode
].operand
[0].mode
;
13966 mode1
= insn_data
[icode
].operand
[1].mode
;
13967 mode2
= insn_data
[icode
].operand
[2].mode
;
13969 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13970 op0
= copy_to_mode_reg (mode1
, op0
);
13971 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13973 /* @@@ better error message */
13974 error ("mask must be an immediate");
13978 || GET_MODE (target
) != tmode
13979 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13980 target
= gen_reg_rtx (tmode
);
13981 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13987 case IX86_BUILTIN_PSLLDQI128
:
13988 case IX86_BUILTIN_PSRLDQI128
:
13989 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
13990 : CODE_FOR_sse2_lshrti3
);
13991 arg0
= TREE_VALUE (arglist
);
13992 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13993 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13994 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13995 tmode
= insn_data
[icode
].operand
[0].mode
;
13996 mode1
= insn_data
[icode
].operand
[1].mode
;
13997 mode2
= insn_data
[icode
].operand
[2].mode
;
13999 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
14001 op0
= copy_to_reg (op0
);
14002 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
14004 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
14006 error ("shift must be an immediate");
14009 target
= gen_reg_rtx (V2DImode
);
14010 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
14016 case IX86_BUILTIN_FEMMS
:
14017 emit_insn (gen_femms ());
14020 case IX86_BUILTIN_PAVGUSB
:
14021 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
14023 case IX86_BUILTIN_PF2ID
:
14024 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
14026 case IX86_BUILTIN_PFACC
:
14027 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
14029 case IX86_BUILTIN_PFADD
:
14030 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
14032 case IX86_BUILTIN_PFCMPEQ
:
14033 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
14035 case IX86_BUILTIN_PFCMPGE
:
14036 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
14038 case IX86_BUILTIN_PFCMPGT
:
14039 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
14041 case IX86_BUILTIN_PFMAX
:
14042 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
14044 case IX86_BUILTIN_PFMIN
:
14045 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
14047 case IX86_BUILTIN_PFMUL
:
14048 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
14050 case IX86_BUILTIN_PFRCP
:
14051 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
14053 case IX86_BUILTIN_PFRCPIT1
:
14054 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
14056 case IX86_BUILTIN_PFRCPIT2
:
14057 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
14059 case IX86_BUILTIN_PFRSQIT1
:
14060 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
14062 case IX86_BUILTIN_PFRSQRT
:
14063 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
14065 case IX86_BUILTIN_PFSUB
:
14066 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
14068 case IX86_BUILTIN_PFSUBR
:
14069 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
14071 case IX86_BUILTIN_PI2FD
:
14072 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
14074 case IX86_BUILTIN_PMULHRW
:
14075 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
14077 case IX86_BUILTIN_PF2IW
:
14078 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
14080 case IX86_BUILTIN_PFNACC
:
14081 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
14083 case IX86_BUILTIN_PFPNACC
:
14084 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
14086 case IX86_BUILTIN_PI2FW
:
14087 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
14089 case IX86_BUILTIN_PSWAPDSI
:
14090 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
14092 case IX86_BUILTIN_PSWAPDSF
:
14093 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
14095 case IX86_BUILTIN_SSE_ZERO
:
14096 target
= gen_reg_rtx (V4SFmode
);
14097 emit_insn (gen_sse_clrv4sf (target
, CONST0_RTX (V4SFmode
)));
14100 case IX86_BUILTIN_MMX_ZERO
:
14101 target
= gen_reg_rtx (DImode
);
14102 emit_insn (gen_mmx_clrdi (target
));
14105 case IX86_BUILTIN_CLRTI
:
14106 target
= gen_reg_rtx (V2DImode
);
14107 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
14111 case IX86_BUILTIN_SQRTSD
:
14112 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
14113 case IX86_BUILTIN_LOADAPD
:
14114 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
14115 case IX86_BUILTIN_LOADUPD
:
14116 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
14118 case IX86_BUILTIN_STOREAPD
:
14119 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14120 case IX86_BUILTIN_STOREUPD
:
14121 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
14123 case IX86_BUILTIN_LOADSD
:
14124 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
14126 case IX86_BUILTIN_STORESD
:
14127 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
14129 case IX86_BUILTIN_SETPD1
:
14130 target
= assign_386_stack_local (DFmode
, 0);
14131 arg0
= TREE_VALUE (arglist
);
14132 emit_move_insn (adjust_address (target
, DFmode
, 0),
14133 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14134 op0
= gen_reg_rtx (V2DFmode
);
14135 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
14136 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, GEN_INT (0)));
14139 case IX86_BUILTIN_SETPD
:
14140 target
= assign_386_stack_local (V2DFmode
, 0);
14141 arg0
= TREE_VALUE (arglist
);
14142 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
14143 emit_move_insn (adjust_address (target
, DFmode
, 0),
14144 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
14145 emit_move_insn (adjust_address (target
, DFmode
, 8),
14146 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
14147 op0
= gen_reg_rtx (V2DFmode
);
14148 emit_insn (gen_sse2_movapd (op0
, target
));
14151 case IX86_BUILTIN_LOADRPD
:
14152 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
14153 gen_reg_rtx (V2DFmode
), 1);
14154 emit_insn (gen_sse2_shufpd (target
, target
, target
, GEN_INT (1)));
14157 case IX86_BUILTIN_LOADPD1
:
14158 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
14159 gen_reg_rtx (V2DFmode
), 1);
14160 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
14163 case IX86_BUILTIN_STOREPD1
:
14164 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14165 case IX86_BUILTIN_STORERPD
:
14166 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
14168 case IX86_BUILTIN_CLRPD
:
14169 target
= gen_reg_rtx (V2DFmode
);
14170 emit_insn (gen_sse_clrv2df (target
));
14173 case IX86_BUILTIN_MFENCE
:
14174 emit_insn (gen_sse2_mfence ());
14176 case IX86_BUILTIN_LFENCE
:
14177 emit_insn (gen_sse2_lfence ());
14180 case IX86_BUILTIN_CLFLUSH
:
14181 arg0
= TREE_VALUE (arglist
);
14182 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
14183 icode
= CODE_FOR_sse2_clflush
;
14184 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
14185 op0
= copy_to_mode_reg (Pmode
, op0
);
14187 emit_insn (gen_sse2_clflush (op0
));
14190 case IX86_BUILTIN_MOVNTPD
:
14191 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
14192 case IX86_BUILTIN_MOVNTDQ
:
14193 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
14194 case IX86_BUILTIN_MOVNTI
:
14195 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
14197 case IX86_BUILTIN_LOADDQA
:
14198 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
14199 case IX86_BUILTIN_LOADDQU
:
14200 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
14201 case IX86_BUILTIN_LOADD
:
14202 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
14204 case IX86_BUILTIN_STOREDQA
:
14205 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
14206 case IX86_BUILTIN_STOREDQU
:
14207 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
14208 case IX86_BUILTIN_STORED
:
14209 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
14215 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
14216 if (d
->code
== fcode
)
14218 /* Compares are treated specially. */
14219 if (d
->icode
== CODE_FOR_maskcmpv4sf3
14220 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
14221 || d
->icode
== CODE_FOR_maskncmpv4sf3
14222 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
14223 || d
->icode
== CODE_FOR_maskcmpv2df3
14224 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
14225 || d
->icode
== CODE_FOR_maskncmpv2df3
14226 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
14227 return ix86_expand_sse_compare (d
, arglist
, target
);
14229 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
14232 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
14233 if (d
->code
== fcode
)
14234 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
14236 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
14237 if (d
->code
== fcode
)
14238 return ix86_expand_sse_comi (d
, arglist
, target
);
14240 /* @@@ Should really do something sensible here. */
14244 /* Store OPERAND to the memory after reload is completed. This means
14245 that we can't easily use assign_stack_local. */
14247 ix86_force_to_memory (mode
, operand
)
14248 enum machine_mode mode
;
14252 if (!reload_completed
)
14254 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
14256 result
= gen_rtx_MEM (mode
,
14257 gen_rtx_PLUS (Pmode
,
14259 GEN_INT (-RED_ZONE_SIZE
)));
14260 emit_move_insn (result
, operand
);
14262 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
14268 operand
= gen_lowpart (DImode
, operand
);
14272 gen_rtx_SET (VOIDmode
,
14273 gen_rtx_MEM (DImode
,
14274 gen_rtx_PRE_DEC (DImode
,
14275 stack_pointer_rtx
)),
14281 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14290 split_di (&operand
, 1, operands
, operands
+ 1);
14292 gen_rtx_SET (VOIDmode
,
14293 gen_rtx_MEM (SImode
,
14294 gen_rtx_PRE_DEC (Pmode
,
14295 stack_pointer_rtx
)),
14298 gen_rtx_SET (VOIDmode
,
14299 gen_rtx_MEM (SImode
,
14300 gen_rtx_PRE_DEC (Pmode
,
14301 stack_pointer_rtx
)),
14306 /* It is better to store HImodes as SImodes. */
14307 if (!TARGET_PARTIAL_REG_STALL
)
14308 operand
= gen_lowpart (SImode
, operand
);
14312 gen_rtx_SET (VOIDmode
,
14313 gen_rtx_MEM (GET_MODE (operand
),
14314 gen_rtx_PRE_DEC (SImode
,
14315 stack_pointer_rtx
)),
14321 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
14326 /* Free operand from the memory. */
14328 ix86_free_from_memory (mode
)
14329 enum machine_mode mode
;
14331 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
14335 if (mode
== DImode
|| TARGET_64BIT
)
14337 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
14341 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14342 to pop or add instruction if registers are available. */
14343 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
14344 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
14349 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14350 QImode must go into class Q_REGS.
14351 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14352 movdf to do mem-to-mem moves through integer regs. */
14354 ix86_preferred_reload_class (x
, class)
14356 enum reg_class
class;
14358 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
14360 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
14362 /* SSE can't load any constant directly yet. */
14363 if (SSE_CLASS_P (class))
14365 /* Floats can load 0 and 1. */
14366 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
14368 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14369 if (MAYBE_SSE_CLASS_P (class))
14370 return (reg_class_subset_p (class, GENERAL_REGS
)
14371 ? GENERAL_REGS
: FLOAT_REGS
);
14375 /* General regs can load everything. */
14376 if (reg_class_subset_p (class, GENERAL_REGS
))
14377 return GENERAL_REGS
;
14378 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14379 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14382 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
14384 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
14389 /* If we are copying between general and FP registers, we need a memory
14390 location. The same is true for SSE and MMX registers.
14392 The macro can't work reliably when one of the CLASSES is class containing
14393 registers from multiple units (SSE, MMX, integer). We avoid this by never
14394 combining those units in single alternative in the machine description.
14395 Ensure that this constraint holds to avoid unexpected surprises.
14397 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14398 enforce these sanity checks. */
14400 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
14401 enum reg_class class1
, class2
;
14402 enum machine_mode mode
;
14405 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
14406 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
14407 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
14408 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
14409 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
14410 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
14417 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
14418 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
14419 && (mode
) != SImode
)
14420 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
14421 && (mode
) != SImode
));
14423 /* Return the cost of moving data from a register in class CLASS1 to
14424 one in class CLASS2.
14426 It is not required that the cost always equal 2 when FROM is the same as TO;
14427 on some machines it is expensive to move between registers if they are not
14428 general registers. */
14430 ix86_register_move_cost (mode
, class1
, class2
)
14431 enum machine_mode mode
;
14432 enum reg_class class1
, class2
;
14434 /* In case we require secondary memory, compute cost of the store followed
14435 by load. In order to avoid bad register allocation choices, we need
14436 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14438 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
14442 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
14443 MEMORY_MOVE_COST (mode
, class1
, 1));
14444 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
14445 MEMORY_MOVE_COST (mode
, class2
, 1));
14447 /* In case of copying from general_purpose_register we may emit multiple
14448 stores followed by single load causing memory size mismatch stall.
14449 Count this as arbitrarily high cost of 20. */
14450 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
14453 /* In the case of FP/MMX moves, the registers actually overlap, and we
14454 have to switch modes in order to treat them differently. */
14455 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
14456 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
14462 /* Moves between SSE/MMX and integer unit are expensive. */
14463 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
14464 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
14465 return ix86_cost
->mmxsse_to_integer
;
14466 if (MAYBE_FLOAT_CLASS_P (class1
))
14467 return ix86_cost
->fp_move
;
14468 if (MAYBE_SSE_CLASS_P (class1
))
14469 return ix86_cost
->sse_move
;
14470 if (MAYBE_MMX_CLASS_P (class1
))
14471 return ix86_cost
->mmx_move
;
14475 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14477 ix86_hard_regno_mode_ok (regno
, mode
)
14479 enum machine_mode mode
;
14481 /* Flags and only flags can only hold CCmode values. */
14482 if (CC_REGNO_P (regno
))
14483 return GET_MODE_CLASS (mode
) == MODE_CC
;
14484 if (GET_MODE_CLASS (mode
) == MODE_CC
14485 || GET_MODE_CLASS (mode
) == MODE_RANDOM
14486 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
14488 if (FP_REGNO_P (regno
))
14489 return VALID_FP_MODE_P (mode
);
14490 if (SSE_REGNO_P (regno
))
14491 return VALID_SSE_REG_MODE (mode
);
14492 if (MMX_REGNO_P (regno
))
14493 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
14494 /* We handle both integer and floats in the general purpose registers.
14495 In future we should be able to handle vector modes as well. */
14496 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
14498 /* Take care for QImode values - they can be in non-QI regs, but then
14499 they do cause partial register stalls. */
14500 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
14502 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
14505 /* Return the cost of moving data of mode M between a
14506 register and memory. A value of 2 is the default; this cost is
14507 relative to those in `REGISTER_MOVE_COST'.
14509 If moving between registers and memory is more expensive than
14510 between two registers, you should define this macro to express the
14513 Model also increased moving costs of QImode registers in non
14517 ix86_memory_move_cost (mode
, class, in
)
14518 enum machine_mode mode
;
14519 enum reg_class
class;
14522 if (FLOAT_CLASS_P (class))
14540 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
14542 if (SSE_CLASS_P (class))
14545 switch (GET_MODE_SIZE (mode
))
14559 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
14561 if (MMX_CLASS_P (class))
14564 switch (GET_MODE_SIZE (mode
))
14575 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
14577 switch (GET_MODE_SIZE (mode
))
14581 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
14582 : ix86_cost
->movzbl_load
);
14584 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
14585 : ix86_cost
->int_store
[0] + 4);
14588 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
14590 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14591 if (mode
== TFmode
)
14593 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
14594 * ((int) GET_MODE_SIZE (mode
)
14595 + UNITS_PER_WORD
-1 ) / UNITS_PER_WORD
);
14599 /* Compute a (partial) cost for rtx X. Return true if the complete
14600 cost has been computed, and false if subexpressions should be
14601 scanned. In either case, *TOTAL contains the cost result. */
14604 ix86_rtx_costs (x
, code
, outer_code
, total
)
14606 int code
, outer_code
;
14609 enum machine_mode mode
= GET_MODE (x
);
14617 if (TARGET_64BIT
&& !x86_64_sign_extended_value (x
))
14619 else if (TARGET_64BIT
&& !x86_64_zero_extended_value (x
))
14621 else if (flag_pic
&& SYMBOLIC_CONST (x
))
14628 if (mode
== VOIDmode
)
14631 switch (standard_80387_constant_p (x
))
14640 /* Start with (MEM (SYMBOL_REF)), since that's where
14641 it'll probably end up. Add a penalty for size. */
14642 *total
= (COSTS_N_INSNS (1)
14644 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
14650 /* The zero extensions is often completely free on x86_64, so make
14651 it as cheap as possible. */
14652 if (TARGET_64BIT
&& mode
== DImode
14653 && GET_MODE (XEXP (x
, 0)) == SImode
)
14655 else if (TARGET_ZERO_EXTEND_WITH_AND
)
14656 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14658 *total
= COSTS_N_INSNS (ix86_cost
->movzx
);
14662 *total
= COSTS_N_INSNS (ix86_cost
->movsx
);
14666 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
14667 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
14669 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14672 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14675 if ((value
== 2 || value
== 3)
14676 && !TARGET_DECOMPOSE_LEA
14677 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
14679 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14689 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
14691 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14693 if (INTVAL (XEXP (x
, 1)) > 32)
14694 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
+ 2);
14696 *total
= COSTS_N_INSNS(ix86_cost
->shift_const
* 2);
14700 if (GET_CODE (XEXP (x
, 1)) == AND
)
14701 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 2);
14703 *total
= COSTS_N_INSNS(ix86_cost
->shift_var
* 6 + 2);
14708 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14709 *total
= COSTS_N_INSNS (ix86_cost
->shift_const
);
14711 *total
= COSTS_N_INSNS (ix86_cost
->shift_var
);
14716 if (FLOAT_MODE_P (mode
))
14717 *total
= COSTS_N_INSNS (ix86_cost
->fmul
);
14718 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
14720 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
14723 for (nbits
= 0; value
!= 0; value
>>= 1)
14726 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
14727 + nbits
* ix86_cost
->mult_bit
);
14731 /* This is arbitrary */
14732 *total
= COSTS_N_INSNS (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
14733 + 7 * ix86_cost
->mult_bit
);
14741 if (FLOAT_MODE_P (mode
))
14742 *total
= COSTS_N_INSNS (ix86_cost
->fdiv
);
14744 *total
= COSTS_N_INSNS (ix86_cost
->divide
[MODE_INDEX (mode
)]);
14748 if (FLOAT_MODE_P (mode
))
14749 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
14750 else if (!TARGET_DECOMPOSE_LEA
14751 && GET_MODE_CLASS (mode
) == MODE_INT
14752 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
14754 if (GET_CODE (XEXP (x
, 0)) == PLUS
14755 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
14756 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
14757 && CONSTANT_P (XEXP (x
, 1)))
14759 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
14760 if (val
== 2 || val
== 4 || val
== 8)
14762 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14763 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14764 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
14766 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14770 else if (GET_CODE (XEXP (x
, 0)) == MULT
14771 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
14773 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
14774 if (val
== 2 || val
== 4 || val
== 8)
14776 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14777 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14778 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14782 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
14784 *total
= COSTS_N_INSNS (ix86_cost
->lea
);
14785 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
14786 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
14787 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
14794 if (FLOAT_MODE_P (mode
))
14796 *total
= COSTS_N_INSNS (ix86_cost
->fadd
);
14804 if (!TARGET_64BIT
&& mode
== DImode
)
14806 *total
= (COSTS_N_INSNS (ix86_cost
->add
) * 2
14807 + (rtx_cost (XEXP (x
, 0), outer_code
)
14808 << (GET_MODE (XEXP (x
, 0)) != DImode
))
14809 + (rtx_cost (XEXP (x
, 1), outer_code
)
14810 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
14816 if (FLOAT_MODE_P (mode
))
14818 *total
= COSTS_N_INSNS (ix86_cost
->fchs
);
14824 if (!TARGET_64BIT
&& mode
== DImode
)
14825 *total
= COSTS_N_INSNS (ix86_cost
->add
* 2);
14827 *total
= COSTS_N_INSNS (ix86_cost
->add
);
14831 if (!TARGET_SSE_MATH
|| !VALID_SSE_REG_MODE (mode
))
14836 if (FLOAT_MODE_P (mode
))
14837 *total
= COSTS_N_INSNS (ix86_cost
->fabs
);
14841 if (FLOAT_MODE_P (mode
))
14842 *total
= COSTS_N_INSNS (ix86_cost
->fsqrt
);
14850 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14852 ix86_svr3_asm_out_constructor (symbol
, priority
)
14854 int priority ATTRIBUTE_UNUSED
;
14857 fputs ("\tpushl $", asm_out_file
);
14858 assemble_name (asm_out_file
, XSTR (symbol
, 0));
14859 fputc ('\n', asm_out_file
);
14865 static int current_machopic_label_num
;
14867 /* Given a symbol name and its associated stub, write out the
14868 definition of the stub. */
14871 machopic_output_stub (file
, symb
, stub
)
14873 const char *symb
, *stub
;
14875 unsigned int length
;
14876 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
14877 int label
= ++current_machopic_label_num
;
14879 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14880 symb
= (*targetm
.strip_name_encoding
) (symb
);
14882 length
= strlen (stub
);
14883 binder_name
= alloca (length
+ 32);
14884 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
14886 length
= strlen (symb
);
14887 symbol_name
= alloca (length
+ 32);
14888 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
14890 sprintf (lazy_ptr_name
, "L%d$lz", label
);
14893 machopic_picsymbol_stub_section ();
14895 machopic_symbol_stub_section ();
14897 fprintf (file
, "%s:\n", stub
);
14898 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14902 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
14903 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
14904 fprintf (file
, "\tjmp %%edx\n");
14907 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
14909 fprintf (file
, "%s:\n", binder_name
);
14913 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
14914 fprintf (file
, "\tpushl %%eax\n");
14917 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
14919 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
14921 machopic_lazy_symbol_ptr_section ();
14922 fprintf (file
, "%s:\n", lazy_ptr_name
);
14923 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14924 fprintf (file
, "\t.long %s\n", binder_name
);
14926 #endif /* TARGET_MACHO */
14928 /* Order the registers for register allocator. */
14931 x86_order_regs_for_local_alloc ()
14936 /* First allocate the local general purpose registers. */
14937 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14938 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
14939 reg_alloc_order
[pos
++] = i
;
14941 /* Global general purpose registers. */
14942 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14943 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
14944 reg_alloc_order
[pos
++] = i
;
14946 /* x87 registers come first in case we are doing FP math
14948 if (!TARGET_SSE_MATH
)
14949 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14950 reg_alloc_order
[pos
++] = i
;
14952 /* SSE registers. */
14953 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
14954 reg_alloc_order
[pos
++] = i
;
14955 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
14956 reg_alloc_order
[pos
++] = i
;
14958 /* x87 registers. */
14959 if (TARGET_SSE_MATH
)
14960 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14961 reg_alloc_order
[pos
++] = i
;
14963 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
14964 reg_alloc_order
[pos
++] = i
;
14966 /* Initialize the rest of array as we do not allocate some registers
14968 while (pos
< FIRST_PSEUDO_REGISTER
)
14969 reg_alloc_order
[pos
++] = 0;
14972 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14973 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14976 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14977 struct attribute_spec.handler. */
14979 ix86_handle_struct_attribute (node
, name
, args
, flags
, no_add_attrs
)
14982 tree args ATTRIBUTE_UNUSED
;
14983 int flags ATTRIBUTE_UNUSED
;
14984 bool *no_add_attrs
;
14987 if (DECL_P (*node
))
14989 if (TREE_CODE (*node
) == TYPE_DECL
)
14990 type
= &TREE_TYPE (*node
);
14995 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
14996 || TREE_CODE (*type
) == UNION_TYPE
)))
14998 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
14999 *no_add_attrs
= true;
15002 else if ((is_attribute_p ("ms_struct", name
)
15003 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
15004 || ((is_attribute_p ("gcc_struct", name
)
15005 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
15007 warning ("`%s' incompatible attribute ignored",
15008 IDENTIFIER_POINTER (name
));
15009 *no_add_attrs
= true;
15016 ix86_ms_bitfield_layout_p (record_type
)
15019 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
15020 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
15021 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
15024 /* Returns an expression indicating where the this parameter is
15025 located on entry to the FUNCTION. */
15028 x86_this_parameter (function
)
15031 tree type
= TREE_TYPE (function
);
15035 int n
= aggregate_value_p (TREE_TYPE (type
)) != 0;
15036 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
15039 if (ix86_fntype_regparm (type
) > 0)
15043 parm
= TYPE_ARG_TYPES (type
);
15044 /* Figure out whether or not the function has a variable number of
15046 for (; parm
; parm
= TREE_CHAIN (parm
))
15047 if (TREE_VALUE (parm
) == void_type_node
)
15049 /* If not, the this parameter is in %eax. */
15051 return gen_rtx_REG (SImode
, 0);
15054 if (aggregate_value_p (TREE_TYPE (type
)))
15055 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
15057 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
15060 /* Determine whether x86_output_mi_thunk can succeed. */
15063 x86_can_output_mi_thunk (thunk
, delta
, vcall_offset
, function
)
15064 tree thunk ATTRIBUTE_UNUSED
;
15065 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
;
15066 HOST_WIDE_INT vcall_offset
;
15069 /* 64-bit can handle anything. */
15073 /* For 32-bit, everything's fine if we have one free register. */
15074 if (ix86_fntype_regparm (TREE_TYPE (function
)) < 3)
15077 /* Need a free register for vcall_offset. */
15081 /* Need a free register for GOT references. */
15082 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
15085 /* Otherwise ok. */
15089 /* Output the assembler code for a thunk function. THUNK_DECL is the
15090 declaration for the thunk function itself, FUNCTION is the decl for
15091 the target function. DELTA is an immediate constant offset to be
15092 added to THIS. If VCALL_OFFSET is nonzero, the word at
15093 *(*this + vcall_offset) should be added to THIS. */
15096 x86_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
)
15097 FILE *file ATTRIBUTE_UNUSED
;
15098 tree thunk ATTRIBUTE_UNUSED
;
15099 HOST_WIDE_INT delta
;
15100 HOST_WIDE_INT vcall_offset
;
15104 rtx
this = x86_this_parameter (function
);
15107 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15108 pull it in now and let DELTA benefit. */
15111 else if (vcall_offset
)
15113 /* Put the this parameter into %eax. */
15115 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
15116 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15119 this_reg
= NULL_RTX
;
15121 /* Adjust the this parameter by a fixed constant. */
15124 xops
[0] = GEN_INT (delta
);
15125 xops
[1] = this_reg
? this_reg
: this;
15128 if (!x86_64_general_operand (xops
[0], DImode
))
15130 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15132 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
15136 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15139 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15142 /* Adjust the this parameter by a value stored in the vtable. */
15146 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
15148 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
15150 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
15153 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15155 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15157 /* Adjust the this parameter. */
15158 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
15159 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
15161 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
15162 xops
[0] = GEN_INT (vcall_offset
);
15164 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
15165 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
15167 xops
[1] = this_reg
;
15169 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
15171 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
15174 /* If necessary, drop THIS back to its stack slot. */
15175 if (this_reg
&& this_reg
!= this)
15177 xops
[0] = this_reg
;
15179 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
15182 xops
[0] = DECL_RTL (function
);
15185 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15186 output_asm_insn ("jmp\t%P0", xops
);
15189 tmp
= XEXP (xops
[0], 0);
15190 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, tmp
), UNSPEC_GOTPCREL
);
15191 tmp
= gen_rtx_CONST (Pmode
, tmp
);
15192 tmp
= gen_rtx_MEM (QImode
, tmp
);
15194 output_asm_insn ("jmp\t%A0", xops
);
15199 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
15200 output_asm_insn ("jmp\t%P0", xops
);
15205 char *ip
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function
));
15206 tmp
= gen_rtx_SYMBOL_REF (Pmode
, machopic_stub_name (ip
));
15207 tmp
= gen_rtx_MEM (QImode
, tmp
);
15209 output_asm_insn ("jmp\t%0", xops
);
15212 #endif /* TARGET_MACHO */
15214 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
15215 output_set_got (tmp
);
15218 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
15219 output_asm_insn ("jmp\t{*}%1", xops
);
15225 x86_field_alignment (field
, computed
)
15229 enum machine_mode mode
;
15230 tree type
= TREE_TYPE (field
);
15232 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
15234 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
15235 ? get_inner_array_type (type
) : type
);
15236 if (mode
== DFmode
|| mode
== DCmode
15237 || GET_MODE_CLASS (mode
) == MODE_INT
15238 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
15239 return MIN (32, computed
);
15243 /* Output assembler code to FILE to increment profiler label # LABELNO
15244 for profiling a function entry. */
15246 x86_function_profiler (file
, labelno
)
15248 int labelno ATTRIBUTE_UNUSED
;
15253 #ifndef NO_PROFILE_COUNTERS
15254 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
15256 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
15260 #ifndef NO_PROFILE_COUNTERS
15261 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
15263 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15267 #ifndef NO_PROFILE_COUNTERS
15268 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15269 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
15271 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
15275 #ifndef NO_PROFILE_COUNTERS
15276 fprintf (file
, "\tmovl\t$%sP%d,%%$%s\n", LPREFIX
, labelno
,
15277 PROFILE_COUNT_REGISTER
);
15279 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
15283 /* Implement machine specific optimizations.
15284 At the moment we implement single transformation: AMD Athlon works faster
15285 when RET is not destination of conditional jump or directly preceded
15286 by other jump instruction. We avoid the penalty by inserting NOP just
15287 before the RET instructions in such cases. */
15289 x86_machine_dependent_reorg (first
)
15290 rtx first ATTRIBUTE_UNUSED
;
15294 if (!TARGET_ATHLON_K8
|| !optimize
|| optimize_size
)
15296 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
15298 basic_block bb
= e
->src
;
15301 bool insert
= false;
15303 if (!returnjump_p (ret
) || !maybe_hot_bb_p (bb
))
15305 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
15306 if (active_insn_p (prev
) || GET_CODE (prev
) == CODE_LABEL
)
15308 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
15311 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
15312 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
15313 && !(e
->flags
& EDGE_FALLTHRU
))
15318 prev
= prev_active_insn (ret
);
15319 if (prev
&& GET_CODE (prev
) == JUMP_INSN
15320 && any_condjump_p (prev
))
15322 /* Empty functions get branch misspredict even when the jump destination
15323 is not visible to us. */
15324 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
15328 emit_insn_before (gen_nop (), ret
);
15332 /* Return nonzero when QImode register that must be represented via REX prefix
15335 x86_extended_QIreg_mentioned_p (insn
)
15339 extract_insn_cached (insn
);
15340 for (i
= 0; i
< recog_data
.n_operands
; i
++)
15341 if (REG_P (recog_data
.operand
[i
])
15342 && REGNO (recog_data
.operand
[i
]) >= 4)
15347 /* Return nonzero when P points to register encoded via REX prefix.
15348 Called via for_each_rtx. */
15350 extended_reg_mentioned_1 (p
, data
)
15352 void *data ATTRIBUTE_UNUSED
;
15354 unsigned int regno
;
15357 regno
= REGNO (*p
);
15358 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
15361 /* Return true when INSN mentions register that must be encoded using REX
15364 x86_extended_reg_mentioned_p (insn
)
15367 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
15370 #include "gt-i386.h"