* target.h (asm_out.file_start, file_start_app_off,
[official-gcc.git] / gcc / config / i386 / i386.c
blob8409def0e46808341ee3e2b23f8bff05b0c8fd63
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
52 #endif
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
62 /* Processor costs (relative to an add) */
63 static const
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
98 1, /* Branch cost */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
108 static const
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
120 3, /* MOVE_RATIO */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
143 1, /* Branch cost */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
152 static const
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
164 3, /* MOVE_RATIO */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
187 1, /* Branch cost */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
196 static const
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
208 6, /* MOVE_RATIO */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
231 2, /* Branch cost */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
240 static const
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
252 6, /* MOVE_RATIO */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
275 2, /* Branch cost */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
284 static const
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
296 4, /* MOVE_RATIO */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
319 1, /* Branch cost */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
328 static const
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
340 9, /* MOVE_RATIO */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
363 2, /* Branch cost */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
372 static const
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
416 static const
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
451 2, /* Branch cost */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
530 epilogue code. */
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
543 /* ax, dx, cx, bx */
544 AREG, DREG, CREG, BREG,
545 /* si, di, bp, sp */
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
547 /* FP registers */
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
550 /* arg pointer */
551 NON_Q_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
555 SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
557 MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
561 SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
644 numbers.
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
679 unsigned short mode;
680 unsigned short n;
681 rtx rtl;
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
688 [arguments]
689 <- ARG_POINTER
690 saved pc
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
694 [saved regs]
696 [padding1] \
698 [va_arg registers] (
699 > to_allocate <- FRAME_POINTER
700 [frame] (
702 [padding2] /
704 struct ix86_frame
706 int nregs;
707 int padding1;
708 int va_arg_size;
709 HOST_WIDE_INT frame;
710 int padding2;
711 int outgoing_arguments_size;
712 int red_zone_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
729 /* Parsed value. */
730 enum cmodel ix86_cmodel;
731 /* Asm dialect. */
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
734 /* TLS dialext. */
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
758 int ix86_regparm;
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
786 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
787 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
788 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
789 int, int, FILE *));
790 static const char *get_some_local_dynamic_name PARAMS ((void));
791 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
792 static rtx maybe_get_pool_constant PARAMS ((rtx));
793 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
794 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
795 rtx *, rtx *));
796 static rtx get_thread_pointer PARAMS ((int));
797 static rtx legitimize_tls_address PARAMS ((rtx, enum tls_model, int));
798 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
799 static rtx gen_push PARAMS ((rtx));
800 static int memory_address_length PARAMS ((rtx addr));
801 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
802 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
803 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
804 static void ix86_dump_ppro_packet PARAMS ((FILE *));
805 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
806 static struct machine_function * ix86_init_machine_status PARAMS ((void));
807 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
808 static int ix86_nsaved_regs PARAMS ((void));
809 static void ix86_emit_save_regs PARAMS ((void));
810 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
811 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
812 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
813 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
814 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
815 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
816 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
817 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
818 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
819 static int ix86_issue_rate PARAMS ((void));
820 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
821 static void ix86_sched_init PARAMS ((FILE *, int, int));
822 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
823 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
824 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
825 static int ia32_multipass_dfa_lookahead PARAMS ((void));
826 static void ix86_init_mmx_sse_builtins PARAMS ((void));
827 static rtx x86_this_parameter PARAMS ((tree));
828 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree));
830 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832 static void x86_file_start PARAMS ((void));
833 static void ix86_reorg PARAMS ((void));
834 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
836 struct ix86_address
838 rtx base, index, disp;
839 HOST_WIDE_INT scale;
840 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
843 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
844 static int ix86_address_cost PARAMS ((rtx));
845 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
846 static rtx ix86_delegitimize_address PARAMS ((rtx));
848 struct builtin_description;
849 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
850 tree, rtx));
851 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
852 tree, rtx));
853 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
854 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
855 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
856 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
857 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
858 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
859 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
860 enum rtx_code *,
861 enum rtx_code *,
862 enum rtx_code *));
863 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
864 rtx *, rtx *));
865 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
866 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
867 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
868 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
869 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
870 static int ix86_save_reg PARAMS ((unsigned int, int));
871 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
872 static int ix86_comp_type_attributes PARAMS ((tree, tree));
873 static int ix86_fntype_regparm PARAMS ((tree));
874 const struct attribute_spec ix86_attribute_table[];
875 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
876 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
877 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
878 static int ix86_value_regno PARAMS ((enum machine_mode));
879 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
880 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
881 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
882 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
883 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
884 static int min_insn_size PARAMS ((rtx));
885 static void k8_avoid_jump_misspredicts PARAMS ((void));
887 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
888 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
889 #endif
891 /* Register class used for passing given 64bit part of the argument.
892 These represent classes as documented by the PS ABI, with the exception
893 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
894 use SF or DFmode move instead of DImode to avoid reformatting penalties.
896 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
897 whenever possible (upper half does contain padding).
899 enum x86_64_reg_class
901 X86_64_NO_CLASS,
902 X86_64_INTEGER_CLASS,
903 X86_64_INTEGERSI_CLASS,
904 X86_64_SSE_CLASS,
905 X86_64_SSESF_CLASS,
906 X86_64_SSEDF_CLASS,
907 X86_64_SSEUP_CLASS,
908 X86_64_X87_CLASS,
909 X86_64_X87UP_CLASS,
910 X86_64_MEMORY_CLASS
912 static const char * const x86_64_reg_class_name[] =
913 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
915 #define MAX_CLASSES 4
916 static int classify_argument PARAMS ((enum machine_mode, tree,
917 enum x86_64_reg_class [MAX_CLASSES],
918 int));
919 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
920 int *));
921 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
922 const int *, int));
923 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
924 enum x86_64_reg_class));
926 /* Table of constants used by fldpi, fldln2, etc... */
927 static REAL_VALUE_TYPE ext_80387_constants_table [5];
928 static bool ext_80387_constants_init = 0;
929 static void init_ext_80387_constants PARAMS ((void));
931 /* Initialize the GCC target structure. */
932 #undef TARGET_ATTRIBUTE_TABLE
933 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
934 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
935 # undef TARGET_MERGE_DECL_ATTRIBUTES
936 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
937 #endif
939 #undef TARGET_COMP_TYPE_ATTRIBUTES
940 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
942 #undef TARGET_INIT_BUILTINS
943 #define TARGET_INIT_BUILTINS ix86_init_builtins
945 #undef TARGET_EXPAND_BUILTIN
946 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
948 #undef TARGET_ASM_FUNCTION_EPILOGUE
949 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
951 #undef TARGET_ASM_OPEN_PAREN
952 #define TARGET_ASM_OPEN_PAREN ""
953 #undef TARGET_ASM_CLOSE_PAREN
954 #define TARGET_ASM_CLOSE_PAREN ""
956 #undef TARGET_ASM_ALIGNED_HI_OP
957 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
958 #undef TARGET_ASM_ALIGNED_SI_OP
959 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
960 #ifdef ASM_QUAD
961 #undef TARGET_ASM_ALIGNED_DI_OP
962 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
963 #endif
965 #undef TARGET_ASM_UNALIGNED_HI_OP
966 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
967 #undef TARGET_ASM_UNALIGNED_SI_OP
968 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
969 #undef TARGET_ASM_UNALIGNED_DI_OP
970 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
972 #undef TARGET_SCHED_ADJUST_COST
973 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
974 #undef TARGET_SCHED_ISSUE_RATE
975 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
976 #undef TARGET_SCHED_VARIABLE_ISSUE
977 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
978 #undef TARGET_SCHED_INIT
979 #define TARGET_SCHED_INIT ix86_sched_init
980 #undef TARGET_SCHED_REORDER
981 #define TARGET_SCHED_REORDER ix86_sched_reorder
982 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
983 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
984 ia32_use_dfa_pipeline_interface
985 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
986 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
987 ia32_multipass_dfa_lookahead
989 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
990 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
992 #ifdef HAVE_AS_TLS
993 #undef TARGET_HAVE_TLS
994 #define TARGET_HAVE_TLS true
995 #endif
996 #undef TARGET_CANNOT_FORCE_CONST_MEM
997 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
999 #undef TARGET_DELEGITIMIZE_ADDRESS
1000 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1002 #undef TARGET_MS_BITFIELD_LAYOUT_P
1003 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1005 #undef TARGET_ASM_OUTPUT_MI_THUNK
1006 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1007 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1008 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1010 #undef TARGET_ASM_FILE_START
1011 #define TARGET_ASM_FILE_START x86_file_start
1013 #undef TARGET_RTX_COSTS
1014 #define TARGET_RTX_COSTS ix86_rtx_costs
1015 #undef TARGET_ADDRESS_COST
1016 #define TARGET_ADDRESS_COST ix86_address_cost
1018 #undef TARGET_MACHINE_DEPENDENT_REORG
1019 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1021 struct gcc_target targetm = TARGET_INITIALIZER;
1023 /* The svr4 ABI for the i386 says that records and unions are returned
1024 in memory. */
1025 #ifndef DEFAULT_PCC_STRUCT_RETURN
1026 #define DEFAULT_PCC_STRUCT_RETURN 1
1027 #endif
1029 /* Sometimes certain combinations of command options do not make
1030 sense on a particular target machine. You can define a macro
1031 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1032 defined, is executed once just after all the command options have
1033 been parsed.
1035 Don't use this macro to turn on various extra optimizations for
1036 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1038 void
1039 override_options ()
1041 int i;
1042 /* Comes from final.c -- no real reason to change it. */
1043 #define MAX_CODE_ALIGN 16
1045 static struct ptt
1047 const struct processor_costs *cost; /* Processor costs */
1048 const int target_enable; /* Target flags to enable. */
1049 const int target_disable; /* Target flags to disable. */
1050 const int align_loop; /* Default alignments. */
1051 const int align_loop_max_skip;
1052 const int align_jump;
1053 const int align_jump_max_skip;
1054 const int align_func;
1056 const processor_target_table[PROCESSOR_max] =
1058 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1059 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1060 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1061 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1062 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1063 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1064 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1065 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1068 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1069 static struct pta
1071 const char *const name; /* processor name or nickname. */
1072 const enum processor_type processor;
1073 const enum pta_flags
1075 PTA_SSE = 1,
1076 PTA_SSE2 = 2,
1077 PTA_MMX = 4,
1078 PTA_PREFETCH_SSE = 8,
1079 PTA_3DNOW = 16,
1080 PTA_3DNOW_A = 64,
1081 PTA_64BIT = 128
1082 } flags;
1084 const processor_alias_table[] =
1086 {"i386", PROCESSOR_I386, 0},
1087 {"i486", PROCESSOR_I486, 0},
1088 {"i586", PROCESSOR_PENTIUM, 0},
1089 {"pentium", PROCESSOR_PENTIUM, 0},
1090 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1091 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1092 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1093 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1094 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1095 {"i686", PROCESSOR_PENTIUMPRO, 0},
1096 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1097 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1098 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1099 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1100 PTA_MMX | PTA_PREFETCH_SSE},
1101 {"k6", PROCESSOR_K6, PTA_MMX},
1102 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1103 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1104 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1105 | PTA_3DNOW_A},
1106 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1107 | PTA_3DNOW | PTA_3DNOW_A},
1108 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1109 | PTA_3DNOW_A | PTA_SSE},
1110 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1111 | PTA_3DNOW_A | PTA_SSE},
1112 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1113 | PTA_3DNOW_A | PTA_SSE},
1114 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1115 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1118 int const pta_size = ARRAY_SIZE (processor_alias_table);
1120 /* By default our XFmode is the 80-bit extended format. If we have
1121 use TFmode instead, it's also the 80-bit format, but with padding. */
1122 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1123 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1125 /* Set the default values for switches whose default depends on TARGET_64BIT
1126 in case they weren't overwritten by command line options. */
1127 if (TARGET_64BIT)
1129 if (flag_omit_frame_pointer == 2)
1130 flag_omit_frame_pointer = 1;
1131 if (flag_asynchronous_unwind_tables == 2)
1132 flag_asynchronous_unwind_tables = 1;
1133 if (flag_pcc_struct_return == 2)
1134 flag_pcc_struct_return = 0;
1136 else
1138 if (flag_omit_frame_pointer == 2)
1139 flag_omit_frame_pointer = 0;
1140 if (flag_asynchronous_unwind_tables == 2)
1141 flag_asynchronous_unwind_tables = 0;
1142 if (flag_pcc_struct_return == 2)
1143 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1146 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1147 SUBTARGET_OVERRIDE_OPTIONS;
1148 #endif
1150 if (!ix86_tune_string && ix86_arch_string)
1151 ix86_tune_string = ix86_arch_string;
1152 if (!ix86_tune_string)
1153 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1154 if (!ix86_arch_string)
1155 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1157 if (ix86_cmodel_string != 0)
1159 if (!strcmp (ix86_cmodel_string, "small"))
1160 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1161 else if (flag_pic)
1162 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1163 else if (!strcmp (ix86_cmodel_string, "32"))
1164 ix86_cmodel = CM_32;
1165 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1166 ix86_cmodel = CM_KERNEL;
1167 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1168 ix86_cmodel = CM_MEDIUM;
1169 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1170 ix86_cmodel = CM_LARGE;
1171 else
1172 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1174 else
1176 ix86_cmodel = CM_32;
1177 if (TARGET_64BIT)
1178 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1180 if (ix86_asm_string != 0)
1182 if (!strcmp (ix86_asm_string, "intel"))
1183 ix86_asm_dialect = ASM_INTEL;
1184 else if (!strcmp (ix86_asm_string, "att"))
1185 ix86_asm_dialect = ASM_ATT;
1186 else
1187 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1189 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1190 error ("code model `%s' not supported in the %s bit mode",
1191 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1192 if (ix86_cmodel == CM_LARGE)
1193 sorry ("code model `large' not supported yet");
1194 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1195 sorry ("%i-bit mode not compiled in",
1196 (target_flags & MASK_64BIT) ? 64 : 32);
1198 for (i = 0; i < pta_size; i++)
1199 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1201 ix86_arch = processor_alias_table[i].processor;
1202 /* Default cpu tuning to the architecture. */
1203 ix86_tune = ix86_arch;
1204 if (processor_alias_table[i].flags & PTA_MMX
1205 && !(target_flags_explicit & MASK_MMX))
1206 target_flags |= MASK_MMX;
1207 if (processor_alias_table[i].flags & PTA_3DNOW
1208 && !(target_flags_explicit & MASK_3DNOW))
1209 target_flags |= MASK_3DNOW;
1210 if (processor_alias_table[i].flags & PTA_3DNOW_A
1211 && !(target_flags_explicit & MASK_3DNOW_A))
1212 target_flags |= MASK_3DNOW_A;
1213 if (processor_alias_table[i].flags & PTA_SSE
1214 && !(target_flags_explicit & MASK_SSE))
1215 target_flags |= MASK_SSE;
1216 if (processor_alias_table[i].flags & PTA_SSE2
1217 && !(target_flags_explicit & MASK_SSE2))
1218 target_flags |= MASK_SSE2;
1219 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1220 x86_prefetch_sse = true;
1221 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1222 error ("CPU you selected does not support x86-64 instruction set");
1223 break;
1226 if (i == pta_size)
1227 error ("bad value (%s) for -march= switch", ix86_arch_string);
1229 for (i = 0; i < pta_size; i++)
1230 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1232 ix86_tune = processor_alias_table[i].processor;
1233 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1234 error ("CPU you selected does not support x86-64 instruction set");
1235 break;
1237 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1238 x86_prefetch_sse = true;
1239 if (i == pta_size)
1240 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1242 if (optimize_size)
1243 ix86_cost = &size_cost;
1244 else
1245 ix86_cost = processor_target_table[ix86_tune].cost;
1246 target_flags |= processor_target_table[ix86_tune].target_enable;
1247 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1249 /* Arrange to set up i386_stack_locals for all functions. */
1250 init_machine_status = ix86_init_machine_status;
1252 /* Validate -mregparm= value. */
1253 if (ix86_regparm_string)
1255 i = atoi (ix86_regparm_string);
1256 if (i < 0 || i > REGPARM_MAX)
1257 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1258 else
1259 ix86_regparm = i;
1261 else
1262 if (TARGET_64BIT)
1263 ix86_regparm = REGPARM_MAX;
1265 /* If the user has provided any of the -malign-* options,
1266 warn and use that value only if -falign-* is not set.
1267 Remove this code in GCC 3.2 or later. */
1268 if (ix86_align_loops_string)
1270 warning ("-malign-loops is obsolete, use -falign-loops");
1271 if (align_loops == 0)
1273 i = atoi (ix86_align_loops_string);
1274 if (i < 0 || i > MAX_CODE_ALIGN)
1275 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1276 else
1277 align_loops = 1 << i;
1281 if (ix86_align_jumps_string)
1283 warning ("-malign-jumps is obsolete, use -falign-jumps");
1284 if (align_jumps == 0)
1286 i = atoi (ix86_align_jumps_string);
1287 if (i < 0 || i > MAX_CODE_ALIGN)
1288 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1289 else
1290 align_jumps = 1 << i;
1294 if (ix86_align_funcs_string)
1296 warning ("-malign-functions is obsolete, use -falign-functions");
1297 if (align_functions == 0)
1299 i = atoi (ix86_align_funcs_string);
1300 if (i < 0 || i > MAX_CODE_ALIGN)
1301 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1302 else
1303 align_functions = 1 << i;
1307 /* Default align_* from the processor table. */
1308 if (align_loops == 0)
1310 align_loops = processor_target_table[ix86_tune].align_loop;
1311 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1313 if (align_jumps == 0)
1315 align_jumps = processor_target_table[ix86_tune].align_jump;
1316 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1318 if (align_functions == 0)
1320 align_functions = processor_target_table[ix86_tune].align_func;
1323 /* Validate -mpreferred-stack-boundary= value, or provide default.
1324 The default of 128 bits is for Pentium III's SSE __m128, but we
1325 don't want additional code to keep the stack aligned when
1326 optimizing for code size. */
1327 ix86_preferred_stack_boundary = (optimize_size
1328 ? TARGET_64BIT ? 128 : 32
1329 : 128);
1330 if (ix86_preferred_stack_boundary_string)
1332 i = atoi (ix86_preferred_stack_boundary_string);
1333 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1334 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1335 TARGET_64BIT ? 4 : 2);
1336 else
1337 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1340 /* Validate -mbranch-cost= value, or provide default. */
1341 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1342 if (ix86_branch_cost_string)
1344 i = atoi (ix86_branch_cost_string);
1345 if (i < 0 || i > 5)
1346 error ("-mbranch-cost=%d is not between 0 and 5", i);
1347 else
1348 ix86_branch_cost = i;
1351 if (ix86_tls_dialect_string)
1353 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1354 ix86_tls_dialect = TLS_DIALECT_GNU;
1355 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1356 ix86_tls_dialect = TLS_DIALECT_SUN;
1357 else
1358 error ("bad value (%s) for -mtls-dialect= switch",
1359 ix86_tls_dialect_string);
1362 /* Keep nonleaf frame pointers. */
1363 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1364 flag_omit_frame_pointer = 1;
1366 /* If we're doing fast math, we don't care about comparison order
1367 wrt NaNs. This lets us use a shorter comparison sequence. */
1368 if (flag_unsafe_math_optimizations)
1369 target_flags &= ~MASK_IEEE_FP;
1371 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1372 since the insns won't need emulation. */
1373 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1374 target_flags &= ~MASK_NO_FANCY_MATH_387;
1376 if (TARGET_64BIT)
1378 if (TARGET_ALIGN_DOUBLE)
1379 error ("-malign-double makes no sense in the 64bit mode");
1380 if (TARGET_RTD)
1381 error ("-mrtd calling convention not supported in the 64bit mode");
1382 /* Enable by default the SSE and MMX builtins. */
1383 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1384 ix86_fpmath = FPMATH_SSE;
1386 else
1388 ix86_fpmath = FPMATH_387;
1389 /* i386 ABI does not specify red zone. It still makes sense to use it
1390 when programmer takes care to stack from being destroyed. */
1391 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1392 target_flags |= MASK_NO_RED_ZONE;
1395 if (ix86_fpmath_string != 0)
1397 if (! strcmp (ix86_fpmath_string, "387"))
1398 ix86_fpmath = FPMATH_387;
1399 else if (! strcmp (ix86_fpmath_string, "sse"))
1401 if (!TARGET_SSE)
1403 warning ("SSE instruction set disabled, using 387 arithmetics");
1404 ix86_fpmath = FPMATH_387;
1406 else
1407 ix86_fpmath = FPMATH_SSE;
1409 else if (! strcmp (ix86_fpmath_string, "387,sse")
1410 || ! strcmp (ix86_fpmath_string, "sse,387"))
1412 if (!TARGET_SSE)
1414 warning ("SSE instruction set disabled, using 387 arithmetics");
1415 ix86_fpmath = FPMATH_387;
1417 else if (!TARGET_80387)
1419 warning ("387 instruction set disabled, using SSE arithmetics");
1420 ix86_fpmath = FPMATH_SSE;
1422 else
1423 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1425 else
1426 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1429 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1430 on by -msse. */
1431 if (TARGET_SSE)
1433 target_flags |= MASK_MMX;
1434 x86_prefetch_sse = true;
1437 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1438 if (TARGET_3DNOW)
1440 target_flags |= MASK_MMX;
1441 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1442 extensions it adds. */
1443 if (x86_3dnow_a & (1 << ix86_arch))
1444 target_flags |= MASK_3DNOW_A;
1446 if ((x86_accumulate_outgoing_args & TUNEMASK)
1447 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1448 && !optimize_size)
1449 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1451 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1453 char *p;
1454 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1455 p = strchr (internal_label_prefix, 'X');
1456 internal_label_prefix_len = p - internal_label_prefix;
1457 *p = '\0';
1461 void
1462 optimization_options (level, size)
1463 int level;
1464 int size ATTRIBUTE_UNUSED;
1466 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1467 make the problem with not enough registers even worse. */
1468 #ifdef INSN_SCHEDULING
1469 if (level > 1)
1470 flag_schedule_insns = 0;
1471 #endif
1473 /* The default values of these switches depend on the TARGET_64BIT
1474 that is not known at this moment. Mark these values with 2 and
1475 let user the to override these. In case there is no command line option
1476 specifying them, we will set the defaults in override_options. */
1477 if (optimize >= 1)
1478 flag_omit_frame_pointer = 2;
1479 flag_pcc_struct_return = 2;
1480 flag_asynchronous_unwind_tables = 2;
1483 /* Table of valid machine attributes. */
1484 const struct attribute_spec ix86_attribute_table[] =
1486 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1487 /* Stdcall attribute says callee is responsible for popping arguments
1488 if they are not variable. */
1489 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1490 /* Fastcall attribute says callee is responsible for popping arguments
1491 if they are not variable. */
1492 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1493 /* Cdecl attribute says the callee is a normal C declaration */
1494 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1495 /* Regparm attribute specifies how many integer arguments are to be
1496 passed in registers. */
1497 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1498 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1499 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1501 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1502 #endif
1503 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1504 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1505 { NULL, 0, 0, false, false, false, NULL }
1508 /* Decide whether we can make a sibling call to a function. DECL is the
1509 declaration of the function being targeted by the call and EXP is the
1510 CALL_EXPR representing the call. */
1512 static bool
1513 ix86_function_ok_for_sibcall (decl, exp)
1514 tree decl;
1515 tree exp;
1517 /* If we are generating position-independent code, we cannot sibcall
1518 optimize any indirect call, or a direct call to a global function,
1519 as the PLT requires %ebx be live. */
1520 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1521 return false;
1523 /* If we are returning floats on the 80387 register stack, we cannot
1524 make a sibcall from a function that doesn't return a float to a
1525 function that does or, conversely, from a function that does return
1526 a float to a function that doesn't; the necessary stack adjustment
1527 would not be executed. */
1528 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1529 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1530 return false;
1532 /* If this call is indirect, we'll need to be able to use a call-clobbered
1533 register for the address of the target function. Make sure that all
1534 such registers are not used for passing parameters. */
1535 if (!decl && !TARGET_64BIT)
1537 int regparm = ix86_regparm;
1538 tree attr, type;
1540 /* We're looking at the CALL_EXPR, we need the type of the function. */
1541 type = TREE_OPERAND (exp, 0); /* pointer expression */
1542 type = TREE_TYPE (type); /* pointer type */
1543 type = TREE_TYPE (type); /* function type */
1545 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1546 if (attr)
1547 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1549 if (regparm >= 3)
1551 /* ??? Need to count the actual number of registers to be used,
1552 not the possible number of registers. Fix later. */
1553 return false;
1557 /* Otherwise okay. That also includes certain types of indirect calls. */
1558 return true;
1561 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1562 arguments as in struct attribute_spec.handler. */
1563 static tree
1564 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1565 tree *node;
1566 tree name;
1567 tree args ATTRIBUTE_UNUSED;
1568 int flags ATTRIBUTE_UNUSED;
1569 bool *no_add_attrs;
1571 if (TREE_CODE (*node) != FUNCTION_TYPE
1572 && TREE_CODE (*node) != METHOD_TYPE
1573 && TREE_CODE (*node) != FIELD_DECL
1574 && TREE_CODE (*node) != TYPE_DECL)
1576 warning ("`%s' attribute only applies to functions",
1577 IDENTIFIER_POINTER (name));
1578 *no_add_attrs = true;
1580 else
1582 if (is_attribute_p ("fastcall", name))
1584 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1586 error ("fastcall and stdcall attributes are not compatible");
1588 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1590 error ("fastcall and regparm attributes are not compatible");
1593 else if (is_attribute_p ("stdcall", name))
1595 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1597 error ("fastcall and stdcall attributes are not compatible");
1602 if (TARGET_64BIT)
1604 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1605 *no_add_attrs = true;
1608 return NULL_TREE;
1611 /* Handle a "regparm" attribute;
1612 arguments as in struct attribute_spec.handler. */
1613 static tree
1614 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1615 tree *node;
1616 tree name;
1617 tree args;
1618 int flags ATTRIBUTE_UNUSED;
1619 bool *no_add_attrs;
1621 if (TREE_CODE (*node) != FUNCTION_TYPE
1622 && TREE_CODE (*node) != METHOD_TYPE
1623 && TREE_CODE (*node) != FIELD_DECL
1624 && TREE_CODE (*node) != TYPE_DECL)
1626 warning ("`%s' attribute only applies to functions",
1627 IDENTIFIER_POINTER (name));
1628 *no_add_attrs = true;
1630 else
1632 tree cst;
1634 cst = TREE_VALUE (args);
1635 if (TREE_CODE (cst) != INTEGER_CST)
1637 warning ("`%s' attribute requires an integer constant argument",
1638 IDENTIFIER_POINTER (name));
1639 *no_add_attrs = true;
1641 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1643 warning ("argument to `%s' attribute larger than %d",
1644 IDENTIFIER_POINTER (name), REGPARM_MAX);
1645 *no_add_attrs = true;
1648 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1650 error ("fastcall and regparm attributes are not compatible");
1654 return NULL_TREE;
1657 /* Return 0 if the attributes for two types are incompatible, 1 if they
1658 are compatible, and 2 if they are nearly compatible (which causes a
1659 warning to be generated). */
1661 static int
1662 ix86_comp_type_attributes (type1, type2)
1663 tree type1;
1664 tree type2;
1666 /* Check for mismatch of non-default calling convention. */
1667 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1669 if (TREE_CODE (type1) != FUNCTION_TYPE)
1670 return 1;
1672 /* Check for mismatched fastcall types */
1673 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1674 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1675 return 0;
1677 /* Check for mismatched return types (cdecl vs stdcall). */
1678 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1679 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1680 return 0;
1681 return 1;
1684 /* Return the regparm value for a fuctio with the indicated TYPE. */
1686 static int
1687 ix86_fntype_regparm (type)
1688 tree type;
1690 tree attr;
1692 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1693 if (attr)
1694 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1695 else
1696 return ix86_regparm;
1699 /* Value is the number of bytes of arguments automatically
1700 popped when returning from a subroutine call.
1701 FUNDECL is the declaration node of the function (as a tree),
1702 FUNTYPE is the data type of the function (as a tree),
1703 or for a library call it is an identifier node for the subroutine name.
1704 SIZE is the number of bytes of arguments passed on the stack.
1706 On the 80386, the RTD insn may be used to pop them if the number
1707 of args is fixed, but if the number is variable then the caller
1708 must pop them all. RTD can't be used for library calls now
1709 because the library is compiled with the Unix compiler.
1710 Use of RTD is a selectable option, since it is incompatible with
1711 standard Unix calling sequences. If the option is not selected,
1712 the caller must always pop the args.
1714 The attribute stdcall is equivalent to RTD on a per module basis. */
1717 ix86_return_pops_args (fundecl, funtype, size)
1718 tree fundecl;
1719 tree funtype;
1720 int size;
1722 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1724 /* Cdecl functions override -mrtd, and never pop the stack. */
1725 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1727 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1728 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1729 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1730 rtd = 1;
1732 if (rtd
1733 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1734 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1735 == void_type_node)))
1736 return size;
1739 /* Lose any fake structure return argument if it is passed on the stack. */
1740 if (aggregate_value_p (TREE_TYPE (funtype))
1741 && !TARGET_64BIT)
1743 int nregs = ix86_fntype_regparm (funtype);
1745 if (!nregs)
1746 return GET_MODE_SIZE (Pmode);
1749 return 0;
1752 /* Argument support functions. */
1754 /* Return true when register may be used to pass function parameters. */
1755 bool
1756 ix86_function_arg_regno_p (regno)
1757 int regno;
1759 int i;
1760 if (!TARGET_64BIT)
1761 return (regno < REGPARM_MAX
1762 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1763 if (SSE_REGNO_P (regno) && TARGET_SSE)
1764 return true;
1765 /* RAX is used as hidden argument to va_arg functions. */
1766 if (!regno)
1767 return true;
1768 for (i = 0; i < REGPARM_MAX; i++)
1769 if (regno == x86_64_int_parameter_registers[i])
1770 return true;
1771 return false;
1774 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1775 for a call to a function whose data type is FNTYPE.
1776 For a library call, FNTYPE is 0. */
1778 void
1779 init_cumulative_args (cum, fntype, libname, fndecl)
1780 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1781 tree fntype; /* tree ptr for function decl */
1782 rtx libname; /* SYMBOL_REF of library name or 0 */
1783 tree fndecl;
1785 static CUMULATIVE_ARGS zero_cum;
1786 tree param, next_param;
1787 bool user_convention = false;
1789 if (TARGET_DEBUG_ARG)
1791 fprintf (stderr, "\ninit_cumulative_args (");
1792 if (fntype)
1793 fprintf (stderr, "fntype code = %s, ret code = %s",
1794 tree_code_name[(int) TREE_CODE (fntype)],
1795 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1796 else
1797 fprintf (stderr, "no fntype");
1799 if (libname)
1800 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1803 *cum = zero_cum;
1805 /* Set up the number of registers to use for passing arguments. */
1806 cum->nregs = ix86_regparm;
1807 cum->sse_nregs = SSE_REGPARM_MAX;
1808 if (fntype && !TARGET_64BIT)
1810 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1812 if (attr)
1814 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1815 user_convention = true;
1818 cum->maybe_vaarg = false;
1820 /* Use ecx and edx registers if function has fastcall attribute */
1821 if (fntype && !TARGET_64BIT)
1823 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1825 cum->nregs = 2;
1826 cum->fastcall = 1;
1827 user_convention = true;
1831 /* Use register calling convention for local functions when possible. */
1832 if (!TARGET_64BIT && !user_convention && fndecl
1833 && flag_unit_at_a_time)
1835 struct cgraph_local_info *i = cgraph_local_info (fndecl);
1836 if (i && i->local)
1838 /* We can't use regparm(3) for nested functions as these use
1839 static chain pointer in third argument. */
1840 if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl))
1841 cum->nregs = 2;
1842 else
1843 cum->nregs = 3;
1848 /* Determine if this function has variable arguments. This is
1849 indicated by the last argument being 'void_type_mode' if there
1850 are no variable arguments. If there are variable arguments, then
1851 we won't pass anything in registers */
1853 if (cum->nregs)
1855 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1856 param != 0; param = next_param)
1858 next_param = TREE_CHAIN (param);
1859 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1861 if (!TARGET_64BIT)
1863 cum->nregs = 0;
1864 cum->fastcall = 0;
1866 cum->maybe_vaarg = true;
1870 if ((!fntype && !libname)
1871 || (fntype && !TYPE_ARG_TYPES (fntype)))
1872 cum->maybe_vaarg = 1;
1874 if (TARGET_DEBUG_ARG)
1875 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1877 return;
1880 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1881 of this code is to classify each 8bytes of incoming argument by the register
1882 class and assign registers accordingly. */
1884 /* Return the union class of CLASS1 and CLASS2.
1885 See the x86-64 PS ABI for details. */
1887 static enum x86_64_reg_class
1888 merge_classes (class1, class2)
1889 enum x86_64_reg_class class1, class2;
1891 /* Rule #1: If both classes are equal, this is the resulting class. */
1892 if (class1 == class2)
1893 return class1;
1895 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1896 the other class. */
1897 if (class1 == X86_64_NO_CLASS)
1898 return class2;
1899 if (class2 == X86_64_NO_CLASS)
1900 return class1;
1902 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1903 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1904 return X86_64_MEMORY_CLASS;
1906 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1907 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1908 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1909 return X86_64_INTEGERSI_CLASS;
1910 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1911 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1912 return X86_64_INTEGER_CLASS;
1914 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1915 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1916 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1917 return X86_64_MEMORY_CLASS;
1919 /* Rule #6: Otherwise class SSE is used. */
1920 return X86_64_SSE_CLASS;
1923 /* Classify the argument of type TYPE and mode MODE.
1924 CLASSES will be filled by the register class used to pass each word
1925 of the operand. The number of words is returned. In case the parameter
1926 should be passed in memory, 0 is returned. As a special case for zero
1927 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1929 BIT_OFFSET is used internally for handling records and specifies offset
1930 of the offset in bits modulo 256 to avoid overflow cases.
1932 See the x86-64 PS ABI for details.
1935 static int
1936 classify_argument (mode, type, classes, bit_offset)
1937 enum machine_mode mode;
1938 tree type;
1939 enum x86_64_reg_class classes[MAX_CLASSES];
1940 int bit_offset;
1942 int bytes =
1943 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1944 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1946 /* Variable sized entities are always passed/returned in memory. */
1947 if (bytes < 0)
1948 return 0;
1950 if (mode != VOIDmode
1951 && MUST_PASS_IN_STACK (mode, type))
1952 return 0;
1954 if (type && AGGREGATE_TYPE_P (type))
1956 int i;
1957 tree field;
1958 enum x86_64_reg_class subclasses[MAX_CLASSES];
1960 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1961 if (bytes > 16)
1962 return 0;
1964 for (i = 0; i < words; i++)
1965 classes[i] = X86_64_NO_CLASS;
1967 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1968 signalize memory class, so handle it as special case. */
1969 if (!words)
1971 classes[0] = X86_64_NO_CLASS;
1972 return 1;
1975 /* Classify each field of record and merge classes. */
1976 if (TREE_CODE (type) == RECORD_TYPE)
1978 /* For classes first merge in the field of the subclasses. */
1979 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1981 tree bases = TYPE_BINFO_BASETYPES (type);
1982 int n_bases = TREE_VEC_LENGTH (bases);
1983 int i;
1985 for (i = 0; i < n_bases; ++i)
1987 tree binfo = TREE_VEC_ELT (bases, i);
1988 int num;
1989 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1990 tree type = BINFO_TYPE (binfo);
1992 num = classify_argument (TYPE_MODE (type),
1993 type, subclasses,
1994 (offset + bit_offset) % 256);
1995 if (!num)
1996 return 0;
1997 for (i = 0; i < num; i++)
1999 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2000 classes[i + pos] =
2001 merge_classes (subclasses[i], classes[i + pos]);
2005 /* And now merge the fields of structure. */
2006 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2008 if (TREE_CODE (field) == FIELD_DECL)
2010 int num;
2012 /* Bitfields are always classified as integer. Handle them
2013 early, since later code would consider them to be
2014 misaligned integers. */
2015 if (DECL_BIT_FIELD (field))
2017 for (i = int_bit_position (field) / 8 / 8;
2018 i < (int_bit_position (field)
2019 + tree_low_cst (DECL_SIZE (field), 0)
2020 + 63) / 8 / 8; i++)
2021 classes[i] =
2022 merge_classes (X86_64_INTEGER_CLASS,
2023 classes[i]);
2025 else
2027 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2028 TREE_TYPE (field), subclasses,
2029 (int_bit_position (field)
2030 + bit_offset) % 256);
2031 if (!num)
2032 return 0;
2033 for (i = 0; i < num; i++)
2035 int pos =
2036 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2037 classes[i + pos] =
2038 merge_classes (subclasses[i], classes[i + pos]);
2044 /* Arrays are handled as small records. */
2045 else if (TREE_CODE (type) == ARRAY_TYPE)
2047 int num;
2048 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2049 TREE_TYPE (type), subclasses, bit_offset);
2050 if (!num)
2051 return 0;
2053 /* The partial classes are now full classes. */
2054 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2055 subclasses[0] = X86_64_SSE_CLASS;
2056 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2057 subclasses[0] = X86_64_INTEGER_CLASS;
2059 for (i = 0; i < words; i++)
2060 classes[i] = subclasses[i % num];
2062 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2063 else if (TREE_CODE (type) == UNION_TYPE
2064 || TREE_CODE (type) == QUAL_UNION_TYPE)
2066 /* For classes first merge in the field of the subclasses. */
2067 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2069 tree bases = TYPE_BINFO_BASETYPES (type);
2070 int n_bases = TREE_VEC_LENGTH (bases);
2071 int i;
2073 for (i = 0; i < n_bases; ++i)
2075 tree binfo = TREE_VEC_ELT (bases, i);
2076 int num;
2077 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2078 tree type = BINFO_TYPE (binfo);
2080 num = classify_argument (TYPE_MODE (type),
2081 type, subclasses,
2082 (offset + (bit_offset % 64)) % 256);
2083 if (!num)
2084 return 0;
2085 for (i = 0; i < num; i++)
2087 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2088 classes[i + pos] =
2089 merge_classes (subclasses[i], classes[i + pos]);
2093 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2095 if (TREE_CODE (field) == FIELD_DECL)
2097 int num;
2098 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2099 TREE_TYPE (field), subclasses,
2100 bit_offset);
2101 if (!num)
2102 return 0;
2103 for (i = 0; i < num; i++)
2104 classes[i] = merge_classes (subclasses[i], classes[i]);
2108 else
2109 abort ();
2111 /* Final merger cleanup. */
2112 for (i = 0; i < words; i++)
2114 /* If one class is MEMORY, everything should be passed in
2115 memory. */
2116 if (classes[i] == X86_64_MEMORY_CLASS)
2117 return 0;
2119 /* The X86_64_SSEUP_CLASS should be always preceded by
2120 X86_64_SSE_CLASS. */
2121 if (classes[i] == X86_64_SSEUP_CLASS
2122 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2123 classes[i] = X86_64_SSE_CLASS;
2125 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2126 if (classes[i] == X86_64_X87UP_CLASS
2127 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2128 classes[i] = X86_64_SSE_CLASS;
2130 return words;
2133 /* Compute alignment needed. We align all types to natural boundaries with
2134 exception of XFmode that is aligned to 64bits. */
2135 if (mode != VOIDmode && mode != BLKmode)
2137 int mode_alignment = GET_MODE_BITSIZE (mode);
2139 if (mode == XFmode)
2140 mode_alignment = 128;
2141 else if (mode == XCmode)
2142 mode_alignment = 256;
2143 /* Misaligned fields are always returned in memory. */
2144 if (bit_offset % mode_alignment)
2145 return 0;
2148 /* Classification of atomic types. */
2149 switch (mode)
2151 case DImode:
2152 case SImode:
2153 case HImode:
2154 case QImode:
2155 case CSImode:
2156 case CHImode:
2157 case CQImode:
2158 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2159 classes[0] = X86_64_INTEGERSI_CLASS;
2160 else
2161 classes[0] = X86_64_INTEGER_CLASS;
2162 return 1;
2163 case CDImode:
2164 case TImode:
2165 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2166 return 2;
2167 case CTImode:
2168 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2169 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2170 return 4;
2171 case SFmode:
2172 if (!(bit_offset % 64))
2173 classes[0] = X86_64_SSESF_CLASS;
2174 else
2175 classes[0] = X86_64_SSE_CLASS;
2176 return 1;
2177 case DFmode:
2178 classes[0] = X86_64_SSEDF_CLASS;
2179 return 1;
2180 case TFmode:
2181 classes[0] = X86_64_X87_CLASS;
2182 classes[1] = X86_64_X87UP_CLASS;
2183 return 2;
2184 case TCmode:
2185 classes[0] = X86_64_X87_CLASS;
2186 classes[1] = X86_64_X87UP_CLASS;
2187 classes[2] = X86_64_X87_CLASS;
2188 classes[3] = X86_64_X87UP_CLASS;
2189 return 4;
2190 case DCmode:
2191 classes[0] = X86_64_SSEDF_CLASS;
2192 classes[1] = X86_64_SSEDF_CLASS;
2193 return 2;
2194 case SCmode:
2195 classes[0] = X86_64_SSE_CLASS;
2196 return 1;
2197 case V4SFmode:
2198 case V4SImode:
2199 case V16QImode:
2200 case V8HImode:
2201 case V2DFmode:
2202 case V2DImode:
2203 classes[0] = X86_64_SSE_CLASS;
2204 classes[1] = X86_64_SSEUP_CLASS;
2205 return 2;
2206 case V2SFmode:
2207 case V2SImode:
2208 case V4HImode:
2209 case V8QImode:
2210 return 0;
2211 case BLKmode:
2212 case VOIDmode:
2213 return 0;
2214 default:
2215 abort ();
2219 /* Examine the argument and return set number of register required in each
2220 class. Return 0 iff parameter should be passed in memory. */
2221 static int
2222 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2223 enum machine_mode mode;
2224 tree type;
2225 int *int_nregs, *sse_nregs;
2226 int in_return;
2228 enum x86_64_reg_class class[MAX_CLASSES];
2229 int n = classify_argument (mode, type, class, 0);
2231 *int_nregs = 0;
2232 *sse_nregs = 0;
2233 if (!n)
2234 return 0;
2235 for (n--; n >= 0; n--)
2236 switch (class[n])
2238 case X86_64_INTEGER_CLASS:
2239 case X86_64_INTEGERSI_CLASS:
2240 (*int_nregs)++;
2241 break;
2242 case X86_64_SSE_CLASS:
2243 case X86_64_SSESF_CLASS:
2244 case X86_64_SSEDF_CLASS:
2245 (*sse_nregs)++;
2246 break;
2247 case X86_64_NO_CLASS:
2248 case X86_64_SSEUP_CLASS:
2249 break;
2250 case X86_64_X87_CLASS:
2251 case X86_64_X87UP_CLASS:
2252 if (!in_return)
2253 return 0;
2254 break;
2255 case X86_64_MEMORY_CLASS:
2256 abort ();
2258 return 1;
2260 /* Construct container for the argument used by GCC interface. See
2261 FUNCTION_ARG for the detailed description. */
2262 static rtx
2263 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2264 enum machine_mode mode;
2265 tree type;
2266 int in_return;
2267 int nintregs, nsseregs;
2268 const int * intreg;
2269 int sse_regno;
2271 enum machine_mode tmpmode;
2272 int bytes =
2273 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2274 enum x86_64_reg_class class[MAX_CLASSES];
2275 int n;
2276 int i;
2277 int nexps = 0;
2278 int needed_sseregs, needed_intregs;
2279 rtx exp[MAX_CLASSES];
2280 rtx ret;
2282 n = classify_argument (mode, type, class, 0);
2283 if (TARGET_DEBUG_ARG)
2285 if (!n)
2286 fprintf (stderr, "Memory class\n");
2287 else
2289 fprintf (stderr, "Classes:");
2290 for (i = 0; i < n; i++)
2292 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2294 fprintf (stderr, "\n");
2297 if (!n)
2298 return NULL;
2299 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2300 return NULL;
2301 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2302 return NULL;
2304 /* First construct simple cases. Avoid SCmode, since we want to use
2305 single register to pass this type. */
2306 if (n == 1 && mode != SCmode)
2307 switch (class[0])
2309 case X86_64_INTEGER_CLASS:
2310 case X86_64_INTEGERSI_CLASS:
2311 return gen_rtx_REG (mode, intreg[0]);
2312 case X86_64_SSE_CLASS:
2313 case X86_64_SSESF_CLASS:
2314 case X86_64_SSEDF_CLASS:
2315 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2316 case X86_64_X87_CLASS:
2317 return gen_rtx_REG (mode, FIRST_STACK_REG);
2318 case X86_64_NO_CLASS:
2319 /* Zero sized array, struct or class. */
2320 return NULL;
2321 default:
2322 abort ();
2324 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2325 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2326 if (n == 2
2327 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2328 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2329 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2330 && class[1] == X86_64_INTEGER_CLASS
2331 && (mode == CDImode || mode == TImode)
2332 && intreg[0] + 1 == intreg[1])
2333 return gen_rtx_REG (mode, intreg[0]);
2334 if (n == 4
2335 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2336 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2337 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2339 /* Otherwise figure out the entries of the PARALLEL. */
2340 for (i = 0; i < n; i++)
2342 switch (class[i])
2344 case X86_64_NO_CLASS:
2345 break;
2346 case X86_64_INTEGER_CLASS:
2347 case X86_64_INTEGERSI_CLASS:
2348 /* Merge TImodes on aligned occasions here too. */
2349 if (i * 8 + 8 > bytes)
2350 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2351 else if (class[i] == X86_64_INTEGERSI_CLASS)
2352 tmpmode = SImode;
2353 else
2354 tmpmode = DImode;
2355 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2356 if (tmpmode == BLKmode)
2357 tmpmode = DImode;
2358 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2359 gen_rtx_REG (tmpmode, *intreg),
2360 GEN_INT (i*8));
2361 intreg++;
2362 break;
2363 case X86_64_SSESF_CLASS:
2364 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2365 gen_rtx_REG (SFmode,
2366 SSE_REGNO (sse_regno)),
2367 GEN_INT (i*8));
2368 sse_regno++;
2369 break;
2370 case X86_64_SSEDF_CLASS:
2371 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2372 gen_rtx_REG (DFmode,
2373 SSE_REGNO (sse_regno)),
2374 GEN_INT (i*8));
2375 sse_regno++;
2376 break;
2377 case X86_64_SSE_CLASS:
2378 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2379 tmpmode = TImode;
2380 else
2381 tmpmode = DImode;
2382 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2383 gen_rtx_REG (tmpmode,
2384 SSE_REGNO (sse_regno)),
2385 GEN_INT (i*8));
2386 if (tmpmode == TImode)
2387 i++;
2388 sse_regno++;
2389 break;
2390 default:
2391 abort ();
2394 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2395 for (i = 0; i < nexps; i++)
2396 XVECEXP (ret, 0, i) = exp [i];
2397 return ret;
2400 /* Update the data in CUM to advance over an argument
2401 of mode MODE and data type TYPE.
2402 (TYPE is null for libcalls where that information may not be available.) */
2404 void
2405 function_arg_advance (cum, mode, type, named)
2406 CUMULATIVE_ARGS *cum; /* current arg information */
2407 enum machine_mode mode; /* current arg mode */
2408 tree type; /* type of the argument or 0 if lib support */
2409 int named; /* whether or not the argument was named */
2411 int bytes =
2412 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2413 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2415 if (TARGET_DEBUG_ARG)
2416 fprintf (stderr,
2417 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2418 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2419 if (TARGET_64BIT)
2421 int int_nregs, sse_nregs;
2422 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2423 cum->words += words;
2424 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2426 cum->nregs -= int_nregs;
2427 cum->sse_nregs -= sse_nregs;
2428 cum->regno += int_nregs;
2429 cum->sse_regno += sse_nregs;
2431 else
2432 cum->words += words;
2434 else
2436 if (TARGET_SSE && mode == TImode)
2438 cum->sse_words += words;
2439 cum->sse_nregs -= 1;
2440 cum->sse_regno += 1;
2441 if (cum->sse_nregs <= 0)
2443 cum->sse_nregs = 0;
2444 cum->sse_regno = 0;
2447 else
2449 cum->words += words;
2450 cum->nregs -= words;
2451 cum->regno += words;
2453 if (cum->nregs <= 0)
2455 cum->nregs = 0;
2456 cum->regno = 0;
2460 return;
2463 /* Define where to put the arguments to a function.
2464 Value is zero to push the argument on the stack,
2465 or a hard register in which to store the argument.
2467 MODE is the argument's machine mode.
2468 TYPE is the data type of the argument (as a tree).
2469 This is null for libcalls where that information may
2470 not be available.
2471 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2472 the preceding args and about the function being called.
2473 NAMED is nonzero if this argument is a named parameter
2474 (otherwise it is an extra parameter matching an ellipsis). */
2477 function_arg (cum, mode, type, named)
2478 CUMULATIVE_ARGS *cum; /* current arg information */
2479 enum machine_mode mode; /* current arg mode */
2480 tree type; /* type of the argument or 0 if lib support */
2481 int named; /* != 0 for normal args, == 0 for ... args */
2483 rtx ret = NULL_RTX;
2484 int bytes =
2485 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2486 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2488 /* Handle a hidden AL argument containing number of registers for varargs
2489 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2490 any AL settings. */
2491 if (mode == VOIDmode)
2493 if (TARGET_64BIT)
2494 return GEN_INT (cum->maybe_vaarg
2495 ? (cum->sse_nregs < 0
2496 ? SSE_REGPARM_MAX
2497 : cum->sse_regno)
2498 : -1);
2499 else
2500 return constm1_rtx;
2502 if (TARGET_64BIT)
2503 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2504 &x86_64_int_parameter_registers [cum->regno],
2505 cum->sse_regno);
2506 else
2507 switch (mode)
2509 /* For now, pass fp/complex values on the stack. */
2510 default:
2511 break;
2513 case BLKmode:
2514 if (bytes < 0)
2515 break;
2516 /* FALLTHRU */
2517 case DImode:
2518 case SImode:
2519 case HImode:
2520 case QImode:
2521 if (words <= cum->nregs)
2523 int regno = cum->regno;
2525 /* Fastcall allocates the first two DWORD (SImode) or
2526 smaller arguments to ECX and EDX. */
2527 if (cum->fastcall)
2529 if (mode == BLKmode || mode == DImode)
2530 break;
2532 /* ECX not EAX is the first allocated register. */
2533 if (regno == 0)
2534 regno = 2;
2536 ret = gen_rtx_REG (mode, regno);
2538 break;
2539 case TImode:
2540 if (cum->sse_nregs)
2541 ret = gen_rtx_REG (mode, cum->sse_regno);
2542 break;
2545 if (TARGET_DEBUG_ARG)
2547 fprintf (stderr,
2548 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2549 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2551 if (ret)
2552 print_simple_rtl (stderr, ret);
2553 else
2554 fprintf (stderr, ", stack");
2556 fprintf (stderr, " )\n");
2559 return ret;
2562 /* A C expression that indicates when an argument must be passed by
2563 reference. If nonzero for an argument, a copy of that argument is
2564 made in memory and a pointer to the argument is passed instead of
2565 the argument itself. The pointer is passed in whatever way is
2566 appropriate for passing a pointer to that type. */
2569 function_arg_pass_by_reference (cum, mode, type, named)
2570 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2571 enum machine_mode mode ATTRIBUTE_UNUSED;
2572 tree type;
2573 int named ATTRIBUTE_UNUSED;
2575 if (!TARGET_64BIT)
2576 return 0;
2578 if (type && int_size_in_bytes (type) == -1)
2580 if (TARGET_DEBUG_ARG)
2581 fprintf (stderr, "function_arg_pass_by_reference\n");
2582 return 1;
2585 return 0;
2588 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2589 ABI */
2590 static bool
2591 contains_128bit_aligned_vector_p (type)
2592 tree type;
2594 enum machine_mode mode = TYPE_MODE (type);
2595 if (SSE_REG_MODE_P (mode)
2596 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2597 return true;
2598 if (TYPE_ALIGN (type) < 128)
2599 return false;
2601 if (AGGREGATE_TYPE_P (type))
2603 /* Walk the agregates recursivly. */
2604 if (TREE_CODE (type) == RECORD_TYPE
2605 || TREE_CODE (type) == UNION_TYPE
2606 || TREE_CODE (type) == QUAL_UNION_TYPE)
2608 tree field;
2610 if (TYPE_BINFO (type) != NULL
2611 && TYPE_BINFO_BASETYPES (type) != NULL)
2613 tree bases = TYPE_BINFO_BASETYPES (type);
2614 int n_bases = TREE_VEC_LENGTH (bases);
2615 int i;
2617 for (i = 0; i < n_bases; ++i)
2619 tree binfo = TREE_VEC_ELT (bases, i);
2620 tree type = BINFO_TYPE (binfo);
2622 if (contains_128bit_aligned_vector_p (type))
2623 return true;
2626 /* And now merge the fields of structure. */
2627 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2629 if (TREE_CODE (field) == FIELD_DECL
2630 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2631 return true;
2634 /* Just for use if some languages passes arrays by value. */
2635 else if (TREE_CODE (type) == ARRAY_TYPE)
2637 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2638 return true;
2640 else
2641 abort ();
2643 return false;
2646 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2647 and type. */
2650 ix86_function_arg_boundary (mode, type)
2651 enum machine_mode mode;
2652 tree type;
2654 int align;
2655 if (type)
2656 align = TYPE_ALIGN (type);
2657 else
2658 align = GET_MODE_ALIGNMENT (mode);
2659 if (align < PARM_BOUNDARY)
2660 align = PARM_BOUNDARY;
2661 if (!TARGET_64BIT)
2663 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2664 make an exception for SSE modes since these require 128bit
2665 alignment.
2667 The handling here differs from field_alignment. ICC aligns MMX
2668 arguments to 4 byte boundaries, while structure fields are aligned
2669 to 8 byte boundaries. */
2670 if (!type)
2672 if (!SSE_REG_MODE_P (mode))
2673 align = PARM_BOUNDARY;
2675 else
2677 if (!contains_128bit_aligned_vector_p (type))
2678 align = PARM_BOUNDARY;
2680 if (align != PARM_BOUNDARY && !TARGET_SSE)
2681 abort();
2683 if (align > 128)
2684 align = 128;
2685 return align;
2688 /* Return true if N is a possible register number of function value. */
2689 bool
2690 ix86_function_value_regno_p (regno)
2691 int regno;
2693 if (!TARGET_64BIT)
2695 return ((regno) == 0
2696 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2697 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2699 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2700 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2701 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2704 /* Define how to find the value returned by a function.
2705 VALTYPE is the data type of the value (as a tree).
2706 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2707 otherwise, FUNC is 0. */
2709 ix86_function_value (valtype)
2710 tree valtype;
2712 if (TARGET_64BIT)
2714 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2715 REGPARM_MAX, SSE_REGPARM_MAX,
2716 x86_64_int_return_registers, 0);
2717 /* For zero sized structures, construct_container return NULL, but we need
2718 to keep rest of compiler happy by returning meaningful value. */
2719 if (!ret)
2720 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2721 return ret;
2723 else
2724 return gen_rtx_REG (TYPE_MODE (valtype),
2725 ix86_value_regno (TYPE_MODE (valtype)));
2728 /* Return false iff type is returned in memory. */
2730 ix86_return_in_memory (type)
2731 tree type;
2733 int needed_intregs, needed_sseregs;
2734 if (TARGET_64BIT)
2736 return !examine_argument (TYPE_MODE (type), type, 1,
2737 &needed_intregs, &needed_sseregs);
2739 else
2741 if (TYPE_MODE (type) == BLKmode)
2742 return 1;
2743 else if (MS_AGGREGATE_RETURN
2744 && AGGREGATE_TYPE_P (type)
2745 && int_size_in_bytes(type) <= 8)
2746 return 0;
2747 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2748 && int_size_in_bytes (type) == 8)
2749 || (int_size_in_bytes (type) > 12
2750 && TYPE_MODE (type) != TImode
2751 && TYPE_MODE (type) != TFmode
2752 && !VECTOR_MODE_P (TYPE_MODE (type))))
2753 return 1;
2754 return 0;
2758 /* Define how to find the value returned by a library function
2759 assuming the value has mode MODE. */
2761 ix86_libcall_value (mode)
2762 enum machine_mode mode;
2764 if (TARGET_64BIT)
2766 switch (mode)
2768 case SFmode:
2769 case SCmode:
2770 case DFmode:
2771 case DCmode:
2772 return gen_rtx_REG (mode, FIRST_SSE_REG);
2773 case TFmode:
2774 case TCmode:
2775 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2776 default:
2777 return gen_rtx_REG (mode, 0);
2780 else
2781 return gen_rtx_REG (mode, ix86_value_regno (mode));
2784 /* Given a mode, return the register to use for a return value. */
2786 static int
2787 ix86_value_regno (mode)
2788 enum machine_mode mode;
2790 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2791 return FIRST_FLOAT_REG;
2792 if (mode == TImode || VECTOR_MODE_P (mode))
2793 return FIRST_SSE_REG;
2794 return 0;
2797 /* Create the va_list data type. */
2799 tree
2800 ix86_build_va_list ()
2802 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2804 /* For i386 we use plain pointer to argument area. */
2805 if (!TARGET_64BIT)
2806 return build_pointer_type (char_type_node);
2808 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2809 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2811 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2812 unsigned_type_node);
2813 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2814 unsigned_type_node);
2815 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2816 ptr_type_node);
2817 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2818 ptr_type_node);
2820 DECL_FIELD_CONTEXT (f_gpr) = record;
2821 DECL_FIELD_CONTEXT (f_fpr) = record;
2822 DECL_FIELD_CONTEXT (f_ovf) = record;
2823 DECL_FIELD_CONTEXT (f_sav) = record;
2825 TREE_CHAIN (record) = type_decl;
2826 TYPE_NAME (record) = type_decl;
2827 TYPE_FIELDS (record) = f_gpr;
2828 TREE_CHAIN (f_gpr) = f_fpr;
2829 TREE_CHAIN (f_fpr) = f_ovf;
2830 TREE_CHAIN (f_ovf) = f_sav;
2832 layout_type (record);
2834 /* The correct type is an array type of one element. */
2835 return build_array_type (record, build_index_type (size_zero_node));
2838 /* Perform any needed actions needed for a function that is receiving a
2839 variable number of arguments.
2841 CUM is as above.
2843 MODE and TYPE are the mode and type of the current parameter.
2845 PRETEND_SIZE is a variable that should be set to the amount of stack
2846 that must be pushed by the prolog to pretend that our caller pushed
2849 Normally, this macro will push all remaining incoming registers on the
2850 stack and set PRETEND_SIZE to the length of the registers pushed. */
2852 void
2853 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2854 CUMULATIVE_ARGS *cum;
2855 enum machine_mode mode;
2856 tree type;
2857 int *pretend_size ATTRIBUTE_UNUSED;
2858 int no_rtl;
2861 CUMULATIVE_ARGS next_cum;
2862 rtx save_area = NULL_RTX, mem;
2863 rtx label;
2864 rtx label_ref;
2865 rtx tmp_reg;
2866 rtx nsse_reg;
2867 int set;
2868 tree fntype;
2869 int stdarg_p;
2870 int i;
2872 if (!TARGET_64BIT)
2873 return;
2875 /* Indicate to allocate space on the stack for varargs save area. */
2876 ix86_save_varrargs_registers = 1;
2878 cfun->stack_alignment_needed = 128;
2880 fntype = TREE_TYPE (current_function_decl);
2881 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2882 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2883 != void_type_node));
2885 /* For varargs, we do not want to skip the dummy va_dcl argument.
2886 For stdargs, we do want to skip the last named argument. */
2887 next_cum = *cum;
2888 if (stdarg_p)
2889 function_arg_advance (&next_cum, mode, type, 1);
2891 if (!no_rtl)
2892 save_area = frame_pointer_rtx;
2894 set = get_varargs_alias_set ();
2896 for (i = next_cum.regno; i < ix86_regparm; i++)
2898 mem = gen_rtx_MEM (Pmode,
2899 plus_constant (save_area, i * UNITS_PER_WORD));
2900 set_mem_alias_set (mem, set);
2901 emit_move_insn (mem, gen_rtx_REG (Pmode,
2902 x86_64_int_parameter_registers[i]));
2905 if (next_cum.sse_nregs)
2907 /* Now emit code to save SSE registers. The AX parameter contains number
2908 of SSE parameter registers used to call this function. We use
2909 sse_prologue_save insn template that produces computed jump across
2910 SSE saves. We need some preparation work to get this working. */
2912 label = gen_label_rtx ();
2913 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2915 /* Compute address to jump to :
2916 label - 5*eax + nnamed_sse_arguments*5 */
2917 tmp_reg = gen_reg_rtx (Pmode);
2918 nsse_reg = gen_reg_rtx (Pmode);
2919 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2920 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2921 gen_rtx_MULT (Pmode, nsse_reg,
2922 GEN_INT (4))));
2923 if (next_cum.sse_regno)
2924 emit_move_insn
2925 (nsse_reg,
2926 gen_rtx_CONST (DImode,
2927 gen_rtx_PLUS (DImode,
2928 label_ref,
2929 GEN_INT (next_cum.sse_regno * 4))));
2930 else
2931 emit_move_insn (nsse_reg, label_ref);
2932 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2934 /* Compute address of memory block we save into. We always use pointer
2935 pointing 127 bytes after first byte to store - this is needed to keep
2936 instruction size limited by 4 bytes. */
2937 tmp_reg = gen_reg_rtx (Pmode);
2938 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2939 plus_constant (save_area,
2940 8 * REGPARM_MAX + 127)));
2941 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2942 set_mem_alias_set (mem, set);
2943 set_mem_align (mem, BITS_PER_WORD);
2945 /* And finally do the dirty job! */
2946 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2947 GEN_INT (next_cum.sse_regno), label));
2952 /* Implement va_start. */
2954 void
2955 ix86_va_start (valist, nextarg)
2956 tree valist;
2957 rtx nextarg;
2959 HOST_WIDE_INT words, n_gpr, n_fpr;
2960 tree f_gpr, f_fpr, f_ovf, f_sav;
2961 tree gpr, fpr, ovf, sav, t;
2963 /* Only 64bit target needs something special. */
2964 if (!TARGET_64BIT)
2966 std_expand_builtin_va_start (valist, nextarg);
2967 return;
2970 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2971 f_fpr = TREE_CHAIN (f_gpr);
2972 f_ovf = TREE_CHAIN (f_fpr);
2973 f_sav = TREE_CHAIN (f_ovf);
2975 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2976 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2977 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2978 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2979 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2981 /* Count number of gp and fp argument registers used. */
2982 words = current_function_args_info.words;
2983 n_gpr = current_function_args_info.regno;
2984 n_fpr = current_function_args_info.sse_regno;
2986 if (TARGET_DEBUG_ARG)
2987 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2988 (int) words, (int) n_gpr, (int) n_fpr);
2990 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2991 build_int_2 (n_gpr * 8, 0));
2992 TREE_SIDE_EFFECTS (t) = 1;
2993 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2995 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2996 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2997 TREE_SIDE_EFFECTS (t) = 1;
2998 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3000 /* Find the overflow area. */
3001 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3002 if (words != 0)
3003 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3004 build_int_2 (words * UNITS_PER_WORD, 0));
3005 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3006 TREE_SIDE_EFFECTS (t) = 1;
3007 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3009 /* Find the register save area.
3010 Prologue of the function save it right above stack frame. */
3011 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3012 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3013 TREE_SIDE_EFFECTS (t) = 1;
3014 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3017 /* Implement va_arg. */
3019 ix86_va_arg (valist, type)
3020 tree valist, type;
3022 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3023 tree f_gpr, f_fpr, f_ovf, f_sav;
3024 tree gpr, fpr, ovf, sav, t;
3025 int size, rsize;
3026 rtx lab_false, lab_over = NULL_RTX;
3027 rtx addr_rtx, r;
3028 rtx container;
3029 int indirect_p = 0;
3031 /* Only 64bit target needs something special. */
3032 if (!TARGET_64BIT)
3034 return std_expand_builtin_va_arg (valist, type);
3037 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3038 f_fpr = TREE_CHAIN (f_gpr);
3039 f_ovf = TREE_CHAIN (f_fpr);
3040 f_sav = TREE_CHAIN (f_ovf);
3042 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3043 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3044 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3045 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3046 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3048 size = int_size_in_bytes (type);
3049 if (size == -1)
3051 /* Passed by reference. */
3052 indirect_p = 1;
3053 type = build_pointer_type (type);
3054 size = int_size_in_bytes (type);
3056 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3058 container = construct_container (TYPE_MODE (type), type, 0,
3059 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3061 * Pull the value out of the saved registers ...
3064 addr_rtx = gen_reg_rtx (Pmode);
3066 if (container)
3068 rtx int_addr_rtx, sse_addr_rtx;
3069 int needed_intregs, needed_sseregs;
3070 int need_temp;
3072 lab_over = gen_label_rtx ();
3073 lab_false = gen_label_rtx ();
3075 examine_argument (TYPE_MODE (type), type, 0,
3076 &needed_intregs, &needed_sseregs);
3079 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3080 || TYPE_ALIGN (type) > 128);
3082 /* In case we are passing structure, verify that it is consecutive block
3083 on the register save area. If not we need to do moves. */
3084 if (!need_temp && !REG_P (container))
3086 /* Verify that all registers are strictly consecutive */
3087 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3089 int i;
3091 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3093 rtx slot = XVECEXP (container, 0, i);
3094 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3095 || INTVAL (XEXP (slot, 1)) != i * 16)
3096 need_temp = 1;
3099 else
3101 int i;
3103 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3105 rtx slot = XVECEXP (container, 0, i);
3106 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3107 || INTVAL (XEXP (slot, 1)) != i * 8)
3108 need_temp = 1;
3112 if (!need_temp)
3114 int_addr_rtx = addr_rtx;
3115 sse_addr_rtx = addr_rtx;
3117 else
3119 int_addr_rtx = gen_reg_rtx (Pmode);
3120 sse_addr_rtx = gen_reg_rtx (Pmode);
3122 /* First ensure that we fit completely in registers. */
3123 if (needed_intregs)
3125 emit_cmp_and_jump_insns (expand_expr
3126 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3127 GEN_INT ((REGPARM_MAX - needed_intregs +
3128 1) * 8), GE, const1_rtx, SImode,
3129 1, lab_false);
3131 if (needed_sseregs)
3133 emit_cmp_and_jump_insns (expand_expr
3134 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3135 GEN_INT ((SSE_REGPARM_MAX -
3136 needed_sseregs + 1) * 16 +
3137 REGPARM_MAX * 8), GE, const1_rtx,
3138 SImode, 1, lab_false);
3141 /* Compute index to start of area used for integer regs. */
3142 if (needed_intregs)
3144 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3145 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3146 if (r != int_addr_rtx)
3147 emit_move_insn (int_addr_rtx, r);
3149 if (needed_sseregs)
3151 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3152 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3153 if (r != sse_addr_rtx)
3154 emit_move_insn (sse_addr_rtx, r);
3156 if (need_temp)
3158 int i;
3159 rtx mem;
3161 /* Never use the memory itself, as it has the alias set. */
3162 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3163 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3164 set_mem_alias_set (mem, get_varargs_alias_set ());
3165 set_mem_align (mem, BITS_PER_UNIT);
3167 for (i = 0; i < XVECLEN (container, 0); i++)
3169 rtx slot = XVECEXP (container, 0, i);
3170 rtx reg = XEXP (slot, 0);
3171 enum machine_mode mode = GET_MODE (reg);
3172 rtx src_addr;
3173 rtx src_mem;
3174 int src_offset;
3175 rtx dest_mem;
3177 if (SSE_REGNO_P (REGNO (reg)))
3179 src_addr = sse_addr_rtx;
3180 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3182 else
3184 src_addr = int_addr_rtx;
3185 src_offset = REGNO (reg) * 8;
3187 src_mem = gen_rtx_MEM (mode, src_addr);
3188 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3189 src_mem = adjust_address (src_mem, mode, src_offset);
3190 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3191 emit_move_insn (dest_mem, src_mem);
3195 if (needed_intregs)
3198 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3199 build_int_2 (needed_intregs * 8, 0));
3200 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3201 TREE_SIDE_EFFECTS (t) = 1;
3202 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3204 if (needed_sseregs)
3207 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3208 build_int_2 (needed_sseregs * 16, 0));
3209 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3210 TREE_SIDE_EFFECTS (t) = 1;
3211 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3214 emit_jump_insn (gen_jump (lab_over));
3215 emit_barrier ();
3216 emit_label (lab_false);
3219 /* ... otherwise out of the overflow area. */
3221 /* Care for on-stack alignment if needed. */
3222 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3223 t = ovf;
3224 else
3226 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3227 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3228 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3230 t = save_expr (t);
3232 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3233 if (r != addr_rtx)
3234 emit_move_insn (addr_rtx, r);
3237 build (PLUS_EXPR, TREE_TYPE (t), t,
3238 build_int_2 (rsize * UNITS_PER_WORD, 0));
3239 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3240 TREE_SIDE_EFFECTS (t) = 1;
3241 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3243 if (container)
3244 emit_label (lab_over);
3246 if (indirect_p)
3248 r = gen_rtx_MEM (Pmode, addr_rtx);
3249 set_mem_alias_set (r, get_varargs_alias_set ());
3250 emit_move_insn (addr_rtx, r);
3253 return addr_rtx;
3256 /* Return nonzero if OP is either a i387 or SSE fp register. */
3258 any_fp_register_operand (op, mode)
3259 rtx op;
3260 enum machine_mode mode ATTRIBUTE_UNUSED;
3262 return ANY_FP_REG_P (op);
3265 /* Return nonzero if OP is an i387 fp register. */
3267 fp_register_operand (op, mode)
3268 rtx op;
3269 enum machine_mode mode ATTRIBUTE_UNUSED;
3271 return FP_REG_P (op);
3274 /* Return nonzero if OP is a non-fp register_operand. */
3276 register_and_not_any_fp_reg_operand (op, mode)
3277 rtx op;
3278 enum machine_mode mode;
3280 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3283 /* Return nonzero if OP is a register operand other than an
3284 i387 fp register. */
3286 register_and_not_fp_reg_operand (op, mode)
3287 rtx op;
3288 enum machine_mode mode;
3290 return register_operand (op, mode) && !FP_REG_P (op);
3293 /* Return nonzero if OP is general operand representable on x86_64. */
3296 x86_64_general_operand (op, mode)
3297 rtx op;
3298 enum machine_mode mode;
3300 if (!TARGET_64BIT)
3301 return general_operand (op, mode);
3302 if (nonimmediate_operand (op, mode))
3303 return 1;
3304 return x86_64_sign_extended_value (op);
3307 /* Return nonzero if OP is general operand representable on x86_64
3308 as either sign extended or zero extended constant. */
3311 x86_64_szext_general_operand (op, mode)
3312 rtx op;
3313 enum machine_mode mode;
3315 if (!TARGET_64BIT)
3316 return general_operand (op, mode);
3317 if (nonimmediate_operand (op, mode))
3318 return 1;
3319 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3322 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3325 x86_64_nonmemory_operand (op, mode)
3326 rtx op;
3327 enum machine_mode mode;
3329 if (!TARGET_64BIT)
3330 return nonmemory_operand (op, mode);
3331 if (register_operand (op, mode))
3332 return 1;
3333 return x86_64_sign_extended_value (op);
3336 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3339 x86_64_movabs_operand (op, mode)
3340 rtx op;
3341 enum machine_mode mode;
3343 if (!TARGET_64BIT || !flag_pic)
3344 return nonmemory_operand (op, mode);
3345 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3346 return 1;
3347 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3348 return 1;
3349 return 0;
3352 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3355 x86_64_szext_nonmemory_operand (op, mode)
3356 rtx op;
3357 enum machine_mode mode;
3359 if (!TARGET_64BIT)
3360 return nonmemory_operand (op, mode);
3361 if (register_operand (op, mode))
3362 return 1;
3363 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3366 /* Return nonzero if OP is immediate operand representable on x86_64. */
3369 x86_64_immediate_operand (op, mode)
3370 rtx op;
3371 enum machine_mode mode;
3373 if (!TARGET_64BIT)
3374 return immediate_operand (op, mode);
3375 return x86_64_sign_extended_value (op);
3378 /* Return nonzero if OP is immediate operand representable on x86_64. */
3381 x86_64_zext_immediate_operand (op, mode)
3382 rtx op;
3383 enum machine_mode mode ATTRIBUTE_UNUSED;
3385 return x86_64_zero_extended_value (op);
3388 /* Return nonzero if OP is (const_int 1), else return zero. */
3391 const_int_1_operand (op, mode)
3392 rtx op;
3393 enum machine_mode mode ATTRIBUTE_UNUSED;
3395 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3398 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3399 for shift & compare patterns, as shifting by 0 does not change flags),
3400 else return zero. */
3403 const_int_1_31_operand (op, mode)
3404 rtx op;
3405 enum machine_mode mode ATTRIBUTE_UNUSED;
3407 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3410 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3411 reference and a constant. */
3414 symbolic_operand (op, mode)
3415 register rtx op;
3416 enum machine_mode mode ATTRIBUTE_UNUSED;
3418 switch (GET_CODE (op))
3420 case SYMBOL_REF:
3421 case LABEL_REF:
3422 return 1;
3424 case CONST:
3425 op = XEXP (op, 0);
3426 if (GET_CODE (op) == SYMBOL_REF
3427 || GET_CODE (op) == LABEL_REF
3428 || (GET_CODE (op) == UNSPEC
3429 && (XINT (op, 1) == UNSPEC_GOT
3430 || XINT (op, 1) == UNSPEC_GOTOFF
3431 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3432 return 1;
3433 if (GET_CODE (op) != PLUS
3434 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3435 return 0;
3437 op = XEXP (op, 0);
3438 if (GET_CODE (op) == SYMBOL_REF
3439 || GET_CODE (op) == LABEL_REF)
3440 return 1;
3441 /* Only @GOTOFF gets offsets. */
3442 if (GET_CODE (op) != UNSPEC
3443 || XINT (op, 1) != UNSPEC_GOTOFF)
3444 return 0;
3446 op = XVECEXP (op, 0, 0);
3447 if (GET_CODE (op) == SYMBOL_REF
3448 || GET_CODE (op) == LABEL_REF)
3449 return 1;
3450 return 0;
3452 default:
3453 return 0;
3457 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3460 pic_symbolic_operand (op, mode)
3461 register rtx op;
3462 enum machine_mode mode ATTRIBUTE_UNUSED;
3464 if (GET_CODE (op) != CONST)
3465 return 0;
3466 op = XEXP (op, 0);
3467 if (TARGET_64BIT)
3469 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3470 return 1;
3472 else
3474 if (GET_CODE (op) == UNSPEC)
3475 return 1;
3476 if (GET_CODE (op) != PLUS
3477 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3478 return 0;
3479 op = XEXP (op, 0);
3480 if (GET_CODE (op) == UNSPEC)
3481 return 1;
3483 return 0;
3486 /* Return true if OP is a symbolic operand that resolves locally. */
3488 static int
3489 local_symbolic_operand (op, mode)
3490 rtx op;
3491 enum machine_mode mode ATTRIBUTE_UNUSED;
3493 if (GET_CODE (op) == CONST
3494 && GET_CODE (XEXP (op, 0)) == PLUS
3495 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3496 op = XEXP (XEXP (op, 0), 0);
3498 if (GET_CODE (op) == LABEL_REF)
3499 return 1;
3501 if (GET_CODE (op) != SYMBOL_REF)
3502 return 0;
3504 if (SYMBOL_REF_LOCAL_P (op))
3505 return 1;
3507 /* There is, however, a not insubstantial body of code in the rest of
3508 the compiler that assumes it can just stick the results of
3509 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3510 /* ??? This is a hack. Should update the body of the compiler to
3511 always create a DECL an invoke targetm.encode_section_info. */
3512 if (strncmp (XSTR (op, 0), internal_label_prefix,
3513 internal_label_prefix_len) == 0)
3514 return 1;
3516 return 0;
3519 /* Test for various thread-local symbols. */
3522 tls_symbolic_operand (op, mode)
3523 register rtx op;
3524 enum machine_mode mode ATTRIBUTE_UNUSED;
3526 if (GET_CODE (op) != SYMBOL_REF)
3527 return 0;
3528 return SYMBOL_REF_TLS_MODEL (op);
3531 static inline int
3532 tls_symbolic_operand_1 (op, kind)
3533 rtx op;
3534 enum tls_model kind;
3536 if (GET_CODE (op) != SYMBOL_REF)
3537 return 0;
3538 return SYMBOL_REF_TLS_MODEL (op) == kind;
3542 global_dynamic_symbolic_operand (op, mode)
3543 register rtx op;
3544 enum machine_mode mode ATTRIBUTE_UNUSED;
3546 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3550 local_dynamic_symbolic_operand (op, mode)
3551 register rtx op;
3552 enum machine_mode mode ATTRIBUTE_UNUSED;
3554 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3558 initial_exec_symbolic_operand (op, mode)
3559 register rtx op;
3560 enum machine_mode mode ATTRIBUTE_UNUSED;
3562 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3566 local_exec_symbolic_operand (op, mode)
3567 register rtx op;
3568 enum machine_mode mode ATTRIBUTE_UNUSED;
3570 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3573 /* Test for a valid operand for a call instruction. Don't allow the
3574 arg pointer register or virtual regs since they may decay into
3575 reg + const, which the patterns can't handle. */
3578 call_insn_operand (op, mode)
3579 rtx op;
3580 enum machine_mode mode ATTRIBUTE_UNUSED;
3582 /* Disallow indirect through a virtual register. This leads to
3583 compiler aborts when trying to eliminate them. */
3584 if (GET_CODE (op) == REG
3585 && (op == arg_pointer_rtx
3586 || op == frame_pointer_rtx
3587 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3588 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3589 return 0;
3591 /* Disallow `call 1234'. Due to varying assembler lameness this
3592 gets either rejected or translated to `call .+1234'. */
3593 if (GET_CODE (op) == CONST_INT)
3594 return 0;
3596 /* Explicitly allow SYMBOL_REF even if pic. */
3597 if (GET_CODE (op) == SYMBOL_REF)
3598 return 1;
3600 /* Otherwise we can allow any general_operand in the address. */
3601 return general_operand (op, Pmode);
3604 /* Test for a valid operand for a call instruction. Don't allow the
3605 arg pointer register or virtual regs since they may decay into
3606 reg + const, which the patterns can't handle. */
3609 sibcall_insn_operand (op, mode)
3610 rtx op;
3611 enum machine_mode mode ATTRIBUTE_UNUSED;
3613 /* Disallow indirect through a virtual register. This leads to
3614 compiler aborts when trying to eliminate them. */
3615 if (GET_CODE (op) == REG
3616 && (op == arg_pointer_rtx
3617 || op == frame_pointer_rtx
3618 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3619 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3620 return 0;
3622 /* Explicitly allow SYMBOL_REF even if pic. */
3623 if (GET_CODE (op) == SYMBOL_REF)
3624 return 1;
3626 /* Otherwise we can only allow register operands. */
3627 return register_operand (op, Pmode);
3631 constant_call_address_operand (op, mode)
3632 rtx op;
3633 enum machine_mode mode ATTRIBUTE_UNUSED;
3635 if (GET_CODE (op) == CONST
3636 && GET_CODE (XEXP (op, 0)) == PLUS
3637 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3638 op = XEXP (XEXP (op, 0), 0);
3639 return GET_CODE (op) == SYMBOL_REF;
3642 /* Match exactly zero and one. */
3645 const0_operand (op, mode)
3646 register rtx op;
3647 enum machine_mode mode;
3649 return op == CONST0_RTX (mode);
3653 const1_operand (op, mode)
3654 register rtx op;
3655 enum machine_mode mode ATTRIBUTE_UNUSED;
3657 return op == const1_rtx;
3660 /* Match 2, 4, or 8. Used for leal multiplicands. */
3663 const248_operand (op, mode)
3664 register rtx op;
3665 enum machine_mode mode ATTRIBUTE_UNUSED;
3667 return (GET_CODE (op) == CONST_INT
3668 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3671 /* True if this is a constant appropriate for an increment or decrement. */
3674 incdec_operand (op, mode)
3675 register rtx op;
3676 enum machine_mode mode ATTRIBUTE_UNUSED;
3678 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3679 registers, since carry flag is not set. */
3680 if (TARGET_PENTIUM4 && !optimize_size)
3681 return 0;
3682 return op == const1_rtx || op == constm1_rtx;
3685 /* Return nonzero if OP is acceptable as operand of DImode shift
3686 expander. */
3689 shiftdi_operand (op, mode)
3690 rtx op;
3691 enum machine_mode mode ATTRIBUTE_UNUSED;
3693 if (TARGET_64BIT)
3694 return nonimmediate_operand (op, mode);
3695 else
3696 return register_operand (op, mode);
3699 /* Return false if this is the stack pointer, or any other fake
3700 register eliminable to the stack pointer. Otherwise, this is
3701 a register operand.
3703 This is used to prevent esp from being used as an index reg.
3704 Which would only happen in pathological cases. */
3707 reg_no_sp_operand (op, mode)
3708 register rtx op;
3709 enum machine_mode mode;
3711 rtx t = op;
3712 if (GET_CODE (t) == SUBREG)
3713 t = SUBREG_REG (t);
3714 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3715 return 0;
3717 return register_operand (op, mode);
3721 mmx_reg_operand (op, mode)
3722 register rtx op;
3723 enum machine_mode mode ATTRIBUTE_UNUSED;
3725 return MMX_REG_P (op);
3728 /* Return false if this is any eliminable register. Otherwise
3729 general_operand. */
3732 general_no_elim_operand (op, mode)
3733 register rtx op;
3734 enum machine_mode mode;
3736 rtx t = op;
3737 if (GET_CODE (t) == SUBREG)
3738 t = SUBREG_REG (t);
3739 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3740 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3741 || t == virtual_stack_dynamic_rtx)
3742 return 0;
3743 if (REG_P (t)
3744 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3745 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3746 return 0;
3748 return general_operand (op, mode);
3751 /* Return false if this is any eliminable register. Otherwise
3752 register_operand or const_int. */
3755 nonmemory_no_elim_operand (op, mode)
3756 register rtx op;
3757 enum machine_mode mode;
3759 rtx t = op;
3760 if (GET_CODE (t) == SUBREG)
3761 t = SUBREG_REG (t);
3762 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3763 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3764 || t == virtual_stack_dynamic_rtx)
3765 return 0;
3767 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3770 /* Return false if this is any eliminable register or stack register,
3771 otherwise work like register_operand. */
3774 index_register_operand (op, mode)
3775 register rtx op;
3776 enum machine_mode mode;
3778 rtx t = op;
3779 if (GET_CODE (t) == SUBREG)
3780 t = SUBREG_REG (t);
3781 if (!REG_P (t))
3782 return 0;
3783 if (t == arg_pointer_rtx
3784 || t == frame_pointer_rtx
3785 || t == virtual_incoming_args_rtx
3786 || t == virtual_stack_vars_rtx
3787 || t == virtual_stack_dynamic_rtx
3788 || REGNO (t) == STACK_POINTER_REGNUM)
3789 return 0;
3791 return general_operand (op, mode);
3794 /* Return true if op is a Q_REGS class register. */
3797 q_regs_operand (op, mode)
3798 register rtx op;
3799 enum machine_mode mode;
3801 if (mode != VOIDmode && GET_MODE (op) != mode)
3802 return 0;
3803 if (GET_CODE (op) == SUBREG)
3804 op = SUBREG_REG (op);
3805 return ANY_QI_REG_P (op);
3808 /* Return true if op is an flags register. */
3811 flags_reg_operand (op, mode)
3812 register rtx op;
3813 enum machine_mode mode;
3815 if (mode != VOIDmode && GET_MODE (op) != mode)
3816 return 0;
3817 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3820 /* Return true if op is a NON_Q_REGS class register. */
3823 non_q_regs_operand (op, mode)
3824 register rtx op;
3825 enum machine_mode mode;
3827 if (mode != VOIDmode && GET_MODE (op) != mode)
3828 return 0;
3829 if (GET_CODE (op) == SUBREG)
3830 op = SUBREG_REG (op);
3831 return NON_QI_REG_P (op);
3835 zero_extended_scalar_load_operand (op, mode)
3836 rtx op;
3837 enum machine_mode mode ATTRIBUTE_UNUSED;
3839 unsigned n_elts;
3840 if (GET_CODE (op) != MEM)
3841 return 0;
3842 op = maybe_get_pool_constant (op);
3843 if (!op)
3844 return 0;
3845 if (GET_CODE (op) != CONST_VECTOR)
3846 return 0;
3847 n_elts =
3848 (GET_MODE_SIZE (GET_MODE (op)) /
3849 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3850 for (n_elts--; n_elts > 0; n_elts--)
3852 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3853 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3854 return 0;
3856 return 1;
3859 /* Return 1 when OP is operand acceptable for standard SSE move. */
3861 vector_move_operand (op, mode)
3862 rtx op;
3863 enum machine_mode mode;
3865 if (nonimmediate_operand (op, mode))
3866 return 1;
3867 if (GET_MODE (op) != mode && mode != VOIDmode)
3868 return 0;
3869 return (op == CONST0_RTX (GET_MODE (op)));
3872 /* Return true if op if a valid address, and does not contain
3873 a segment override. */
3876 no_seg_address_operand (op, mode)
3877 register rtx op;
3878 enum machine_mode mode;
3880 struct ix86_address parts;
3882 if (! address_operand (op, mode))
3883 return 0;
3885 if (! ix86_decompose_address (op, &parts))
3886 abort ();
3888 return parts.seg == SEG_DEFAULT;
3891 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3892 insns. */
3894 sse_comparison_operator (op, mode)
3895 rtx op;
3896 enum machine_mode mode ATTRIBUTE_UNUSED;
3898 enum rtx_code code = GET_CODE (op);
3899 switch (code)
3901 /* Operations supported directly. */
3902 case EQ:
3903 case LT:
3904 case LE:
3905 case UNORDERED:
3906 case NE:
3907 case UNGE:
3908 case UNGT:
3909 case ORDERED:
3910 return 1;
3911 /* These are equivalent to ones above in non-IEEE comparisons. */
3912 case UNEQ:
3913 case UNLT:
3914 case UNLE:
3915 case LTGT:
3916 case GE:
3917 case GT:
3918 return !TARGET_IEEE_FP;
3919 default:
3920 return 0;
3923 /* Return 1 if OP is a valid comparison operator in valid mode. */
3925 ix86_comparison_operator (op, mode)
3926 register rtx op;
3927 enum machine_mode mode;
3929 enum machine_mode inmode;
3930 enum rtx_code code = GET_CODE (op);
3931 if (mode != VOIDmode && GET_MODE (op) != mode)
3932 return 0;
3933 if (GET_RTX_CLASS (code) != '<')
3934 return 0;
3935 inmode = GET_MODE (XEXP (op, 0));
3937 if (inmode == CCFPmode || inmode == CCFPUmode)
3939 enum rtx_code second_code, bypass_code;
3940 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3941 return (bypass_code == NIL && second_code == NIL);
3943 switch (code)
3945 case EQ: case NE:
3946 return 1;
3947 case LT: case GE:
3948 if (inmode == CCmode || inmode == CCGCmode
3949 || inmode == CCGOCmode || inmode == CCNOmode)
3950 return 1;
3951 return 0;
3952 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3953 if (inmode == CCmode)
3954 return 1;
3955 return 0;
3956 case GT: case LE:
3957 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3958 return 1;
3959 return 0;
3960 default:
3961 return 0;
3965 /* Return 1 if OP is a valid comparison operator testing carry flag
3966 to be set. */
3968 ix86_carry_flag_operator (op, mode)
3969 register rtx op;
3970 enum machine_mode mode;
3972 enum machine_mode inmode;
3973 enum rtx_code code = GET_CODE (op);
3975 if (mode != VOIDmode && GET_MODE (op) != mode)
3976 return 0;
3977 if (GET_RTX_CLASS (code) != '<')
3978 return 0;
3979 inmode = GET_MODE (XEXP (op, 0));
3980 if (GET_CODE (XEXP (op, 0)) != REG
3981 || REGNO (XEXP (op, 0)) != 17
3982 || XEXP (op, 1) != const0_rtx)
3983 return 0;
3985 if (inmode == CCFPmode || inmode == CCFPUmode)
3987 enum rtx_code second_code, bypass_code;
3989 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3990 if (bypass_code != NIL || second_code != NIL)
3991 return 0;
3992 code = ix86_fp_compare_code_to_integer (code);
3994 else if (inmode != CCmode)
3995 return 0;
3996 return code == LTU;
3999 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4002 fcmov_comparison_operator (op, mode)
4003 register rtx op;
4004 enum machine_mode mode;
4006 enum machine_mode inmode;
4007 enum rtx_code code = GET_CODE (op);
4009 if (mode != VOIDmode && GET_MODE (op) != mode)
4010 return 0;
4011 if (GET_RTX_CLASS (code) != '<')
4012 return 0;
4013 inmode = GET_MODE (XEXP (op, 0));
4014 if (inmode == CCFPmode || inmode == CCFPUmode)
4016 enum rtx_code second_code, bypass_code;
4018 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4019 if (bypass_code != NIL || second_code != NIL)
4020 return 0;
4021 code = ix86_fp_compare_code_to_integer (code);
4023 /* i387 supports just limited amount of conditional codes. */
4024 switch (code)
4026 case LTU: case GTU: case LEU: case GEU:
4027 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4028 return 1;
4029 return 0;
4030 case ORDERED: case UNORDERED:
4031 case EQ: case NE:
4032 return 1;
4033 default:
4034 return 0;
4038 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4041 promotable_binary_operator (op, mode)
4042 register rtx op;
4043 enum machine_mode mode ATTRIBUTE_UNUSED;
4045 switch (GET_CODE (op))
4047 case MULT:
4048 /* Modern CPUs have same latency for HImode and SImode multiply,
4049 but 386 and 486 do HImode multiply faster. */
4050 return ix86_tune > PROCESSOR_I486;
4051 case PLUS:
4052 case AND:
4053 case IOR:
4054 case XOR:
4055 case ASHIFT:
4056 return 1;
4057 default:
4058 return 0;
4062 /* Nearly general operand, but accept any const_double, since we wish
4063 to be able to drop them into memory rather than have them get pulled
4064 into registers. */
4067 cmp_fp_expander_operand (op, mode)
4068 register rtx op;
4069 enum machine_mode mode;
4071 if (mode != VOIDmode && mode != GET_MODE (op))
4072 return 0;
4073 if (GET_CODE (op) == CONST_DOUBLE)
4074 return 1;
4075 return general_operand (op, mode);
4078 /* Match an SI or HImode register for a zero_extract. */
4081 ext_register_operand (op, mode)
4082 register rtx op;
4083 enum machine_mode mode ATTRIBUTE_UNUSED;
4085 int regno;
4086 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4087 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4088 return 0;
4090 if (!register_operand (op, VOIDmode))
4091 return 0;
4093 /* Be careful to accept only registers having upper parts. */
4094 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4095 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4098 /* Return 1 if this is a valid binary floating-point operation.
4099 OP is the expression matched, and MODE is its mode. */
4102 binary_fp_operator (op, mode)
4103 register rtx op;
4104 enum machine_mode mode;
4106 if (mode != VOIDmode && mode != GET_MODE (op))
4107 return 0;
4109 switch (GET_CODE (op))
4111 case PLUS:
4112 case MINUS:
4113 case MULT:
4114 case DIV:
4115 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4117 default:
4118 return 0;
4123 mult_operator (op, mode)
4124 register rtx op;
4125 enum machine_mode mode ATTRIBUTE_UNUSED;
4127 return GET_CODE (op) == MULT;
4131 div_operator (op, mode)
4132 register rtx op;
4133 enum machine_mode mode ATTRIBUTE_UNUSED;
4135 return GET_CODE (op) == DIV;
4139 arith_or_logical_operator (op, mode)
4140 rtx op;
4141 enum machine_mode mode;
4143 return ((mode == VOIDmode || GET_MODE (op) == mode)
4144 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4145 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4148 /* Returns 1 if OP is memory operand with a displacement. */
4151 memory_displacement_operand (op, mode)
4152 register rtx op;
4153 enum machine_mode mode;
4155 struct ix86_address parts;
4157 if (! memory_operand (op, mode))
4158 return 0;
4160 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4161 abort ();
4163 return parts.disp != NULL_RTX;
4166 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4167 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4169 ??? It seems likely that this will only work because cmpsi is an
4170 expander, and no actual insns use this. */
4173 cmpsi_operand (op, mode)
4174 rtx op;
4175 enum machine_mode mode;
4177 if (nonimmediate_operand (op, mode))
4178 return 1;
4180 if (GET_CODE (op) == AND
4181 && GET_MODE (op) == SImode
4182 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4183 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4184 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4185 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4186 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4187 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4188 return 1;
4190 return 0;
4193 /* Returns 1 if OP is memory operand that can not be represented by the
4194 modRM array. */
4197 long_memory_operand (op, mode)
4198 register rtx op;
4199 enum machine_mode mode;
4201 if (! memory_operand (op, mode))
4202 return 0;
4204 return memory_address_length (op) != 0;
4207 /* Return nonzero if the rtx is known aligned. */
4210 aligned_operand (op, mode)
4211 rtx op;
4212 enum machine_mode mode;
4214 struct ix86_address parts;
4216 if (!general_operand (op, mode))
4217 return 0;
4219 /* Registers and immediate operands are always "aligned". */
4220 if (GET_CODE (op) != MEM)
4221 return 1;
4223 /* Don't even try to do any aligned optimizations with volatiles. */
4224 if (MEM_VOLATILE_P (op))
4225 return 0;
4227 op = XEXP (op, 0);
4229 /* Pushes and pops are only valid on the stack pointer. */
4230 if (GET_CODE (op) == PRE_DEC
4231 || GET_CODE (op) == POST_INC)
4232 return 1;
4234 /* Decode the address. */
4235 if (! ix86_decompose_address (op, &parts))
4236 abort ();
4238 if (parts.base && GET_CODE (parts.base) == SUBREG)
4239 parts.base = SUBREG_REG (parts.base);
4240 if (parts.index && GET_CODE (parts.index) == SUBREG)
4241 parts.index = SUBREG_REG (parts.index);
4243 /* Look for some component that isn't known to be aligned. */
4244 if (parts.index)
4246 if (parts.scale < 4
4247 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4248 return 0;
4250 if (parts.base)
4252 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4253 return 0;
4255 if (parts.disp)
4257 if (GET_CODE (parts.disp) != CONST_INT
4258 || (INTVAL (parts.disp) & 3) != 0)
4259 return 0;
4262 /* Didn't find one -- this must be an aligned address. */
4263 return 1;
4266 /* Initialize the table of extra 80387 mathematical constants. */
4268 static void
4269 init_ext_80387_constants ()
4271 static const char * cst[5] =
4273 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4274 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4275 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4276 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4277 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4279 int i;
4281 for (i = 0; i < 5; i++)
4283 real_from_string (&ext_80387_constants_table[i], cst[i]);
4284 /* Ensure each constant is rounded to XFmode precision. */
4285 real_convert (&ext_80387_constants_table[i], XFmode,
4286 &ext_80387_constants_table[i]);
4289 ext_80387_constants_init = 1;
4292 /* Return true if the constant is something that can be loaded with
4293 a special instruction. */
4296 standard_80387_constant_p (x)
4297 rtx x;
4299 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4300 return -1;
4302 if (x == CONST0_RTX (GET_MODE (x)))
4303 return 1;
4304 if (x == CONST1_RTX (GET_MODE (x)))
4305 return 2;
4307 /* For XFmode constants, try to find a special 80387 instruction on
4308 those CPUs that benefit from them. */
4309 if (GET_MODE (x) == XFmode
4310 && x86_ext_80387_constants & TUNEMASK)
4312 REAL_VALUE_TYPE r;
4313 int i;
4315 if (! ext_80387_constants_init)
4316 init_ext_80387_constants ();
4318 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4319 for (i = 0; i < 5; i++)
4320 if (real_identical (&r, &ext_80387_constants_table[i]))
4321 return i + 3;
4324 return 0;
4327 /* Return the opcode of the special instruction to be used to load
4328 the constant X. */
4330 const char *
4331 standard_80387_constant_opcode (x)
4332 rtx x;
4334 switch (standard_80387_constant_p (x))
4336 case 1:
4337 return "fldz";
4338 case 2:
4339 return "fld1";
4340 case 3:
4341 return "fldlg2";
4342 case 4:
4343 return "fldln2";
4344 case 5:
4345 return "fldl2e";
4346 case 6:
4347 return "fldl2t";
4348 case 7:
4349 return "fldpi";
4351 abort ();
4354 /* Return the CONST_DOUBLE representing the 80387 constant that is
4355 loaded by the specified special instruction. The argument IDX
4356 matches the return value from standard_80387_constant_p. */
4359 standard_80387_constant_rtx (idx)
4360 int idx;
4362 int i;
4364 if (! ext_80387_constants_init)
4365 init_ext_80387_constants ();
4367 switch (idx)
4369 case 3:
4370 case 4:
4371 case 5:
4372 case 6:
4373 case 7:
4374 i = idx - 3;
4375 break;
4377 default:
4378 abort ();
4381 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], XFmode);
4384 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4387 standard_sse_constant_p (x)
4388 rtx x;
4390 if (x == const0_rtx)
4391 return 1;
4392 return (x == CONST0_RTX (GET_MODE (x)));
4395 /* Returns 1 if OP contains a symbol reference */
4398 symbolic_reference_mentioned_p (op)
4399 rtx op;
4401 register const char *fmt;
4402 register int i;
4404 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4405 return 1;
4407 fmt = GET_RTX_FORMAT (GET_CODE (op));
4408 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4410 if (fmt[i] == 'E')
4412 register int j;
4414 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4415 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4416 return 1;
4419 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4420 return 1;
4423 return 0;
4426 /* Return 1 if it is appropriate to emit `ret' instructions in the
4427 body of a function. Do this only if the epilogue is simple, needing a
4428 couple of insns. Prior to reloading, we can't tell how many registers
4429 must be saved, so return 0 then. Return 0 if there is no frame
4430 marker to de-allocate.
4432 If NON_SAVING_SETJMP is defined and true, then it is not possible
4433 for the epilogue to be simple, so return 0. This is a special case
4434 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4435 until final, but jump_optimize may need to know sooner if a
4436 `return' is OK. */
4439 ix86_can_use_return_insn_p ()
4441 struct ix86_frame frame;
4443 #ifdef NON_SAVING_SETJMP
4444 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4445 return 0;
4446 #endif
4448 if (! reload_completed || frame_pointer_needed)
4449 return 0;
4451 /* Don't allow more than 32 pop, since that's all we can do
4452 with one instruction. */
4453 if (current_function_pops_args
4454 && current_function_args_size >= 32768)
4455 return 0;
4457 ix86_compute_frame_layout (&frame);
4458 return frame.to_allocate == 0 && frame.nregs == 0;
4461 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4463 x86_64_sign_extended_value (value)
4464 rtx value;
4466 switch (GET_CODE (value))
4468 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4469 to be at least 32 and this all acceptable constants are
4470 represented as CONST_INT. */
4471 case CONST_INT:
4472 if (HOST_BITS_PER_WIDE_INT == 32)
4473 return 1;
4474 else
4476 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4477 return trunc_int_for_mode (val, SImode) == val;
4479 break;
4481 /* For certain code models, the symbolic references are known to fit.
4482 in CM_SMALL_PIC model we know it fits if it is local to the shared
4483 library. Don't count TLS SYMBOL_REFs here, since they should fit
4484 only if inside of UNSPEC handled below. */
4485 case SYMBOL_REF:
4486 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4488 /* For certain code models, the code is near as well. */
4489 case LABEL_REF:
4490 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4491 || ix86_cmodel == CM_KERNEL);
4493 /* We also may accept the offsetted memory references in certain special
4494 cases. */
4495 case CONST:
4496 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4497 switch (XINT (XEXP (value, 0), 1))
4499 case UNSPEC_GOTPCREL:
4500 case UNSPEC_DTPOFF:
4501 case UNSPEC_GOTNTPOFF:
4502 case UNSPEC_NTPOFF:
4503 return 1;
4504 default:
4505 break;
4507 if (GET_CODE (XEXP (value, 0)) == PLUS)
4509 rtx op1 = XEXP (XEXP (value, 0), 0);
4510 rtx op2 = XEXP (XEXP (value, 0), 1);
4511 HOST_WIDE_INT offset;
4513 if (ix86_cmodel == CM_LARGE)
4514 return 0;
4515 if (GET_CODE (op2) != CONST_INT)
4516 return 0;
4517 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4518 switch (GET_CODE (op1))
4520 case SYMBOL_REF:
4521 /* For CM_SMALL assume that latest object is 16MB before
4522 end of 31bits boundary. We may also accept pretty
4523 large negative constants knowing that all objects are
4524 in the positive half of address space. */
4525 if (ix86_cmodel == CM_SMALL
4526 && offset < 16*1024*1024
4527 && trunc_int_for_mode (offset, SImode) == offset)
4528 return 1;
4529 /* For CM_KERNEL we know that all object resist in the
4530 negative half of 32bits address space. We may not
4531 accept negative offsets, since they may be just off
4532 and we may accept pretty large positive ones. */
4533 if (ix86_cmodel == CM_KERNEL
4534 && offset > 0
4535 && trunc_int_for_mode (offset, SImode) == offset)
4536 return 1;
4537 break;
4538 case LABEL_REF:
4539 /* These conditions are similar to SYMBOL_REF ones, just the
4540 constraints for code models differ. */
4541 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4542 && offset < 16*1024*1024
4543 && trunc_int_for_mode (offset, SImode) == offset)
4544 return 1;
4545 if (ix86_cmodel == CM_KERNEL
4546 && offset > 0
4547 && trunc_int_for_mode (offset, SImode) == offset)
4548 return 1;
4549 break;
4550 case UNSPEC:
4551 switch (XINT (op1, 1))
4553 case UNSPEC_DTPOFF:
4554 case UNSPEC_NTPOFF:
4555 if (offset > 0
4556 && trunc_int_for_mode (offset, SImode) == offset)
4557 return 1;
4559 break;
4560 default:
4561 return 0;
4564 return 0;
4565 default:
4566 return 0;
4570 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4572 x86_64_zero_extended_value (value)
4573 rtx value;
4575 switch (GET_CODE (value))
4577 case CONST_DOUBLE:
4578 if (HOST_BITS_PER_WIDE_INT == 32)
4579 return (GET_MODE (value) == VOIDmode
4580 && !CONST_DOUBLE_HIGH (value));
4581 else
4582 return 0;
4583 case CONST_INT:
4584 if (HOST_BITS_PER_WIDE_INT == 32)
4585 return INTVAL (value) >= 0;
4586 else
4587 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4588 break;
4590 /* For certain code models, the symbolic references are known to fit. */
4591 case SYMBOL_REF:
4592 return ix86_cmodel == CM_SMALL;
4594 /* For certain code models, the code is near as well. */
4595 case LABEL_REF:
4596 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4598 /* We also may accept the offsetted memory references in certain special
4599 cases. */
4600 case CONST:
4601 if (GET_CODE (XEXP (value, 0)) == PLUS)
4603 rtx op1 = XEXP (XEXP (value, 0), 0);
4604 rtx op2 = XEXP (XEXP (value, 0), 1);
4606 if (ix86_cmodel == CM_LARGE)
4607 return 0;
4608 switch (GET_CODE (op1))
4610 case SYMBOL_REF:
4611 return 0;
4612 /* For small code model we may accept pretty large positive
4613 offsets, since one bit is available for free. Negative
4614 offsets are limited by the size of NULL pointer area
4615 specified by the ABI. */
4616 if (ix86_cmodel == CM_SMALL
4617 && GET_CODE (op2) == CONST_INT
4618 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4619 && (trunc_int_for_mode (INTVAL (op2), SImode)
4620 == INTVAL (op2)))
4621 return 1;
4622 /* ??? For the kernel, we may accept adjustment of
4623 -0x10000000, since we know that it will just convert
4624 negative address space to positive, but perhaps this
4625 is not worthwhile. */
4626 break;
4627 case LABEL_REF:
4628 /* These conditions are similar to SYMBOL_REF ones, just the
4629 constraints for code models differ. */
4630 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4631 && GET_CODE (op2) == CONST_INT
4632 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4633 && (trunc_int_for_mode (INTVAL (op2), SImode)
4634 == INTVAL (op2)))
4635 return 1;
4636 break;
4637 default:
4638 return 0;
4641 return 0;
4642 default:
4643 return 0;
4647 /* Value should be nonzero if functions must have frame pointers.
4648 Zero means the frame pointer need not be set up (and parms may
4649 be accessed via the stack pointer) in functions that seem suitable. */
4652 ix86_frame_pointer_required ()
4654 /* If we accessed previous frames, then the generated code expects
4655 to be able to access the saved ebp value in our frame. */
4656 if (cfun->machine->accesses_prev_frame)
4657 return 1;
4659 /* Several x86 os'es need a frame pointer for other reasons,
4660 usually pertaining to setjmp. */
4661 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4662 return 1;
4664 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4665 the frame pointer by default. Turn it back on now if we've not
4666 got a leaf function. */
4667 if (TARGET_OMIT_LEAF_FRAME_POINTER
4668 && (!current_function_is_leaf))
4669 return 1;
4671 if (current_function_profile)
4672 return 1;
4674 return 0;
4677 /* Record that the current function accesses previous call frames. */
4679 void
4680 ix86_setup_frame_addresses ()
4682 cfun->machine->accesses_prev_frame = 1;
4685 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4686 # define USE_HIDDEN_LINKONCE 1
4687 #else
4688 # define USE_HIDDEN_LINKONCE 0
4689 #endif
4691 static int pic_labels_used;
4693 /* Fills in the label name that should be used for a pc thunk for
4694 the given register. */
4696 static void
4697 get_pc_thunk_name (name, regno)
4698 char name[32];
4699 unsigned int regno;
4701 if (USE_HIDDEN_LINKONCE)
4702 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4703 else
4704 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4708 /* This function generates code for -fpic that loads %ebx with
4709 the return address of the caller and then returns. */
4711 void
4712 ix86_file_end ()
4714 rtx xops[2];
4715 int regno;
4717 for (regno = 0; regno < 8; ++regno)
4719 char name[32];
4721 if (! ((pic_labels_used >> regno) & 1))
4722 continue;
4724 get_pc_thunk_name (name, regno);
4726 if (USE_HIDDEN_LINKONCE)
4728 tree decl;
4730 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4731 error_mark_node);
4732 TREE_PUBLIC (decl) = 1;
4733 TREE_STATIC (decl) = 1;
4734 DECL_ONE_ONLY (decl) = 1;
4736 (*targetm.asm_out.unique_section) (decl, 0);
4737 named_section (decl, NULL, 0);
4739 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4740 fputs ("\t.hidden\t", asm_out_file);
4741 assemble_name (asm_out_file, name);
4742 fputc ('\n', asm_out_file);
4743 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4745 else
4747 text_section ();
4748 ASM_OUTPUT_LABEL (asm_out_file, name);
4751 xops[0] = gen_rtx_REG (SImode, regno);
4752 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4753 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4754 output_asm_insn ("ret", xops);
4757 if (NEED_INDICATE_EXEC_STACK)
4758 file_end_indicate_exec_stack ();
4761 /* Emit code for the SET_GOT patterns. */
4763 const char *
4764 output_set_got (dest)
4765 rtx dest;
4767 rtx xops[3];
4769 xops[0] = dest;
4770 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4772 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4774 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4776 if (!flag_pic)
4777 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4778 else
4779 output_asm_insn ("call\t%a2", xops);
4781 #if TARGET_MACHO
4782 /* Output the "canonical" label name ("Lxx$pb") here too. This
4783 is what will be referred to by the Mach-O PIC subsystem. */
4784 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4785 #endif
4786 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4787 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4789 if (flag_pic)
4790 output_asm_insn ("pop{l}\t%0", xops);
4792 else
4794 char name[32];
4795 get_pc_thunk_name (name, REGNO (dest));
4796 pic_labels_used |= 1 << REGNO (dest);
4798 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4799 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4800 output_asm_insn ("call\t%X2", xops);
4803 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4804 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4805 else if (!TARGET_MACHO)
4806 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4808 return "";
4811 /* Generate an "push" pattern for input ARG. */
4813 static rtx
4814 gen_push (arg)
4815 rtx arg;
4817 return gen_rtx_SET (VOIDmode,
4818 gen_rtx_MEM (Pmode,
4819 gen_rtx_PRE_DEC (Pmode,
4820 stack_pointer_rtx)),
4821 arg);
4824 /* Return >= 0 if there is an unused call-clobbered register available
4825 for the entire function. */
4827 static unsigned int
4828 ix86_select_alt_pic_regnum ()
4830 if (current_function_is_leaf && !current_function_profile)
4832 int i;
4833 for (i = 2; i >= 0; --i)
4834 if (!regs_ever_live[i])
4835 return i;
4838 return INVALID_REGNUM;
4841 /* Return 1 if we need to save REGNO. */
4842 static int
4843 ix86_save_reg (regno, maybe_eh_return)
4844 unsigned int regno;
4845 int maybe_eh_return;
4847 if (pic_offset_table_rtx
4848 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4849 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4850 || current_function_profile
4851 || current_function_calls_eh_return
4852 || current_function_uses_const_pool))
4854 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4855 return 0;
4856 return 1;
4859 if (current_function_calls_eh_return && maybe_eh_return)
4861 unsigned i;
4862 for (i = 0; ; i++)
4864 unsigned test = EH_RETURN_DATA_REGNO (i);
4865 if (test == INVALID_REGNUM)
4866 break;
4867 if (test == regno)
4868 return 1;
4872 return (regs_ever_live[regno]
4873 && !call_used_regs[regno]
4874 && !fixed_regs[regno]
4875 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4878 /* Return number of registers to be saved on the stack. */
4880 static int
4881 ix86_nsaved_regs ()
4883 int nregs = 0;
4884 int regno;
4886 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4887 if (ix86_save_reg (regno, true))
4888 nregs++;
4889 return nregs;
4892 /* Return the offset between two registers, one to be eliminated, and the other
4893 its replacement, at the start of a routine. */
4895 HOST_WIDE_INT
4896 ix86_initial_elimination_offset (from, to)
4897 int from;
4898 int to;
4900 struct ix86_frame frame;
4901 ix86_compute_frame_layout (&frame);
4903 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4904 return frame.hard_frame_pointer_offset;
4905 else if (from == FRAME_POINTER_REGNUM
4906 && to == HARD_FRAME_POINTER_REGNUM)
4907 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4908 else
4910 if (to != STACK_POINTER_REGNUM)
4911 abort ();
4912 else if (from == ARG_POINTER_REGNUM)
4913 return frame.stack_pointer_offset;
4914 else if (from != FRAME_POINTER_REGNUM)
4915 abort ();
4916 else
4917 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4921 /* Fill structure ix86_frame about frame of currently computed function. */
4923 static void
4924 ix86_compute_frame_layout (frame)
4925 struct ix86_frame *frame;
4927 HOST_WIDE_INT total_size;
4928 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4929 int offset;
4930 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4931 HOST_WIDE_INT size = get_frame_size ();
4933 frame->nregs = ix86_nsaved_regs ();
4934 total_size = size;
4936 /* During reload iteration the amount of registers saved can change.
4937 Recompute the value as needed. Do not recompute when amount of registers
4938 didn't change as reload does mutiple calls to the function and does not
4939 expect the decision to change within single iteration. */
4940 if (!optimize_size
4941 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4943 int count = frame->nregs;
4945 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4946 /* The fast prologue uses move instead of push to save registers. This
4947 is significantly longer, but also executes faster as modern hardware
4948 can execute the moves in parallel, but can't do that for push/pop.
4950 Be careful about choosing what prologue to emit: When function takes
4951 many instructions to execute we may use slow version as well as in
4952 case function is known to be outside hot spot (this is known with
4953 feedback only). Weight the size of function by number of registers
4954 to save as it is cheap to use one or two push instructions but very
4955 slow to use many of them. */
4956 if (count)
4957 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4958 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4959 || (flag_branch_probabilities
4960 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4961 cfun->machine->use_fast_prologue_epilogue = false;
4962 else
4963 cfun->machine->use_fast_prologue_epilogue
4964 = !expensive_function_p (count);
4966 if (TARGET_PROLOGUE_USING_MOVE
4967 && cfun->machine->use_fast_prologue_epilogue)
4968 frame->save_regs_using_mov = true;
4969 else
4970 frame->save_regs_using_mov = false;
4973 /* Skip return address and saved base pointer. */
4974 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4976 frame->hard_frame_pointer_offset = offset;
4978 /* Do some sanity checking of stack_alignment_needed and
4979 preferred_alignment, since i386 port is the only using those features
4980 that may break easily. */
4982 if (size && !stack_alignment_needed)
4983 abort ();
4984 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4985 abort ();
4986 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4987 abort ();
4988 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4989 abort ();
4991 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4992 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4994 /* Register save area */
4995 offset += frame->nregs * UNITS_PER_WORD;
4997 /* Va-arg area */
4998 if (ix86_save_varrargs_registers)
5000 offset += X86_64_VARARGS_SIZE;
5001 frame->va_arg_size = X86_64_VARARGS_SIZE;
5003 else
5004 frame->va_arg_size = 0;
5006 /* Align start of frame for local function. */
5007 frame->padding1 = ((offset + stack_alignment_needed - 1)
5008 & -stack_alignment_needed) - offset;
5010 offset += frame->padding1;
5012 /* Frame pointer points here. */
5013 frame->frame_pointer_offset = offset;
5015 offset += size;
5017 /* Add outgoing arguments area. Can be skipped if we eliminated
5018 all the function calls as dead code. */
5019 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
5021 offset += current_function_outgoing_args_size;
5022 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5024 else
5025 frame->outgoing_arguments_size = 0;
5027 /* Align stack boundary. Only needed if we're calling another function
5028 or using alloca. */
5029 if (!current_function_is_leaf || current_function_calls_alloca)
5030 frame->padding2 = ((offset + preferred_alignment - 1)
5031 & -preferred_alignment) - offset;
5032 else
5033 frame->padding2 = 0;
5035 offset += frame->padding2;
5037 /* We've reached end of stack frame. */
5038 frame->stack_pointer_offset = offset;
5040 /* Size prologue needs to allocate. */
5041 frame->to_allocate =
5042 (size + frame->padding1 + frame->padding2
5043 + frame->outgoing_arguments_size + frame->va_arg_size);
5045 if (!frame->to_allocate && frame->nregs <= 1)
5046 frame->save_regs_using_mov = false;
5048 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5049 && current_function_is_leaf)
5051 frame->red_zone_size = frame->to_allocate;
5052 if (frame->save_regs_using_mov)
5053 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5054 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5055 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5057 else
5058 frame->red_zone_size = 0;
5059 frame->to_allocate -= frame->red_zone_size;
5060 frame->stack_pointer_offset -= frame->red_zone_size;
5061 #if 0
5062 fprintf (stderr, "nregs: %i\n", frame->nregs);
5063 fprintf (stderr, "size: %i\n", size);
5064 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5065 fprintf (stderr, "padding1: %i\n", frame->padding1);
5066 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5067 fprintf (stderr, "padding2: %i\n", frame->padding2);
5068 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5069 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5070 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5071 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5072 frame->hard_frame_pointer_offset);
5073 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5074 #endif
5077 /* Emit code to save registers in the prologue. */
5079 static void
5080 ix86_emit_save_regs ()
5082 register int regno;
5083 rtx insn;
5085 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5086 if (ix86_save_reg (regno, true))
5088 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5089 RTX_FRAME_RELATED_P (insn) = 1;
5093 /* Emit code to save registers using MOV insns. First register
5094 is restored from POINTER + OFFSET. */
5095 static void
5096 ix86_emit_save_regs_using_mov (pointer, offset)
5097 rtx pointer;
5098 HOST_WIDE_INT offset;
5100 int regno;
5101 rtx insn;
5103 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5104 if (ix86_save_reg (regno, true))
5106 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5107 Pmode, offset),
5108 gen_rtx_REG (Pmode, regno));
5109 RTX_FRAME_RELATED_P (insn) = 1;
5110 offset += UNITS_PER_WORD;
5114 /* Expand the prologue into a bunch of separate insns. */
5116 void
5117 ix86_expand_prologue ()
5119 rtx insn;
5120 bool pic_reg_used;
5121 struct ix86_frame frame;
5122 HOST_WIDE_INT allocate;
5124 ix86_compute_frame_layout (&frame);
5126 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5127 slower on all targets. Also sdb doesn't like it. */
5129 if (frame_pointer_needed)
5131 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5132 RTX_FRAME_RELATED_P (insn) = 1;
5134 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5135 RTX_FRAME_RELATED_P (insn) = 1;
5138 allocate = frame.to_allocate;
5140 if (!frame.save_regs_using_mov)
5141 ix86_emit_save_regs ();
5142 else
5143 allocate += frame.nregs * UNITS_PER_WORD;
5145 /* When using red zone we may start register saving before allocating
5146 the stack frame saving one cycle of the prologue. */
5147 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5148 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5149 : stack_pointer_rtx,
5150 -frame.nregs * UNITS_PER_WORD);
5152 if (allocate == 0)
5154 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5156 insn = emit_insn (gen_pro_epilogue_adjust_stack
5157 (stack_pointer_rtx, stack_pointer_rtx,
5158 GEN_INT (-allocate)));
5159 RTX_FRAME_RELATED_P (insn) = 1;
5161 else
5163 /* ??? Is this only valid for Win32? */
5165 rtx arg0, sym;
5167 if (TARGET_64BIT)
5168 abort ();
5170 arg0 = gen_rtx_REG (SImode, 0);
5171 emit_move_insn (arg0, GEN_INT (allocate));
5173 sym = gen_rtx_MEM (FUNCTION_MODE,
5174 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5175 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5177 CALL_INSN_FUNCTION_USAGE (insn)
5178 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5179 CALL_INSN_FUNCTION_USAGE (insn));
5181 /* Don't allow scheduling pass to move insns across __alloca
5182 call. */
5183 emit_insn (gen_blockage (const0_rtx));
5185 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5187 if (!frame_pointer_needed || !frame.to_allocate)
5188 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5189 else
5190 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5191 -frame.nregs * UNITS_PER_WORD);
5194 pic_reg_used = false;
5195 if (pic_offset_table_rtx
5196 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5197 || current_function_profile))
5199 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5201 if (alt_pic_reg_used != INVALID_REGNUM)
5202 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5204 pic_reg_used = true;
5207 if (pic_reg_used)
5209 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5211 /* Even with accurate pre-reload life analysis, we can wind up
5212 deleting all references to the pic register after reload.
5213 Consider if cross-jumping unifies two sides of a branch
5214 controlled by a comparison vs the only read from a global.
5215 In which case, allow the set_got to be deleted, though we're
5216 too late to do anything about the ebx save in the prologue. */
5217 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5220 /* Prevent function calls from be scheduled before the call to mcount.
5221 In the pic_reg_used case, make sure that the got load isn't deleted. */
5222 if (current_function_profile)
5223 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5226 /* Emit code to restore saved registers using MOV insns. First register
5227 is restored from POINTER + OFFSET. */
5228 static void
5229 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
5230 rtx pointer;
5231 int offset;
5232 int maybe_eh_return;
5234 int regno;
5236 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5237 if (ix86_save_reg (regno, maybe_eh_return))
5239 emit_move_insn (gen_rtx_REG (Pmode, regno),
5240 adjust_address (gen_rtx_MEM (Pmode, pointer),
5241 Pmode, offset));
5242 offset += UNITS_PER_WORD;
5246 /* Restore function stack, frame, and registers. */
5248 void
5249 ix86_expand_epilogue (style)
5250 int style;
5252 int regno;
5253 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5254 struct ix86_frame frame;
5255 HOST_WIDE_INT offset;
5257 ix86_compute_frame_layout (&frame);
5259 /* Calculate start of saved registers relative to ebp. Special care
5260 must be taken for the normal return case of a function using
5261 eh_return: the eax and edx registers are marked as saved, but not
5262 restored along this path. */
5263 offset = frame.nregs;
5264 if (current_function_calls_eh_return && style != 2)
5265 offset -= 2;
5266 offset *= -UNITS_PER_WORD;
5268 /* If we're only restoring one register and sp is not valid then
5269 using a move instruction to restore the register since it's
5270 less work than reloading sp and popping the register.
5272 The default code result in stack adjustment using add/lea instruction,
5273 while this code results in LEAVE instruction (or discrete equivalent),
5274 so it is profitable in some other cases as well. Especially when there
5275 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5276 and there is exactly one register to pop. This heuristic may need some
5277 tuning in future. */
5278 if ((!sp_valid && frame.nregs <= 1)
5279 || (TARGET_EPILOGUE_USING_MOVE
5280 && cfun->machine->use_fast_prologue_epilogue
5281 && (frame.nregs > 1 || frame.to_allocate))
5282 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5283 || (frame_pointer_needed && TARGET_USE_LEAVE
5284 && cfun->machine->use_fast_prologue_epilogue
5285 && frame.nregs == 1)
5286 || current_function_calls_eh_return)
5288 /* Restore registers. We can use ebp or esp to address the memory
5289 locations. If both are available, default to ebp, since offsets
5290 are known to be small. Only exception is esp pointing directly to the
5291 end of block of saved registers, where we may simplify addressing
5292 mode. */
5294 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5295 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5296 frame.to_allocate, style == 2);
5297 else
5298 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5299 offset, style == 2);
5301 /* eh_return epilogues need %ecx added to the stack pointer. */
5302 if (style == 2)
5304 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5306 if (frame_pointer_needed)
5308 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5309 tmp = plus_constant (tmp, UNITS_PER_WORD);
5310 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5312 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5313 emit_move_insn (hard_frame_pointer_rtx, tmp);
5315 emit_insn (gen_pro_epilogue_adjust_stack
5316 (stack_pointer_rtx, sa, const0_rtx));
5318 else
5320 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5321 tmp = plus_constant (tmp, (frame.to_allocate
5322 + frame.nregs * UNITS_PER_WORD));
5323 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5326 else if (!frame_pointer_needed)
5327 emit_insn (gen_pro_epilogue_adjust_stack
5328 (stack_pointer_rtx, stack_pointer_rtx,
5329 GEN_INT (frame.to_allocate
5330 + frame.nregs * UNITS_PER_WORD)));
5331 /* If not an i386, mov & pop is faster than "leave". */
5332 else if (TARGET_USE_LEAVE || optimize_size
5333 || !cfun->machine->use_fast_prologue_epilogue)
5334 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5335 else
5337 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5338 hard_frame_pointer_rtx,
5339 const0_rtx));
5340 if (TARGET_64BIT)
5341 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5342 else
5343 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5346 else
5348 /* First step is to deallocate the stack frame so that we can
5349 pop the registers. */
5350 if (!sp_valid)
5352 if (!frame_pointer_needed)
5353 abort ();
5354 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5355 hard_frame_pointer_rtx,
5356 GEN_INT (offset)));
5358 else if (frame.to_allocate)
5359 emit_insn (gen_pro_epilogue_adjust_stack
5360 (stack_pointer_rtx, stack_pointer_rtx,
5361 GEN_INT (frame.to_allocate)));
5363 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5364 if (ix86_save_reg (regno, false))
5366 if (TARGET_64BIT)
5367 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5368 else
5369 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5371 if (frame_pointer_needed)
5373 /* Leave results in shorter dependency chains on CPUs that are
5374 able to grok it fast. */
5375 if (TARGET_USE_LEAVE)
5376 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5377 else if (TARGET_64BIT)
5378 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5379 else
5380 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5384 /* Sibcall epilogues don't want a return instruction. */
5385 if (style == 0)
5386 return;
5388 if (current_function_pops_args && current_function_args_size)
5390 rtx popc = GEN_INT (current_function_pops_args);
5392 /* i386 can only pop 64K bytes. If asked to pop more, pop
5393 return address, do explicit add, and jump indirectly to the
5394 caller. */
5396 if (current_function_pops_args >= 65536)
5398 rtx ecx = gen_rtx_REG (SImode, 2);
5400 /* There are is no "pascal" calling convention in 64bit ABI. */
5401 if (TARGET_64BIT)
5402 abort ();
5404 emit_insn (gen_popsi1 (ecx));
5405 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5406 emit_jump_insn (gen_return_indirect_internal (ecx));
5408 else
5409 emit_jump_insn (gen_return_pop_internal (popc));
5411 else
5412 emit_jump_insn (gen_return_internal ());
5415 /* Reset from the function's potential modifications. */
5417 static void
5418 ix86_output_function_epilogue (file, size)
5419 FILE *file ATTRIBUTE_UNUSED;
5420 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5422 if (pic_offset_table_rtx)
5423 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5426 /* Extract the parts of an RTL expression that is a valid memory address
5427 for an instruction. Return 0 if the structure of the address is
5428 grossly off. Return -1 if the address contains ASHIFT, so it is not
5429 strictly valid, but still used for computing length of lea instruction. */
5431 static int
5432 ix86_decompose_address (addr, out)
5433 register rtx addr;
5434 struct ix86_address *out;
5436 rtx base = NULL_RTX;
5437 rtx index = NULL_RTX;
5438 rtx disp = NULL_RTX;
5439 HOST_WIDE_INT scale = 1;
5440 rtx scale_rtx = NULL_RTX;
5441 int retval = 1;
5442 enum ix86_address_seg seg = SEG_DEFAULT;
5444 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5445 base = addr;
5446 else if (GET_CODE (addr) == PLUS)
5448 rtx addends[4], op;
5449 int n = 0, i;
5451 op = addr;
5454 if (n >= 4)
5455 return 0;
5456 addends[n++] = XEXP (op, 1);
5457 op = XEXP (op, 0);
5459 while (GET_CODE (op) == PLUS);
5460 if (n >= 4)
5461 return 0;
5462 addends[n] = op;
5464 for (i = n; i >= 0; --i)
5466 op = addends[i];
5467 switch (GET_CODE (op))
5469 case MULT:
5470 if (index)
5471 return 0;
5472 index = XEXP (op, 0);
5473 scale_rtx = XEXP (op, 1);
5474 break;
5476 case UNSPEC:
5477 if (XINT (op, 1) == UNSPEC_TP
5478 && TARGET_TLS_DIRECT_SEG_REFS
5479 && seg == SEG_DEFAULT)
5480 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5481 else
5482 return 0;
5483 break;
5485 case REG:
5486 case SUBREG:
5487 if (!base)
5488 base = op;
5489 else if (!index)
5490 index = op;
5491 else
5492 return 0;
5493 break;
5495 case CONST:
5496 case CONST_INT:
5497 case SYMBOL_REF:
5498 case LABEL_REF:
5499 if (disp)
5500 return 0;
5501 disp = op;
5502 break;
5504 default:
5505 return 0;
5509 else if (GET_CODE (addr) == MULT)
5511 index = XEXP (addr, 0); /* index*scale */
5512 scale_rtx = XEXP (addr, 1);
5514 else if (GET_CODE (addr) == ASHIFT)
5516 rtx tmp;
5518 /* We're called for lea too, which implements ashift on occasion. */
5519 index = XEXP (addr, 0);
5520 tmp = XEXP (addr, 1);
5521 if (GET_CODE (tmp) != CONST_INT)
5522 return 0;
5523 scale = INTVAL (tmp);
5524 if ((unsigned HOST_WIDE_INT) scale > 3)
5525 return 0;
5526 scale = 1 << scale;
5527 retval = -1;
5529 else
5530 disp = addr; /* displacement */
5532 /* Extract the integral value of scale. */
5533 if (scale_rtx)
5535 if (GET_CODE (scale_rtx) != CONST_INT)
5536 return 0;
5537 scale = INTVAL (scale_rtx);
5540 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5541 if (base && index && scale == 1
5542 && (index == arg_pointer_rtx
5543 || index == frame_pointer_rtx
5544 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5546 rtx tmp = base;
5547 base = index;
5548 index = tmp;
5551 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5552 if ((base == hard_frame_pointer_rtx
5553 || base == frame_pointer_rtx
5554 || base == arg_pointer_rtx) && !disp)
5555 disp = const0_rtx;
5557 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5558 Avoid this by transforming to [%esi+0]. */
5559 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5560 && base && !index && !disp
5561 && REG_P (base)
5562 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5563 disp = const0_rtx;
5565 /* Special case: encode reg+reg instead of reg*2. */
5566 if (!base && index && scale && scale == 2)
5567 base = index, scale = 1;
5569 /* Special case: scaling cannot be encoded without base or displacement. */
5570 if (!base && !disp && index && scale != 1)
5571 disp = const0_rtx;
5573 out->base = base;
5574 out->index = index;
5575 out->disp = disp;
5576 out->scale = scale;
5577 out->seg = seg;
5579 return retval;
5582 /* Return cost of the memory address x.
5583 For i386, it is better to use a complex address than let gcc copy
5584 the address into a reg and make a new pseudo. But not if the address
5585 requires to two regs - that would mean more pseudos with longer
5586 lifetimes. */
5587 static int
5588 ix86_address_cost (x)
5589 rtx x;
5591 struct ix86_address parts;
5592 int cost = 1;
5594 if (!ix86_decompose_address (x, &parts))
5595 abort ();
5597 if (parts.base && GET_CODE (parts.base) == SUBREG)
5598 parts.base = SUBREG_REG (parts.base);
5599 if (parts.index && GET_CODE (parts.index) == SUBREG)
5600 parts.index = SUBREG_REG (parts.index);
5602 /* More complex memory references are better. */
5603 if (parts.disp && parts.disp != const0_rtx)
5604 cost--;
5605 if (parts.seg != SEG_DEFAULT)
5606 cost--;
5608 /* Attempt to minimize number of registers in the address. */
5609 if ((parts.base
5610 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5611 || (parts.index
5612 && (!REG_P (parts.index)
5613 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5614 cost++;
5616 if (parts.base
5617 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5618 && parts.index
5619 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5620 && parts.base != parts.index)
5621 cost++;
5623 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5624 since it's predecode logic can't detect the length of instructions
5625 and it degenerates to vector decoded. Increase cost of such
5626 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5627 to split such addresses or even refuse such addresses at all.
5629 Following addressing modes are affected:
5630 [base+scale*index]
5631 [scale*index+disp]
5632 [base+index]
5634 The first and last case may be avoidable by explicitly coding the zero in
5635 memory address, but I don't have AMD-K6 machine handy to check this
5636 theory. */
5638 if (TARGET_K6
5639 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5640 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5641 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5642 cost += 10;
5644 return cost;
5647 /* If X is a machine specific address (i.e. a symbol or label being
5648 referenced as a displacement from the GOT implemented using an
5649 UNSPEC), then return the base term. Otherwise return X. */
5652 ix86_find_base_term (x)
5653 rtx x;
5655 rtx term;
5657 if (TARGET_64BIT)
5659 if (GET_CODE (x) != CONST)
5660 return x;
5661 term = XEXP (x, 0);
5662 if (GET_CODE (term) == PLUS
5663 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5664 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5665 term = XEXP (term, 0);
5666 if (GET_CODE (term) != UNSPEC
5667 || XINT (term, 1) != UNSPEC_GOTPCREL)
5668 return x;
5670 term = XVECEXP (term, 0, 0);
5672 if (GET_CODE (term) != SYMBOL_REF
5673 && GET_CODE (term) != LABEL_REF)
5674 return x;
5676 return term;
5679 term = ix86_delegitimize_address (x);
5681 if (GET_CODE (term) != SYMBOL_REF
5682 && GET_CODE (term) != LABEL_REF)
5683 return x;
5685 return term;
5688 /* Determine if a given RTX is a valid constant. We already know this
5689 satisfies CONSTANT_P. */
5691 bool
5692 legitimate_constant_p (x)
5693 rtx x;
5695 rtx inner;
5697 switch (GET_CODE (x))
5699 case SYMBOL_REF:
5700 /* TLS symbols are not constant. */
5701 if (tls_symbolic_operand (x, Pmode))
5702 return false;
5703 break;
5705 case CONST:
5706 inner = XEXP (x, 0);
5708 /* Offsets of TLS symbols are never valid.
5709 Discourage CSE from creating them. */
5710 if (GET_CODE (inner) == PLUS
5711 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5712 return false;
5714 if (GET_CODE (inner) == PLUS)
5716 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5717 return false;
5718 inner = XEXP (inner, 0);
5721 /* Only some unspecs are valid as "constants". */
5722 if (GET_CODE (inner) == UNSPEC)
5723 switch (XINT (inner, 1))
5725 case UNSPEC_TPOFF:
5726 case UNSPEC_NTPOFF:
5727 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5728 case UNSPEC_DTPOFF:
5729 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5730 default:
5731 return false;
5733 break;
5735 default:
5736 break;
5739 /* Otherwise we handle everything else in the move patterns. */
5740 return true;
5743 /* Determine if it's legal to put X into the constant pool. This
5744 is not possible for the address of thread-local symbols, which
5745 is checked above. */
5747 static bool
5748 ix86_cannot_force_const_mem (x)
5749 rtx x;
5751 return !legitimate_constant_p (x);
5754 /* Determine if a given RTX is a valid constant address. */
5756 bool
5757 constant_address_p (x)
5758 rtx x;
5760 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5763 /* Nonzero if the constant value X is a legitimate general operand
5764 when generating PIC code. It is given that flag_pic is on and
5765 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5767 bool
5768 legitimate_pic_operand_p (x)
5769 rtx x;
5771 rtx inner;
5773 switch (GET_CODE (x))
5775 case CONST:
5776 inner = XEXP (x, 0);
5778 /* Only some unspecs are valid as "constants". */
5779 if (GET_CODE (inner) == UNSPEC)
5780 switch (XINT (inner, 1))
5782 case UNSPEC_TPOFF:
5783 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5784 default:
5785 return false;
5787 /* FALLTHRU */
5789 case SYMBOL_REF:
5790 case LABEL_REF:
5791 return legitimate_pic_address_disp_p (x);
5793 default:
5794 return true;
5798 /* Determine if a given CONST RTX is a valid memory displacement
5799 in PIC mode. */
5802 legitimate_pic_address_disp_p (disp)
5803 register rtx disp;
5805 bool saw_plus;
5807 /* In 64bit mode we can allow direct addresses of symbols and labels
5808 when they are not dynamic symbols. */
5809 if (TARGET_64BIT)
5811 /* TLS references should always be enclosed in UNSPEC. */
5812 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5813 return 0;
5814 if (GET_CODE (disp) == SYMBOL_REF
5815 && ix86_cmodel == CM_SMALL_PIC
5816 && SYMBOL_REF_LOCAL_P (disp))
5817 return 1;
5818 if (GET_CODE (disp) == LABEL_REF)
5819 return 1;
5820 if (GET_CODE (disp) == CONST
5821 && GET_CODE (XEXP (disp, 0)) == PLUS
5822 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5823 && ix86_cmodel == CM_SMALL_PIC
5824 && SYMBOL_REF_LOCAL_P (XEXP (XEXP (disp, 0), 0)))
5825 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5826 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5827 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5828 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5829 return 1;
5831 if (GET_CODE (disp) != CONST)
5832 return 0;
5833 disp = XEXP (disp, 0);
5835 if (TARGET_64BIT)
5837 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5838 of GOT tables. We should not need these anyway. */
5839 if (GET_CODE (disp) != UNSPEC
5840 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5841 return 0;
5843 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5844 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5845 return 0;
5846 return 1;
5849 saw_plus = false;
5850 if (GET_CODE (disp) == PLUS)
5852 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5853 return 0;
5854 disp = XEXP (disp, 0);
5855 saw_plus = true;
5858 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5859 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5861 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5862 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5863 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5865 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5866 if (strstr (sym_name, "$pb") != 0)
5867 return 1;
5871 if (GET_CODE (disp) != UNSPEC)
5872 return 0;
5874 switch (XINT (disp, 1))
5876 case UNSPEC_GOT:
5877 if (saw_plus)
5878 return false;
5879 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5880 case UNSPEC_GOTOFF:
5881 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5882 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5883 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5884 return false;
5885 case UNSPEC_GOTTPOFF:
5886 case UNSPEC_GOTNTPOFF:
5887 case UNSPEC_INDNTPOFF:
5888 if (saw_plus)
5889 return false;
5890 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5891 case UNSPEC_NTPOFF:
5892 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5893 case UNSPEC_DTPOFF:
5894 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5897 return 0;
5900 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5901 memory address for an instruction. The MODE argument is the machine mode
5902 for the MEM expression that wants to use this address.
5904 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5905 convert common non-canonical forms to canonical form so that they will
5906 be recognized. */
5909 legitimate_address_p (mode, addr, strict)
5910 enum machine_mode mode;
5911 register rtx addr;
5912 int strict;
5914 struct ix86_address parts;
5915 rtx base, index, disp;
5916 HOST_WIDE_INT scale;
5917 const char *reason = NULL;
5918 rtx reason_rtx = NULL_RTX;
5920 if (TARGET_DEBUG_ADDR)
5922 fprintf (stderr,
5923 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5924 GET_MODE_NAME (mode), strict);
5925 debug_rtx (addr);
5928 if (ix86_decompose_address (addr, &parts) <= 0)
5930 reason = "decomposition failed";
5931 goto report_error;
5934 base = parts.base;
5935 index = parts.index;
5936 disp = parts.disp;
5937 scale = parts.scale;
5939 /* Validate base register.
5941 Don't allow SUBREG's here, it can lead to spill failures when the base
5942 is one word out of a two word structure, which is represented internally
5943 as a DImode int. */
5945 if (base)
5947 rtx reg;
5948 reason_rtx = base;
5950 if (GET_CODE (base) == SUBREG)
5951 reg = SUBREG_REG (base);
5952 else
5953 reg = base;
5955 if (GET_CODE (reg) != REG)
5957 reason = "base is not a register";
5958 goto report_error;
5961 if (GET_MODE (base) != Pmode)
5963 reason = "base is not in Pmode";
5964 goto report_error;
5967 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5968 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5970 reason = "base is not valid";
5971 goto report_error;
5975 /* Validate index register.
5977 Don't allow SUBREG's here, it can lead to spill failures when the index
5978 is one word out of a two word structure, which is represented internally
5979 as a DImode int. */
5981 if (index)
5983 rtx reg;
5984 reason_rtx = index;
5986 if (GET_CODE (index) == SUBREG)
5987 reg = SUBREG_REG (index);
5988 else
5989 reg = index;
5991 if (GET_CODE (reg) != REG)
5993 reason = "index is not a register";
5994 goto report_error;
5997 if (GET_MODE (index) != Pmode)
5999 reason = "index is not in Pmode";
6000 goto report_error;
6003 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6004 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6006 reason = "index is not valid";
6007 goto report_error;
6011 /* Validate scale factor. */
6012 if (scale != 1)
6014 reason_rtx = GEN_INT (scale);
6015 if (!index)
6017 reason = "scale without index";
6018 goto report_error;
6021 if (scale != 2 && scale != 4 && scale != 8)
6023 reason = "scale is not a valid multiplier";
6024 goto report_error;
6028 /* Validate displacement. */
6029 if (disp)
6031 reason_rtx = disp;
6033 if (GET_CODE (disp) == CONST
6034 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6035 switch (XINT (XEXP (disp, 0), 1))
6037 case UNSPEC_GOT:
6038 case UNSPEC_GOTOFF:
6039 case UNSPEC_GOTPCREL:
6040 if (!flag_pic)
6041 abort ();
6042 goto is_legitimate_pic;
6044 case UNSPEC_GOTTPOFF:
6045 case UNSPEC_GOTNTPOFF:
6046 case UNSPEC_INDNTPOFF:
6047 case UNSPEC_NTPOFF:
6048 case UNSPEC_DTPOFF:
6049 break;
6051 default:
6052 reason = "invalid address unspec";
6053 goto report_error;
6056 else if (flag_pic && (SYMBOLIC_CONST (disp)
6057 #if TARGET_MACHO
6058 && !machopic_operand_p (disp)
6059 #endif
6062 is_legitimate_pic:
6063 if (TARGET_64BIT && (index || base))
6065 /* foo@dtpoff(%rX) is ok. */
6066 if (GET_CODE (disp) != CONST
6067 || GET_CODE (XEXP (disp, 0)) != PLUS
6068 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6069 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6070 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6071 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6073 reason = "non-constant pic memory reference";
6074 goto report_error;
6077 else if (! legitimate_pic_address_disp_p (disp))
6079 reason = "displacement is an invalid pic construct";
6080 goto report_error;
6083 /* This code used to verify that a symbolic pic displacement
6084 includes the pic_offset_table_rtx register.
6086 While this is good idea, unfortunately these constructs may
6087 be created by "adds using lea" optimization for incorrect
6088 code like:
6090 int a;
6091 int foo(int i)
6093 return *(&a+i);
6096 This code is nonsensical, but results in addressing
6097 GOT table with pic_offset_table_rtx base. We can't
6098 just refuse it easily, since it gets matched by
6099 "addsi3" pattern, that later gets split to lea in the
6100 case output register differs from input. While this
6101 can be handled by separate addsi pattern for this case
6102 that never results in lea, this seems to be easier and
6103 correct fix for crash to disable this test. */
6105 else if (GET_CODE (disp) != LABEL_REF
6106 && GET_CODE (disp) != CONST_INT
6107 && (GET_CODE (disp) != CONST
6108 || !legitimate_constant_p (disp))
6109 && (GET_CODE (disp) != SYMBOL_REF
6110 || !legitimate_constant_p (disp)))
6112 reason = "displacement is not constant";
6113 goto report_error;
6115 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6117 reason = "displacement is out of range";
6118 goto report_error;
6122 /* Everything looks valid. */
6123 if (TARGET_DEBUG_ADDR)
6124 fprintf (stderr, "Success.\n");
6125 return TRUE;
6127 report_error:
6128 if (TARGET_DEBUG_ADDR)
6130 fprintf (stderr, "Error: %s\n", reason);
6131 debug_rtx (reason_rtx);
6133 return FALSE;
6136 /* Return an unique alias set for the GOT. */
6138 static HOST_WIDE_INT
6139 ix86_GOT_alias_set ()
6141 static HOST_WIDE_INT set = -1;
6142 if (set == -1)
6143 set = new_alias_set ();
6144 return set;
6147 /* Return a legitimate reference for ORIG (an address) using the
6148 register REG. If REG is 0, a new pseudo is generated.
6150 There are two types of references that must be handled:
6152 1. Global data references must load the address from the GOT, via
6153 the PIC reg. An insn is emitted to do this load, and the reg is
6154 returned.
6156 2. Static data references, constant pool addresses, and code labels
6157 compute the address as an offset from the GOT, whose base is in
6158 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6159 differentiate them from global data objects. The returned
6160 address is the PIC reg + an unspec constant.
6162 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6163 reg also appears in the address. */
6166 legitimize_pic_address (orig, reg)
6167 rtx orig;
6168 rtx reg;
6170 rtx addr = orig;
6171 rtx new = orig;
6172 rtx base;
6174 #if TARGET_MACHO
6175 if (reg == 0)
6176 reg = gen_reg_rtx (Pmode);
6177 /* Use the generic Mach-O PIC machinery. */
6178 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6179 #endif
6181 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6182 new = addr;
6183 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6185 /* This symbol may be referenced via a displacement from the PIC
6186 base address (@GOTOFF). */
6188 if (reload_in_progress)
6189 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6190 if (GET_CODE (addr) == CONST)
6191 addr = XEXP (addr, 0);
6192 if (GET_CODE (addr) == PLUS)
6194 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6195 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6197 else
6198 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6199 new = gen_rtx_CONST (Pmode, new);
6200 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6202 if (reg != 0)
6204 emit_move_insn (reg, new);
6205 new = reg;
6208 else if (GET_CODE (addr) == SYMBOL_REF)
6210 if (TARGET_64BIT)
6212 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6213 new = gen_rtx_CONST (Pmode, new);
6214 new = gen_rtx_MEM (Pmode, new);
6215 RTX_UNCHANGING_P (new) = 1;
6216 set_mem_alias_set (new, ix86_GOT_alias_set ());
6218 if (reg == 0)
6219 reg = gen_reg_rtx (Pmode);
6220 /* Use directly gen_movsi, otherwise the address is loaded
6221 into register for CSE. We don't want to CSE this addresses,
6222 instead we CSE addresses from the GOT table, so skip this. */
6223 emit_insn (gen_movsi (reg, new));
6224 new = reg;
6226 else
6228 /* This symbol must be referenced via a load from the
6229 Global Offset Table (@GOT). */
6231 if (reload_in_progress)
6232 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6233 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6234 new = gen_rtx_CONST (Pmode, new);
6235 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6236 new = gen_rtx_MEM (Pmode, new);
6237 RTX_UNCHANGING_P (new) = 1;
6238 set_mem_alias_set (new, ix86_GOT_alias_set ());
6240 if (reg == 0)
6241 reg = gen_reg_rtx (Pmode);
6242 emit_move_insn (reg, new);
6243 new = reg;
6246 else
6248 if (GET_CODE (addr) == CONST)
6250 addr = XEXP (addr, 0);
6252 /* We must match stuff we generate before. Assume the only
6253 unspecs that can get here are ours. Not that we could do
6254 anything with them anyway... */
6255 if (GET_CODE (addr) == UNSPEC
6256 || (GET_CODE (addr) == PLUS
6257 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6258 return orig;
6259 if (GET_CODE (addr) != PLUS)
6260 abort ();
6262 if (GET_CODE (addr) == PLUS)
6264 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6266 /* Check first to see if this is a constant offset from a @GOTOFF
6267 symbol reference. */
6268 if (local_symbolic_operand (op0, Pmode)
6269 && GET_CODE (op1) == CONST_INT)
6271 if (!TARGET_64BIT)
6273 if (reload_in_progress)
6274 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6275 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6276 UNSPEC_GOTOFF);
6277 new = gen_rtx_PLUS (Pmode, new, op1);
6278 new = gen_rtx_CONST (Pmode, new);
6279 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6281 if (reg != 0)
6283 emit_move_insn (reg, new);
6284 new = reg;
6287 else
6289 if (INTVAL (op1) < -16*1024*1024
6290 || INTVAL (op1) >= 16*1024*1024)
6291 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6294 else
6296 base = legitimize_pic_address (XEXP (addr, 0), reg);
6297 new = legitimize_pic_address (XEXP (addr, 1),
6298 base == reg ? NULL_RTX : reg);
6300 if (GET_CODE (new) == CONST_INT)
6301 new = plus_constant (base, INTVAL (new));
6302 else
6304 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6306 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6307 new = XEXP (new, 1);
6309 new = gen_rtx_PLUS (Pmode, base, new);
6314 return new;
6317 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6319 static rtx
6320 get_thread_pointer (to_reg)
6321 int to_reg;
6323 rtx tp, reg, insn;
6325 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6326 if (!to_reg)
6327 return tp;
6329 reg = gen_reg_rtx (Pmode);
6330 insn = gen_rtx_SET (VOIDmode, reg, tp);
6331 insn = emit_insn (insn);
6333 return reg;
6336 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6337 false if we expect this to be used for a memory address and true if
6338 we expect to load the address into a register. */
6340 static rtx
6341 legitimize_tls_address (x, model, for_mov)
6342 rtx x;
6343 enum tls_model model;
6344 int for_mov;
6346 rtx dest, base, off, pic;
6347 int type;
6349 switch (model)
6351 case TLS_MODEL_GLOBAL_DYNAMIC:
6352 dest = gen_reg_rtx (Pmode);
6353 if (TARGET_64BIT)
6355 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6357 start_sequence ();
6358 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6359 insns = get_insns ();
6360 end_sequence ();
6362 emit_libcall_block (insns, dest, rax, x);
6364 else
6365 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6366 break;
6368 case TLS_MODEL_LOCAL_DYNAMIC:
6369 base = gen_reg_rtx (Pmode);
6370 if (TARGET_64BIT)
6372 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6374 start_sequence ();
6375 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6376 insns = get_insns ();
6377 end_sequence ();
6379 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6380 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6381 emit_libcall_block (insns, base, rax, note);
6383 else
6384 emit_insn (gen_tls_local_dynamic_base_32 (base));
6386 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6387 off = gen_rtx_CONST (Pmode, off);
6389 return gen_rtx_PLUS (Pmode, base, off);
6391 case TLS_MODEL_INITIAL_EXEC:
6392 if (TARGET_64BIT)
6394 pic = NULL;
6395 type = UNSPEC_GOTNTPOFF;
6397 else if (flag_pic)
6399 if (reload_in_progress)
6400 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6401 pic = pic_offset_table_rtx;
6402 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6404 else if (!TARGET_GNU_TLS)
6406 pic = gen_reg_rtx (Pmode);
6407 emit_insn (gen_set_got (pic));
6408 type = UNSPEC_GOTTPOFF;
6410 else
6412 pic = NULL;
6413 type = UNSPEC_INDNTPOFF;
6416 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6417 off = gen_rtx_CONST (Pmode, off);
6418 if (pic)
6419 off = gen_rtx_PLUS (Pmode, pic, off);
6420 off = gen_rtx_MEM (Pmode, off);
6421 RTX_UNCHANGING_P (off) = 1;
6422 set_mem_alias_set (off, ix86_GOT_alias_set ());
6424 if (TARGET_64BIT || TARGET_GNU_TLS)
6426 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6427 off = force_reg (Pmode, off);
6428 return gen_rtx_PLUS (Pmode, base, off);
6430 else
6432 base = get_thread_pointer (true);
6433 dest = gen_reg_rtx (Pmode);
6434 emit_insn (gen_subsi3 (dest, base, off));
6436 break;
6438 case TLS_MODEL_LOCAL_EXEC:
6439 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6440 (TARGET_64BIT || TARGET_GNU_TLS)
6441 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6442 off = gen_rtx_CONST (Pmode, off);
6444 if (TARGET_64BIT || TARGET_GNU_TLS)
6446 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6447 return gen_rtx_PLUS (Pmode, base, off);
6449 else
6451 base = get_thread_pointer (true);
6452 dest = gen_reg_rtx (Pmode);
6453 emit_insn (gen_subsi3 (dest, base, off));
6455 break;
6457 default:
6458 abort ();
6461 return dest;
6464 /* Try machine-dependent ways of modifying an illegitimate address
6465 to be legitimate. If we find one, return the new, valid address.
6466 This macro is used in only one place: `memory_address' in explow.c.
6468 OLDX is the address as it was before break_out_memory_refs was called.
6469 In some cases it is useful to look at this to decide what needs to be done.
6471 MODE and WIN are passed so that this macro can use
6472 GO_IF_LEGITIMATE_ADDRESS.
6474 It is always safe for this macro to do nothing. It exists to recognize
6475 opportunities to optimize the output.
6477 For the 80386, we handle X+REG by loading X into a register R and
6478 using R+REG. R will go in a general reg and indexing will be used.
6479 However, if REG is a broken-out memory address or multiplication,
6480 nothing needs to be done because REG can certainly go in a general reg.
6482 When -fpic is used, special handling is needed for symbolic references.
6483 See comments by legitimize_pic_address in i386.c for details. */
6486 legitimize_address (x, oldx, mode)
6487 register rtx x;
6488 register rtx oldx ATTRIBUTE_UNUSED;
6489 enum machine_mode mode;
6491 int changed = 0;
6492 unsigned log;
6494 if (TARGET_DEBUG_ADDR)
6496 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6497 GET_MODE_NAME (mode));
6498 debug_rtx (x);
6501 log = tls_symbolic_operand (x, mode);
6502 if (log)
6503 return legitimize_tls_address (x, log, false);
6505 if (flag_pic && SYMBOLIC_CONST (x))
6506 return legitimize_pic_address (x, 0);
6508 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6509 if (GET_CODE (x) == ASHIFT
6510 && GET_CODE (XEXP (x, 1)) == CONST_INT
6511 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6513 changed = 1;
6514 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6515 GEN_INT (1 << log));
6518 if (GET_CODE (x) == PLUS)
6520 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6522 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6523 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6524 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6526 changed = 1;
6527 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6528 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6529 GEN_INT (1 << log));
6532 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6533 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6534 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6536 changed = 1;
6537 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6538 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6539 GEN_INT (1 << log));
6542 /* Put multiply first if it isn't already. */
6543 if (GET_CODE (XEXP (x, 1)) == MULT)
6545 rtx tmp = XEXP (x, 0);
6546 XEXP (x, 0) = XEXP (x, 1);
6547 XEXP (x, 1) = tmp;
6548 changed = 1;
6551 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6552 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6553 created by virtual register instantiation, register elimination, and
6554 similar optimizations. */
6555 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6557 changed = 1;
6558 x = gen_rtx_PLUS (Pmode,
6559 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6560 XEXP (XEXP (x, 1), 0)),
6561 XEXP (XEXP (x, 1), 1));
6564 /* Canonicalize
6565 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6566 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6567 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6568 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6569 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6570 && CONSTANT_P (XEXP (x, 1)))
6572 rtx constant;
6573 rtx other = NULL_RTX;
6575 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6577 constant = XEXP (x, 1);
6578 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6580 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6582 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6583 other = XEXP (x, 1);
6585 else
6586 constant = 0;
6588 if (constant)
6590 changed = 1;
6591 x = gen_rtx_PLUS (Pmode,
6592 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6593 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6594 plus_constant (other, INTVAL (constant)));
6598 if (changed && legitimate_address_p (mode, x, FALSE))
6599 return x;
6601 if (GET_CODE (XEXP (x, 0)) == MULT)
6603 changed = 1;
6604 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6607 if (GET_CODE (XEXP (x, 1)) == MULT)
6609 changed = 1;
6610 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6613 if (changed
6614 && GET_CODE (XEXP (x, 1)) == REG
6615 && GET_CODE (XEXP (x, 0)) == REG)
6616 return x;
6618 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6620 changed = 1;
6621 x = legitimize_pic_address (x, 0);
6624 if (changed && legitimate_address_p (mode, x, FALSE))
6625 return x;
6627 if (GET_CODE (XEXP (x, 0)) == REG)
6629 register rtx temp = gen_reg_rtx (Pmode);
6630 register rtx val = force_operand (XEXP (x, 1), temp);
6631 if (val != temp)
6632 emit_move_insn (temp, val);
6634 XEXP (x, 1) = temp;
6635 return x;
6638 else if (GET_CODE (XEXP (x, 1)) == REG)
6640 register rtx temp = gen_reg_rtx (Pmode);
6641 register rtx val = force_operand (XEXP (x, 0), temp);
6642 if (val != temp)
6643 emit_move_insn (temp, val);
6645 XEXP (x, 0) = temp;
6646 return x;
6650 return x;
6653 /* Print an integer constant expression in assembler syntax. Addition
6654 and subtraction are the only arithmetic that may appear in these
6655 expressions. FILE is the stdio stream to write to, X is the rtx, and
6656 CODE is the operand print code from the output string. */
6658 static void
6659 output_pic_addr_const (file, x, code)
6660 FILE *file;
6661 rtx x;
6662 int code;
6664 char buf[256];
6666 switch (GET_CODE (x))
6668 case PC:
6669 if (flag_pic)
6670 putc ('.', file);
6671 else
6672 abort ();
6673 break;
6675 case SYMBOL_REF:
6676 assemble_name (file, XSTR (x, 0));
6677 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6678 fputs ("@PLT", file);
6679 break;
6681 case LABEL_REF:
6682 x = XEXP (x, 0);
6683 /* FALLTHRU */
6684 case CODE_LABEL:
6685 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6686 assemble_name (asm_out_file, buf);
6687 break;
6689 case CONST_INT:
6690 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6691 break;
6693 case CONST:
6694 /* This used to output parentheses around the expression,
6695 but that does not work on the 386 (either ATT or BSD assembler). */
6696 output_pic_addr_const (file, XEXP (x, 0), code);
6697 break;
6699 case CONST_DOUBLE:
6700 if (GET_MODE (x) == VOIDmode)
6702 /* We can use %d if the number is <32 bits and positive. */
6703 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6704 fprintf (file, "0x%lx%08lx",
6705 (unsigned long) CONST_DOUBLE_HIGH (x),
6706 (unsigned long) CONST_DOUBLE_LOW (x));
6707 else
6708 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6710 else
6711 /* We can't handle floating point constants;
6712 PRINT_OPERAND must handle them. */
6713 output_operand_lossage ("floating constant misused");
6714 break;
6716 case PLUS:
6717 /* Some assemblers need integer constants to appear first. */
6718 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6720 output_pic_addr_const (file, XEXP (x, 0), code);
6721 putc ('+', file);
6722 output_pic_addr_const (file, XEXP (x, 1), code);
6724 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6726 output_pic_addr_const (file, XEXP (x, 1), code);
6727 putc ('+', file);
6728 output_pic_addr_const (file, XEXP (x, 0), code);
6730 else
6731 abort ();
6732 break;
6734 case MINUS:
6735 if (!TARGET_MACHO)
6736 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6737 output_pic_addr_const (file, XEXP (x, 0), code);
6738 putc ('-', file);
6739 output_pic_addr_const (file, XEXP (x, 1), code);
6740 if (!TARGET_MACHO)
6741 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6742 break;
6744 case UNSPEC:
6745 if (XVECLEN (x, 0) != 1)
6746 abort ();
6747 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6748 switch (XINT (x, 1))
6750 case UNSPEC_GOT:
6751 fputs ("@GOT", file);
6752 break;
6753 case UNSPEC_GOTOFF:
6754 fputs ("@GOTOFF", file);
6755 break;
6756 case UNSPEC_GOTPCREL:
6757 fputs ("@GOTPCREL(%rip)", file);
6758 break;
6759 case UNSPEC_GOTTPOFF:
6760 /* FIXME: This might be @TPOFF in Sun ld too. */
6761 fputs ("@GOTTPOFF", file);
6762 break;
6763 case UNSPEC_TPOFF:
6764 fputs ("@TPOFF", file);
6765 break;
6766 case UNSPEC_NTPOFF:
6767 if (TARGET_64BIT)
6768 fputs ("@TPOFF", file);
6769 else
6770 fputs ("@NTPOFF", file);
6771 break;
6772 case UNSPEC_DTPOFF:
6773 fputs ("@DTPOFF", file);
6774 break;
6775 case UNSPEC_GOTNTPOFF:
6776 if (TARGET_64BIT)
6777 fputs ("@GOTTPOFF(%rip)", file);
6778 else
6779 fputs ("@GOTNTPOFF", file);
6780 break;
6781 case UNSPEC_INDNTPOFF:
6782 fputs ("@INDNTPOFF", file);
6783 break;
6784 default:
6785 output_operand_lossage ("invalid UNSPEC as operand");
6786 break;
6788 break;
6790 default:
6791 output_operand_lossage ("invalid expression as operand");
6795 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6796 We need to handle our special PIC relocations. */
6798 void
6799 i386_dwarf_output_addr_const (file, x)
6800 FILE *file;
6801 rtx x;
6803 #ifdef ASM_QUAD
6804 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6805 #else
6806 if (TARGET_64BIT)
6807 abort ();
6808 fprintf (file, "%s", ASM_LONG);
6809 #endif
6810 if (flag_pic)
6811 output_pic_addr_const (file, x, '\0');
6812 else
6813 output_addr_const (file, x);
6814 fputc ('\n', file);
6817 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6818 We need to emit DTP-relative relocations. */
6820 void
6821 i386_output_dwarf_dtprel (file, size, x)
6822 FILE *file;
6823 int size;
6824 rtx x;
6826 fputs (ASM_LONG, file);
6827 output_addr_const (file, x);
6828 fputs ("@DTPOFF", file);
6829 switch (size)
6831 case 4:
6832 break;
6833 case 8:
6834 fputs (", 0", file);
6835 break;
6836 default:
6837 abort ();
6841 /* In the name of slightly smaller debug output, and to cater to
6842 general assembler losage, recognize PIC+GOTOFF and turn it back
6843 into a direct symbol reference. */
6845 static rtx
6846 ix86_delegitimize_address (orig_x)
6847 rtx orig_x;
6849 rtx x = orig_x, y;
6851 if (GET_CODE (x) == MEM)
6852 x = XEXP (x, 0);
6854 if (TARGET_64BIT)
6856 if (GET_CODE (x) != CONST
6857 || GET_CODE (XEXP (x, 0)) != UNSPEC
6858 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6859 || GET_CODE (orig_x) != MEM)
6860 return orig_x;
6861 return XVECEXP (XEXP (x, 0), 0, 0);
6864 if (GET_CODE (x) != PLUS
6865 || GET_CODE (XEXP (x, 1)) != CONST)
6866 return orig_x;
6868 if (GET_CODE (XEXP (x, 0)) == REG
6869 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6870 /* %ebx + GOT/GOTOFF */
6871 y = NULL;
6872 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6874 /* %ebx + %reg * scale + GOT/GOTOFF */
6875 y = XEXP (x, 0);
6876 if (GET_CODE (XEXP (y, 0)) == REG
6877 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6878 y = XEXP (y, 1);
6879 else if (GET_CODE (XEXP (y, 1)) == REG
6880 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6881 y = XEXP (y, 0);
6882 else
6883 return orig_x;
6884 if (GET_CODE (y) != REG
6885 && GET_CODE (y) != MULT
6886 && GET_CODE (y) != ASHIFT)
6887 return orig_x;
6889 else
6890 return orig_x;
6892 x = XEXP (XEXP (x, 1), 0);
6893 if (GET_CODE (x) == UNSPEC
6894 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6895 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6897 if (y)
6898 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6899 return XVECEXP (x, 0, 0);
6902 if (GET_CODE (x) == PLUS
6903 && GET_CODE (XEXP (x, 0)) == UNSPEC
6904 && GET_CODE (XEXP (x, 1)) == CONST_INT
6905 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6906 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6907 && GET_CODE (orig_x) != MEM)))
6909 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6910 if (y)
6911 return gen_rtx_PLUS (Pmode, y, x);
6912 return x;
6915 return orig_x;
6918 static void
6919 put_condition_code (code, mode, reverse, fp, file)
6920 enum rtx_code code;
6921 enum machine_mode mode;
6922 int reverse, fp;
6923 FILE *file;
6925 const char *suffix;
6927 if (mode == CCFPmode || mode == CCFPUmode)
6929 enum rtx_code second_code, bypass_code;
6930 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6931 if (bypass_code != NIL || second_code != NIL)
6932 abort ();
6933 code = ix86_fp_compare_code_to_integer (code);
6934 mode = CCmode;
6936 if (reverse)
6937 code = reverse_condition (code);
6939 switch (code)
6941 case EQ:
6942 suffix = "e";
6943 break;
6944 case NE:
6945 suffix = "ne";
6946 break;
6947 case GT:
6948 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6949 abort ();
6950 suffix = "g";
6951 break;
6952 case GTU:
6953 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6954 Those same assemblers have the same but opposite losage on cmov. */
6955 if (mode != CCmode)
6956 abort ();
6957 suffix = fp ? "nbe" : "a";
6958 break;
6959 case LT:
6960 if (mode == CCNOmode || mode == CCGOCmode)
6961 suffix = "s";
6962 else if (mode == CCmode || mode == CCGCmode)
6963 suffix = "l";
6964 else
6965 abort ();
6966 break;
6967 case LTU:
6968 if (mode != CCmode)
6969 abort ();
6970 suffix = "b";
6971 break;
6972 case GE:
6973 if (mode == CCNOmode || mode == CCGOCmode)
6974 suffix = "ns";
6975 else if (mode == CCmode || mode == CCGCmode)
6976 suffix = "ge";
6977 else
6978 abort ();
6979 break;
6980 case GEU:
6981 /* ??? As above. */
6982 if (mode != CCmode)
6983 abort ();
6984 suffix = fp ? "nb" : "ae";
6985 break;
6986 case LE:
6987 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6988 abort ();
6989 suffix = "le";
6990 break;
6991 case LEU:
6992 if (mode != CCmode)
6993 abort ();
6994 suffix = "be";
6995 break;
6996 case UNORDERED:
6997 suffix = fp ? "u" : "p";
6998 break;
6999 case ORDERED:
7000 suffix = fp ? "nu" : "np";
7001 break;
7002 default:
7003 abort ();
7005 fputs (suffix, file);
7008 void
7009 print_reg (x, code, file)
7010 rtx x;
7011 int code;
7012 FILE *file;
7014 if (REGNO (x) == ARG_POINTER_REGNUM
7015 || REGNO (x) == FRAME_POINTER_REGNUM
7016 || REGNO (x) == FLAGS_REG
7017 || REGNO (x) == FPSR_REG)
7018 abort ();
7020 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7021 putc ('%', file);
7023 if (code == 'w' || MMX_REG_P (x))
7024 code = 2;
7025 else if (code == 'b')
7026 code = 1;
7027 else if (code == 'k')
7028 code = 4;
7029 else if (code == 'q')
7030 code = 8;
7031 else if (code == 'y')
7032 code = 3;
7033 else if (code == 'h')
7034 code = 0;
7035 else
7036 code = GET_MODE_SIZE (GET_MODE (x));
7038 /* Irritatingly, AMD extended registers use different naming convention
7039 from the normal registers. */
7040 if (REX_INT_REG_P (x))
7042 if (!TARGET_64BIT)
7043 abort ();
7044 switch (code)
7046 case 0:
7047 error ("extended registers have no high halves");
7048 break;
7049 case 1:
7050 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7051 break;
7052 case 2:
7053 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7054 break;
7055 case 4:
7056 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7057 break;
7058 case 8:
7059 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7060 break;
7061 default:
7062 error ("unsupported operand size for extended register");
7063 break;
7065 return;
7067 switch (code)
7069 case 3:
7070 if (STACK_TOP_P (x))
7072 fputs ("st(0)", file);
7073 break;
7075 /* FALLTHRU */
7076 case 8:
7077 case 4:
7078 case 12:
7079 if (! ANY_FP_REG_P (x))
7080 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7081 /* FALLTHRU */
7082 case 16:
7083 case 2:
7084 fputs (hi_reg_name[REGNO (x)], file);
7085 break;
7086 case 1:
7087 fputs (qi_reg_name[REGNO (x)], file);
7088 break;
7089 case 0:
7090 fputs (qi_high_reg_name[REGNO (x)], file);
7091 break;
7092 default:
7093 abort ();
7097 /* Locate some local-dynamic symbol still in use by this function
7098 so that we can print its name in some tls_local_dynamic_base
7099 pattern. */
7101 static const char *
7102 get_some_local_dynamic_name ()
7104 rtx insn;
7106 if (cfun->machine->some_ld_name)
7107 return cfun->machine->some_ld_name;
7109 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7110 if (INSN_P (insn)
7111 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7112 return cfun->machine->some_ld_name;
7114 abort ();
7117 static int
7118 get_some_local_dynamic_name_1 (px, data)
7119 rtx *px;
7120 void *data ATTRIBUTE_UNUSED;
7122 rtx x = *px;
7124 if (GET_CODE (x) == SYMBOL_REF
7125 && local_dynamic_symbolic_operand (x, Pmode))
7127 cfun->machine->some_ld_name = XSTR (x, 0);
7128 return 1;
7131 return 0;
7134 /* Meaning of CODE:
7135 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7136 C -- print opcode suffix for set/cmov insn.
7137 c -- like C, but print reversed condition
7138 F,f -- likewise, but for floating-point.
7139 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
7140 nothing
7141 R -- print the prefix for register names.
7142 z -- print the opcode suffix for the size of the current operand.
7143 * -- print a star (in certain assembler syntax)
7144 A -- print an absolute memory reference.
7145 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7146 s -- print a shift double count, followed by the assemblers argument
7147 delimiter.
7148 b -- print the QImode name of the register for the indicated operand.
7149 %b0 would print %al if operands[0] is reg 0.
7150 w -- likewise, print the HImode name of the register.
7151 k -- likewise, print the SImode name of the register.
7152 q -- likewise, print the DImode name of the register.
7153 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7154 y -- print "st(0)" instead of "st" as a register.
7155 D -- print condition for SSE cmp instruction.
7156 P -- if PIC, print an @PLT suffix.
7157 X -- don't print any sort of PIC '@' suffix for a symbol.
7158 & -- print some in-use local-dynamic symbol name.
7161 void
7162 print_operand (file, x, code)
7163 FILE *file;
7164 rtx x;
7165 int code;
7167 if (code)
7169 switch (code)
7171 case '*':
7172 if (ASSEMBLER_DIALECT == ASM_ATT)
7173 putc ('*', file);
7174 return;
7176 case '&':
7177 assemble_name (file, get_some_local_dynamic_name ());
7178 return;
7180 case 'A':
7181 if (ASSEMBLER_DIALECT == ASM_ATT)
7182 putc ('*', file);
7183 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7185 /* Intel syntax. For absolute addresses, registers should not
7186 be surrounded by braces. */
7187 if (GET_CODE (x) != REG)
7189 putc ('[', file);
7190 PRINT_OPERAND (file, x, 0);
7191 putc (']', file);
7192 return;
7195 else
7196 abort ();
7198 PRINT_OPERAND (file, x, 0);
7199 return;
7202 case 'L':
7203 if (ASSEMBLER_DIALECT == ASM_ATT)
7204 putc ('l', file);
7205 return;
7207 case 'W':
7208 if (ASSEMBLER_DIALECT == ASM_ATT)
7209 putc ('w', file);
7210 return;
7212 case 'B':
7213 if (ASSEMBLER_DIALECT == ASM_ATT)
7214 putc ('b', file);
7215 return;
7217 case 'Q':
7218 if (ASSEMBLER_DIALECT == ASM_ATT)
7219 putc ('l', file);
7220 return;
7222 case 'S':
7223 if (ASSEMBLER_DIALECT == ASM_ATT)
7224 putc ('s', file);
7225 return;
7227 case 'T':
7228 if (ASSEMBLER_DIALECT == ASM_ATT)
7229 putc ('t', file);
7230 return;
7232 case 'z':
7233 /* 387 opcodes don't get size suffixes if the operands are
7234 registers. */
7235 if (STACK_REG_P (x))
7236 return;
7238 /* Likewise if using Intel opcodes. */
7239 if (ASSEMBLER_DIALECT == ASM_INTEL)
7240 return;
7242 /* This is the size of op from size of operand. */
7243 switch (GET_MODE_SIZE (GET_MODE (x)))
7245 case 2:
7246 #ifdef HAVE_GAS_FILDS_FISTS
7247 putc ('s', file);
7248 #endif
7249 return;
7251 case 4:
7252 if (GET_MODE (x) == SFmode)
7254 putc ('s', file);
7255 return;
7257 else
7258 putc ('l', file);
7259 return;
7261 case 12:
7262 case 16:
7263 putc ('t', file);
7264 return;
7266 case 8:
7267 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7269 #ifdef GAS_MNEMONICS
7270 putc ('q', file);
7271 #else
7272 putc ('l', file);
7273 putc ('l', file);
7274 #endif
7276 else
7277 putc ('l', file);
7278 return;
7280 default:
7281 abort ();
7284 case 'b':
7285 case 'w':
7286 case 'k':
7287 case 'q':
7288 case 'h':
7289 case 'y':
7290 case 'X':
7291 case 'P':
7292 break;
7294 case 's':
7295 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7297 PRINT_OPERAND (file, x, 0);
7298 putc (',', file);
7300 return;
7302 case 'D':
7303 /* Little bit of braindamage here. The SSE compare instructions
7304 does use completely different names for the comparisons that the
7305 fp conditional moves. */
7306 switch (GET_CODE (x))
7308 case EQ:
7309 case UNEQ:
7310 fputs ("eq", file);
7311 break;
7312 case LT:
7313 case UNLT:
7314 fputs ("lt", file);
7315 break;
7316 case LE:
7317 case UNLE:
7318 fputs ("le", file);
7319 break;
7320 case UNORDERED:
7321 fputs ("unord", file);
7322 break;
7323 case NE:
7324 case LTGT:
7325 fputs ("neq", file);
7326 break;
7327 case UNGE:
7328 case GE:
7329 fputs ("nlt", file);
7330 break;
7331 case UNGT:
7332 case GT:
7333 fputs ("nle", file);
7334 break;
7335 case ORDERED:
7336 fputs ("ord", file);
7337 break;
7338 default:
7339 abort ();
7340 break;
7342 return;
7343 case 'O':
7344 #ifdef CMOV_SUN_AS_SYNTAX
7345 if (ASSEMBLER_DIALECT == ASM_ATT)
7347 switch (GET_MODE (x))
7349 case HImode: putc ('w', file); break;
7350 case SImode:
7351 case SFmode: putc ('l', file); break;
7352 case DImode:
7353 case DFmode: putc ('q', file); break;
7354 default: abort ();
7356 putc ('.', file);
7358 #endif
7359 return;
7360 case 'C':
7361 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7362 return;
7363 case 'F':
7364 #ifdef CMOV_SUN_AS_SYNTAX
7365 if (ASSEMBLER_DIALECT == ASM_ATT)
7366 putc ('.', file);
7367 #endif
7368 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7369 return;
7371 /* Like above, but reverse condition */
7372 case 'c':
7373 /* Check to see if argument to %c is really a constant
7374 and not a condition code which needs to be reversed. */
7375 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7377 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7378 return;
7380 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7381 return;
7382 case 'f':
7383 #ifdef CMOV_SUN_AS_SYNTAX
7384 if (ASSEMBLER_DIALECT == ASM_ATT)
7385 putc ('.', file);
7386 #endif
7387 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7388 return;
7389 case '+':
7391 rtx x;
7393 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7394 return;
7396 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7397 if (x)
7399 int pred_val = INTVAL (XEXP (x, 0));
7401 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7402 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7404 int taken = pred_val > REG_BR_PROB_BASE / 2;
7405 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7407 /* Emit hints only in the case default branch prediction
7408 heuristics would fail. */
7409 if (taken != cputaken)
7411 /* We use 3e (DS) prefix for taken branches and
7412 2e (CS) prefix for not taken branches. */
7413 if (taken)
7414 fputs ("ds ; ", file);
7415 else
7416 fputs ("cs ; ", file);
7420 return;
7422 default:
7423 output_operand_lossage ("invalid operand code `%c'", code);
7427 if (GET_CODE (x) == REG)
7429 PRINT_REG (x, code, file);
7432 else if (GET_CODE (x) == MEM)
7434 /* No `byte ptr' prefix for call instructions. */
7435 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7437 const char * size;
7438 switch (GET_MODE_SIZE (GET_MODE (x)))
7440 case 1: size = "BYTE"; break;
7441 case 2: size = "WORD"; break;
7442 case 4: size = "DWORD"; break;
7443 case 8: size = "QWORD"; break;
7444 case 12: size = "XWORD"; break;
7445 case 16: size = "XMMWORD"; break;
7446 default:
7447 abort ();
7450 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7451 if (code == 'b')
7452 size = "BYTE";
7453 else if (code == 'w')
7454 size = "WORD";
7455 else if (code == 'k')
7456 size = "DWORD";
7458 fputs (size, file);
7459 fputs (" PTR ", file);
7462 x = XEXP (x, 0);
7463 /* Avoid (%rip) for call operands. */
7464 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7465 && GET_CODE (x) != CONST_INT)
7466 output_addr_const (file, x);
7467 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7468 output_operand_lossage ("invalid constraints for operand");
7469 else
7470 output_address (x);
7473 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7475 REAL_VALUE_TYPE r;
7476 long l;
7478 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7479 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7481 if (ASSEMBLER_DIALECT == ASM_ATT)
7482 putc ('$', file);
7483 fprintf (file, "0x%lx", l);
7486 /* These float cases don't actually occur as immediate operands. */
7487 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7489 char dstr[30];
7491 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7492 fprintf (file, "%s", dstr);
7495 else if (GET_CODE (x) == CONST_DOUBLE
7496 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7498 char dstr[30];
7500 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7501 fprintf (file, "%s", dstr);
7504 else
7506 if (code != 'P')
7508 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7510 if (ASSEMBLER_DIALECT == ASM_ATT)
7511 putc ('$', file);
7513 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7514 || GET_CODE (x) == LABEL_REF)
7516 if (ASSEMBLER_DIALECT == ASM_ATT)
7517 putc ('$', file);
7518 else
7519 fputs ("OFFSET FLAT:", file);
7522 if (GET_CODE (x) == CONST_INT)
7523 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7524 else if (flag_pic)
7525 output_pic_addr_const (file, x, code);
7526 else
7527 output_addr_const (file, x);
7531 /* Print a memory operand whose address is ADDR. */
7533 void
7534 print_operand_address (file, addr)
7535 FILE *file;
7536 register rtx addr;
7538 struct ix86_address parts;
7539 rtx base, index, disp;
7540 int scale;
7542 if (! ix86_decompose_address (addr, &parts))
7543 abort ();
7545 base = parts.base;
7546 index = parts.index;
7547 disp = parts.disp;
7548 scale = parts.scale;
7550 switch (parts.seg)
7552 case SEG_DEFAULT:
7553 break;
7554 case SEG_FS:
7555 case SEG_GS:
7556 if (USER_LABEL_PREFIX[0] == 0)
7557 putc ('%', file);
7558 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7559 break;
7560 default:
7561 abort ();
7564 if (!base && !index)
7566 /* Displacement only requires special attention. */
7568 if (GET_CODE (disp) == CONST_INT)
7570 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7572 if (USER_LABEL_PREFIX[0] == 0)
7573 putc ('%', file);
7574 fputs ("ds:", file);
7576 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7578 else if (flag_pic)
7579 output_pic_addr_const (file, disp, 0);
7580 else
7581 output_addr_const (file, disp);
7583 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7584 if (TARGET_64BIT
7585 && ((GET_CODE (disp) == SYMBOL_REF
7586 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7587 || GET_CODE (disp) == LABEL_REF
7588 || (GET_CODE (disp) == CONST
7589 && GET_CODE (XEXP (disp, 0)) == PLUS
7590 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7591 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7592 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7593 fputs ("(%rip)", file);
7595 else
7597 if (ASSEMBLER_DIALECT == ASM_ATT)
7599 if (disp)
7601 if (flag_pic)
7602 output_pic_addr_const (file, disp, 0);
7603 else if (GET_CODE (disp) == LABEL_REF)
7604 output_asm_label (disp);
7605 else
7606 output_addr_const (file, disp);
7609 putc ('(', file);
7610 if (base)
7611 PRINT_REG (base, 0, file);
7612 if (index)
7614 putc (',', file);
7615 PRINT_REG (index, 0, file);
7616 if (scale != 1)
7617 fprintf (file, ",%d", scale);
7619 putc (')', file);
7621 else
7623 rtx offset = NULL_RTX;
7625 if (disp)
7627 /* Pull out the offset of a symbol; print any symbol itself. */
7628 if (GET_CODE (disp) == CONST
7629 && GET_CODE (XEXP (disp, 0)) == PLUS
7630 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7632 offset = XEXP (XEXP (disp, 0), 1);
7633 disp = gen_rtx_CONST (VOIDmode,
7634 XEXP (XEXP (disp, 0), 0));
7637 if (flag_pic)
7638 output_pic_addr_const (file, disp, 0);
7639 else if (GET_CODE (disp) == LABEL_REF)
7640 output_asm_label (disp);
7641 else if (GET_CODE (disp) == CONST_INT)
7642 offset = disp;
7643 else
7644 output_addr_const (file, disp);
7647 putc ('[', file);
7648 if (base)
7650 PRINT_REG (base, 0, file);
7651 if (offset)
7653 if (INTVAL (offset) >= 0)
7654 putc ('+', file);
7655 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7658 else if (offset)
7659 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7660 else
7661 putc ('0', file);
7663 if (index)
7665 putc ('+', file);
7666 PRINT_REG (index, 0, file);
7667 if (scale != 1)
7668 fprintf (file, "*%d", scale);
7670 putc (']', file);
7675 bool
7676 output_addr_const_extra (file, x)
7677 FILE *file;
7678 rtx x;
7680 rtx op;
7682 if (GET_CODE (x) != UNSPEC)
7683 return false;
7685 op = XVECEXP (x, 0, 0);
7686 switch (XINT (x, 1))
7688 case UNSPEC_GOTTPOFF:
7689 output_addr_const (file, op);
7690 /* FIXME: This might be @TPOFF in Sun ld. */
7691 fputs ("@GOTTPOFF", file);
7692 break;
7693 case UNSPEC_TPOFF:
7694 output_addr_const (file, op);
7695 fputs ("@TPOFF", file);
7696 break;
7697 case UNSPEC_NTPOFF:
7698 output_addr_const (file, op);
7699 if (TARGET_64BIT)
7700 fputs ("@TPOFF", file);
7701 else
7702 fputs ("@NTPOFF", file);
7703 break;
7704 case UNSPEC_DTPOFF:
7705 output_addr_const (file, op);
7706 fputs ("@DTPOFF", file);
7707 break;
7708 case UNSPEC_GOTNTPOFF:
7709 output_addr_const (file, op);
7710 if (TARGET_64BIT)
7711 fputs ("@GOTTPOFF(%rip)", file);
7712 else
7713 fputs ("@GOTNTPOFF", file);
7714 break;
7715 case UNSPEC_INDNTPOFF:
7716 output_addr_const (file, op);
7717 fputs ("@INDNTPOFF", file);
7718 break;
7720 default:
7721 return false;
7724 return true;
7727 /* Split one or more DImode RTL references into pairs of SImode
7728 references. The RTL can be REG, offsettable MEM, integer constant, or
7729 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7730 split and "num" is its length. lo_half and hi_half are output arrays
7731 that parallel "operands". */
7733 void
7734 split_di (operands, num, lo_half, hi_half)
7735 rtx operands[];
7736 int num;
7737 rtx lo_half[], hi_half[];
7739 while (num--)
7741 rtx op = operands[num];
7743 /* simplify_subreg refuse to split volatile memory addresses,
7744 but we still have to handle it. */
7745 if (GET_CODE (op) == MEM)
7747 lo_half[num] = adjust_address (op, SImode, 0);
7748 hi_half[num] = adjust_address (op, SImode, 4);
7750 else
7752 lo_half[num] = simplify_gen_subreg (SImode, op,
7753 GET_MODE (op) == VOIDmode
7754 ? DImode : GET_MODE (op), 0);
7755 hi_half[num] = simplify_gen_subreg (SImode, op,
7756 GET_MODE (op) == VOIDmode
7757 ? DImode : GET_MODE (op), 4);
7761 /* Split one or more TImode RTL references into pairs of SImode
7762 references. The RTL can be REG, offsettable MEM, integer constant, or
7763 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7764 split and "num" is its length. lo_half and hi_half are output arrays
7765 that parallel "operands". */
7767 void
7768 split_ti (operands, num, lo_half, hi_half)
7769 rtx operands[];
7770 int num;
7771 rtx lo_half[], hi_half[];
7773 while (num--)
7775 rtx op = operands[num];
7777 /* simplify_subreg refuse to split volatile memory addresses, but we
7778 still have to handle it. */
7779 if (GET_CODE (op) == MEM)
7781 lo_half[num] = adjust_address (op, DImode, 0);
7782 hi_half[num] = adjust_address (op, DImode, 8);
7784 else
7786 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7787 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7792 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7793 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7794 is the expression of the binary operation. The output may either be
7795 emitted here, or returned to the caller, like all output_* functions.
7797 There is no guarantee that the operands are the same mode, as they
7798 might be within FLOAT or FLOAT_EXTEND expressions. */
7800 #ifndef SYSV386_COMPAT
7801 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7802 wants to fix the assemblers because that causes incompatibility
7803 with gcc. No-one wants to fix gcc because that causes
7804 incompatibility with assemblers... You can use the option of
7805 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7806 #define SYSV386_COMPAT 1
7807 #endif
7809 const char *
7810 output_387_binary_op (insn, operands)
7811 rtx insn;
7812 rtx *operands;
7814 static char buf[30];
7815 const char *p;
7816 const char *ssep;
7817 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7819 #ifdef ENABLE_CHECKING
7820 /* Even if we do not want to check the inputs, this documents input
7821 constraints. Which helps in understanding the following code. */
7822 if (STACK_REG_P (operands[0])
7823 && ((REG_P (operands[1])
7824 && REGNO (operands[0]) == REGNO (operands[1])
7825 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7826 || (REG_P (operands[2])
7827 && REGNO (operands[0]) == REGNO (operands[2])
7828 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7829 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7830 ; /* ok */
7831 else if (!is_sse)
7832 abort ();
7833 #endif
7835 switch (GET_CODE (operands[3]))
7837 case PLUS:
7838 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7839 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7840 p = "fiadd";
7841 else
7842 p = "fadd";
7843 ssep = "add";
7844 break;
7846 case MINUS:
7847 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7848 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7849 p = "fisub";
7850 else
7851 p = "fsub";
7852 ssep = "sub";
7853 break;
7855 case MULT:
7856 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7857 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7858 p = "fimul";
7859 else
7860 p = "fmul";
7861 ssep = "mul";
7862 break;
7864 case DIV:
7865 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7866 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7867 p = "fidiv";
7868 else
7869 p = "fdiv";
7870 ssep = "div";
7871 break;
7873 default:
7874 abort ();
7877 if (is_sse)
7879 strcpy (buf, ssep);
7880 if (GET_MODE (operands[0]) == SFmode)
7881 strcat (buf, "ss\t{%2, %0|%0, %2}");
7882 else
7883 strcat (buf, "sd\t{%2, %0|%0, %2}");
7884 return buf;
7886 strcpy (buf, p);
7888 switch (GET_CODE (operands[3]))
7890 case MULT:
7891 case PLUS:
7892 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7894 rtx temp = operands[2];
7895 operands[2] = operands[1];
7896 operands[1] = temp;
7899 /* know operands[0] == operands[1]. */
7901 if (GET_CODE (operands[2]) == MEM)
7903 p = "%z2\t%2";
7904 break;
7907 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7909 if (STACK_TOP_P (operands[0]))
7910 /* How is it that we are storing to a dead operand[2]?
7911 Well, presumably operands[1] is dead too. We can't
7912 store the result to st(0) as st(0) gets popped on this
7913 instruction. Instead store to operands[2] (which I
7914 think has to be st(1)). st(1) will be popped later.
7915 gcc <= 2.8.1 didn't have this check and generated
7916 assembly code that the Unixware assembler rejected. */
7917 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7918 else
7919 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7920 break;
7923 if (STACK_TOP_P (operands[0]))
7924 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7925 else
7926 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7927 break;
7929 case MINUS:
7930 case DIV:
7931 if (GET_CODE (operands[1]) == MEM)
7933 p = "r%z1\t%1";
7934 break;
7937 if (GET_CODE (operands[2]) == MEM)
7939 p = "%z2\t%2";
7940 break;
7943 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7945 #if SYSV386_COMPAT
7946 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7947 derived assemblers, confusingly reverse the direction of
7948 the operation for fsub{r} and fdiv{r} when the
7949 destination register is not st(0). The Intel assembler
7950 doesn't have this brain damage. Read !SYSV386_COMPAT to
7951 figure out what the hardware really does. */
7952 if (STACK_TOP_P (operands[0]))
7953 p = "{p\t%0, %2|rp\t%2, %0}";
7954 else
7955 p = "{rp\t%2, %0|p\t%0, %2}";
7956 #else
7957 if (STACK_TOP_P (operands[0]))
7958 /* As above for fmul/fadd, we can't store to st(0). */
7959 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7960 else
7961 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7962 #endif
7963 break;
7966 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7968 #if SYSV386_COMPAT
7969 if (STACK_TOP_P (operands[0]))
7970 p = "{rp\t%0, %1|p\t%1, %0}";
7971 else
7972 p = "{p\t%1, %0|rp\t%0, %1}";
7973 #else
7974 if (STACK_TOP_P (operands[0]))
7975 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7976 else
7977 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7978 #endif
7979 break;
7982 if (STACK_TOP_P (operands[0]))
7984 if (STACK_TOP_P (operands[1]))
7985 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7986 else
7987 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7988 break;
7990 else if (STACK_TOP_P (operands[1]))
7992 #if SYSV386_COMPAT
7993 p = "{\t%1, %0|r\t%0, %1}";
7994 #else
7995 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7996 #endif
7998 else
8000 #if SYSV386_COMPAT
8001 p = "{r\t%2, %0|\t%0, %2}";
8002 #else
8003 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8004 #endif
8006 break;
8008 default:
8009 abort ();
8012 strcat (buf, p);
8013 return buf;
8016 /* Output code to initialize control word copies used by
8017 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8018 is set to control word rounding downwards. */
8019 void
8020 emit_i387_cw_initialization (normal, round_down)
8021 rtx normal, round_down;
8023 rtx reg = gen_reg_rtx (HImode);
8025 emit_insn (gen_x86_fnstcw_1 (normal));
8026 emit_move_insn (reg, normal);
8027 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8028 && !TARGET_64BIT)
8029 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8030 else
8031 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8032 emit_move_insn (round_down, reg);
8035 /* Output code for INSN to convert a float to a signed int. OPERANDS
8036 are the insn operands. The output may be [HSD]Imode and the input
8037 operand may be [SDX]Fmode. */
8039 const char *
8040 output_fix_trunc (insn, operands)
8041 rtx insn;
8042 rtx *operands;
8044 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8045 int dimode_p = GET_MODE (operands[0]) == DImode;
8047 /* Jump through a hoop or two for DImode, since the hardware has no
8048 non-popping instruction. We used to do this a different way, but
8049 that was somewhat fragile and broke with post-reload splitters. */
8050 if (dimode_p && !stack_top_dies)
8051 output_asm_insn ("fld\t%y1", operands);
8053 if (!STACK_TOP_P (operands[1]))
8054 abort ();
8056 if (GET_CODE (operands[0]) != MEM)
8057 abort ();
8059 output_asm_insn ("fldcw\t%3", operands);
8060 if (stack_top_dies || dimode_p)
8061 output_asm_insn ("fistp%z0\t%0", operands);
8062 else
8063 output_asm_insn ("fist%z0\t%0", operands);
8064 output_asm_insn ("fldcw\t%2", operands);
8066 return "";
8069 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8070 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8071 when fucom should be used. */
8073 const char *
8074 output_fp_compare (insn, operands, eflags_p, unordered_p)
8075 rtx insn;
8076 rtx *operands;
8077 int eflags_p, unordered_p;
8079 int stack_top_dies;
8080 rtx cmp_op0 = operands[0];
8081 rtx cmp_op1 = operands[1];
8082 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8084 if (eflags_p == 2)
8086 cmp_op0 = cmp_op1;
8087 cmp_op1 = operands[2];
8089 if (is_sse)
8091 if (GET_MODE (operands[0]) == SFmode)
8092 if (unordered_p)
8093 return "ucomiss\t{%1, %0|%0, %1}";
8094 else
8095 return "comiss\t{%1, %0|%0, %1}";
8096 else
8097 if (unordered_p)
8098 return "ucomisd\t{%1, %0|%0, %1}";
8099 else
8100 return "comisd\t{%1, %0|%0, %1}";
8103 if (! STACK_TOP_P (cmp_op0))
8104 abort ();
8106 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8108 if (STACK_REG_P (cmp_op1)
8109 && stack_top_dies
8110 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8111 && REGNO (cmp_op1) != FIRST_STACK_REG)
8113 /* If both the top of the 387 stack dies, and the other operand
8114 is also a stack register that dies, then this must be a
8115 `fcompp' float compare */
8117 if (eflags_p == 1)
8119 /* There is no double popping fcomi variant. Fortunately,
8120 eflags is immune from the fstp's cc clobbering. */
8121 if (unordered_p)
8122 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8123 else
8124 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8125 return "fstp\t%y0";
8127 else
8129 if (eflags_p == 2)
8131 if (unordered_p)
8132 return "fucompp\n\tfnstsw\t%0";
8133 else
8134 return "fcompp\n\tfnstsw\t%0";
8136 else
8138 if (unordered_p)
8139 return "fucompp";
8140 else
8141 return "fcompp";
8145 else
8147 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8149 static const char * const alt[24] =
8151 "fcom%z1\t%y1",
8152 "fcomp%z1\t%y1",
8153 "fucom%z1\t%y1",
8154 "fucomp%z1\t%y1",
8156 "ficom%z1\t%y1",
8157 "ficomp%z1\t%y1",
8158 NULL,
8159 NULL,
8161 "fcomi\t{%y1, %0|%0, %y1}",
8162 "fcomip\t{%y1, %0|%0, %y1}",
8163 "fucomi\t{%y1, %0|%0, %y1}",
8164 "fucomip\t{%y1, %0|%0, %y1}",
8166 NULL,
8167 NULL,
8168 NULL,
8169 NULL,
8171 "fcom%z2\t%y2\n\tfnstsw\t%0",
8172 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8173 "fucom%z2\t%y2\n\tfnstsw\t%0",
8174 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8176 "ficom%z2\t%y2\n\tfnstsw\t%0",
8177 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8178 NULL,
8179 NULL
8182 int mask;
8183 const char *ret;
8185 mask = eflags_p << 3;
8186 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8187 mask |= unordered_p << 1;
8188 mask |= stack_top_dies;
8190 if (mask >= 24)
8191 abort ();
8192 ret = alt[mask];
8193 if (ret == NULL)
8194 abort ();
8196 return ret;
8200 void
8201 ix86_output_addr_vec_elt (file, value)
8202 FILE *file;
8203 int value;
8205 const char *directive = ASM_LONG;
8207 if (TARGET_64BIT)
8209 #ifdef ASM_QUAD
8210 directive = ASM_QUAD;
8211 #else
8212 abort ();
8213 #endif
8216 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8219 void
8220 ix86_output_addr_diff_elt (file, value, rel)
8221 FILE *file;
8222 int value, rel;
8224 if (TARGET_64BIT)
8225 fprintf (file, "%s%s%d-%s%d\n",
8226 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8227 else if (HAVE_AS_GOTOFF_IN_DATA)
8228 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8229 #if TARGET_MACHO
8230 else if (TARGET_MACHO)
8231 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8232 machopic_function_base_name () + 1);
8233 #endif
8234 else
8235 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8236 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8239 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8240 for the target. */
8242 void
8243 ix86_expand_clear (dest)
8244 rtx dest;
8246 rtx tmp;
8248 /* We play register width games, which are only valid after reload. */
8249 if (!reload_completed)
8250 abort ();
8252 /* Avoid HImode and its attendant prefix byte. */
8253 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8254 dest = gen_rtx_REG (SImode, REGNO (dest));
8256 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8258 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8259 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8261 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8262 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8265 emit_insn (tmp);
8268 /* X is an unchanging MEM. If it is a constant pool reference, return
8269 the constant pool rtx, else NULL. */
8271 static rtx
8272 maybe_get_pool_constant (x)
8273 rtx x;
8275 x = ix86_delegitimize_address (XEXP (x, 0));
8277 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8278 return get_pool_constant (x);
8280 return NULL_RTX;
8283 void
8284 ix86_expand_move (mode, operands)
8285 enum machine_mode mode;
8286 rtx operands[];
8288 int strict = (reload_in_progress || reload_completed);
8289 rtx op0, op1;
8290 enum tls_model model;
8292 op0 = operands[0];
8293 op1 = operands[1];
8295 model = tls_symbolic_operand (op1, Pmode);
8296 if (model)
8298 op1 = legitimize_tls_address (op1, model, true);
8299 op1 = force_operand (op1, op0);
8300 if (op1 == op0)
8301 return;
8304 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8306 #if TARGET_MACHO
8307 if (MACHOPIC_PURE)
8309 rtx temp = ((reload_in_progress
8310 || ((op0 && GET_CODE (op0) == REG)
8311 && mode == Pmode))
8312 ? op0 : gen_reg_rtx (Pmode));
8313 op1 = machopic_indirect_data_reference (op1, temp);
8314 op1 = machopic_legitimize_pic_address (op1, mode,
8315 temp == op1 ? 0 : temp);
8317 else if (MACHOPIC_INDIRECT)
8318 op1 = machopic_indirect_data_reference (op1, 0);
8319 if (op0 == op1)
8320 return;
8321 #else
8322 if (GET_CODE (op0) == MEM)
8323 op1 = force_reg (Pmode, op1);
8324 else
8326 rtx temp = op0;
8327 if (GET_CODE (temp) != REG)
8328 temp = gen_reg_rtx (Pmode);
8329 temp = legitimize_pic_address (op1, temp);
8330 if (temp == op0)
8331 return;
8332 op1 = temp;
8334 #endif /* TARGET_MACHO */
8336 else
8338 if (GET_CODE (op0) == MEM
8339 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8340 || !push_operand (op0, mode))
8341 && GET_CODE (op1) == MEM)
8342 op1 = force_reg (mode, op1);
8344 if (push_operand (op0, mode)
8345 && ! general_no_elim_operand (op1, mode))
8346 op1 = copy_to_mode_reg (mode, op1);
8348 /* Force large constants in 64bit compilation into register
8349 to get them CSEed. */
8350 if (TARGET_64BIT && mode == DImode
8351 && immediate_operand (op1, mode)
8352 && !x86_64_zero_extended_value (op1)
8353 && !register_operand (op0, mode)
8354 && optimize && !reload_completed && !reload_in_progress)
8355 op1 = copy_to_mode_reg (mode, op1);
8357 if (FLOAT_MODE_P (mode))
8359 /* If we are loading a floating point constant to a register,
8360 force the value to memory now, since we'll get better code
8361 out the back end. */
8363 if (strict)
8365 else if (GET_CODE (op1) == CONST_DOUBLE)
8367 op1 = validize_mem (force_const_mem (mode, op1));
8368 if (!register_operand (op0, mode))
8370 rtx temp = gen_reg_rtx (mode);
8371 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8372 emit_move_insn (op0, temp);
8373 return;
8379 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8382 void
8383 ix86_expand_vector_move (mode, operands)
8384 enum machine_mode mode;
8385 rtx operands[];
8387 /* Force constants other than zero into memory. We do not know how
8388 the instructions used to build constants modify the upper 64 bits
8389 of the register, once we have that information we may be able
8390 to handle some of them more efficiently. */
8391 if ((reload_in_progress | reload_completed) == 0
8392 && register_operand (operands[0], mode)
8393 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8394 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8396 /* Make operand1 a register if it isn't already. */
8397 if (!no_new_pseudos
8398 && !register_operand (operands[0], mode)
8399 && !register_operand (operands[1], mode))
8401 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8402 emit_move_insn (operands[0], temp);
8403 return;
8406 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8409 /* Attempt to expand a binary operator. Make the expansion closer to the
8410 actual machine, then just general_operand, which will allow 3 separate
8411 memory references (one output, two input) in a single insn. */
8413 void
8414 ix86_expand_binary_operator (code, mode, operands)
8415 enum rtx_code code;
8416 enum machine_mode mode;
8417 rtx operands[];
8419 int matching_memory;
8420 rtx src1, src2, dst, op, clob;
8422 dst = operands[0];
8423 src1 = operands[1];
8424 src2 = operands[2];
8426 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8427 if (GET_RTX_CLASS (code) == 'c'
8428 && (rtx_equal_p (dst, src2)
8429 || immediate_operand (src1, mode)))
8431 rtx temp = src1;
8432 src1 = src2;
8433 src2 = temp;
8436 /* If the destination is memory, and we do not have matching source
8437 operands, do things in registers. */
8438 matching_memory = 0;
8439 if (GET_CODE (dst) == MEM)
8441 if (rtx_equal_p (dst, src1))
8442 matching_memory = 1;
8443 else if (GET_RTX_CLASS (code) == 'c'
8444 && rtx_equal_p (dst, src2))
8445 matching_memory = 2;
8446 else
8447 dst = gen_reg_rtx (mode);
8450 /* Both source operands cannot be in memory. */
8451 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8453 if (matching_memory != 2)
8454 src2 = force_reg (mode, src2);
8455 else
8456 src1 = force_reg (mode, src1);
8459 /* If the operation is not commutable, source 1 cannot be a constant
8460 or non-matching memory. */
8461 if ((CONSTANT_P (src1)
8462 || (!matching_memory && GET_CODE (src1) == MEM))
8463 && GET_RTX_CLASS (code) != 'c')
8464 src1 = force_reg (mode, src1);
8466 /* If optimizing, copy to regs to improve CSE */
8467 if (optimize && ! no_new_pseudos)
8469 if (GET_CODE (dst) == MEM)
8470 dst = gen_reg_rtx (mode);
8471 if (GET_CODE (src1) == MEM)
8472 src1 = force_reg (mode, src1);
8473 if (GET_CODE (src2) == MEM)
8474 src2 = force_reg (mode, src2);
8477 /* Emit the instruction. */
8479 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8480 if (reload_in_progress)
8482 /* Reload doesn't know about the flags register, and doesn't know that
8483 it doesn't want to clobber it. We can only do this with PLUS. */
8484 if (code != PLUS)
8485 abort ();
8486 emit_insn (op);
8488 else
8490 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8491 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8494 /* Fix up the destination if needed. */
8495 if (dst != operands[0])
8496 emit_move_insn (operands[0], dst);
8499 /* Return TRUE or FALSE depending on whether the binary operator meets the
8500 appropriate constraints. */
8503 ix86_binary_operator_ok (code, mode, operands)
8504 enum rtx_code code;
8505 enum machine_mode mode ATTRIBUTE_UNUSED;
8506 rtx operands[3];
8508 /* Both source operands cannot be in memory. */
8509 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8510 return 0;
8511 /* If the operation is not commutable, source 1 cannot be a constant. */
8512 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8513 return 0;
8514 /* If the destination is memory, we must have a matching source operand. */
8515 if (GET_CODE (operands[0]) == MEM
8516 && ! (rtx_equal_p (operands[0], operands[1])
8517 || (GET_RTX_CLASS (code) == 'c'
8518 && rtx_equal_p (operands[0], operands[2]))))
8519 return 0;
8520 /* If the operation is not commutable and the source 1 is memory, we must
8521 have a matching destination. */
8522 if (GET_CODE (operands[1]) == MEM
8523 && GET_RTX_CLASS (code) != 'c'
8524 && ! rtx_equal_p (operands[0], operands[1]))
8525 return 0;
8526 return 1;
8529 /* Attempt to expand a unary operator. Make the expansion closer to the
8530 actual machine, then just general_operand, which will allow 2 separate
8531 memory references (one output, one input) in a single insn. */
8533 void
8534 ix86_expand_unary_operator (code, mode, operands)
8535 enum rtx_code code;
8536 enum machine_mode mode;
8537 rtx operands[];
8539 int matching_memory;
8540 rtx src, dst, op, clob;
8542 dst = operands[0];
8543 src = operands[1];
8545 /* If the destination is memory, and we do not have matching source
8546 operands, do things in registers. */
8547 matching_memory = 0;
8548 if (GET_CODE (dst) == MEM)
8550 if (rtx_equal_p (dst, src))
8551 matching_memory = 1;
8552 else
8553 dst = gen_reg_rtx (mode);
8556 /* When source operand is memory, destination must match. */
8557 if (!matching_memory && GET_CODE (src) == MEM)
8558 src = force_reg (mode, src);
8560 /* If optimizing, copy to regs to improve CSE */
8561 if (optimize && ! no_new_pseudos)
8563 if (GET_CODE (dst) == MEM)
8564 dst = gen_reg_rtx (mode);
8565 if (GET_CODE (src) == MEM)
8566 src = force_reg (mode, src);
8569 /* Emit the instruction. */
8571 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8572 if (reload_in_progress || code == NOT)
8574 /* Reload doesn't know about the flags register, and doesn't know that
8575 it doesn't want to clobber it. */
8576 if (code != NOT)
8577 abort ();
8578 emit_insn (op);
8580 else
8582 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8583 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8586 /* Fix up the destination if needed. */
8587 if (dst != operands[0])
8588 emit_move_insn (operands[0], dst);
8591 /* Return TRUE or FALSE depending on whether the unary operator meets the
8592 appropriate constraints. */
8595 ix86_unary_operator_ok (code, mode, operands)
8596 enum rtx_code code ATTRIBUTE_UNUSED;
8597 enum machine_mode mode ATTRIBUTE_UNUSED;
8598 rtx operands[2] ATTRIBUTE_UNUSED;
8600 /* If one of operands is memory, source and destination must match. */
8601 if ((GET_CODE (operands[0]) == MEM
8602 || GET_CODE (operands[1]) == MEM)
8603 && ! rtx_equal_p (operands[0], operands[1]))
8604 return FALSE;
8605 return TRUE;
8608 /* Return TRUE or FALSE depending on whether the first SET in INSN
8609 has source and destination with matching CC modes, and that the
8610 CC mode is at least as constrained as REQ_MODE. */
8613 ix86_match_ccmode (insn, req_mode)
8614 rtx insn;
8615 enum machine_mode req_mode;
8617 rtx set;
8618 enum machine_mode set_mode;
8620 set = PATTERN (insn);
8621 if (GET_CODE (set) == PARALLEL)
8622 set = XVECEXP (set, 0, 0);
8623 if (GET_CODE (set) != SET)
8624 abort ();
8625 if (GET_CODE (SET_SRC (set)) != COMPARE)
8626 abort ();
8628 set_mode = GET_MODE (SET_DEST (set));
8629 switch (set_mode)
8631 case CCNOmode:
8632 if (req_mode != CCNOmode
8633 && (req_mode != CCmode
8634 || XEXP (SET_SRC (set), 1) != const0_rtx))
8635 return 0;
8636 break;
8637 case CCmode:
8638 if (req_mode == CCGCmode)
8639 return 0;
8640 /* FALLTHRU */
8641 case CCGCmode:
8642 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8643 return 0;
8644 /* FALLTHRU */
8645 case CCGOCmode:
8646 if (req_mode == CCZmode)
8647 return 0;
8648 /* FALLTHRU */
8649 case CCZmode:
8650 break;
8652 default:
8653 abort ();
8656 return (GET_MODE (SET_SRC (set)) == set_mode);
8659 /* Generate insn patterns to do an integer compare of OPERANDS. */
8661 static rtx
8662 ix86_expand_int_compare (code, op0, op1)
8663 enum rtx_code code;
8664 rtx op0, op1;
8666 enum machine_mode cmpmode;
8667 rtx tmp, flags;
8669 cmpmode = SELECT_CC_MODE (code, op0, op1);
8670 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8672 /* This is very simple, but making the interface the same as in the
8673 FP case makes the rest of the code easier. */
8674 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8675 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8677 /* Return the test that should be put into the flags user, i.e.
8678 the bcc, scc, or cmov instruction. */
8679 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8682 /* Figure out whether to use ordered or unordered fp comparisons.
8683 Return the appropriate mode to use. */
8685 enum machine_mode
8686 ix86_fp_compare_mode (code)
8687 enum rtx_code code ATTRIBUTE_UNUSED;
8689 /* ??? In order to make all comparisons reversible, we do all comparisons
8690 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8691 all forms trapping and nontrapping comparisons, we can make inequality
8692 comparisons trapping again, since it results in better code when using
8693 FCOM based compares. */
8694 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8697 enum machine_mode
8698 ix86_cc_mode (code, op0, op1)
8699 enum rtx_code code;
8700 rtx op0, op1;
8702 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8703 return ix86_fp_compare_mode (code);
8704 switch (code)
8706 /* Only zero flag is needed. */
8707 case EQ: /* ZF=0 */
8708 case NE: /* ZF!=0 */
8709 return CCZmode;
8710 /* Codes needing carry flag. */
8711 case GEU: /* CF=0 */
8712 case GTU: /* CF=0 & ZF=0 */
8713 case LTU: /* CF=1 */
8714 case LEU: /* CF=1 | ZF=1 */
8715 return CCmode;
8716 /* Codes possibly doable only with sign flag when
8717 comparing against zero. */
8718 case GE: /* SF=OF or SF=0 */
8719 case LT: /* SF<>OF or SF=1 */
8720 if (op1 == const0_rtx)
8721 return CCGOCmode;
8722 else
8723 /* For other cases Carry flag is not required. */
8724 return CCGCmode;
8725 /* Codes doable only with sign flag when comparing
8726 against zero, but we miss jump instruction for it
8727 so we need to use relational tests against overflow
8728 that thus needs to be zero. */
8729 case GT: /* ZF=0 & SF=OF */
8730 case LE: /* ZF=1 | SF<>OF */
8731 if (op1 == const0_rtx)
8732 return CCNOmode;
8733 else
8734 return CCGCmode;
8735 /* strcmp pattern do (use flags) and combine may ask us for proper
8736 mode. */
8737 case USE:
8738 return CCmode;
8739 default:
8740 abort ();
8744 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8747 ix86_use_fcomi_compare (code)
8748 enum rtx_code code ATTRIBUTE_UNUSED;
8750 enum rtx_code swapped_code = swap_condition (code);
8751 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8752 || (ix86_fp_comparison_cost (swapped_code)
8753 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8756 /* Swap, force into registers, or otherwise massage the two operands
8757 to a fp comparison. The operands are updated in place; the new
8758 comparison code is returned. */
8760 static enum rtx_code
8761 ix86_prepare_fp_compare_args (code, pop0, pop1)
8762 enum rtx_code code;
8763 rtx *pop0, *pop1;
8765 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8766 rtx op0 = *pop0, op1 = *pop1;
8767 enum machine_mode op_mode = GET_MODE (op0);
8768 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8770 /* All of the unordered compare instructions only work on registers.
8771 The same is true of the XFmode compare instructions. The same is
8772 true of the fcomi compare instructions. */
8774 if (!is_sse
8775 && (fpcmp_mode == CCFPUmode
8776 || op_mode == XFmode
8777 || op_mode == TFmode
8778 || ix86_use_fcomi_compare (code)))
8780 op0 = force_reg (op_mode, op0);
8781 op1 = force_reg (op_mode, op1);
8783 else
8785 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8786 things around if they appear profitable, otherwise force op0
8787 into a register. */
8789 if (standard_80387_constant_p (op0) == 0
8790 || (GET_CODE (op0) == MEM
8791 && ! (standard_80387_constant_p (op1) == 0
8792 || GET_CODE (op1) == MEM)))
8794 rtx tmp;
8795 tmp = op0, op0 = op1, op1 = tmp;
8796 code = swap_condition (code);
8799 if (GET_CODE (op0) != REG)
8800 op0 = force_reg (op_mode, op0);
8802 if (CONSTANT_P (op1))
8804 if (standard_80387_constant_p (op1))
8805 op1 = force_reg (op_mode, op1);
8806 else
8807 op1 = validize_mem (force_const_mem (op_mode, op1));
8811 /* Try to rearrange the comparison to make it cheaper. */
8812 if (ix86_fp_comparison_cost (code)
8813 > ix86_fp_comparison_cost (swap_condition (code))
8814 && (GET_CODE (op1) == REG || !no_new_pseudos))
8816 rtx tmp;
8817 tmp = op0, op0 = op1, op1 = tmp;
8818 code = swap_condition (code);
8819 if (GET_CODE (op0) != REG)
8820 op0 = force_reg (op_mode, op0);
8823 *pop0 = op0;
8824 *pop1 = op1;
8825 return code;
8828 /* Convert comparison codes we use to represent FP comparison to integer
8829 code that will result in proper branch. Return UNKNOWN if no such code
8830 is available. */
8831 static enum rtx_code
8832 ix86_fp_compare_code_to_integer (code)
8833 enum rtx_code code;
8835 switch (code)
8837 case GT:
8838 return GTU;
8839 case GE:
8840 return GEU;
8841 case ORDERED:
8842 case UNORDERED:
8843 return code;
8844 break;
8845 case UNEQ:
8846 return EQ;
8847 break;
8848 case UNLT:
8849 return LTU;
8850 break;
8851 case UNLE:
8852 return LEU;
8853 break;
8854 case LTGT:
8855 return NE;
8856 break;
8857 default:
8858 return UNKNOWN;
8862 /* Split comparison code CODE into comparisons we can do using branch
8863 instructions. BYPASS_CODE is comparison code for branch that will
8864 branch around FIRST_CODE and SECOND_CODE. If some of branches
8865 is not required, set value to NIL.
8866 We never require more than two branches. */
8867 static void
8868 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8869 enum rtx_code code, *bypass_code, *first_code, *second_code;
8871 *first_code = code;
8872 *bypass_code = NIL;
8873 *second_code = NIL;
8875 /* The fcomi comparison sets flags as follows:
8877 cmp ZF PF CF
8878 > 0 0 0
8879 < 0 0 1
8880 = 1 0 0
8881 un 1 1 1 */
8883 switch (code)
8885 case GT: /* GTU - CF=0 & ZF=0 */
8886 case GE: /* GEU - CF=0 */
8887 case ORDERED: /* PF=0 */
8888 case UNORDERED: /* PF=1 */
8889 case UNEQ: /* EQ - ZF=1 */
8890 case UNLT: /* LTU - CF=1 */
8891 case UNLE: /* LEU - CF=1 | ZF=1 */
8892 case LTGT: /* EQ - ZF=0 */
8893 break;
8894 case LT: /* LTU - CF=1 - fails on unordered */
8895 *first_code = UNLT;
8896 *bypass_code = UNORDERED;
8897 break;
8898 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8899 *first_code = UNLE;
8900 *bypass_code = UNORDERED;
8901 break;
8902 case EQ: /* EQ - ZF=1 - fails on unordered */
8903 *first_code = UNEQ;
8904 *bypass_code = UNORDERED;
8905 break;
8906 case NE: /* NE - ZF=0 - fails on unordered */
8907 *first_code = LTGT;
8908 *second_code = UNORDERED;
8909 break;
8910 case UNGE: /* GEU - CF=0 - fails on unordered */
8911 *first_code = GE;
8912 *second_code = UNORDERED;
8913 break;
8914 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8915 *first_code = GT;
8916 *second_code = UNORDERED;
8917 break;
8918 default:
8919 abort ();
8921 if (!TARGET_IEEE_FP)
8923 *second_code = NIL;
8924 *bypass_code = NIL;
8928 /* Return cost of comparison done fcom + arithmetics operations on AX.
8929 All following functions do use number of instructions as a cost metrics.
8930 In future this should be tweaked to compute bytes for optimize_size and
8931 take into account performance of various instructions on various CPUs. */
8932 static int
8933 ix86_fp_comparison_arithmetics_cost (code)
8934 enum rtx_code code;
8936 if (!TARGET_IEEE_FP)
8937 return 4;
8938 /* The cost of code output by ix86_expand_fp_compare. */
8939 switch (code)
8941 case UNLE:
8942 case UNLT:
8943 case LTGT:
8944 case GT:
8945 case GE:
8946 case UNORDERED:
8947 case ORDERED:
8948 case UNEQ:
8949 return 4;
8950 break;
8951 case LT:
8952 case NE:
8953 case EQ:
8954 case UNGE:
8955 return 5;
8956 break;
8957 case LE:
8958 case UNGT:
8959 return 6;
8960 break;
8961 default:
8962 abort ();
8966 /* Return cost of comparison done using fcomi operation.
8967 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8968 static int
8969 ix86_fp_comparison_fcomi_cost (code)
8970 enum rtx_code code;
8972 enum rtx_code bypass_code, first_code, second_code;
8973 /* Return arbitrarily high cost when instruction is not supported - this
8974 prevents gcc from using it. */
8975 if (!TARGET_CMOVE)
8976 return 1024;
8977 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8978 return (bypass_code != NIL || second_code != NIL) + 2;
8981 /* Return cost of comparison done using sahf operation.
8982 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8983 static int
8984 ix86_fp_comparison_sahf_cost (code)
8985 enum rtx_code code;
8987 enum rtx_code bypass_code, first_code, second_code;
8988 /* Return arbitrarily high cost when instruction is not preferred - this
8989 avoids gcc from using it. */
8990 if (!TARGET_USE_SAHF && !optimize_size)
8991 return 1024;
8992 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8993 return (bypass_code != NIL || second_code != NIL) + 3;
8996 /* Compute cost of the comparison done using any method.
8997 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8998 static int
8999 ix86_fp_comparison_cost (code)
9000 enum rtx_code code;
9002 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9003 int min;
9005 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9006 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9008 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9009 if (min > sahf_cost)
9010 min = sahf_cost;
9011 if (min > fcomi_cost)
9012 min = fcomi_cost;
9013 return min;
9016 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9018 static rtx
9019 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
9020 enum rtx_code code;
9021 rtx op0, op1, scratch;
9022 rtx *second_test;
9023 rtx *bypass_test;
9025 enum machine_mode fpcmp_mode, intcmp_mode;
9026 rtx tmp, tmp2;
9027 int cost = ix86_fp_comparison_cost (code);
9028 enum rtx_code bypass_code, first_code, second_code;
9030 fpcmp_mode = ix86_fp_compare_mode (code);
9031 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9033 if (second_test)
9034 *second_test = NULL_RTX;
9035 if (bypass_test)
9036 *bypass_test = NULL_RTX;
9038 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9040 /* Do fcomi/sahf based test when profitable. */
9041 if ((bypass_code == NIL || bypass_test)
9042 && (second_code == NIL || second_test)
9043 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9045 if (TARGET_CMOVE)
9047 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9048 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9049 tmp);
9050 emit_insn (tmp);
9052 else
9054 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9055 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9056 if (!scratch)
9057 scratch = gen_reg_rtx (HImode);
9058 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9059 emit_insn (gen_x86_sahf_1 (scratch));
9062 /* The FP codes work out to act like unsigned. */
9063 intcmp_mode = fpcmp_mode;
9064 code = first_code;
9065 if (bypass_code != NIL)
9066 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9067 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9068 const0_rtx);
9069 if (second_code != NIL)
9070 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9071 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9072 const0_rtx);
9074 else
9076 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9077 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9078 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9079 if (!scratch)
9080 scratch = gen_reg_rtx (HImode);
9081 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9083 /* In the unordered case, we have to check C2 for NaN's, which
9084 doesn't happen to work out to anything nice combination-wise.
9085 So do some bit twiddling on the value we've got in AH to come
9086 up with an appropriate set of condition codes. */
9088 intcmp_mode = CCNOmode;
9089 switch (code)
9091 case GT:
9092 case UNGT:
9093 if (code == GT || !TARGET_IEEE_FP)
9095 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9096 code = EQ;
9098 else
9100 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9101 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9102 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9103 intcmp_mode = CCmode;
9104 code = GEU;
9106 break;
9107 case LT:
9108 case UNLT:
9109 if (code == LT && TARGET_IEEE_FP)
9111 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9112 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9113 intcmp_mode = CCmode;
9114 code = EQ;
9116 else
9118 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9119 code = NE;
9121 break;
9122 case GE:
9123 case UNGE:
9124 if (code == GE || !TARGET_IEEE_FP)
9126 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9127 code = EQ;
9129 else
9131 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9132 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9133 GEN_INT (0x01)));
9134 code = NE;
9136 break;
9137 case LE:
9138 case UNLE:
9139 if (code == LE && TARGET_IEEE_FP)
9141 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9142 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9143 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9144 intcmp_mode = CCmode;
9145 code = LTU;
9147 else
9149 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9150 code = NE;
9152 break;
9153 case EQ:
9154 case UNEQ:
9155 if (code == EQ && TARGET_IEEE_FP)
9157 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9158 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9159 intcmp_mode = CCmode;
9160 code = EQ;
9162 else
9164 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9165 code = NE;
9166 break;
9168 break;
9169 case NE:
9170 case LTGT:
9171 if (code == NE && TARGET_IEEE_FP)
9173 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9174 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9175 GEN_INT (0x40)));
9176 code = NE;
9178 else
9180 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9181 code = EQ;
9183 break;
9185 case UNORDERED:
9186 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9187 code = NE;
9188 break;
9189 case ORDERED:
9190 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9191 code = EQ;
9192 break;
9194 default:
9195 abort ();
9199 /* Return the test that should be put into the flags user, i.e.
9200 the bcc, scc, or cmov instruction. */
9201 return gen_rtx_fmt_ee (code, VOIDmode,
9202 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9203 const0_rtx);
9207 ix86_expand_compare (code, second_test, bypass_test)
9208 enum rtx_code code;
9209 rtx *second_test, *bypass_test;
9211 rtx op0, op1, ret;
9212 op0 = ix86_compare_op0;
9213 op1 = ix86_compare_op1;
9215 if (second_test)
9216 *second_test = NULL_RTX;
9217 if (bypass_test)
9218 *bypass_test = NULL_RTX;
9220 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9221 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9222 second_test, bypass_test);
9223 else
9224 ret = ix86_expand_int_compare (code, op0, op1);
9226 return ret;
9229 /* Return true if the CODE will result in nontrivial jump sequence. */
9230 bool
9231 ix86_fp_jump_nontrivial_p (code)
9232 enum rtx_code code;
9234 enum rtx_code bypass_code, first_code, second_code;
9235 if (!TARGET_CMOVE)
9236 return true;
9237 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9238 return bypass_code != NIL || second_code != NIL;
9241 void
9242 ix86_expand_branch (code, label)
9243 enum rtx_code code;
9244 rtx label;
9246 rtx tmp;
9248 switch (GET_MODE (ix86_compare_op0))
9250 case QImode:
9251 case HImode:
9252 case SImode:
9253 simple:
9254 tmp = ix86_expand_compare (code, NULL, NULL);
9255 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9256 gen_rtx_LABEL_REF (VOIDmode, label),
9257 pc_rtx);
9258 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9259 return;
9261 case SFmode:
9262 case DFmode:
9263 case XFmode:
9264 case TFmode:
9266 rtvec vec;
9267 int use_fcomi;
9268 enum rtx_code bypass_code, first_code, second_code;
9270 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9271 &ix86_compare_op1);
9273 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9275 /* Check whether we will use the natural sequence with one jump. If
9276 so, we can expand jump early. Otherwise delay expansion by
9277 creating compound insn to not confuse optimizers. */
9278 if (bypass_code == NIL && second_code == NIL
9279 && TARGET_CMOVE)
9281 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9282 gen_rtx_LABEL_REF (VOIDmode, label),
9283 pc_rtx, NULL_RTX);
9285 else
9287 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9288 ix86_compare_op0, ix86_compare_op1);
9289 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9290 gen_rtx_LABEL_REF (VOIDmode, label),
9291 pc_rtx);
9292 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9294 use_fcomi = ix86_use_fcomi_compare (code);
9295 vec = rtvec_alloc (3 + !use_fcomi);
9296 RTVEC_ELT (vec, 0) = tmp;
9297 RTVEC_ELT (vec, 1)
9298 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9299 RTVEC_ELT (vec, 2)
9300 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9301 if (! use_fcomi)
9302 RTVEC_ELT (vec, 3)
9303 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9305 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9307 return;
9310 case DImode:
9311 if (TARGET_64BIT)
9312 goto simple;
9313 /* Expand DImode branch into multiple compare+branch. */
9315 rtx lo[2], hi[2], label2;
9316 enum rtx_code code1, code2, code3;
9318 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9320 tmp = ix86_compare_op0;
9321 ix86_compare_op0 = ix86_compare_op1;
9322 ix86_compare_op1 = tmp;
9323 code = swap_condition (code);
9325 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9326 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9328 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9329 avoid two branches. This costs one extra insn, so disable when
9330 optimizing for size. */
9332 if ((code == EQ || code == NE)
9333 && (!optimize_size
9334 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9336 rtx xor0, xor1;
9338 xor1 = hi[0];
9339 if (hi[1] != const0_rtx)
9340 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9341 NULL_RTX, 0, OPTAB_WIDEN);
9343 xor0 = lo[0];
9344 if (lo[1] != const0_rtx)
9345 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9346 NULL_RTX, 0, OPTAB_WIDEN);
9348 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9349 NULL_RTX, 0, OPTAB_WIDEN);
9351 ix86_compare_op0 = tmp;
9352 ix86_compare_op1 = const0_rtx;
9353 ix86_expand_branch (code, label);
9354 return;
9357 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9358 op1 is a constant and the low word is zero, then we can just
9359 examine the high word. */
9361 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9362 switch (code)
9364 case LT: case LTU: case GE: case GEU:
9365 ix86_compare_op0 = hi[0];
9366 ix86_compare_op1 = hi[1];
9367 ix86_expand_branch (code, label);
9368 return;
9369 default:
9370 break;
9373 /* Otherwise, we need two or three jumps. */
9375 label2 = gen_label_rtx ();
9377 code1 = code;
9378 code2 = swap_condition (code);
9379 code3 = unsigned_condition (code);
9381 switch (code)
9383 case LT: case GT: case LTU: case GTU:
9384 break;
9386 case LE: code1 = LT; code2 = GT; break;
9387 case GE: code1 = GT; code2 = LT; break;
9388 case LEU: code1 = LTU; code2 = GTU; break;
9389 case GEU: code1 = GTU; code2 = LTU; break;
9391 case EQ: code1 = NIL; code2 = NE; break;
9392 case NE: code2 = NIL; break;
9394 default:
9395 abort ();
9399 * a < b =>
9400 * if (hi(a) < hi(b)) goto true;
9401 * if (hi(a) > hi(b)) goto false;
9402 * if (lo(a) < lo(b)) goto true;
9403 * false:
9406 ix86_compare_op0 = hi[0];
9407 ix86_compare_op1 = hi[1];
9409 if (code1 != NIL)
9410 ix86_expand_branch (code1, label);
9411 if (code2 != NIL)
9412 ix86_expand_branch (code2, label2);
9414 ix86_compare_op0 = lo[0];
9415 ix86_compare_op1 = lo[1];
9416 ix86_expand_branch (code3, label);
9418 if (code2 != NIL)
9419 emit_label (label2);
9420 return;
9423 default:
9424 abort ();
9428 /* Split branch based on floating point condition. */
9429 void
9430 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9431 enum rtx_code code;
9432 rtx op1, op2, target1, target2, tmp;
9434 rtx second, bypass;
9435 rtx label = NULL_RTX;
9436 rtx condition;
9437 int bypass_probability = -1, second_probability = -1, probability = -1;
9438 rtx i;
9440 if (target2 != pc_rtx)
9442 rtx tmp = target2;
9443 code = reverse_condition_maybe_unordered (code);
9444 target2 = target1;
9445 target1 = tmp;
9448 condition = ix86_expand_fp_compare (code, op1, op2,
9449 tmp, &second, &bypass);
9451 if (split_branch_probability >= 0)
9453 /* Distribute the probabilities across the jumps.
9454 Assume the BYPASS and SECOND to be always test
9455 for UNORDERED. */
9456 probability = split_branch_probability;
9458 /* Value of 1 is low enough to make no need for probability
9459 to be updated. Later we may run some experiments and see
9460 if unordered values are more frequent in practice. */
9461 if (bypass)
9462 bypass_probability = 1;
9463 if (second)
9464 second_probability = 1;
9466 if (bypass != NULL_RTX)
9468 label = gen_label_rtx ();
9469 i = emit_jump_insn (gen_rtx_SET
9470 (VOIDmode, pc_rtx,
9471 gen_rtx_IF_THEN_ELSE (VOIDmode,
9472 bypass,
9473 gen_rtx_LABEL_REF (VOIDmode,
9474 label),
9475 pc_rtx)));
9476 if (bypass_probability >= 0)
9477 REG_NOTES (i)
9478 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9479 GEN_INT (bypass_probability),
9480 REG_NOTES (i));
9482 i = emit_jump_insn (gen_rtx_SET
9483 (VOIDmode, pc_rtx,
9484 gen_rtx_IF_THEN_ELSE (VOIDmode,
9485 condition, target1, target2)));
9486 if (probability >= 0)
9487 REG_NOTES (i)
9488 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9489 GEN_INT (probability),
9490 REG_NOTES (i));
9491 if (second != NULL_RTX)
9493 i = emit_jump_insn (gen_rtx_SET
9494 (VOIDmode, pc_rtx,
9495 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9496 target2)));
9497 if (second_probability >= 0)
9498 REG_NOTES (i)
9499 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9500 GEN_INT (second_probability),
9501 REG_NOTES (i));
9503 if (label != NULL_RTX)
9504 emit_label (label);
9508 ix86_expand_setcc (code, dest)
9509 enum rtx_code code;
9510 rtx dest;
9512 rtx ret, tmp, tmpreg;
9513 rtx second_test, bypass_test;
9515 if (GET_MODE (ix86_compare_op0) == DImode
9516 && !TARGET_64BIT)
9517 return 0; /* FAIL */
9519 if (GET_MODE (dest) != QImode)
9520 abort ();
9522 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9523 PUT_MODE (ret, QImode);
9525 tmp = dest;
9526 tmpreg = dest;
9528 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9529 if (bypass_test || second_test)
9531 rtx test = second_test;
9532 int bypass = 0;
9533 rtx tmp2 = gen_reg_rtx (QImode);
9534 if (bypass_test)
9536 if (second_test)
9537 abort ();
9538 test = bypass_test;
9539 bypass = 1;
9540 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9542 PUT_MODE (test, QImode);
9543 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9545 if (bypass)
9546 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9547 else
9548 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9551 return 1; /* DONE */
9554 /* Expand comparison setting or clearing carry flag. Return true when successful
9555 and set pop for the operation. */
9556 bool
9557 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9558 rtx op0, op1, *pop;
9559 enum rtx_code code;
9561 enum machine_mode mode =
9562 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9564 /* Do not handle DImode compares that go trought special path. Also we can't
9565 deal with FP compares yet. This is possible to add. */
9566 if ((mode == DImode && !TARGET_64BIT))
9567 return false;
9568 if (FLOAT_MODE_P (mode))
9570 rtx second_test = NULL, bypass_test = NULL;
9571 rtx compare_op, compare_seq;
9573 /* Shortcut: following common codes never translate into carry flag compares. */
9574 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9575 || code == ORDERED || code == UNORDERED)
9576 return false;
9578 /* These comparisons require zero flag; swap operands so they won't. */
9579 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9580 && !TARGET_IEEE_FP)
9582 rtx tmp = op0;
9583 op0 = op1;
9584 op1 = tmp;
9585 code = swap_condition (code);
9588 /* Try to expand the comparsion and verify that we end up with carry flag
9589 based comparsion. This is fails to be true only when we decide to expand
9590 comparsion using arithmetic that is not too common scenario. */
9591 start_sequence ();
9592 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9593 &second_test, &bypass_test);
9594 compare_seq = get_insns ();
9595 end_sequence ();
9597 if (second_test || bypass_test)
9598 return false;
9599 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9600 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9601 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9602 else
9603 code = GET_CODE (compare_op);
9604 if (code != LTU && code != GEU)
9605 return false;
9606 emit_insn (compare_seq);
9607 *pop = compare_op;
9608 return true;
9610 if (!INTEGRAL_MODE_P (mode))
9611 return false;
9612 switch (code)
9614 case LTU:
9615 case GEU:
9616 break;
9618 /* Convert a==0 into (unsigned)a<1. */
9619 case EQ:
9620 case NE:
9621 if (op1 != const0_rtx)
9622 return false;
9623 op1 = const1_rtx;
9624 code = (code == EQ ? LTU : GEU);
9625 break;
9627 /* Convert a>b into b<a or a>=b-1. */
9628 case GTU:
9629 case LEU:
9630 if (GET_CODE (op1) == CONST_INT)
9632 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9633 /* Bail out on overflow. We still can swap operands but that
9634 would force loading of the constant into register. */
9635 if (op1 == const0_rtx
9636 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9637 return false;
9638 code = (code == GTU ? GEU : LTU);
9640 else
9642 rtx tmp = op1;
9643 op1 = op0;
9644 op0 = tmp;
9645 code = (code == GTU ? LTU : GEU);
9647 break;
9649 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9650 case LT:
9651 case GE:
9652 if (mode == DImode || op1 != const0_rtx)
9653 return false;
9654 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9655 code = (code == LT ? GEU : LTU);
9656 break;
9657 case LE:
9658 case GT:
9659 if (mode == DImode || op1 != constm1_rtx)
9660 return false;
9661 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9662 code = (code == LE ? GEU : LTU);
9663 break;
9665 default:
9666 return false;
9668 ix86_compare_op0 = op0;
9669 ix86_compare_op1 = op1;
9670 *pop = ix86_expand_compare (code, NULL, NULL);
9671 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9672 abort ();
9673 return true;
9677 ix86_expand_int_movcc (operands)
9678 rtx operands[];
9680 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9681 rtx compare_seq, compare_op;
9682 rtx second_test, bypass_test;
9683 enum machine_mode mode = GET_MODE (operands[0]);
9684 bool sign_bit_compare_p = false;;
9686 start_sequence ();
9687 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9688 compare_seq = get_insns ();
9689 end_sequence ();
9691 compare_code = GET_CODE (compare_op);
9693 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9694 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9695 sign_bit_compare_p = true;
9697 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9698 HImode insns, we'd be swallowed in word prefix ops. */
9700 if ((mode != HImode || TARGET_FAST_PREFIX)
9701 && (mode != DImode || TARGET_64BIT)
9702 && GET_CODE (operands[2]) == CONST_INT
9703 && GET_CODE (operands[3]) == CONST_INT)
9705 rtx out = operands[0];
9706 HOST_WIDE_INT ct = INTVAL (operands[2]);
9707 HOST_WIDE_INT cf = INTVAL (operands[3]);
9708 HOST_WIDE_INT diff;
9710 diff = ct - cf;
9711 /* Sign bit compares are better done using shifts than we do by using
9712 sbb. */
9713 if (sign_bit_compare_p
9714 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9715 ix86_compare_op1, &compare_op))
9717 /* Detect overlap between destination and compare sources. */
9718 rtx tmp = out;
9720 if (!sign_bit_compare_p)
9722 bool fpcmp = false;
9724 compare_code = GET_CODE (compare_op);
9726 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9727 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9729 fpcmp = true;
9730 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9733 /* To simplify rest of code, restrict to the GEU case. */
9734 if (compare_code == LTU)
9736 HOST_WIDE_INT tmp = ct;
9737 ct = cf;
9738 cf = tmp;
9739 compare_code = reverse_condition (compare_code);
9740 code = reverse_condition (code);
9742 else
9744 if (fpcmp)
9745 PUT_CODE (compare_op,
9746 reverse_condition_maybe_unordered
9747 (GET_CODE (compare_op)));
9748 else
9749 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9751 diff = ct - cf;
9753 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9754 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9755 tmp = gen_reg_rtx (mode);
9757 if (mode == DImode)
9758 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9759 else
9760 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9762 else
9764 if (code == GT || code == GE)
9765 code = reverse_condition (code);
9766 else
9768 HOST_WIDE_INT tmp = ct;
9769 ct = cf;
9770 cf = tmp;
9771 diff = ct - cf;
9773 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9774 ix86_compare_op1, VOIDmode, 0, -1);
9777 if (diff == 1)
9780 * cmpl op0,op1
9781 * sbbl dest,dest
9782 * [addl dest, ct]
9784 * Size 5 - 8.
9786 if (ct)
9787 tmp = expand_simple_binop (mode, PLUS,
9788 tmp, GEN_INT (ct),
9789 copy_rtx (tmp), 1, OPTAB_DIRECT);
9791 else if (cf == -1)
9794 * cmpl op0,op1
9795 * sbbl dest,dest
9796 * orl $ct, dest
9798 * Size 8.
9800 tmp = expand_simple_binop (mode, IOR,
9801 tmp, GEN_INT (ct),
9802 copy_rtx (tmp), 1, OPTAB_DIRECT);
9804 else if (diff == -1 && ct)
9807 * cmpl op0,op1
9808 * sbbl dest,dest
9809 * notl dest
9810 * [addl dest, cf]
9812 * Size 8 - 11.
9814 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9815 if (cf)
9816 tmp = expand_simple_binop (mode, PLUS,
9817 copy_rtx (tmp), GEN_INT (cf),
9818 copy_rtx (tmp), 1, OPTAB_DIRECT);
9820 else
9823 * cmpl op0,op1
9824 * sbbl dest,dest
9825 * [notl dest]
9826 * andl cf - ct, dest
9827 * [addl dest, ct]
9829 * Size 8 - 11.
9832 if (cf == 0)
9834 cf = ct;
9835 ct = 0;
9836 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9839 tmp = expand_simple_binop (mode, AND,
9840 copy_rtx (tmp),
9841 gen_int_mode (cf - ct, mode),
9842 copy_rtx (tmp), 1, OPTAB_DIRECT);
9843 if (ct)
9844 tmp = expand_simple_binop (mode, PLUS,
9845 copy_rtx (tmp), GEN_INT (ct),
9846 copy_rtx (tmp), 1, OPTAB_DIRECT);
9849 if (!rtx_equal_p (tmp, out))
9850 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9852 return 1; /* DONE */
9855 if (diff < 0)
9857 HOST_WIDE_INT tmp;
9858 tmp = ct, ct = cf, cf = tmp;
9859 diff = -diff;
9860 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9862 /* We may be reversing unordered compare to normal compare, that
9863 is not valid in general (we may convert non-trapping condition
9864 to trapping one), however on i386 we currently emit all
9865 comparisons unordered. */
9866 compare_code = reverse_condition_maybe_unordered (compare_code);
9867 code = reverse_condition_maybe_unordered (code);
9869 else
9871 compare_code = reverse_condition (compare_code);
9872 code = reverse_condition (code);
9876 compare_code = NIL;
9877 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9878 && GET_CODE (ix86_compare_op1) == CONST_INT)
9880 if (ix86_compare_op1 == const0_rtx
9881 && (code == LT || code == GE))
9882 compare_code = code;
9883 else if (ix86_compare_op1 == constm1_rtx)
9885 if (code == LE)
9886 compare_code = LT;
9887 else if (code == GT)
9888 compare_code = GE;
9892 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9893 if (compare_code != NIL
9894 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9895 && (cf == -1 || ct == -1))
9897 /* If lea code below could be used, only optimize
9898 if it results in a 2 insn sequence. */
9900 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9901 || diff == 3 || diff == 5 || diff == 9)
9902 || (compare_code == LT && ct == -1)
9903 || (compare_code == GE && cf == -1))
9906 * notl op1 (if necessary)
9907 * sarl $31, op1
9908 * orl cf, op1
9910 if (ct != -1)
9912 cf = ct;
9913 ct = -1;
9914 code = reverse_condition (code);
9917 out = emit_store_flag (out, code, ix86_compare_op0,
9918 ix86_compare_op1, VOIDmode, 0, -1);
9920 out = expand_simple_binop (mode, IOR,
9921 out, GEN_INT (cf),
9922 out, 1, OPTAB_DIRECT);
9923 if (out != operands[0])
9924 emit_move_insn (operands[0], out);
9926 return 1; /* DONE */
9931 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9932 || diff == 3 || diff == 5 || diff == 9)
9933 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9934 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9937 * xorl dest,dest
9938 * cmpl op1,op2
9939 * setcc dest
9940 * lea cf(dest*(ct-cf)),dest
9942 * Size 14.
9944 * This also catches the degenerate setcc-only case.
9947 rtx tmp;
9948 int nops;
9950 out = emit_store_flag (out, code, ix86_compare_op0,
9951 ix86_compare_op1, VOIDmode, 0, 1);
9953 nops = 0;
9954 /* On x86_64 the lea instruction operates on Pmode, so we need
9955 to get arithmetics done in proper mode to match. */
9956 if (diff == 1)
9957 tmp = copy_rtx (out);
9958 else
9960 rtx out1;
9961 out1 = copy_rtx (out);
9962 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9963 nops++;
9964 if (diff & 1)
9966 tmp = gen_rtx_PLUS (mode, tmp, out1);
9967 nops++;
9970 if (cf != 0)
9972 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9973 nops++;
9975 if (!rtx_equal_p (tmp, out))
9977 if (nops == 1)
9978 out = force_operand (tmp, copy_rtx (out));
9979 else
9980 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9982 if (!rtx_equal_p (out, operands[0]))
9983 emit_move_insn (operands[0], copy_rtx (out));
9985 return 1; /* DONE */
9989 * General case: Jumpful:
9990 * xorl dest,dest cmpl op1, op2
9991 * cmpl op1, op2 movl ct, dest
9992 * setcc dest jcc 1f
9993 * decl dest movl cf, dest
9994 * andl (cf-ct),dest 1:
9995 * addl ct,dest
9997 * Size 20. Size 14.
9999 * This is reasonably steep, but branch mispredict costs are
10000 * high on modern cpus, so consider failing only if optimizing
10001 * for space.
10004 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10005 && BRANCH_COST >= 2)
10007 if (cf == 0)
10009 cf = ct;
10010 ct = 0;
10011 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10012 /* We may be reversing unordered compare to normal compare,
10013 that is not valid in general (we may convert non-trapping
10014 condition to trapping one), however on i386 we currently
10015 emit all comparisons unordered. */
10016 code = reverse_condition_maybe_unordered (code);
10017 else
10019 code = reverse_condition (code);
10020 if (compare_code != NIL)
10021 compare_code = reverse_condition (compare_code);
10025 if (compare_code != NIL)
10027 /* notl op1 (if needed)
10028 sarl $31, op1
10029 andl (cf-ct), op1
10030 addl ct, op1
10032 For x < 0 (resp. x <= -1) there will be no notl,
10033 so if possible swap the constants to get rid of the
10034 complement.
10035 True/false will be -1/0 while code below (store flag
10036 followed by decrement) is 0/-1, so the constants need
10037 to be exchanged once more. */
10039 if (compare_code == GE || !cf)
10041 code = reverse_condition (code);
10042 compare_code = LT;
10044 else
10046 HOST_WIDE_INT tmp = cf;
10047 cf = ct;
10048 ct = tmp;
10051 out = emit_store_flag (out, code, ix86_compare_op0,
10052 ix86_compare_op1, VOIDmode, 0, -1);
10054 else
10056 out = emit_store_flag (out, code, ix86_compare_op0,
10057 ix86_compare_op1, VOIDmode, 0, 1);
10059 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10060 copy_rtx (out), 1, OPTAB_DIRECT);
10063 out = expand_simple_binop (mode, AND, copy_rtx (out),
10064 gen_int_mode (cf - ct, mode),
10065 copy_rtx (out), 1, OPTAB_DIRECT);
10066 if (ct)
10067 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10068 copy_rtx (out), 1, OPTAB_DIRECT);
10069 if (!rtx_equal_p (out, operands[0]))
10070 emit_move_insn (operands[0], copy_rtx (out));
10072 return 1; /* DONE */
10076 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10078 /* Try a few things more with specific constants and a variable. */
10080 optab op;
10081 rtx var, orig_out, out, tmp;
10083 if (BRANCH_COST <= 2)
10084 return 0; /* FAIL */
10086 /* If one of the two operands is an interesting constant, load a
10087 constant with the above and mask it in with a logical operation. */
10089 if (GET_CODE (operands[2]) == CONST_INT)
10091 var = operands[3];
10092 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10093 operands[3] = constm1_rtx, op = and_optab;
10094 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10095 operands[3] = const0_rtx, op = ior_optab;
10096 else
10097 return 0; /* FAIL */
10099 else if (GET_CODE (operands[3]) == CONST_INT)
10101 var = operands[2];
10102 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10103 operands[2] = constm1_rtx, op = and_optab;
10104 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10105 operands[2] = const0_rtx, op = ior_optab;
10106 else
10107 return 0; /* FAIL */
10109 else
10110 return 0; /* FAIL */
10112 orig_out = operands[0];
10113 tmp = gen_reg_rtx (mode);
10114 operands[0] = tmp;
10116 /* Recurse to get the constant loaded. */
10117 if (ix86_expand_int_movcc (operands) == 0)
10118 return 0; /* FAIL */
10120 /* Mask in the interesting variable. */
10121 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10122 OPTAB_WIDEN);
10123 if (!rtx_equal_p (out, orig_out))
10124 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10126 return 1; /* DONE */
10130 * For comparison with above,
10132 * movl cf,dest
10133 * movl ct,tmp
10134 * cmpl op1,op2
10135 * cmovcc tmp,dest
10137 * Size 15.
10140 if (! nonimmediate_operand (operands[2], mode))
10141 operands[2] = force_reg (mode, operands[2]);
10142 if (! nonimmediate_operand (operands[3], mode))
10143 operands[3] = force_reg (mode, operands[3]);
10145 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10147 rtx tmp = gen_reg_rtx (mode);
10148 emit_move_insn (tmp, operands[3]);
10149 operands[3] = tmp;
10151 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10153 rtx tmp = gen_reg_rtx (mode);
10154 emit_move_insn (tmp, operands[2]);
10155 operands[2] = tmp;
10158 if (! register_operand (operands[2], VOIDmode)
10159 && (mode == QImode
10160 || ! register_operand (operands[3], VOIDmode)))
10161 operands[2] = force_reg (mode, operands[2]);
10163 if (mode == QImode
10164 && ! register_operand (operands[3], VOIDmode))
10165 operands[3] = force_reg (mode, operands[3]);
10167 emit_insn (compare_seq);
10168 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10169 gen_rtx_IF_THEN_ELSE (mode,
10170 compare_op, operands[2],
10171 operands[3])));
10172 if (bypass_test)
10173 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10174 gen_rtx_IF_THEN_ELSE (mode,
10175 bypass_test,
10176 copy_rtx (operands[3]),
10177 copy_rtx (operands[0]))));
10178 if (second_test)
10179 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10180 gen_rtx_IF_THEN_ELSE (mode,
10181 second_test,
10182 copy_rtx (operands[2]),
10183 copy_rtx (operands[0]))));
10185 return 1; /* DONE */
10189 ix86_expand_fp_movcc (operands)
10190 rtx operands[];
10192 enum rtx_code code;
10193 rtx tmp;
10194 rtx compare_op, second_test, bypass_test;
10196 /* For SF/DFmode conditional moves based on comparisons
10197 in same mode, we may want to use SSE min/max instructions. */
10198 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10199 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10200 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10201 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10202 && (!TARGET_IEEE_FP
10203 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10204 /* We may be called from the post-reload splitter. */
10205 && (!REG_P (operands[0])
10206 || SSE_REG_P (operands[0])
10207 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10209 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10210 code = GET_CODE (operands[1]);
10212 /* See if we have (cross) match between comparison operands and
10213 conditional move operands. */
10214 if (rtx_equal_p (operands[2], op1))
10216 rtx tmp = op0;
10217 op0 = op1;
10218 op1 = tmp;
10219 code = reverse_condition_maybe_unordered (code);
10221 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10223 /* Check for min operation. */
10224 if (code == LT || code == UNLE)
10226 if (code == UNLE)
10228 rtx tmp = op0;
10229 op0 = op1;
10230 op1 = tmp;
10232 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10233 if (memory_operand (op0, VOIDmode))
10234 op0 = force_reg (GET_MODE (operands[0]), op0);
10235 if (GET_MODE (operands[0]) == SFmode)
10236 emit_insn (gen_minsf3 (operands[0], op0, op1));
10237 else
10238 emit_insn (gen_mindf3 (operands[0], op0, op1));
10239 return 1;
10241 /* Check for max operation. */
10242 if (code == GT || code == UNGE)
10244 if (code == UNGE)
10246 rtx tmp = op0;
10247 op0 = op1;
10248 op1 = tmp;
10250 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10251 if (memory_operand (op0, VOIDmode))
10252 op0 = force_reg (GET_MODE (operands[0]), op0);
10253 if (GET_MODE (operands[0]) == SFmode)
10254 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10255 else
10256 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10257 return 1;
10260 /* Manage condition to be sse_comparison_operator. In case we are
10261 in non-ieee mode, try to canonicalize the destination operand
10262 to be first in the comparison - this helps reload to avoid extra
10263 moves. */
10264 if (!sse_comparison_operator (operands[1], VOIDmode)
10265 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10267 rtx tmp = ix86_compare_op0;
10268 ix86_compare_op0 = ix86_compare_op1;
10269 ix86_compare_op1 = tmp;
10270 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10271 VOIDmode, ix86_compare_op0,
10272 ix86_compare_op1);
10274 /* Similarly try to manage result to be first operand of conditional
10275 move. We also don't support the NE comparison on SSE, so try to
10276 avoid it. */
10277 if ((rtx_equal_p (operands[0], operands[3])
10278 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10279 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10281 rtx tmp = operands[2];
10282 operands[2] = operands[3];
10283 operands[3] = tmp;
10284 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10285 (GET_CODE (operands[1])),
10286 VOIDmode, ix86_compare_op0,
10287 ix86_compare_op1);
10289 if (GET_MODE (operands[0]) == SFmode)
10290 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10291 operands[2], operands[3],
10292 ix86_compare_op0, ix86_compare_op1));
10293 else
10294 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10295 operands[2], operands[3],
10296 ix86_compare_op0, ix86_compare_op1));
10297 return 1;
10300 /* The floating point conditional move instructions don't directly
10301 support conditions resulting from a signed integer comparison. */
10303 code = GET_CODE (operands[1]);
10304 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10306 /* The floating point conditional move instructions don't directly
10307 support signed integer comparisons. */
10309 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10311 if (second_test != NULL || bypass_test != NULL)
10312 abort ();
10313 tmp = gen_reg_rtx (QImode);
10314 ix86_expand_setcc (code, tmp);
10315 code = NE;
10316 ix86_compare_op0 = tmp;
10317 ix86_compare_op1 = const0_rtx;
10318 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10320 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10322 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10323 emit_move_insn (tmp, operands[3]);
10324 operands[3] = tmp;
10326 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10328 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10329 emit_move_insn (tmp, operands[2]);
10330 operands[2] = tmp;
10333 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10334 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10335 compare_op,
10336 operands[2],
10337 operands[3])));
10338 if (bypass_test)
10339 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10340 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10341 bypass_test,
10342 operands[3],
10343 operands[0])));
10344 if (second_test)
10345 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10346 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10347 second_test,
10348 operands[2],
10349 operands[0])));
10351 return 1;
10354 /* Expand conditional increment or decrement using adb/sbb instructions.
10355 The default case using setcc followed by the conditional move can be
10356 done by generic code. */
10358 ix86_expand_int_addcc (operands)
10359 rtx operands[];
10361 enum rtx_code code = GET_CODE (operands[1]);
10362 rtx compare_op;
10363 rtx val = const0_rtx;
10364 bool fpcmp = false;
10365 enum machine_mode mode = GET_MODE (operands[0]);
10367 if (operands[3] != const1_rtx
10368 && operands[3] != constm1_rtx)
10369 return 0;
10370 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10371 ix86_compare_op1, &compare_op))
10372 return 0;
10373 code = GET_CODE (compare_op);
10375 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10376 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10378 fpcmp = true;
10379 code = ix86_fp_compare_code_to_integer (code);
10382 if (code != LTU)
10384 val = constm1_rtx;
10385 if (fpcmp)
10386 PUT_CODE (compare_op,
10387 reverse_condition_maybe_unordered
10388 (GET_CODE (compare_op)));
10389 else
10390 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10392 PUT_MODE (compare_op, mode);
10394 /* Construct either adc or sbb insn. */
10395 if ((code == LTU) == (operands[3] == constm1_rtx))
10397 switch (GET_MODE (operands[0]))
10399 case QImode:
10400 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10401 break;
10402 case HImode:
10403 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10404 break;
10405 case SImode:
10406 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10407 break;
10408 case DImode:
10409 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10410 break;
10411 default:
10412 abort ();
10415 else
10417 switch (GET_MODE (operands[0]))
10419 case QImode:
10420 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10421 break;
10422 case HImode:
10423 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10424 break;
10425 case SImode:
10426 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10427 break;
10428 case DImode:
10429 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10430 break;
10431 default:
10432 abort ();
10435 return 1; /* DONE */
10439 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10440 works for floating pointer parameters and nonoffsetable memories.
10441 For pushes, it returns just stack offsets; the values will be saved
10442 in the right order. Maximally three parts are generated. */
10444 static int
10445 ix86_split_to_parts (operand, parts, mode)
10446 rtx operand;
10447 rtx *parts;
10448 enum machine_mode mode;
10450 int size;
10452 if (!TARGET_64BIT)
10453 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10454 else
10455 size = (GET_MODE_SIZE (mode) + 4) / 8;
10457 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10458 abort ();
10459 if (size < 2 || size > 3)
10460 abort ();
10462 /* Optimize constant pool reference to immediates. This is used by fp
10463 moves, that force all constants to memory to allow combining. */
10464 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10466 rtx tmp = maybe_get_pool_constant (operand);
10467 if (tmp)
10468 operand = tmp;
10471 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10473 /* The only non-offsetable memories we handle are pushes. */
10474 if (! push_operand (operand, VOIDmode))
10475 abort ();
10477 operand = copy_rtx (operand);
10478 PUT_MODE (operand, Pmode);
10479 parts[0] = parts[1] = parts[2] = operand;
10481 else if (!TARGET_64BIT)
10483 if (mode == DImode)
10484 split_di (&operand, 1, &parts[0], &parts[1]);
10485 else
10487 if (REG_P (operand))
10489 if (!reload_completed)
10490 abort ();
10491 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10492 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10493 if (size == 3)
10494 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10496 else if (offsettable_memref_p (operand))
10498 operand = adjust_address (operand, SImode, 0);
10499 parts[0] = operand;
10500 parts[1] = adjust_address (operand, SImode, 4);
10501 if (size == 3)
10502 parts[2] = adjust_address (operand, SImode, 8);
10504 else if (GET_CODE (operand) == CONST_DOUBLE)
10506 REAL_VALUE_TYPE r;
10507 long l[4];
10509 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10510 switch (mode)
10512 case XFmode:
10513 case TFmode:
10514 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10515 parts[2] = gen_int_mode (l[2], SImode);
10516 break;
10517 case DFmode:
10518 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10519 break;
10520 default:
10521 abort ();
10523 parts[1] = gen_int_mode (l[1], SImode);
10524 parts[0] = gen_int_mode (l[0], SImode);
10526 else
10527 abort ();
10530 else
10532 if (mode == TImode)
10533 split_ti (&operand, 1, &parts[0], &parts[1]);
10534 if (mode == XFmode || mode == TFmode)
10536 if (REG_P (operand))
10538 if (!reload_completed)
10539 abort ();
10540 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10541 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10543 else if (offsettable_memref_p (operand))
10545 operand = adjust_address (operand, DImode, 0);
10546 parts[0] = operand;
10547 parts[1] = adjust_address (operand, SImode, 8);
10549 else if (GET_CODE (operand) == CONST_DOUBLE)
10551 REAL_VALUE_TYPE r;
10552 long l[3];
10554 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10555 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10556 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10557 if (HOST_BITS_PER_WIDE_INT >= 64)
10558 parts[0]
10559 = gen_int_mode
10560 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10561 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10562 DImode);
10563 else
10564 parts[0] = immed_double_const (l[0], l[1], DImode);
10565 parts[1] = gen_int_mode (l[2], SImode);
10567 else
10568 abort ();
10572 return size;
10575 /* Emit insns to perform a move or push of DI, DF, and XF values.
10576 Return false when normal moves are needed; true when all required
10577 insns have been emitted. Operands 2-4 contain the input values
10578 int the correct order; operands 5-7 contain the output values. */
10580 void
10581 ix86_split_long_move (operands)
10582 rtx operands[];
10584 rtx part[2][3];
10585 int nparts;
10586 int push = 0;
10587 int collisions = 0;
10588 enum machine_mode mode = GET_MODE (operands[0]);
10590 /* The DFmode expanders may ask us to move double.
10591 For 64bit target this is single move. By hiding the fact
10592 here we simplify i386.md splitters. */
10593 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10595 /* Optimize constant pool reference to immediates. This is used by
10596 fp moves, that force all constants to memory to allow combining. */
10598 if (GET_CODE (operands[1]) == MEM
10599 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10600 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10601 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10602 if (push_operand (operands[0], VOIDmode))
10604 operands[0] = copy_rtx (operands[0]);
10605 PUT_MODE (operands[0], Pmode);
10607 else
10608 operands[0] = gen_lowpart (DImode, operands[0]);
10609 operands[1] = gen_lowpart (DImode, operands[1]);
10610 emit_move_insn (operands[0], operands[1]);
10611 return;
10614 /* The only non-offsettable memory we handle is push. */
10615 if (push_operand (operands[0], VOIDmode))
10616 push = 1;
10617 else if (GET_CODE (operands[0]) == MEM
10618 && ! offsettable_memref_p (operands[0]))
10619 abort ();
10621 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10622 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10624 /* When emitting push, take care for source operands on the stack. */
10625 if (push && GET_CODE (operands[1]) == MEM
10626 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10628 if (nparts == 3)
10629 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10630 XEXP (part[1][2], 0));
10631 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10632 XEXP (part[1][1], 0));
10635 /* We need to do copy in the right order in case an address register
10636 of the source overlaps the destination. */
10637 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10639 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10640 collisions++;
10641 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10642 collisions++;
10643 if (nparts == 3
10644 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10645 collisions++;
10647 /* Collision in the middle part can be handled by reordering. */
10648 if (collisions == 1 && nparts == 3
10649 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10651 rtx tmp;
10652 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10653 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10656 /* If there are more collisions, we can't handle it by reordering.
10657 Do an lea to the last part and use only one colliding move. */
10658 else if (collisions > 1)
10660 rtx base;
10662 collisions = 1;
10664 base = part[0][nparts - 1];
10666 /* Handle the case when the last part isn't valid for lea.
10667 Happens in 64-bit mode storing the 12-byte XFmode. */
10668 if (GET_MODE (base) != Pmode)
10669 base = gen_rtx_REG (Pmode, REGNO (base));
10671 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10672 part[1][0] = replace_equiv_address (part[1][0], base);
10673 part[1][1] = replace_equiv_address (part[1][1],
10674 plus_constant (base, UNITS_PER_WORD));
10675 if (nparts == 3)
10676 part[1][2] = replace_equiv_address (part[1][2],
10677 plus_constant (base, 8));
10681 if (push)
10683 if (!TARGET_64BIT)
10685 if (nparts == 3)
10687 /* We use only first 12 bytes of TFmode value, but for pushing we
10688 are required to adjust stack as if we were pushing real 16byte
10689 value. */
10690 if (mode == TFmode && !TARGET_64BIT)
10691 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10692 GEN_INT (-4)));
10693 emit_move_insn (part[0][2], part[1][2]);
10696 else
10698 /* In 64bit mode we don't have 32bit push available. In case this is
10699 register, it is OK - we will just use larger counterpart. We also
10700 retype memory - these comes from attempt to avoid REX prefix on
10701 moving of second half of TFmode value. */
10702 if (GET_MODE (part[1][1]) == SImode)
10704 if (GET_CODE (part[1][1]) == MEM)
10705 part[1][1] = adjust_address (part[1][1], DImode, 0);
10706 else if (REG_P (part[1][1]))
10707 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10708 else
10709 abort ();
10710 if (GET_MODE (part[1][0]) == SImode)
10711 part[1][0] = part[1][1];
10714 emit_move_insn (part[0][1], part[1][1]);
10715 emit_move_insn (part[0][0], part[1][0]);
10716 return;
10719 /* Choose correct order to not overwrite the source before it is copied. */
10720 if ((REG_P (part[0][0])
10721 && REG_P (part[1][1])
10722 && (REGNO (part[0][0]) == REGNO (part[1][1])
10723 || (nparts == 3
10724 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10725 || (collisions > 0
10726 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10728 if (nparts == 3)
10730 operands[2] = part[0][2];
10731 operands[3] = part[0][1];
10732 operands[4] = part[0][0];
10733 operands[5] = part[1][2];
10734 operands[6] = part[1][1];
10735 operands[7] = part[1][0];
10737 else
10739 operands[2] = part[0][1];
10740 operands[3] = part[0][0];
10741 operands[5] = part[1][1];
10742 operands[6] = part[1][0];
10745 else
10747 if (nparts == 3)
10749 operands[2] = part[0][0];
10750 operands[3] = part[0][1];
10751 operands[4] = part[0][2];
10752 operands[5] = part[1][0];
10753 operands[6] = part[1][1];
10754 operands[7] = part[1][2];
10756 else
10758 operands[2] = part[0][0];
10759 operands[3] = part[0][1];
10760 operands[5] = part[1][0];
10761 operands[6] = part[1][1];
10764 emit_move_insn (operands[2], operands[5]);
10765 emit_move_insn (operands[3], operands[6]);
10766 if (nparts == 3)
10767 emit_move_insn (operands[4], operands[7]);
10769 return;
10772 void
10773 ix86_split_ashldi (operands, scratch)
10774 rtx *operands, scratch;
10776 rtx low[2], high[2];
10777 int count;
10779 if (GET_CODE (operands[2]) == CONST_INT)
10781 split_di (operands, 2, low, high);
10782 count = INTVAL (operands[2]) & 63;
10784 if (count >= 32)
10786 emit_move_insn (high[0], low[1]);
10787 emit_move_insn (low[0], const0_rtx);
10789 if (count > 32)
10790 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10792 else
10794 if (!rtx_equal_p (operands[0], operands[1]))
10795 emit_move_insn (operands[0], operands[1]);
10796 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10797 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10800 else
10802 if (!rtx_equal_p (operands[0], operands[1]))
10803 emit_move_insn (operands[0], operands[1]);
10805 split_di (operands, 1, low, high);
10807 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10808 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10810 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10812 if (! no_new_pseudos)
10813 scratch = force_reg (SImode, const0_rtx);
10814 else
10815 emit_move_insn (scratch, const0_rtx);
10817 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10818 scratch));
10820 else
10821 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10825 void
10826 ix86_split_ashrdi (operands, scratch)
10827 rtx *operands, scratch;
10829 rtx low[2], high[2];
10830 int count;
10832 if (GET_CODE (operands[2]) == CONST_INT)
10834 split_di (operands, 2, low, high);
10835 count = INTVAL (operands[2]) & 63;
10837 if (count >= 32)
10839 emit_move_insn (low[0], high[1]);
10841 if (! reload_completed)
10842 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10843 else
10845 emit_move_insn (high[0], low[0]);
10846 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10849 if (count > 32)
10850 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10852 else
10854 if (!rtx_equal_p (operands[0], operands[1]))
10855 emit_move_insn (operands[0], operands[1]);
10856 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10857 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10860 else
10862 if (!rtx_equal_p (operands[0], operands[1]))
10863 emit_move_insn (operands[0], operands[1]);
10865 split_di (operands, 1, low, high);
10867 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10868 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10870 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10872 if (! no_new_pseudos)
10873 scratch = gen_reg_rtx (SImode);
10874 emit_move_insn (scratch, high[0]);
10875 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10876 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10877 scratch));
10879 else
10880 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10884 void
10885 ix86_split_lshrdi (operands, scratch)
10886 rtx *operands, scratch;
10888 rtx low[2], high[2];
10889 int count;
10891 if (GET_CODE (operands[2]) == CONST_INT)
10893 split_di (operands, 2, low, high);
10894 count = INTVAL (operands[2]) & 63;
10896 if (count >= 32)
10898 emit_move_insn (low[0], high[1]);
10899 emit_move_insn (high[0], const0_rtx);
10901 if (count > 32)
10902 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10904 else
10906 if (!rtx_equal_p (operands[0], operands[1]))
10907 emit_move_insn (operands[0], operands[1]);
10908 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10909 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10912 else
10914 if (!rtx_equal_p (operands[0], operands[1]))
10915 emit_move_insn (operands[0], operands[1]);
10917 split_di (operands, 1, low, high);
10919 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10920 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10922 /* Heh. By reversing the arguments, we can reuse this pattern. */
10923 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10925 if (! no_new_pseudos)
10926 scratch = force_reg (SImode, const0_rtx);
10927 else
10928 emit_move_insn (scratch, const0_rtx);
10930 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10931 scratch));
10933 else
10934 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10938 /* Helper function for the string operations below. Dest VARIABLE whether
10939 it is aligned to VALUE bytes. If true, jump to the label. */
10940 static rtx
10941 ix86_expand_aligntest (variable, value)
10942 rtx variable;
10943 int value;
10945 rtx label = gen_label_rtx ();
10946 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10947 if (GET_MODE (variable) == DImode)
10948 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10949 else
10950 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10951 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10952 1, label);
10953 return label;
10956 /* Adjust COUNTER by the VALUE. */
10957 static void
10958 ix86_adjust_counter (countreg, value)
10959 rtx countreg;
10960 HOST_WIDE_INT value;
10962 if (GET_MODE (countreg) == DImode)
10963 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10964 else
10965 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10968 /* Zero extend possibly SImode EXP to Pmode register. */
10970 ix86_zero_extend_to_Pmode (exp)
10971 rtx exp;
10973 rtx r;
10974 if (GET_MODE (exp) == VOIDmode)
10975 return force_reg (Pmode, exp);
10976 if (GET_MODE (exp) == Pmode)
10977 return copy_to_mode_reg (Pmode, exp);
10978 r = gen_reg_rtx (Pmode);
10979 emit_insn (gen_zero_extendsidi2 (r, exp));
10980 return r;
10983 /* Expand string move (memcpy) operation. Use i386 string operations when
10984 profitable. expand_clrstr contains similar code. */
10986 ix86_expand_movstr (dst, src, count_exp, align_exp)
10987 rtx dst, src, count_exp, align_exp;
10989 rtx srcreg, destreg, countreg;
10990 enum machine_mode counter_mode;
10991 HOST_WIDE_INT align = 0;
10992 unsigned HOST_WIDE_INT count = 0;
10993 rtx insns;
10995 if (GET_CODE (align_exp) == CONST_INT)
10996 align = INTVAL (align_exp);
10998 /* Can't use any of this if the user has appropriated esi or edi. */
10999 if (global_regs[4] || global_regs[5])
11000 return 0;
11002 /* This simple hack avoids all inlining code and simplifies code below. */
11003 if (!TARGET_ALIGN_STRINGOPS)
11004 align = 64;
11006 if (GET_CODE (count_exp) == CONST_INT)
11008 count = INTVAL (count_exp);
11009 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11010 return 0;
11013 /* Figure out proper mode for counter. For 32bits it is always SImode,
11014 for 64bits use SImode when possible, otherwise DImode.
11015 Set count to number of bytes copied when known at compile time. */
11016 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11017 || x86_64_zero_extended_value (count_exp))
11018 counter_mode = SImode;
11019 else
11020 counter_mode = DImode;
11022 start_sequence ();
11024 if (counter_mode != SImode && counter_mode != DImode)
11025 abort ();
11027 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11028 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11030 emit_insn (gen_cld ());
11032 /* When optimizing for size emit simple rep ; movsb instruction for
11033 counts not divisible by 4. */
11035 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11037 countreg = ix86_zero_extend_to_Pmode (count_exp);
11038 if (TARGET_64BIT)
11039 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
11040 destreg, srcreg, countreg));
11041 else
11042 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
11043 destreg, srcreg, countreg));
11046 /* For constant aligned (or small unaligned) copies use rep movsl
11047 followed by code copying the rest. For PentiumPro ensure 8 byte
11048 alignment to allow rep movsl acceleration. */
11050 else if (count != 0
11051 && (align >= 8
11052 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11053 || optimize_size || count < (unsigned int) 64))
11055 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11056 if (count & ~(size - 1))
11058 countreg = copy_to_mode_reg (counter_mode,
11059 GEN_INT ((count >> (size == 4 ? 2 : 3))
11060 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11061 countreg = ix86_zero_extend_to_Pmode (countreg);
11062 if (size == 4)
11064 if (TARGET_64BIT)
11065 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
11066 destreg, srcreg, countreg));
11067 else
11068 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
11069 destreg, srcreg, countreg));
11071 else
11072 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
11073 destreg, srcreg, countreg));
11075 if (size == 8 && (count & 0x04))
11076 emit_insn (gen_strmovsi (destreg, srcreg));
11077 if (count & 0x02)
11078 emit_insn (gen_strmovhi (destreg, srcreg));
11079 if (count & 0x01)
11080 emit_insn (gen_strmovqi (destreg, srcreg));
11082 /* The generic code based on the glibc implementation:
11083 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11084 allowing accelerated copying there)
11085 - copy the data using rep movsl
11086 - copy the rest. */
11087 else
11089 rtx countreg2;
11090 rtx label = NULL;
11091 int desired_alignment = (TARGET_PENTIUMPRO
11092 && (count == 0 || count >= (unsigned int) 260)
11093 ? 8 : UNITS_PER_WORD);
11095 /* In case we don't know anything about the alignment, default to
11096 library version, since it is usually equally fast and result in
11097 shorter code.
11099 Also emit call when we know that the count is large and call overhead
11100 will not be important. */
11101 if (!TARGET_INLINE_ALL_STRINGOPS
11102 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11104 end_sequence ();
11105 return 0;
11108 if (TARGET_SINGLE_STRINGOP)
11109 emit_insn (gen_cld ());
11111 countreg2 = gen_reg_rtx (Pmode);
11112 countreg = copy_to_mode_reg (counter_mode, count_exp);
11114 /* We don't use loops to align destination and to copy parts smaller
11115 than 4 bytes, because gcc is able to optimize such code better (in
11116 the case the destination or the count really is aligned, gcc is often
11117 able to predict the branches) and also it is friendlier to the
11118 hardware branch prediction.
11120 Using loops is beneficial for generic case, because we can
11121 handle small counts using the loops. Many CPUs (such as Athlon)
11122 have large REP prefix setup costs.
11124 This is quite costly. Maybe we can revisit this decision later or
11125 add some customizability to this code. */
11127 if (count == 0 && align < desired_alignment)
11129 label = gen_label_rtx ();
11130 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11131 LEU, 0, counter_mode, 1, label);
11133 if (align <= 1)
11135 rtx label = ix86_expand_aligntest (destreg, 1);
11136 emit_insn (gen_strmovqi (destreg, srcreg));
11137 ix86_adjust_counter (countreg, 1);
11138 emit_label (label);
11139 LABEL_NUSES (label) = 1;
11141 if (align <= 2)
11143 rtx label = ix86_expand_aligntest (destreg, 2);
11144 emit_insn (gen_strmovhi (destreg, srcreg));
11145 ix86_adjust_counter (countreg, 2);
11146 emit_label (label);
11147 LABEL_NUSES (label) = 1;
11149 if (align <= 4 && desired_alignment > 4)
11151 rtx label = ix86_expand_aligntest (destreg, 4);
11152 emit_insn (gen_strmovsi (destreg, srcreg));
11153 ix86_adjust_counter (countreg, 4);
11154 emit_label (label);
11155 LABEL_NUSES (label) = 1;
11158 if (label && desired_alignment > 4 && !TARGET_64BIT)
11160 emit_label (label);
11161 LABEL_NUSES (label) = 1;
11162 label = NULL_RTX;
11164 if (!TARGET_SINGLE_STRINGOP)
11165 emit_insn (gen_cld ());
11166 if (TARGET_64BIT)
11168 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11169 GEN_INT (3)));
11170 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11171 destreg, srcreg, countreg2));
11173 else
11175 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11176 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11177 destreg, srcreg, countreg2));
11180 if (label)
11182 emit_label (label);
11183 LABEL_NUSES (label) = 1;
11185 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11186 emit_insn (gen_strmovsi (destreg, srcreg));
11187 if ((align <= 4 || count == 0) && TARGET_64BIT)
11189 rtx label = ix86_expand_aligntest (countreg, 4);
11190 emit_insn (gen_strmovsi (destreg, srcreg));
11191 emit_label (label);
11192 LABEL_NUSES (label) = 1;
11194 if (align > 2 && count != 0 && (count & 2))
11195 emit_insn (gen_strmovhi (destreg, srcreg));
11196 if (align <= 2 || count == 0)
11198 rtx label = ix86_expand_aligntest (countreg, 2);
11199 emit_insn (gen_strmovhi (destreg, srcreg));
11200 emit_label (label);
11201 LABEL_NUSES (label) = 1;
11203 if (align > 1 && count != 0 && (count & 1))
11204 emit_insn (gen_strmovqi (destreg, srcreg));
11205 if (align <= 1 || count == 0)
11207 rtx label = ix86_expand_aligntest (countreg, 1);
11208 emit_insn (gen_strmovqi (destreg, srcreg));
11209 emit_label (label);
11210 LABEL_NUSES (label) = 1;
11214 insns = get_insns ();
11215 end_sequence ();
11217 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11218 emit_insn (insns);
11219 return 1;
11222 /* Expand string clear operation (bzero). Use i386 string operations when
11223 profitable. expand_movstr contains similar code. */
11225 ix86_expand_clrstr (src, count_exp, align_exp)
11226 rtx src, count_exp, align_exp;
11228 rtx destreg, zeroreg, countreg;
11229 enum machine_mode counter_mode;
11230 HOST_WIDE_INT align = 0;
11231 unsigned HOST_WIDE_INT count = 0;
11233 if (GET_CODE (align_exp) == CONST_INT)
11234 align = INTVAL (align_exp);
11236 /* Can't use any of this if the user has appropriated esi. */
11237 if (global_regs[4])
11238 return 0;
11240 /* This simple hack avoids all inlining code and simplifies code below. */
11241 if (!TARGET_ALIGN_STRINGOPS)
11242 align = 32;
11244 if (GET_CODE (count_exp) == CONST_INT)
11246 count = INTVAL (count_exp);
11247 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11248 return 0;
11250 /* Figure out proper mode for counter. For 32bits it is always SImode,
11251 for 64bits use SImode when possible, otherwise DImode.
11252 Set count to number of bytes copied when known at compile time. */
11253 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11254 || x86_64_zero_extended_value (count_exp))
11255 counter_mode = SImode;
11256 else
11257 counter_mode = DImode;
11259 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11261 emit_insn (gen_cld ());
11263 /* When optimizing for size emit simple rep ; movsb instruction for
11264 counts not divisible by 4. */
11266 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11268 countreg = ix86_zero_extend_to_Pmode (count_exp);
11269 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11270 if (TARGET_64BIT)
11271 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11272 destreg, countreg));
11273 else
11274 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11275 destreg, countreg));
11277 else if (count != 0
11278 && (align >= 8
11279 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11280 || optimize_size || count < (unsigned int) 64))
11282 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11283 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11284 if (count & ~(size - 1))
11286 countreg = copy_to_mode_reg (counter_mode,
11287 GEN_INT ((count >> (size == 4 ? 2 : 3))
11288 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11289 countreg = ix86_zero_extend_to_Pmode (countreg);
11290 if (size == 4)
11292 if (TARGET_64BIT)
11293 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11294 destreg, countreg));
11295 else
11296 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11297 destreg, countreg));
11299 else
11300 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11301 destreg, countreg));
11303 if (size == 8 && (count & 0x04))
11304 emit_insn (gen_strsetsi (destreg,
11305 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11306 if (count & 0x02)
11307 emit_insn (gen_strsethi (destreg,
11308 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11309 if (count & 0x01)
11310 emit_insn (gen_strsetqi (destreg,
11311 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11313 else
11315 rtx countreg2;
11316 rtx label = NULL;
11317 /* Compute desired alignment of the string operation. */
11318 int desired_alignment = (TARGET_PENTIUMPRO
11319 && (count == 0 || count >= (unsigned int) 260)
11320 ? 8 : UNITS_PER_WORD);
11322 /* In case we don't know anything about the alignment, default to
11323 library version, since it is usually equally fast and result in
11324 shorter code.
11326 Also emit call when we know that the count is large and call overhead
11327 will not be important. */
11328 if (!TARGET_INLINE_ALL_STRINGOPS
11329 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11330 return 0;
11332 if (TARGET_SINGLE_STRINGOP)
11333 emit_insn (gen_cld ());
11335 countreg2 = gen_reg_rtx (Pmode);
11336 countreg = copy_to_mode_reg (counter_mode, count_exp);
11337 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11339 if (count == 0 && align < desired_alignment)
11341 label = gen_label_rtx ();
11342 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11343 LEU, 0, counter_mode, 1, label);
11345 if (align <= 1)
11347 rtx label = ix86_expand_aligntest (destreg, 1);
11348 emit_insn (gen_strsetqi (destreg,
11349 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11350 ix86_adjust_counter (countreg, 1);
11351 emit_label (label);
11352 LABEL_NUSES (label) = 1;
11354 if (align <= 2)
11356 rtx label = ix86_expand_aligntest (destreg, 2);
11357 emit_insn (gen_strsethi (destreg,
11358 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11359 ix86_adjust_counter (countreg, 2);
11360 emit_label (label);
11361 LABEL_NUSES (label) = 1;
11363 if (align <= 4 && desired_alignment > 4)
11365 rtx label = ix86_expand_aligntest (destreg, 4);
11366 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11367 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11368 : zeroreg)));
11369 ix86_adjust_counter (countreg, 4);
11370 emit_label (label);
11371 LABEL_NUSES (label) = 1;
11374 if (label && desired_alignment > 4 && !TARGET_64BIT)
11376 emit_label (label);
11377 LABEL_NUSES (label) = 1;
11378 label = NULL_RTX;
11381 if (!TARGET_SINGLE_STRINGOP)
11382 emit_insn (gen_cld ());
11383 if (TARGET_64BIT)
11385 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11386 GEN_INT (3)));
11387 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11388 destreg, countreg2));
11390 else
11392 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11393 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11394 destreg, countreg2));
11396 if (label)
11398 emit_label (label);
11399 LABEL_NUSES (label) = 1;
11402 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11403 emit_insn (gen_strsetsi (destreg,
11404 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11405 if (TARGET_64BIT && (align <= 4 || count == 0))
11407 rtx label = ix86_expand_aligntest (countreg, 4);
11408 emit_insn (gen_strsetsi (destreg,
11409 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11410 emit_label (label);
11411 LABEL_NUSES (label) = 1;
11413 if (align > 2 && count != 0 && (count & 2))
11414 emit_insn (gen_strsethi (destreg,
11415 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11416 if (align <= 2 || count == 0)
11418 rtx label = ix86_expand_aligntest (countreg, 2);
11419 emit_insn (gen_strsethi (destreg,
11420 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11421 emit_label (label);
11422 LABEL_NUSES (label) = 1;
11424 if (align > 1 && count != 0 && (count & 1))
11425 emit_insn (gen_strsetqi (destreg,
11426 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11427 if (align <= 1 || count == 0)
11429 rtx label = ix86_expand_aligntest (countreg, 1);
11430 emit_insn (gen_strsetqi (destreg,
11431 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11432 emit_label (label);
11433 LABEL_NUSES (label) = 1;
11436 return 1;
11438 /* Expand strlen. */
11440 ix86_expand_strlen (out, src, eoschar, align)
11441 rtx out, src, eoschar, align;
11443 rtx addr, scratch1, scratch2, scratch3, scratch4;
11445 /* The generic case of strlen expander is long. Avoid it's
11446 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11448 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11449 && !TARGET_INLINE_ALL_STRINGOPS
11450 && !optimize_size
11451 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11452 return 0;
11454 addr = force_reg (Pmode, XEXP (src, 0));
11455 scratch1 = gen_reg_rtx (Pmode);
11457 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11458 && !optimize_size)
11460 /* Well it seems that some optimizer does not combine a call like
11461 foo(strlen(bar), strlen(bar));
11462 when the move and the subtraction is done here. It does calculate
11463 the length just once when these instructions are done inside of
11464 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11465 often used and I use one fewer register for the lifetime of
11466 output_strlen_unroll() this is better. */
11468 emit_move_insn (out, addr);
11470 ix86_expand_strlensi_unroll_1 (out, align);
11472 /* strlensi_unroll_1 returns the address of the zero at the end of
11473 the string, like memchr(), so compute the length by subtracting
11474 the start address. */
11475 if (TARGET_64BIT)
11476 emit_insn (gen_subdi3 (out, out, addr));
11477 else
11478 emit_insn (gen_subsi3 (out, out, addr));
11480 else
11482 scratch2 = gen_reg_rtx (Pmode);
11483 scratch3 = gen_reg_rtx (Pmode);
11484 scratch4 = force_reg (Pmode, constm1_rtx);
11486 emit_move_insn (scratch3, addr);
11487 eoschar = force_reg (QImode, eoschar);
11489 emit_insn (gen_cld ());
11490 if (TARGET_64BIT)
11492 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11493 align, scratch4, scratch3));
11494 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11495 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11497 else
11499 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11500 align, scratch4, scratch3));
11501 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11502 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11505 return 1;
11508 /* Expand the appropriate insns for doing strlen if not just doing
11509 repnz; scasb
11511 out = result, initialized with the start address
11512 align_rtx = alignment of the address.
11513 scratch = scratch register, initialized with the startaddress when
11514 not aligned, otherwise undefined
11516 This is just the body. It needs the initialisations mentioned above and
11517 some address computing at the end. These things are done in i386.md. */
11519 static void
11520 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11521 rtx out, align_rtx;
11523 int align;
11524 rtx tmp;
11525 rtx align_2_label = NULL_RTX;
11526 rtx align_3_label = NULL_RTX;
11527 rtx align_4_label = gen_label_rtx ();
11528 rtx end_0_label = gen_label_rtx ();
11529 rtx mem;
11530 rtx tmpreg = gen_reg_rtx (SImode);
11531 rtx scratch = gen_reg_rtx (SImode);
11532 rtx cmp;
11534 align = 0;
11535 if (GET_CODE (align_rtx) == CONST_INT)
11536 align = INTVAL (align_rtx);
11538 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11540 /* Is there a known alignment and is it less than 4? */
11541 if (align < 4)
11543 rtx scratch1 = gen_reg_rtx (Pmode);
11544 emit_move_insn (scratch1, out);
11545 /* Is there a known alignment and is it not 2? */
11546 if (align != 2)
11548 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11549 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11551 /* Leave just the 3 lower bits. */
11552 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11553 NULL_RTX, 0, OPTAB_WIDEN);
11555 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11556 Pmode, 1, align_4_label);
11557 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11558 Pmode, 1, align_2_label);
11559 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11560 Pmode, 1, align_3_label);
11562 else
11564 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11565 check if is aligned to 4 - byte. */
11567 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11568 NULL_RTX, 0, OPTAB_WIDEN);
11570 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11571 Pmode, 1, align_4_label);
11574 mem = gen_rtx_MEM (QImode, out);
11576 /* Now compare the bytes. */
11578 /* Compare the first n unaligned byte on a byte per byte basis. */
11579 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11580 QImode, 1, end_0_label);
11582 /* Increment the address. */
11583 if (TARGET_64BIT)
11584 emit_insn (gen_adddi3 (out, out, const1_rtx));
11585 else
11586 emit_insn (gen_addsi3 (out, out, const1_rtx));
11588 /* Not needed with an alignment of 2 */
11589 if (align != 2)
11591 emit_label (align_2_label);
11593 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11594 end_0_label);
11596 if (TARGET_64BIT)
11597 emit_insn (gen_adddi3 (out, out, const1_rtx));
11598 else
11599 emit_insn (gen_addsi3 (out, out, const1_rtx));
11601 emit_label (align_3_label);
11604 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11605 end_0_label);
11607 if (TARGET_64BIT)
11608 emit_insn (gen_adddi3 (out, out, const1_rtx));
11609 else
11610 emit_insn (gen_addsi3 (out, out, const1_rtx));
11613 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11614 align this loop. It gives only huge programs, but does not help to
11615 speed up. */
11616 emit_label (align_4_label);
11618 mem = gen_rtx_MEM (SImode, out);
11619 emit_move_insn (scratch, mem);
11620 if (TARGET_64BIT)
11621 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11622 else
11623 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11625 /* This formula yields a nonzero result iff one of the bytes is zero.
11626 This saves three branches inside loop and many cycles. */
11628 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11629 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11630 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11631 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11632 gen_int_mode (0x80808080, SImode)));
11633 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11634 align_4_label);
11636 if (TARGET_CMOVE)
11638 rtx reg = gen_reg_rtx (SImode);
11639 rtx reg2 = gen_reg_rtx (Pmode);
11640 emit_move_insn (reg, tmpreg);
11641 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11643 /* If zero is not in the first two bytes, move two bytes forward. */
11644 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11645 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11646 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11647 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11648 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11649 reg,
11650 tmpreg)));
11651 /* Emit lea manually to avoid clobbering of flags. */
11652 emit_insn (gen_rtx_SET (SImode, reg2,
11653 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11655 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11656 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11657 emit_insn (gen_rtx_SET (VOIDmode, out,
11658 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11659 reg2,
11660 out)));
11663 else
11665 rtx end_2_label = gen_label_rtx ();
11666 /* Is zero in the first two bytes? */
11668 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11669 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11670 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11671 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11672 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11673 pc_rtx);
11674 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11675 JUMP_LABEL (tmp) = end_2_label;
11677 /* Not in the first two. Move two bytes forward. */
11678 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11679 if (TARGET_64BIT)
11680 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11681 else
11682 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11684 emit_label (end_2_label);
11688 /* Avoid branch in fixing the byte. */
11689 tmpreg = gen_lowpart (QImode, tmpreg);
11690 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11691 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11692 if (TARGET_64BIT)
11693 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11694 else
11695 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11697 emit_label (end_0_label);
11700 void
11701 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11702 rtx retval, fnaddr, callarg1, callarg2, pop;
11703 int sibcall;
11705 rtx use = NULL, call;
11707 if (pop == const0_rtx)
11708 pop = NULL;
11709 if (TARGET_64BIT && pop)
11710 abort ();
11712 #if TARGET_MACHO
11713 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11714 fnaddr = machopic_indirect_call_target (fnaddr);
11715 #else
11716 /* Static functions and indirect calls don't need the pic register. */
11717 if (! TARGET_64BIT && flag_pic
11718 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11719 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11720 use_reg (&use, pic_offset_table_rtx);
11722 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11724 rtx al = gen_rtx_REG (QImode, 0);
11725 emit_move_insn (al, callarg2);
11726 use_reg (&use, al);
11728 #endif /* TARGET_MACHO */
11730 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11732 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11733 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11735 if (sibcall && TARGET_64BIT
11736 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11738 rtx addr;
11739 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11740 fnaddr = gen_rtx_REG (Pmode, 40);
11741 emit_move_insn (fnaddr, addr);
11742 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11745 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11746 if (retval)
11747 call = gen_rtx_SET (VOIDmode, retval, call);
11748 if (pop)
11750 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11751 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11752 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11755 call = emit_call_insn (call);
11756 if (use)
11757 CALL_INSN_FUNCTION_USAGE (call) = use;
11761 /* Clear stack slot assignments remembered from previous functions.
11762 This is called from INIT_EXPANDERS once before RTL is emitted for each
11763 function. */
11765 static struct machine_function *
11766 ix86_init_machine_status ()
11768 struct machine_function *f;
11770 f = ggc_alloc_cleared (sizeof (struct machine_function));
11771 f->use_fast_prologue_epilogue_nregs = -1;
11773 return f;
11776 /* Return a MEM corresponding to a stack slot with mode MODE.
11777 Allocate a new slot if necessary.
11779 The RTL for a function can have several slots available: N is
11780 which slot to use. */
11783 assign_386_stack_local (mode, n)
11784 enum machine_mode mode;
11785 int n;
11787 struct stack_local_entry *s;
11789 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11790 abort ();
11792 for (s = ix86_stack_locals; s; s = s->next)
11793 if (s->mode == mode && s->n == n)
11794 return s->rtl;
11796 s = (struct stack_local_entry *)
11797 ggc_alloc (sizeof (struct stack_local_entry));
11798 s->n = n;
11799 s->mode = mode;
11800 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11802 s->next = ix86_stack_locals;
11803 ix86_stack_locals = s;
11804 return s->rtl;
11807 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11809 static GTY(()) rtx ix86_tls_symbol;
11811 ix86_tls_get_addr ()
11814 if (!ix86_tls_symbol)
11816 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11817 (TARGET_GNU_TLS && !TARGET_64BIT)
11818 ? "___tls_get_addr"
11819 : "__tls_get_addr");
11822 return ix86_tls_symbol;
11825 /* Calculate the length of the memory address in the instruction
11826 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11828 static int
11829 memory_address_length (addr)
11830 rtx addr;
11832 struct ix86_address parts;
11833 rtx base, index, disp;
11834 int len;
11836 if (GET_CODE (addr) == PRE_DEC
11837 || GET_CODE (addr) == POST_INC
11838 || GET_CODE (addr) == PRE_MODIFY
11839 || GET_CODE (addr) == POST_MODIFY)
11840 return 0;
11842 if (! ix86_decompose_address (addr, &parts))
11843 abort ();
11845 base = parts.base;
11846 index = parts.index;
11847 disp = parts.disp;
11848 len = 0;
11850 /* Register Indirect. */
11851 if (base && !index && !disp)
11853 /* Special cases: ebp and esp need the two-byte modrm form. */
11854 if (addr == stack_pointer_rtx
11855 || addr == arg_pointer_rtx
11856 || addr == frame_pointer_rtx
11857 || addr == hard_frame_pointer_rtx)
11858 len = 1;
11861 /* Direct Addressing. */
11862 else if (disp && !base && !index)
11863 len = 4;
11865 else
11867 /* Find the length of the displacement constant. */
11868 if (disp)
11870 if (GET_CODE (disp) == CONST_INT
11871 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11872 && base)
11873 len = 1;
11874 else
11875 len = 4;
11878 /* An index requires the two-byte modrm form. */
11879 if (index)
11880 len += 1;
11883 return len;
11886 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11887 is set, expect that insn have 8bit immediate alternative. */
11889 ix86_attr_length_immediate_default (insn, shortform)
11890 rtx insn;
11891 int shortform;
11893 int len = 0;
11894 int i;
11895 extract_insn_cached (insn);
11896 for (i = recog_data.n_operands - 1; i >= 0; --i)
11897 if (CONSTANT_P (recog_data.operand[i]))
11899 if (len)
11900 abort ();
11901 if (shortform
11902 && GET_CODE (recog_data.operand[i]) == CONST_INT
11903 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11904 len = 1;
11905 else
11907 switch (get_attr_mode (insn))
11909 case MODE_QI:
11910 len+=1;
11911 break;
11912 case MODE_HI:
11913 len+=2;
11914 break;
11915 case MODE_SI:
11916 len+=4;
11917 break;
11918 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11919 case MODE_DI:
11920 len+=4;
11921 break;
11922 default:
11923 fatal_insn ("unknown insn mode", insn);
11927 return len;
11929 /* Compute default value for "length_address" attribute. */
11931 ix86_attr_length_address_default (insn)
11932 rtx insn;
11934 int i;
11936 if (get_attr_type (insn) == TYPE_LEA)
11938 rtx set = PATTERN (insn);
11939 if (GET_CODE (set) == SET)
11941 else if (GET_CODE (set) == PARALLEL
11942 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11943 set = XVECEXP (set, 0, 0);
11944 else
11946 #ifdef ENABLE_CHECKING
11947 abort ();
11948 #endif
11949 return 0;
11952 return memory_address_length (SET_SRC (set));
11955 extract_insn_cached (insn);
11956 for (i = recog_data.n_operands - 1; i >= 0; --i)
11957 if (GET_CODE (recog_data.operand[i]) == MEM)
11959 return memory_address_length (XEXP (recog_data.operand[i], 0));
11960 break;
11962 return 0;
11965 /* Return the maximum number of instructions a cpu can issue. */
11967 static int
11968 ix86_issue_rate ()
11970 switch (ix86_tune)
11972 case PROCESSOR_PENTIUM:
11973 case PROCESSOR_K6:
11974 return 2;
11976 case PROCESSOR_PENTIUMPRO:
11977 case PROCESSOR_PENTIUM4:
11978 case PROCESSOR_ATHLON:
11979 case PROCESSOR_K8:
11980 return 3;
11982 default:
11983 return 1;
11987 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11988 by DEP_INSN and nothing set by DEP_INSN. */
11990 static int
11991 ix86_flags_dependant (insn, dep_insn, insn_type)
11992 rtx insn, dep_insn;
11993 enum attr_type insn_type;
11995 rtx set, set2;
11997 /* Simplify the test for uninteresting insns. */
11998 if (insn_type != TYPE_SETCC
11999 && insn_type != TYPE_ICMOV
12000 && insn_type != TYPE_FCMOV
12001 && insn_type != TYPE_IBR)
12002 return 0;
12004 if ((set = single_set (dep_insn)) != 0)
12006 set = SET_DEST (set);
12007 set2 = NULL_RTX;
12009 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12010 && XVECLEN (PATTERN (dep_insn), 0) == 2
12011 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12012 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12014 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12015 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12017 else
12018 return 0;
12020 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12021 return 0;
12023 /* This test is true if the dependent insn reads the flags but
12024 not any other potentially set register. */
12025 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12026 return 0;
12028 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12029 return 0;
12031 return 1;
12034 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12035 address with operands set by DEP_INSN. */
12037 static int
12038 ix86_agi_dependant (insn, dep_insn, insn_type)
12039 rtx insn, dep_insn;
12040 enum attr_type insn_type;
12042 rtx addr;
12044 if (insn_type == TYPE_LEA
12045 && TARGET_PENTIUM)
12047 addr = PATTERN (insn);
12048 if (GET_CODE (addr) == SET)
12050 else if (GET_CODE (addr) == PARALLEL
12051 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12052 addr = XVECEXP (addr, 0, 0);
12053 else
12054 abort ();
12055 addr = SET_SRC (addr);
12057 else
12059 int i;
12060 extract_insn_cached (insn);
12061 for (i = recog_data.n_operands - 1; i >= 0; --i)
12062 if (GET_CODE (recog_data.operand[i]) == MEM)
12064 addr = XEXP (recog_data.operand[i], 0);
12065 goto found;
12067 return 0;
12068 found:;
12071 return modified_in_p (addr, dep_insn);
12074 static int
12075 ix86_adjust_cost (insn, link, dep_insn, cost)
12076 rtx insn, link, dep_insn;
12077 int cost;
12079 enum attr_type insn_type, dep_insn_type;
12080 enum attr_memory memory, dep_memory;
12081 rtx set, set2;
12082 int dep_insn_code_number;
12084 /* Anti and output dependencies have zero cost on all CPUs. */
12085 if (REG_NOTE_KIND (link) != 0)
12086 return 0;
12088 dep_insn_code_number = recog_memoized (dep_insn);
12090 /* If we can't recognize the insns, we can't really do anything. */
12091 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12092 return cost;
12094 insn_type = get_attr_type (insn);
12095 dep_insn_type = get_attr_type (dep_insn);
12097 switch (ix86_tune)
12099 case PROCESSOR_PENTIUM:
12100 /* Address Generation Interlock adds a cycle of latency. */
12101 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12102 cost += 1;
12104 /* ??? Compares pair with jump/setcc. */
12105 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12106 cost = 0;
12108 /* Floating point stores require value to be ready one cycle earlier. */
12109 if (insn_type == TYPE_FMOV
12110 && get_attr_memory (insn) == MEMORY_STORE
12111 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12112 cost += 1;
12113 break;
12115 case PROCESSOR_PENTIUMPRO:
12116 memory = get_attr_memory (insn);
12117 dep_memory = get_attr_memory (dep_insn);
12119 /* Since we can't represent delayed latencies of load+operation,
12120 increase the cost here for non-imov insns. */
12121 if (dep_insn_type != TYPE_IMOV
12122 && dep_insn_type != TYPE_FMOV
12123 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12124 cost += 1;
12126 /* INT->FP conversion is expensive. */
12127 if (get_attr_fp_int_src (dep_insn))
12128 cost += 5;
12130 /* There is one cycle extra latency between an FP op and a store. */
12131 if (insn_type == TYPE_FMOV
12132 && (set = single_set (dep_insn)) != NULL_RTX
12133 && (set2 = single_set (insn)) != NULL_RTX
12134 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12135 && GET_CODE (SET_DEST (set2)) == MEM)
12136 cost += 1;
12138 /* Show ability of reorder buffer to hide latency of load by executing
12139 in parallel with previous instruction in case
12140 previous instruction is not needed to compute the address. */
12141 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12142 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12144 /* Claim moves to take one cycle, as core can issue one load
12145 at time and the next load can start cycle later. */
12146 if (dep_insn_type == TYPE_IMOV
12147 || dep_insn_type == TYPE_FMOV)
12148 cost = 1;
12149 else if (cost > 1)
12150 cost--;
12152 break;
12154 case PROCESSOR_K6:
12155 memory = get_attr_memory (insn);
12156 dep_memory = get_attr_memory (dep_insn);
12157 /* The esp dependency is resolved before the instruction is really
12158 finished. */
12159 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12160 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12161 return 1;
12163 /* Since we can't represent delayed latencies of load+operation,
12164 increase the cost here for non-imov insns. */
12165 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12166 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12168 /* INT->FP conversion is expensive. */
12169 if (get_attr_fp_int_src (dep_insn))
12170 cost += 5;
12172 /* Show ability of reorder buffer to hide latency of load by executing
12173 in parallel with previous instruction in case
12174 previous instruction is not needed to compute the address. */
12175 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12176 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12178 /* Claim moves to take one cycle, as core can issue one load
12179 at time and the next load can start cycle later. */
12180 if (dep_insn_type == TYPE_IMOV
12181 || dep_insn_type == TYPE_FMOV)
12182 cost = 1;
12183 else if (cost > 2)
12184 cost -= 2;
12185 else
12186 cost = 1;
12188 break;
12190 case PROCESSOR_ATHLON:
12191 case PROCESSOR_K8:
12192 memory = get_attr_memory (insn);
12193 dep_memory = get_attr_memory (dep_insn);
12195 /* Show ability of reorder buffer to hide latency of load by executing
12196 in parallel with previous instruction in case
12197 previous instruction is not needed to compute the address. */
12198 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12199 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12201 enum attr_unit unit = get_attr_unit (insn);
12202 int loadcost = 3;
12204 /* Because of the difference between the length of integer and
12205 floating unit pipeline preparation stages, the memory operands
12206 for floating point are cheaper.
12208 ??? For Athlon it the difference is most propbably 2. */
12209 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12210 loadcost = 3;
12211 else
12212 loadcost = TARGET_ATHLON ? 2 : 0;
12214 if (cost >= loadcost)
12215 cost -= loadcost;
12216 else
12217 cost = 0;
12220 default:
12221 break;
12224 return cost;
12227 static union
12229 struct ppro_sched_data
12231 rtx decode[3];
12232 int issued_this_cycle;
12233 } ppro;
12234 } ix86_sched_data;
12236 static enum attr_ppro_uops
12237 ix86_safe_ppro_uops (insn)
12238 rtx insn;
12240 if (recog_memoized (insn) >= 0)
12241 return get_attr_ppro_uops (insn);
12242 else
12243 return PPRO_UOPS_MANY;
12246 static void
12247 ix86_dump_ppro_packet (dump)
12248 FILE *dump;
12250 if (ix86_sched_data.ppro.decode[0])
12252 fprintf (dump, "PPRO packet: %d",
12253 INSN_UID (ix86_sched_data.ppro.decode[0]));
12254 if (ix86_sched_data.ppro.decode[1])
12255 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12256 if (ix86_sched_data.ppro.decode[2])
12257 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12258 fputc ('\n', dump);
12262 /* We're beginning a new block. Initialize data structures as necessary. */
12264 static void
12265 ix86_sched_init (dump, sched_verbose, veclen)
12266 FILE *dump ATTRIBUTE_UNUSED;
12267 int sched_verbose ATTRIBUTE_UNUSED;
12268 int veclen ATTRIBUTE_UNUSED;
12270 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12273 /* Shift INSN to SLOT, and shift everything else down. */
12275 static void
12276 ix86_reorder_insn (insnp, slot)
12277 rtx *insnp, *slot;
12279 if (insnp != slot)
12281 rtx insn = *insnp;
12283 insnp[0] = insnp[1];
12284 while (++insnp != slot);
12285 *insnp = insn;
12289 static void
12290 ix86_sched_reorder_ppro (ready, e_ready)
12291 rtx *ready;
12292 rtx *e_ready;
12294 rtx decode[3];
12295 enum attr_ppro_uops cur_uops;
12296 int issued_this_cycle;
12297 rtx *insnp;
12298 int i;
12300 /* At this point .ppro.decode contains the state of the three
12301 decoders from last "cycle". That is, those insns that were
12302 actually independent. But here we're scheduling for the
12303 decoder, and we may find things that are decodable in the
12304 same cycle. */
12306 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12307 issued_this_cycle = 0;
12309 insnp = e_ready;
12310 cur_uops = ix86_safe_ppro_uops (*insnp);
12312 /* If the decoders are empty, and we've a complex insn at the
12313 head of the priority queue, let it issue without complaint. */
12314 if (decode[0] == NULL)
12316 if (cur_uops == PPRO_UOPS_MANY)
12318 decode[0] = *insnp;
12319 goto ppro_done;
12322 /* Otherwise, search for a 2-4 uop unsn to issue. */
12323 while (cur_uops != PPRO_UOPS_FEW)
12325 if (insnp == ready)
12326 break;
12327 cur_uops = ix86_safe_ppro_uops (*--insnp);
12330 /* If so, move it to the head of the line. */
12331 if (cur_uops == PPRO_UOPS_FEW)
12332 ix86_reorder_insn (insnp, e_ready);
12334 /* Issue the head of the queue. */
12335 issued_this_cycle = 1;
12336 decode[0] = *e_ready--;
12339 /* Look for simple insns to fill in the other two slots. */
12340 for (i = 1; i < 3; ++i)
12341 if (decode[i] == NULL)
12343 if (ready > e_ready)
12344 goto ppro_done;
12346 insnp = e_ready;
12347 cur_uops = ix86_safe_ppro_uops (*insnp);
12348 while (cur_uops != PPRO_UOPS_ONE)
12350 if (insnp == ready)
12351 break;
12352 cur_uops = ix86_safe_ppro_uops (*--insnp);
12355 /* Found one. Move it to the head of the queue and issue it. */
12356 if (cur_uops == PPRO_UOPS_ONE)
12358 ix86_reorder_insn (insnp, e_ready);
12359 decode[i] = *e_ready--;
12360 issued_this_cycle++;
12361 continue;
12364 /* ??? Didn't find one. Ideally, here we would do a lazy split
12365 of 2-uop insns, issue one and queue the other. */
12368 ppro_done:
12369 if (issued_this_cycle == 0)
12370 issued_this_cycle = 1;
12371 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12374 /* We are about to being issuing insns for this clock cycle.
12375 Override the default sort algorithm to better slot instructions. */
12376 static int
12377 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12378 FILE *dump ATTRIBUTE_UNUSED;
12379 int sched_verbose ATTRIBUTE_UNUSED;
12380 rtx *ready;
12381 int *n_readyp;
12382 int clock_var ATTRIBUTE_UNUSED;
12384 int n_ready = *n_readyp;
12385 rtx *e_ready = ready + n_ready - 1;
12387 /* Make sure to go ahead and initialize key items in
12388 ix86_sched_data if we are not going to bother trying to
12389 reorder the ready queue. */
12390 if (n_ready < 2)
12392 ix86_sched_data.ppro.issued_this_cycle = 1;
12393 goto out;
12396 switch (ix86_tune)
12398 default:
12399 break;
12401 case PROCESSOR_PENTIUMPRO:
12402 ix86_sched_reorder_ppro (ready, e_ready);
12403 break;
12406 out:
12407 return ix86_issue_rate ();
12410 /* We are about to issue INSN. Return the number of insns left on the
12411 ready queue that can be issued this cycle. */
12413 static int
12414 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12415 FILE *dump;
12416 int sched_verbose;
12417 rtx insn;
12418 int can_issue_more;
12420 int i;
12421 switch (ix86_tune)
12423 default:
12424 return can_issue_more - 1;
12426 case PROCESSOR_PENTIUMPRO:
12428 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12430 if (uops == PPRO_UOPS_MANY)
12432 if (sched_verbose)
12433 ix86_dump_ppro_packet (dump);
12434 ix86_sched_data.ppro.decode[0] = insn;
12435 ix86_sched_data.ppro.decode[1] = NULL;
12436 ix86_sched_data.ppro.decode[2] = NULL;
12437 if (sched_verbose)
12438 ix86_dump_ppro_packet (dump);
12439 ix86_sched_data.ppro.decode[0] = NULL;
12441 else if (uops == PPRO_UOPS_FEW)
12443 if (sched_verbose)
12444 ix86_dump_ppro_packet (dump);
12445 ix86_sched_data.ppro.decode[0] = insn;
12446 ix86_sched_data.ppro.decode[1] = NULL;
12447 ix86_sched_data.ppro.decode[2] = NULL;
12449 else
12451 for (i = 0; i < 3; ++i)
12452 if (ix86_sched_data.ppro.decode[i] == NULL)
12454 ix86_sched_data.ppro.decode[i] = insn;
12455 break;
12457 if (i == 3)
12458 abort ();
12459 if (i == 2)
12461 if (sched_verbose)
12462 ix86_dump_ppro_packet (dump);
12463 ix86_sched_data.ppro.decode[0] = NULL;
12464 ix86_sched_data.ppro.decode[1] = NULL;
12465 ix86_sched_data.ppro.decode[2] = NULL;
12469 return --ix86_sched_data.ppro.issued_this_cycle;
12473 static int
12474 ia32_use_dfa_pipeline_interface ()
12476 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12477 return 1;
12478 return 0;
12481 /* How many alternative schedules to try. This should be as wide as the
12482 scheduling freedom in the DFA, but no wider. Making this value too
12483 large results extra work for the scheduler. */
12485 static int
12486 ia32_multipass_dfa_lookahead ()
12488 if (ix86_tune == PROCESSOR_PENTIUM)
12489 return 2;
12490 else
12491 return 0;
12495 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12496 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12497 appropriate. */
12499 void
12500 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12501 rtx insns;
12502 rtx dstref, srcref, dstreg, srcreg;
12504 rtx insn;
12506 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12507 if (INSN_P (insn))
12508 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12509 dstreg, srcreg);
12512 /* Subroutine of above to actually do the updating by recursively walking
12513 the rtx. */
12515 static void
12516 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12517 rtx x;
12518 rtx dstref, srcref, dstreg, srcreg;
12520 enum rtx_code code = GET_CODE (x);
12521 const char *format_ptr = GET_RTX_FORMAT (code);
12522 int i, j;
12524 if (code == MEM && XEXP (x, 0) == dstreg)
12525 MEM_COPY_ATTRIBUTES (x, dstref);
12526 else if (code == MEM && XEXP (x, 0) == srcreg)
12527 MEM_COPY_ATTRIBUTES (x, srcref);
12529 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12531 if (*format_ptr == 'e')
12532 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12533 dstreg, srcreg);
12534 else if (*format_ptr == 'E')
12535 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12536 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12537 dstreg, srcreg);
12541 /* Compute the alignment given to a constant that is being placed in memory.
12542 EXP is the constant and ALIGN is the alignment that the object would
12543 ordinarily have.
12544 The value of this function is used instead of that alignment to align
12545 the object. */
12548 ix86_constant_alignment (exp, align)
12549 tree exp;
12550 int align;
12552 if (TREE_CODE (exp) == REAL_CST)
12554 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12555 return 64;
12556 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12557 return 128;
12559 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12560 && align < 256)
12561 return 256;
12563 return align;
12566 /* Compute the alignment for a static variable.
12567 TYPE is the data type, and ALIGN is the alignment that
12568 the object would ordinarily have. The value of this function is used
12569 instead of that alignment to align the object. */
12572 ix86_data_alignment (type, align)
12573 tree type;
12574 int align;
12576 if (AGGREGATE_TYPE_P (type)
12577 && TYPE_SIZE (type)
12578 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12579 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12580 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12581 return 256;
12583 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12584 to 16byte boundary. */
12585 if (TARGET_64BIT)
12587 if (AGGREGATE_TYPE_P (type)
12588 && TYPE_SIZE (type)
12589 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12590 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12591 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12592 return 128;
12595 if (TREE_CODE (type) == ARRAY_TYPE)
12597 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12598 return 64;
12599 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12600 return 128;
12602 else if (TREE_CODE (type) == COMPLEX_TYPE)
12605 if (TYPE_MODE (type) == DCmode && align < 64)
12606 return 64;
12607 if (TYPE_MODE (type) == XCmode && align < 128)
12608 return 128;
12610 else if ((TREE_CODE (type) == RECORD_TYPE
12611 || TREE_CODE (type) == UNION_TYPE
12612 || TREE_CODE (type) == QUAL_UNION_TYPE)
12613 && TYPE_FIELDS (type))
12615 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12616 return 64;
12617 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12618 return 128;
12620 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12621 || TREE_CODE (type) == INTEGER_TYPE)
12623 if (TYPE_MODE (type) == DFmode && align < 64)
12624 return 64;
12625 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12626 return 128;
12629 return align;
12632 /* Compute the alignment for a local variable.
12633 TYPE is the data type, and ALIGN is the alignment that
12634 the object would ordinarily have. The value of this macro is used
12635 instead of that alignment to align the object. */
12638 ix86_local_alignment (type, align)
12639 tree type;
12640 int align;
12642 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12643 to 16byte boundary. */
12644 if (TARGET_64BIT)
12646 if (AGGREGATE_TYPE_P (type)
12647 && TYPE_SIZE (type)
12648 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12649 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12650 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12651 return 128;
12653 if (TREE_CODE (type) == ARRAY_TYPE)
12655 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12656 return 64;
12657 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12658 return 128;
12660 else if (TREE_CODE (type) == COMPLEX_TYPE)
12662 if (TYPE_MODE (type) == DCmode && align < 64)
12663 return 64;
12664 if (TYPE_MODE (type) == XCmode && align < 128)
12665 return 128;
12667 else if ((TREE_CODE (type) == RECORD_TYPE
12668 || TREE_CODE (type) == UNION_TYPE
12669 || TREE_CODE (type) == QUAL_UNION_TYPE)
12670 && TYPE_FIELDS (type))
12672 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12673 return 64;
12674 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12675 return 128;
12677 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12678 || TREE_CODE (type) == INTEGER_TYPE)
12681 if (TYPE_MODE (type) == DFmode && align < 64)
12682 return 64;
12683 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12684 return 128;
12686 return align;
12689 /* Emit RTL insns to initialize the variable parts of a trampoline.
12690 FNADDR is an RTX for the address of the function's pure code.
12691 CXT is an RTX for the static chain value for the function. */
12692 void
12693 x86_initialize_trampoline (tramp, fnaddr, cxt)
12694 rtx tramp, fnaddr, cxt;
12696 if (!TARGET_64BIT)
12698 /* Compute offset from the end of the jmp to the target function. */
12699 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12700 plus_constant (tramp, 10),
12701 NULL_RTX, 1, OPTAB_DIRECT);
12702 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12703 gen_int_mode (0xb9, QImode));
12704 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12705 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12706 gen_int_mode (0xe9, QImode));
12707 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12709 else
12711 int offset = 0;
12712 /* Try to load address using shorter movl instead of movabs.
12713 We may want to support movq for kernel mode, but kernel does not use
12714 trampolines at the moment. */
12715 if (x86_64_zero_extended_value (fnaddr))
12717 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12718 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12719 gen_int_mode (0xbb41, HImode));
12720 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12721 gen_lowpart (SImode, fnaddr));
12722 offset += 6;
12724 else
12726 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12727 gen_int_mode (0xbb49, HImode));
12728 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12729 fnaddr);
12730 offset += 10;
12732 /* Load static chain using movabs to r10. */
12733 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12734 gen_int_mode (0xba49, HImode));
12735 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12736 cxt);
12737 offset += 10;
12738 /* Jump to the r11 */
12739 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12740 gen_int_mode (0xff49, HImode));
12741 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12742 gen_int_mode (0xe3, QImode));
12743 offset += 3;
12744 if (offset > TRAMPOLINE_SIZE)
12745 abort ();
12748 #ifdef TRANSFER_FROM_TRAMPOLINE
12749 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12750 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12751 #endif
12754 #define def_builtin(MASK, NAME, TYPE, CODE) \
12755 do { \
12756 if ((MASK) & target_flags \
12757 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12758 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12759 NULL, NULL_TREE); \
12760 } while (0)
12762 struct builtin_description
12764 const unsigned int mask;
12765 const enum insn_code icode;
12766 const char *const name;
12767 const enum ix86_builtins code;
12768 const enum rtx_code comparison;
12769 const unsigned int flag;
12772 /* Used for builtins that are enabled both by -msse and -msse2. */
12773 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12774 #define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
12775 #define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
12777 static const struct builtin_description bdesc_comi[] =
12779 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12780 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12781 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12782 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12783 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12784 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12785 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12786 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12787 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12788 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12789 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12790 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12791 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12792 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12793 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12794 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12795 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12796 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12797 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12798 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12799 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12800 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12801 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12802 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12805 static const struct builtin_description bdesc_2arg[] =
12807 /* SSE */
12808 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12809 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12810 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12811 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12812 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12813 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12814 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12815 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12817 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12818 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12819 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12820 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12821 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12822 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12823 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12824 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12825 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12826 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12827 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12828 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12829 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12830 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12831 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12832 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12833 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12834 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12835 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12836 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12838 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12839 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12840 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12841 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12843 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12844 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12845 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12846 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12848 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12849 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12850 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12851 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12852 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12854 /* MMX */
12855 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12856 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12857 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12858 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12859 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12860 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12861 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12862 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12864 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12865 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12866 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12867 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12868 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12869 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12870 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12871 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12873 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12874 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12875 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12877 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12878 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12879 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12880 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12882 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12883 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12885 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12886 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12887 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12888 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12889 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12890 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12892 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12893 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12894 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12895 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12897 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12898 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12899 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12900 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12901 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12902 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12904 /* Special. */
12905 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12906 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12907 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12909 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12910 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12911 { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12913 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12914 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12915 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12916 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12917 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12918 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12920 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12921 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12922 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12923 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12924 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12925 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12927 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12928 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12929 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12930 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12932 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12933 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12935 /* SSE2 */
12936 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12938 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12946 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12947 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12948 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12949 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12950 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12951 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12952 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12953 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12954 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12955 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12956 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12957 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12958 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12959 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12960 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12961 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12962 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12963 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12964 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12966 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12980 /* SSE2 MMX */
12981 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12990 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12991 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12992 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12993 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12994 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12995 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12996 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12997 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13006 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13041 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13062 { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
13067 static const struct builtin_description bdesc_1arg[] =
13069 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13070 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13072 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13073 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13074 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13076 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13077 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13078 { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13079 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13080 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13081 { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13083 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13084 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13085 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13086 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13088 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13090 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13091 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13093 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13094 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13095 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13096 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13097 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13099 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13102 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13103 { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13104 { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
13113 void
13114 ix86_init_builtins ()
13116 if (TARGET_MMX)
13117 ix86_init_mmx_sse_builtins ();
13120 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13121 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13122 builtins. */
13123 static void
13124 ix86_init_mmx_sse_builtins ()
13126 const struct builtin_description * d;
13127 size_t i;
13129 tree pchar_type_node = build_pointer_type (char_type_node);
13130 tree pcchar_type_node = build_pointer_type (
13131 build_type_variant (char_type_node, 1, 0));
13132 tree pfloat_type_node = build_pointer_type (float_type_node);
13133 tree pcfloat_type_node = build_pointer_type (
13134 build_type_variant (float_type_node, 1, 0));
13135 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13136 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13137 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13139 /* Comparisons. */
13140 tree int_ftype_v4sf_v4sf
13141 = build_function_type_list (integer_type_node,
13142 V4SF_type_node, V4SF_type_node, NULL_TREE);
13143 tree v4si_ftype_v4sf_v4sf
13144 = build_function_type_list (V4SI_type_node,
13145 V4SF_type_node, V4SF_type_node, NULL_TREE);
13146 /* MMX/SSE/integer conversions. */
13147 tree int_ftype_v4sf
13148 = build_function_type_list (integer_type_node,
13149 V4SF_type_node, NULL_TREE);
13150 tree int64_ftype_v4sf
13151 = build_function_type_list (long_long_integer_type_node,
13152 V4SF_type_node, NULL_TREE);
13153 tree int_ftype_v8qi
13154 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13155 tree v4sf_ftype_v4sf_int
13156 = build_function_type_list (V4SF_type_node,
13157 V4SF_type_node, integer_type_node, NULL_TREE);
13158 tree v4sf_ftype_v4sf_int64
13159 = build_function_type_list (V4SF_type_node,
13160 V4SF_type_node, long_long_integer_type_node,
13161 NULL_TREE);
13162 tree v4sf_ftype_v4sf_v2si
13163 = build_function_type_list (V4SF_type_node,
13164 V4SF_type_node, V2SI_type_node, NULL_TREE);
13165 tree int_ftype_v4hi_int
13166 = build_function_type_list (integer_type_node,
13167 V4HI_type_node, integer_type_node, NULL_TREE);
13168 tree v4hi_ftype_v4hi_int_int
13169 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13170 integer_type_node, integer_type_node,
13171 NULL_TREE);
13172 /* Miscellaneous. */
13173 tree v8qi_ftype_v4hi_v4hi
13174 = build_function_type_list (V8QI_type_node,
13175 V4HI_type_node, V4HI_type_node, NULL_TREE);
13176 tree v4hi_ftype_v2si_v2si
13177 = build_function_type_list (V4HI_type_node,
13178 V2SI_type_node, V2SI_type_node, NULL_TREE);
13179 tree v4sf_ftype_v4sf_v4sf_int
13180 = build_function_type_list (V4SF_type_node,
13181 V4SF_type_node, V4SF_type_node,
13182 integer_type_node, NULL_TREE);
13183 tree v2si_ftype_v4hi_v4hi
13184 = build_function_type_list (V2SI_type_node,
13185 V4HI_type_node, V4HI_type_node, NULL_TREE);
13186 tree v4hi_ftype_v4hi_int
13187 = build_function_type_list (V4HI_type_node,
13188 V4HI_type_node, integer_type_node, NULL_TREE);
13189 tree v4hi_ftype_v4hi_di
13190 = build_function_type_list (V4HI_type_node,
13191 V4HI_type_node, long_long_unsigned_type_node,
13192 NULL_TREE);
13193 tree v2si_ftype_v2si_di
13194 = build_function_type_list (V2SI_type_node,
13195 V2SI_type_node, long_long_unsigned_type_node,
13196 NULL_TREE);
13197 tree void_ftype_void
13198 = build_function_type (void_type_node, void_list_node);
13199 tree void_ftype_unsigned
13200 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13201 tree unsigned_ftype_void
13202 = build_function_type (unsigned_type_node, void_list_node);
13203 tree di_ftype_void
13204 = build_function_type (long_long_unsigned_type_node, void_list_node);
13205 tree v4sf_ftype_void
13206 = build_function_type (V4SF_type_node, void_list_node);
13207 tree v2si_ftype_v4sf
13208 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13209 /* Loads/stores. */
13210 tree void_ftype_v8qi_v8qi_pchar
13211 = build_function_type_list (void_type_node,
13212 V8QI_type_node, V8QI_type_node,
13213 pchar_type_node, NULL_TREE);
13214 tree v4sf_ftype_pcfloat
13215 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13216 /* @@@ the type is bogus */
13217 tree v4sf_ftype_v4sf_pv2si
13218 = build_function_type_list (V4SF_type_node,
13219 V4SF_type_node, pv2si_type_node, NULL_TREE);
13220 tree void_ftype_pv2si_v4sf
13221 = build_function_type_list (void_type_node,
13222 pv2si_type_node, V4SF_type_node, NULL_TREE);
13223 tree void_ftype_pfloat_v4sf
13224 = build_function_type_list (void_type_node,
13225 pfloat_type_node, V4SF_type_node, NULL_TREE);
13226 tree void_ftype_pdi_di
13227 = build_function_type_list (void_type_node,
13228 pdi_type_node, long_long_unsigned_type_node,
13229 NULL_TREE);
13230 tree void_ftype_pv2di_v2di
13231 = build_function_type_list (void_type_node,
13232 pv2di_type_node, V2DI_type_node, NULL_TREE);
13233 /* Normal vector unops. */
13234 tree v4sf_ftype_v4sf
13235 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13237 /* Normal vector binops. */
13238 tree v4sf_ftype_v4sf_v4sf
13239 = build_function_type_list (V4SF_type_node,
13240 V4SF_type_node, V4SF_type_node, NULL_TREE);
13241 tree v8qi_ftype_v8qi_v8qi
13242 = build_function_type_list (V8QI_type_node,
13243 V8QI_type_node, V8QI_type_node, NULL_TREE);
13244 tree v4hi_ftype_v4hi_v4hi
13245 = build_function_type_list (V4HI_type_node,
13246 V4HI_type_node, V4HI_type_node, NULL_TREE);
13247 tree v2si_ftype_v2si_v2si
13248 = build_function_type_list (V2SI_type_node,
13249 V2SI_type_node, V2SI_type_node, NULL_TREE);
13250 tree di_ftype_di_di
13251 = build_function_type_list (long_long_unsigned_type_node,
13252 long_long_unsigned_type_node,
13253 long_long_unsigned_type_node, NULL_TREE);
13255 tree v2si_ftype_v2sf
13256 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13257 tree v2sf_ftype_v2si
13258 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13259 tree v2si_ftype_v2si
13260 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13261 tree v2sf_ftype_v2sf
13262 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13263 tree v2sf_ftype_v2sf_v2sf
13264 = build_function_type_list (V2SF_type_node,
13265 V2SF_type_node, V2SF_type_node, NULL_TREE);
13266 tree v2si_ftype_v2sf_v2sf
13267 = build_function_type_list (V2SI_type_node,
13268 V2SF_type_node, V2SF_type_node, NULL_TREE);
13269 tree pint_type_node = build_pointer_type (integer_type_node);
13270 tree pcint_type_node = build_pointer_type (
13271 build_type_variant (integer_type_node, 1, 0));
13272 tree pdouble_type_node = build_pointer_type (double_type_node);
13273 tree pcdouble_type_node = build_pointer_type (
13274 build_type_variant (double_type_node, 1, 0));
13275 tree int_ftype_v2df_v2df
13276 = build_function_type_list (integer_type_node,
13277 V2DF_type_node, V2DF_type_node, NULL_TREE);
13279 tree ti_ftype_void
13280 = build_function_type (intTI_type_node, void_list_node);
13281 tree v2di_ftype_void
13282 = build_function_type (V2DI_type_node, void_list_node);
13283 tree ti_ftype_ti_ti
13284 = build_function_type_list (intTI_type_node,
13285 intTI_type_node, intTI_type_node, NULL_TREE);
13286 tree void_ftype_pcvoid
13287 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13288 tree v2di_ftype_di
13289 = build_function_type_list (V2DI_type_node,
13290 long_long_unsigned_type_node, NULL_TREE);
13291 tree di_ftype_v2di
13292 = build_function_type_list (long_long_unsigned_type_node,
13293 V2DI_type_node, NULL_TREE);
13294 tree v4sf_ftype_v4si
13295 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13296 tree v4si_ftype_v4sf
13297 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13298 tree v2df_ftype_v4si
13299 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13300 tree v4si_ftype_v2df
13301 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13302 tree v2si_ftype_v2df
13303 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13304 tree v4sf_ftype_v2df
13305 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13306 tree v2df_ftype_v2si
13307 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13308 tree v2df_ftype_v4sf
13309 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13310 tree int_ftype_v2df
13311 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13312 tree int64_ftype_v2df
13313 = build_function_type_list (long_long_integer_type_node,
13314 V2DF_type_node, NULL_TREE);
13315 tree v2df_ftype_v2df_int
13316 = build_function_type_list (V2DF_type_node,
13317 V2DF_type_node, integer_type_node, NULL_TREE);
13318 tree v2df_ftype_v2df_int64
13319 = build_function_type_list (V2DF_type_node,
13320 V2DF_type_node, long_long_integer_type_node,
13321 NULL_TREE);
13322 tree v4sf_ftype_v4sf_v2df
13323 = build_function_type_list (V4SF_type_node,
13324 V4SF_type_node, V2DF_type_node, NULL_TREE);
13325 tree v2df_ftype_v2df_v4sf
13326 = build_function_type_list (V2DF_type_node,
13327 V2DF_type_node, V4SF_type_node, NULL_TREE);
13328 tree v2df_ftype_v2df_v2df_int
13329 = build_function_type_list (V2DF_type_node,
13330 V2DF_type_node, V2DF_type_node,
13331 integer_type_node,
13332 NULL_TREE);
13333 tree v2df_ftype_v2df_pv2si
13334 = build_function_type_list (V2DF_type_node,
13335 V2DF_type_node, pv2si_type_node, NULL_TREE);
13336 tree void_ftype_pv2si_v2df
13337 = build_function_type_list (void_type_node,
13338 pv2si_type_node, V2DF_type_node, NULL_TREE);
13339 tree void_ftype_pdouble_v2df
13340 = build_function_type_list (void_type_node,
13341 pdouble_type_node, V2DF_type_node, NULL_TREE);
13342 tree void_ftype_pint_int
13343 = build_function_type_list (void_type_node,
13344 pint_type_node, integer_type_node, NULL_TREE);
13345 tree void_ftype_v16qi_v16qi_pchar
13346 = build_function_type_list (void_type_node,
13347 V16QI_type_node, V16QI_type_node,
13348 pchar_type_node, NULL_TREE);
13349 tree v2df_ftype_pcdouble
13350 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13351 tree v2df_ftype_v2df_v2df
13352 = build_function_type_list (V2DF_type_node,
13353 V2DF_type_node, V2DF_type_node, NULL_TREE);
13354 tree v16qi_ftype_v16qi_v16qi
13355 = build_function_type_list (V16QI_type_node,
13356 V16QI_type_node, V16QI_type_node, NULL_TREE);
13357 tree v8hi_ftype_v8hi_v8hi
13358 = build_function_type_list (V8HI_type_node,
13359 V8HI_type_node, V8HI_type_node, NULL_TREE);
13360 tree v4si_ftype_v4si_v4si
13361 = build_function_type_list (V4SI_type_node,
13362 V4SI_type_node, V4SI_type_node, NULL_TREE);
13363 tree v2di_ftype_v2di_v2di
13364 = build_function_type_list (V2DI_type_node,
13365 V2DI_type_node, V2DI_type_node, NULL_TREE);
13366 tree v2di_ftype_v2df_v2df
13367 = build_function_type_list (V2DI_type_node,
13368 V2DF_type_node, V2DF_type_node, NULL_TREE);
13369 tree v2df_ftype_v2df
13370 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13371 tree v2df_ftype_double
13372 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13373 tree v2df_ftype_double_double
13374 = build_function_type_list (V2DF_type_node,
13375 double_type_node, double_type_node, NULL_TREE);
13376 tree int_ftype_v8hi_int
13377 = build_function_type_list (integer_type_node,
13378 V8HI_type_node, integer_type_node, NULL_TREE);
13379 tree v8hi_ftype_v8hi_int_int
13380 = build_function_type_list (V8HI_type_node,
13381 V8HI_type_node, integer_type_node,
13382 integer_type_node, NULL_TREE);
13383 tree v2di_ftype_v2di_int
13384 = build_function_type_list (V2DI_type_node,
13385 V2DI_type_node, integer_type_node, NULL_TREE);
13386 tree v4si_ftype_v4si_int
13387 = build_function_type_list (V4SI_type_node,
13388 V4SI_type_node, integer_type_node, NULL_TREE);
13389 tree v8hi_ftype_v8hi_int
13390 = build_function_type_list (V8HI_type_node,
13391 V8HI_type_node, integer_type_node, NULL_TREE);
13392 tree v8hi_ftype_v8hi_v2di
13393 = build_function_type_list (V8HI_type_node,
13394 V8HI_type_node, V2DI_type_node, NULL_TREE);
13395 tree v4si_ftype_v4si_v2di
13396 = build_function_type_list (V4SI_type_node,
13397 V4SI_type_node, V2DI_type_node, NULL_TREE);
13398 tree v4si_ftype_v8hi_v8hi
13399 = build_function_type_list (V4SI_type_node,
13400 V8HI_type_node, V8HI_type_node, NULL_TREE);
13401 tree di_ftype_v8qi_v8qi
13402 = build_function_type_list (long_long_unsigned_type_node,
13403 V8QI_type_node, V8QI_type_node, NULL_TREE);
13404 tree v2di_ftype_v16qi_v16qi
13405 = build_function_type_list (V2DI_type_node,
13406 V16QI_type_node, V16QI_type_node, NULL_TREE);
13407 tree int_ftype_v16qi
13408 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13409 tree v16qi_ftype_pcchar
13410 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13411 tree void_ftype_pchar_v16qi
13412 = build_function_type_list (void_type_node,
13413 pchar_type_node, V16QI_type_node, NULL_TREE);
13414 tree v4si_ftype_pcint
13415 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13416 tree void_ftype_pcint_v4si
13417 = build_function_type_list (void_type_node,
13418 pcint_type_node, V4SI_type_node, NULL_TREE);
13419 tree v2di_ftype_v2di
13420 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13422 /* Add all builtins that are more or less simple operations on two
13423 operands. */
13424 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13426 /* Use one of the operands; the target can have a different mode for
13427 mask-generating compares. */
13428 enum machine_mode mode;
13429 tree type;
13431 if (d->name == 0)
13432 continue;
13433 mode = insn_data[d->icode].operand[1].mode;
13435 switch (mode)
13437 case V16QImode:
13438 type = v16qi_ftype_v16qi_v16qi;
13439 break;
13440 case V8HImode:
13441 type = v8hi_ftype_v8hi_v8hi;
13442 break;
13443 case V4SImode:
13444 type = v4si_ftype_v4si_v4si;
13445 break;
13446 case V2DImode:
13447 type = v2di_ftype_v2di_v2di;
13448 break;
13449 case V2DFmode:
13450 type = v2df_ftype_v2df_v2df;
13451 break;
13452 case TImode:
13453 type = ti_ftype_ti_ti;
13454 break;
13455 case V4SFmode:
13456 type = v4sf_ftype_v4sf_v4sf;
13457 break;
13458 case V8QImode:
13459 type = v8qi_ftype_v8qi_v8qi;
13460 break;
13461 case V4HImode:
13462 type = v4hi_ftype_v4hi_v4hi;
13463 break;
13464 case V2SImode:
13465 type = v2si_ftype_v2si_v2si;
13466 break;
13467 case DImode:
13468 type = di_ftype_di_di;
13469 break;
13471 default:
13472 abort ();
13475 /* Override for comparisons. */
13476 if (d->icode == CODE_FOR_maskcmpv4sf3
13477 || d->icode == CODE_FOR_maskncmpv4sf3
13478 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13479 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13480 type = v4si_ftype_v4sf_v4sf;
13482 if (d->icode == CODE_FOR_maskcmpv2df3
13483 || d->icode == CODE_FOR_maskncmpv2df3
13484 || d->icode == CODE_FOR_vmmaskcmpv2df3
13485 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13486 type = v2di_ftype_v2df_v2df;
13488 def_builtin (d->mask, d->name, type, d->code);
13491 /* Add the remaining MMX insns with somewhat more complicated types. */
13492 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13493 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13494 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13495 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13496 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13498 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13499 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13500 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13502 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13503 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13505 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13506 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13508 /* comi/ucomi insns. */
13509 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13510 if (d->mask == MASK_SSE2)
13511 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13512 else
13513 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13515 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13516 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13517 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13519 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13520 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13521 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13522 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13523 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13524 def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13525 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13526 def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13527 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13528 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13529 def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13531 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13532 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13534 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13536 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13537 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13538 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13539 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13540 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13541 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13543 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13544 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13545 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13546 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13548 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13549 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13550 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13551 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13553 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13555 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13557 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13558 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13559 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13560 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13561 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13562 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13564 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13566 /* Original 3DNow! */
13567 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13568 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13569 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13570 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13571 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13572 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13573 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13574 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13575 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13576 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13577 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13578 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13579 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13580 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13581 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13582 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13583 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13584 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13585 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13586 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13588 /* 3DNow! extension as used in the Athlon CPU. */
13589 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13590 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13591 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13592 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13593 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13594 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13596 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13598 /* SSE2 */
13599 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13600 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13610 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13611 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13614 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13615 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13618 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13622 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13632 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13638 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13639 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13640 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13641 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13645 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13646 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13647 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13648 def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13650 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13651 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13655 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13679 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13693 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13697 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13698 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13703 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13705 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13708 /* Errors in the source file can cause expand_expr to return const0_rtx
13709 where we expect a vector. To avoid crashing, use one of the vector
13710 clear instructions. */
13711 static rtx
13712 safe_vector_operand (x, mode)
13713 rtx x;
13714 enum machine_mode mode;
13716 if (x != const0_rtx)
13717 return x;
13718 x = gen_reg_rtx (mode);
13720 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13721 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13722 : gen_rtx_SUBREG (DImode, x, 0)));
13723 else
13724 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13725 : gen_rtx_SUBREG (V4SFmode, x, 0),
13726 CONST0_RTX (V4SFmode)));
13727 return x;
13730 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13732 static rtx
13733 ix86_expand_binop_builtin (icode, arglist, target)
13734 enum insn_code icode;
13735 tree arglist;
13736 rtx target;
13738 rtx pat;
13739 tree arg0 = TREE_VALUE (arglist);
13740 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13741 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13742 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13743 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13744 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13745 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13747 if (VECTOR_MODE_P (mode0))
13748 op0 = safe_vector_operand (op0, mode0);
13749 if (VECTOR_MODE_P (mode1))
13750 op1 = safe_vector_operand (op1, mode1);
13752 if (! target
13753 || GET_MODE (target) != tmode
13754 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13755 target = gen_reg_rtx (tmode);
13757 if (GET_MODE (op1) == SImode && mode1 == TImode)
13759 rtx x = gen_reg_rtx (V4SImode);
13760 emit_insn (gen_sse2_loadd (x, op1));
13761 op1 = gen_lowpart (TImode, x);
13764 /* In case the insn wants input operands in modes different from
13765 the result, abort. */
13766 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13767 abort ();
13769 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13770 op0 = copy_to_mode_reg (mode0, op0);
13771 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13772 op1 = copy_to_mode_reg (mode1, op1);
13774 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13775 yet one of the two must not be a memory. This is normally enforced
13776 by expanders, but we didn't bother to create one here. */
13777 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13778 op0 = copy_to_mode_reg (mode0, op0);
13780 pat = GEN_FCN (icode) (target, op0, op1);
13781 if (! pat)
13782 return 0;
13783 emit_insn (pat);
13784 return target;
13787 /* Subroutine of ix86_expand_builtin to take care of stores. */
13789 static rtx
13790 ix86_expand_store_builtin (icode, arglist)
13791 enum insn_code icode;
13792 tree arglist;
13794 rtx pat;
13795 tree arg0 = TREE_VALUE (arglist);
13796 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13797 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13798 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13799 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13800 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13802 if (VECTOR_MODE_P (mode1))
13803 op1 = safe_vector_operand (op1, mode1);
13805 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13806 op1 = copy_to_mode_reg (mode1, op1);
13808 pat = GEN_FCN (icode) (op0, op1);
13809 if (pat)
13810 emit_insn (pat);
13811 return 0;
13814 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13816 static rtx
13817 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13818 enum insn_code icode;
13819 tree arglist;
13820 rtx target;
13821 int do_load;
13823 rtx pat;
13824 tree arg0 = TREE_VALUE (arglist);
13825 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13826 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13827 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13829 if (! target
13830 || GET_MODE (target) != tmode
13831 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13832 target = gen_reg_rtx (tmode);
13833 if (do_load)
13834 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13835 else
13837 if (VECTOR_MODE_P (mode0))
13838 op0 = safe_vector_operand (op0, mode0);
13840 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13841 op0 = copy_to_mode_reg (mode0, op0);
13844 pat = GEN_FCN (icode) (target, op0);
13845 if (! pat)
13846 return 0;
13847 emit_insn (pat);
13848 return target;
13851 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13852 sqrtss, rsqrtss, rcpss. */
13854 static rtx
13855 ix86_expand_unop1_builtin (icode, arglist, target)
13856 enum insn_code icode;
13857 tree arglist;
13858 rtx target;
13860 rtx pat;
13861 tree arg0 = TREE_VALUE (arglist);
13862 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13863 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13864 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13866 if (! target
13867 || GET_MODE (target) != tmode
13868 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13869 target = gen_reg_rtx (tmode);
13871 if (VECTOR_MODE_P (mode0))
13872 op0 = safe_vector_operand (op0, mode0);
13874 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13875 op0 = copy_to_mode_reg (mode0, op0);
13877 op1 = op0;
13878 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13879 op1 = copy_to_mode_reg (mode0, op1);
13881 pat = GEN_FCN (icode) (target, op0, op1);
13882 if (! pat)
13883 return 0;
13884 emit_insn (pat);
13885 return target;
13888 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13890 static rtx
13891 ix86_expand_sse_compare (d, arglist, target)
13892 const struct builtin_description *d;
13893 tree arglist;
13894 rtx target;
13896 rtx pat;
13897 tree arg0 = TREE_VALUE (arglist);
13898 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13899 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13900 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13901 rtx op2;
13902 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13903 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13904 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13905 enum rtx_code comparison = d->comparison;
13907 if (VECTOR_MODE_P (mode0))
13908 op0 = safe_vector_operand (op0, mode0);
13909 if (VECTOR_MODE_P (mode1))
13910 op1 = safe_vector_operand (op1, mode1);
13912 /* Swap operands if we have a comparison that isn't available in
13913 hardware. */
13914 if (d->flag)
13916 rtx tmp = gen_reg_rtx (mode1);
13917 emit_move_insn (tmp, op1);
13918 op1 = op0;
13919 op0 = tmp;
13922 if (! target
13923 || GET_MODE (target) != tmode
13924 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13925 target = gen_reg_rtx (tmode);
13927 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13928 op0 = copy_to_mode_reg (mode0, op0);
13929 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13930 op1 = copy_to_mode_reg (mode1, op1);
13932 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13933 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13934 if (! pat)
13935 return 0;
13936 emit_insn (pat);
13937 return target;
13940 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13942 static rtx
13943 ix86_expand_sse_comi (d, arglist, target)
13944 const struct builtin_description *d;
13945 tree arglist;
13946 rtx target;
13948 rtx pat;
13949 tree arg0 = TREE_VALUE (arglist);
13950 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13951 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13952 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13953 rtx op2;
13954 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13955 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13956 enum rtx_code comparison = d->comparison;
13958 if (VECTOR_MODE_P (mode0))
13959 op0 = safe_vector_operand (op0, mode0);
13960 if (VECTOR_MODE_P (mode1))
13961 op1 = safe_vector_operand (op1, mode1);
13963 /* Swap operands if we have a comparison that isn't available in
13964 hardware. */
13965 if (d->flag)
13967 rtx tmp = op1;
13968 op1 = op0;
13969 op0 = tmp;
13972 target = gen_reg_rtx (SImode);
13973 emit_move_insn (target, const0_rtx);
13974 target = gen_rtx_SUBREG (QImode, target, 0);
13976 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13977 op0 = copy_to_mode_reg (mode0, op0);
13978 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13979 op1 = copy_to_mode_reg (mode1, op1);
13981 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13982 pat = GEN_FCN (d->icode) (op0, op1);
13983 if (! pat)
13984 return 0;
13985 emit_insn (pat);
13986 emit_insn (gen_rtx_SET (VOIDmode,
13987 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13988 gen_rtx_fmt_ee (comparison, QImode,
13989 SET_DEST (pat),
13990 const0_rtx)));
13992 return SUBREG_REG (target);
13995 /* Expand an expression EXP that calls a built-in function,
13996 with result going to TARGET if that's convenient
13997 (and in mode MODE if that's convenient).
13998 SUBTARGET may be used as the target for computing one of EXP's operands.
13999 IGNORE is nonzero if the value is to be ignored. */
14002 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
14003 tree exp;
14004 rtx target;
14005 rtx subtarget ATTRIBUTE_UNUSED;
14006 enum machine_mode mode ATTRIBUTE_UNUSED;
14007 int ignore ATTRIBUTE_UNUSED;
14009 const struct builtin_description *d;
14010 size_t i;
14011 enum insn_code icode;
14012 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14013 tree arglist = TREE_OPERAND (exp, 1);
14014 tree arg0, arg1, arg2;
14015 rtx op0, op1, op2, pat;
14016 enum machine_mode tmode, mode0, mode1, mode2;
14017 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14019 switch (fcode)
14021 case IX86_BUILTIN_EMMS:
14022 emit_insn (gen_emms ());
14023 return 0;
14025 case IX86_BUILTIN_SFENCE:
14026 emit_insn (gen_sfence ());
14027 return 0;
14029 case IX86_BUILTIN_PEXTRW:
14030 case IX86_BUILTIN_PEXTRW128:
14031 icode = (fcode == IX86_BUILTIN_PEXTRW
14032 ? CODE_FOR_mmx_pextrw
14033 : CODE_FOR_sse2_pextrw);
14034 arg0 = TREE_VALUE (arglist);
14035 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14036 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14037 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14038 tmode = insn_data[icode].operand[0].mode;
14039 mode0 = insn_data[icode].operand[1].mode;
14040 mode1 = insn_data[icode].operand[2].mode;
14042 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14043 op0 = copy_to_mode_reg (mode0, op0);
14044 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14046 /* @@@ better error message */
14047 error ("selector must be an immediate");
14048 return gen_reg_rtx (tmode);
14050 if (target == 0
14051 || GET_MODE (target) != tmode
14052 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14053 target = gen_reg_rtx (tmode);
14054 pat = GEN_FCN (icode) (target, op0, op1);
14055 if (! pat)
14056 return 0;
14057 emit_insn (pat);
14058 return target;
14060 case IX86_BUILTIN_PINSRW:
14061 case IX86_BUILTIN_PINSRW128:
14062 icode = (fcode == IX86_BUILTIN_PINSRW
14063 ? CODE_FOR_mmx_pinsrw
14064 : CODE_FOR_sse2_pinsrw);
14065 arg0 = TREE_VALUE (arglist);
14066 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14067 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14068 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14069 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14070 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14071 tmode = insn_data[icode].operand[0].mode;
14072 mode0 = insn_data[icode].operand[1].mode;
14073 mode1 = insn_data[icode].operand[2].mode;
14074 mode2 = insn_data[icode].operand[3].mode;
14076 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14077 op0 = copy_to_mode_reg (mode0, op0);
14078 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14079 op1 = copy_to_mode_reg (mode1, op1);
14080 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14082 /* @@@ better error message */
14083 error ("selector must be an immediate");
14084 return const0_rtx;
14086 if (target == 0
14087 || GET_MODE (target) != tmode
14088 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14089 target = gen_reg_rtx (tmode);
14090 pat = GEN_FCN (icode) (target, op0, op1, op2);
14091 if (! pat)
14092 return 0;
14093 emit_insn (pat);
14094 return target;
14096 case IX86_BUILTIN_MASKMOVQ:
14097 case IX86_BUILTIN_MASKMOVDQU:
14098 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14099 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14100 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14101 : CODE_FOR_sse2_maskmovdqu));
14102 /* Note the arg order is different from the operand order. */
14103 arg1 = TREE_VALUE (arglist);
14104 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14105 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14106 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14107 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14108 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14109 mode0 = insn_data[icode].operand[0].mode;
14110 mode1 = insn_data[icode].operand[1].mode;
14111 mode2 = insn_data[icode].operand[2].mode;
14113 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14114 op0 = copy_to_mode_reg (mode0, op0);
14115 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14116 op1 = copy_to_mode_reg (mode1, op1);
14117 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14118 op2 = copy_to_mode_reg (mode2, op2);
14119 pat = GEN_FCN (icode) (op0, op1, op2);
14120 if (! pat)
14121 return 0;
14122 emit_insn (pat);
14123 return 0;
14125 case IX86_BUILTIN_SQRTSS:
14126 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14127 case IX86_BUILTIN_RSQRTSS:
14128 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14129 case IX86_BUILTIN_RCPSS:
14130 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14132 case IX86_BUILTIN_LOADAPS:
14133 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14135 case IX86_BUILTIN_LOADUPS:
14136 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14138 case IX86_BUILTIN_STOREAPS:
14139 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14141 case IX86_BUILTIN_STOREUPS:
14142 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14144 case IX86_BUILTIN_LOADSS:
14145 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14147 case IX86_BUILTIN_STORESS:
14148 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14150 case IX86_BUILTIN_LOADHPS:
14151 case IX86_BUILTIN_LOADLPS:
14152 case IX86_BUILTIN_LOADHPD:
14153 case IX86_BUILTIN_LOADLPD:
14154 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14155 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14156 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14157 : CODE_FOR_sse2_movlpd);
14158 arg0 = TREE_VALUE (arglist);
14159 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14160 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14161 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14162 tmode = insn_data[icode].operand[0].mode;
14163 mode0 = insn_data[icode].operand[1].mode;
14164 mode1 = insn_data[icode].operand[2].mode;
14166 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14167 op0 = copy_to_mode_reg (mode0, op0);
14168 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14169 if (target == 0
14170 || GET_MODE (target) != tmode
14171 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14172 target = gen_reg_rtx (tmode);
14173 pat = GEN_FCN (icode) (target, op0, op1);
14174 if (! pat)
14175 return 0;
14176 emit_insn (pat);
14177 return target;
14179 case IX86_BUILTIN_STOREHPS:
14180 case IX86_BUILTIN_STORELPS:
14181 case IX86_BUILTIN_STOREHPD:
14182 case IX86_BUILTIN_STORELPD:
14183 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14184 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14185 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14186 : CODE_FOR_sse2_movlpd);
14187 arg0 = TREE_VALUE (arglist);
14188 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14189 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14190 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14191 mode0 = insn_data[icode].operand[1].mode;
14192 mode1 = insn_data[icode].operand[2].mode;
14194 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14195 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14196 op1 = copy_to_mode_reg (mode1, op1);
14198 pat = GEN_FCN (icode) (op0, op0, op1);
14199 if (! pat)
14200 return 0;
14201 emit_insn (pat);
14202 return 0;
14204 case IX86_BUILTIN_MOVNTPS:
14205 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14206 case IX86_BUILTIN_MOVNTQ:
14207 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14209 case IX86_BUILTIN_LDMXCSR:
14210 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14211 target = assign_386_stack_local (SImode, 0);
14212 emit_move_insn (target, op0);
14213 emit_insn (gen_ldmxcsr (target));
14214 return 0;
14216 case IX86_BUILTIN_STMXCSR:
14217 target = assign_386_stack_local (SImode, 0);
14218 emit_insn (gen_stmxcsr (target));
14219 return copy_to_mode_reg (SImode, target);
14221 case IX86_BUILTIN_SHUFPS:
14222 case IX86_BUILTIN_SHUFPD:
14223 icode = (fcode == IX86_BUILTIN_SHUFPS
14224 ? CODE_FOR_sse_shufps
14225 : CODE_FOR_sse2_shufpd);
14226 arg0 = TREE_VALUE (arglist);
14227 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14228 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14229 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14230 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14231 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14232 tmode = insn_data[icode].operand[0].mode;
14233 mode0 = insn_data[icode].operand[1].mode;
14234 mode1 = insn_data[icode].operand[2].mode;
14235 mode2 = insn_data[icode].operand[3].mode;
14237 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14238 op0 = copy_to_mode_reg (mode0, op0);
14239 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14240 op1 = copy_to_mode_reg (mode1, op1);
14241 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14243 /* @@@ better error message */
14244 error ("mask must be an immediate");
14245 return gen_reg_rtx (tmode);
14247 if (target == 0
14248 || GET_MODE (target) != tmode
14249 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14250 target = gen_reg_rtx (tmode);
14251 pat = GEN_FCN (icode) (target, op0, op1, op2);
14252 if (! pat)
14253 return 0;
14254 emit_insn (pat);
14255 return target;
14257 case IX86_BUILTIN_PSHUFW:
14258 case IX86_BUILTIN_PSHUFD:
14259 case IX86_BUILTIN_PSHUFHW:
14260 case IX86_BUILTIN_PSHUFLW:
14261 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14262 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14263 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14264 : CODE_FOR_mmx_pshufw);
14265 arg0 = TREE_VALUE (arglist);
14266 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14267 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14268 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14269 tmode = insn_data[icode].operand[0].mode;
14270 mode1 = insn_data[icode].operand[1].mode;
14271 mode2 = insn_data[icode].operand[2].mode;
14273 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14274 op0 = copy_to_mode_reg (mode1, op0);
14275 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14277 /* @@@ better error message */
14278 error ("mask must be an immediate");
14279 return const0_rtx;
14281 if (target == 0
14282 || GET_MODE (target) != tmode
14283 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14284 target = gen_reg_rtx (tmode);
14285 pat = GEN_FCN (icode) (target, op0, op1);
14286 if (! pat)
14287 return 0;
14288 emit_insn (pat);
14289 return target;
14291 case IX86_BUILTIN_PSLLDQI128:
14292 case IX86_BUILTIN_PSRLDQI128:
14293 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14294 : CODE_FOR_sse2_lshrti3);
14295 arg0 = TREE_VALUE (arglist);
14296 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14297 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14298 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14299 tmode = insn_data[icode].operand[0].mode;
14300 mode1 = insn_data[icode].operand[1].mode;
14301 mode2 = insn_data[icode].operand[2].mode;
14303 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14305 op0 = copy_to_reg (op0);
14306 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14308 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14310 error ("shift must be an immediate");
14311 return const0_rtx;
14313 target = gen_reg_rtx (V2DImode);
14314 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14315 if (! pat)
14316 return 0;
14317 emit_insn (pat);
14318 return target;
14320 case IX86_BUILTIN_FEMMS:
14321 emit_insn (gen_femms ());
14322 return NULL_RTX;
14324 case IX86_BUILTIN_PAVGUSB:
14325 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14327 case IX86_BUILTIN_PF2ID:
14328 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14330 case IX86_BUILTIN_PFACC:
14331 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14333 case IX86_BUILTIN_PFADD:
14334 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14336 case IX86_BUILTIN_PFCMPEQ:
14337 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14339 case IX86_BUILTIN_PFCMPGE:
14340 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14342 case IX86_BUILTIN_PFCMPGT:
14343 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14345 case IX86_BUILTIN_PFMAX:
14346 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14348 case IX86_BUILTIN_PFMIN:
14349 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14351 case IX86_BUILTIN_PFMUL:
14352 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14354 case IX86_BUILTIN_PFRCP:
14355 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14357 case IX86_BUILTIN_PFRCPIT1:
14358 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14360 case IX86_BUILTIN_PFRCPIT2:
14361 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14363 case IX86_BUILTIN_PFRSQIT1:
14364 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14366 case IX86_BUILTIN_PFRSQRT:
14367 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14369 case IX86_BUILTIN_PFSUB:
14370 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14372 case IX86_BUILTIN_PFSUBR:
14373 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14375 case IX86_BUILTIN_PI2FD:
14376 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14378 case IX86_BUILTIN_PMULHRW:
14379 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14381 case IX86_BUILTIN_PF2IW:
14382 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14384 case IX86_BUILTIN_PFNACC:
14385 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14387 case IX86_BUILTIN_PFPNACC:
14388 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14390 case IX86_BUILTIN_PI2FW:
14391 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14393 case IX86_BUILTIN_PSWAPDSI:
14394 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14396 case IX86_BUILTIN_PSWAPDSF:
14397 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14399 case IX86_BUILTIN_SSE_ZERO:
14400 target = gen_reg_rtx (V4SFmode);
14401 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14402 return target;
14404 case IX86_BUILTIN_MMX_ZERO:
14405 target = gen_reg_rtx (DImode);
14406 emit_insn (gen_mmx_clrdi (target));
14407 return target;
14409 case IX86_BUILTIN_CLRTI:
14410 target = gen_reg_rtx (V2DImode);
14411 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14412 return target;
14415 case IX86_BUILTIN_SQRTSD:
14416 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14417 case IX86_BUILTIN_LOADAPD:
14418 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14419 case IX86_BUILTIN_LOADUPD:
14420 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14422 case IX86_BUILTIN_STOREAPD:
14423 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14424 case IX86_BUILTIN_STOREUPD:
14425 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14427 case IX86_BUILTIN_LOADSD:
14428 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14430 case IX86_BUILTIN_STORESD:
14431 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14433 case IX86_BUILTIN_SETPD1:
14434 target = assign_386_stack_local (DFmode, 0);
14435 arg0 = TREE_VALUE (arglist);
14436 emit_move_insn (adjust_address (target, DFmode, 0),
14437 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14438 op0 = gen_reg_rtx (V2DFmode);
14439 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14440 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14441 return op0;
14443 case IX86_BUILTIN_SETPD:
14444 target = assign_386_stack_local (V2DFmode, 0);
14445 arg0 = TREE_VALUE (arglist);
14446 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14447 emit_move_insn (adjust_address (target, DFmode, 0),
14448 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14449 emit_move_insn (adjust_address (target, DFmode, 8),
14450 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14451 op0 = gen_reg_rtx (V2DFmode);
14452 emit_insn (gen_sse2_movapd (op0, target));
14453 return op0;
14455 case IX86_BUILTIN_LOADRPD:
14456 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14457 gen_reg_rtx (V2DFmode), 1);
14458 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14459 return target;
14461 case IX86_BUILTIN_LOADPD1:
14462 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14463 gen_reg_rtx (V2DFmode), 1);
14464 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14465 return target;
14467 case IX86_BUILTIN_STOREPD1:
14468 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14469 case IX86_BUILTIN_STORERPD:
14470 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14472 case IX86_BUILTIN_CLRPD:
14473 target = gen_reg_rtx (V2DFmode);
14474 emit_insn (gen_sse_clrv2df (target));
14475 return target;
14477 case IX86_BUILTIN_MFENCE:
14478 emit_insn (gen_sse2_mfence ());
14479 return 0;
14480 case IX86_BUILTIN_LFENCE:
14481 emit_insn (gen_sse2_lfence ());
14482 return 0;
14484 case IX86_BUILTIN_CLFLUSH:
14485 arg0 = TREE_VALUE (arglist);
14486 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14487 icode = CODE_FOR_sse2_clflush;
14488 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14489 op0 = copy_to_mode_reg (Pmode, op0);
14491 emit_insn (gen_sse2_clflush (op0));
14492 return 0;
14494 case IX86_BUILTIN_MOVNTPD:
14495 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14496 case IX86_BUILTIN_MOVNTDQ:
14497 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14498 case IX86_BUILTIN_MOVNTI:
14499 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14501 case IX86_BUILTIN_LOADDQA:
14502 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14503 case IX86_BUILTIN_LOADDQU:
14504 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14505 case IX86_BUILTIN_LOADD:
14506 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14508 case IX86_BUILTIN_STOREDQA:
14509 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14510 case IX86_BUILTIN_STOREDQU:
14511 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14512 case IX86_BUILTIN_STORED:
14513 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14515 default:
14516 break;
14519 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14520 if (d->code == fcode)
14522 /* Compares are treated specially. */
14523 if (d->icode == CODE_FOR_maskcmpv4sf3
14524 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14525 || d->icode == CODE_FOR_maskncmpv4sf3
14526 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14527 || d->icode == CODE_FOR_maskcmpv2df3
14528 || d->icode == CODE_FOR_vmmaskcmpv2df3
14529 || d->icode == CODE_FOR_maskncmpv2df3
14530 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14531 return ix86_expand_sse_compare (d, arglist, target);
14533 return ix86_expand_binop_builtin (d->icode, arglist, target);
14536 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14537 if (d->code == fcode)
14538 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14540 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14541 if (d->code == fcode)
14542 return ix86_expand_sse_comi (d, arglist, target);
14544 /* @@@ Should really do something sensible here. */
14545 return 0;
14548 /* Store OPERAND to the memory after reload is completed. This means
14549 that we can't easily use assign_stack_local. */
14551 ix86_force_to_memory (mode, operand)
14552 enum machine_mode mode;
14553 rtx operand;
14555 rtx result;
14556 if (!reload_completed)
14557 abort ();
14558 if (TARGET_RED_ZONE)
14560 result = gen_rtx_MEM (mode,
14561 gen_rtx_PLUS (Pmode,
14562 stack_pointer_rtx,
14563 GEN_INT (-RED_ZONE_SIZE)));
14564 emit_move_insn (result, operand);
14566 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14568 switch (mode)
14570 case HImode:
14571 case SImode:
14572 operand = gen_lowpart (DImode, operand);
14573 /* FALLTHRU */
14574 case DImode:
14575 emit_insn (
14576 gen_rtx_SET (VOIDmode,
14577 gen_rtx_MEM (DImode,
14578 gen_rtx_PRE_DEC (DImode,
14579 stack_pointer_rtx)),
14580 operand));
14581 break;
14582 default:
14583 abort ();
14585 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14587 else
14589 switch (mode)
14591 case DImode:
14593 rtx operands[2];
14594 split_di (&operand, 1, operands, operands + 1);
14595 emit_insn (
14596 gen_rtx_SET (VOIDmode,
14597 gen_rtx_MEM (SImode,
14598 gen_rtx_PRE_DEC (Pmode,
14599 stack_pointer_rtx)),
14600 operands[1]));
14601 emit_insn (
14602 gen_rtx_SET (VOIDmode,
14603 gen_rtx_MEM (SImode,
14604 gen_rtx_PRE_DEC (Pmode,
14605 stack_pointer_rtx)),
14606 operands[0]));
14608 break;
14609 case HImode:
14610 /* It is better to store HImodes as SImodes. */
14611 if (!TARGET_PARTIAL_REG_STALL)
14612 operand = gen_lowpart (SImode, operand);
14613 /* FALLTHRU */
14614 case SImode:
14615 emit_insn (
14616 gen_rtx_SET (VOIDmode,
14617 gen_rtx_MEM (GET_MODE (operand),
14618 gen_rtx_PRE_DEC (SImode,
14619 stack_pointer_rtx)),
14620 operand));
14621 break;
14622 default:
14623 abort ();
14625 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14627 return result;
14630 /* Free operand from the memory. */
14631 void
14632 ix86_free_from_memory (mode)
14633 enum machine_mode mode;
14635 if (!TARGET_RED_ZONE)
14637 int size;
14639 if (mode == DImode || TARGET_64BIT)
14640 size = 8;
14641 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14642 size = 2;
14643 else
14644 size = 4;
14645 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14646 to pop or add instruction if registers are available. */
14647 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14648 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14649 GEN_INT (size))));
14653 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14654 QImode must go into class Q_REGS.
14655 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14656 movdf to do mem-to-mem moves through integer regs. */
14657 enum reg_class
14658 ix86_preferred_reload_class (x, class)
14659 rtx x;
14660 enum reg_class class;
14662 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14663 return NO_REGS;
14664 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14666 /* SSE can't load any constant directly yet. */
14667 if (SSE_CLASS_P (class))
14668 return NO_REGS;
14669 /* Floats can load 0 and 1. */
14670 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14672 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14673 if (MAYBE_SSE_CLASS_P (class))
14674 return (reg_class_subset_p (class, GENERAL_REGS)
14675 ? GENERAL_REGS : FLOAT_REGS);
14676 else
14677 return class;
14679 /* General regs can load everything. */
14680 if (reg_class_subset_p (class, GENERAL_REGS))
14681 return GENERAL_REGS;
14682 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14683 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14684 return NO_REGS;
14686 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14687 return NO_REGS;
14688 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14689 return Q_REGS;
14690 return class;
14693 /* If we are copying between general and FP registers, we need a memory
14694 location. The same is true for SSE and MMX registers.
14696 The macro can't work reliably when one of the CLASSES is class containing
14697 registers from multiple units (SSE, MMX, integer). We avoid this by never
14698 combining those units in single alternative in the machine description.
14699 Ensure that this constraint holds to avoid unexpected surprises.
14701 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14702 enforce these sanity checks. */
14704 ix86_secondary_memory_needed (class1, class2, mode, strict)
14705 enum reg_class class1, class2;
14706 enum machine_mode mode;
14707 int strict;
14709 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14710 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14711 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14712 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14713 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14714 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14716 if (strict)
14717 abort ();
14718 else
14719 return 1;
14721 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14722 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14723 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14724 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14725 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14727 /* Return the cost of moving data from a register in class CLASS1 to
14728 one in class CLASS2.
14730 It is not required that the cost always equal 2 when FROM is the same as TO;
14731 on some machines it is expensive to move between registers if they are not
14732 general registers. */
14734 ix86_register_move_cost (mode, class1, class2)
14735 enum machine_mode mode;
14736 enum reg_class class1, class2;
14738 /* In case we require secondary memory, compute cost of the store followed
14739 by load. In order to avoid bad register allocation choices, we need
14740 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14742 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14744 int cost = 1;
14746 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14747 MEMORY_MOVE_COST (mode, class1, 1));
14748 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14749 MEMORY_MOVE_COST (mode, class2, 1));
14751 /* In case of copying from general_purpose_register we may emit multiple
14752 stores followed by single load causing memory size mismatch stall.
14753 Count this as arbitrarily high cost of 20. */
14754 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14755 cost += 20;
14757 /* In the case of FP/MMX moves, the registers actually overlap, and we
14758 have to switch modes in order to treat them differently. */
14759 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14760 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14761 cost += 20;
14763 return cost;
14766 /* Moves between SSE/MMX and integer unit are expensive. */
14767 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14768 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14769 return ix86_cost->mmxsse_to_integer;
14770 if (MAYBE_FLOAT_CLASS_P (class1))
14771 return ix86_cost->fp_move;
14772 if (MAYBE_SSE_CLASS_P (class1))
14773 return ix86_cost->sse_move;
14774 if (MAYBE_MMX_CLASS_P (class1))
14775 return ix86_cost->mmx_move;
14776 return 2;
14779 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14781 ix86_hard_regno_mode_ok (regno, mode)
14782 int regno;
14783 enum machine_mode mode;
14785 /* Flags and only flags can only hold CCmode values. */
14786 if (CC_REGNO_P (regno))
14787 return GET_MODE_CLASS (mode) == MODE_CC;
14788 if (GET_MODE_CLASS (mode) == MODE_CC
14789 || GET_MODE_CLASS (mode) == MODE_RANDOM
14790 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14791 return 0;
14792 if (FP_REGNO_P (regno))
14793 return VALID_FP_MODE_P (mode);
14794 if (SSE_REGNO_P (regno))
14795 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14796 if (MMX_REGNO_P (regno))
14797 return (TARGET_MMX
14798 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14799 /* We handle both integer and floats in the general purpose registers.
14800 In future we should be able to handle vector modes as well. */
14801 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14802 return 0;
14803 /* Take care for QImode values - they can be in non-QI regs, but then
14804 they do cause partial register stalls. */
14805 if (regno < 4 || mode != QImode || TARGET_64BIT)
14806 return 1;
14807 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14810 /* Return the cost of moving data of mode M between a
14811 register and memory. A value of 2 is the default; this cost is
14812 relative to those in `REGISTER_MOVE_COST'.
14814 If moving between registers and memory is more expensive than
14815 between two registers, you should define this macro to express the
14816 relative cost.
14818 Model also increased moving costs of QImode registers in non
14819 Q_REGS classes.
14822 ix86_memory_move_cost (mode, class, in)
14823 enum machine_mode mode;
14824 enum reg_class class;
14825 int in;
14827 if (FLOAT_CLASS_P (class))
14829 int index;
14830 switch (mode)
14832 case SFmode:
14833 index = 0;
14834 break;
14835 case DFmode:
14836 index = 1;
14837 break;
14838 case XFmode:
14839 case TFmode:
14840 index = 2;
14841 break;
14842 default:
14843 return 100;
14845 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14847 if (SSE_CLASS_P (class))
14849 int index;
14850 switch (GET_MODE_SIZE (mode))
14852 case 4:
14853 index = 0;
14854 break;
14855 case 8:
14856 index = 1;
14857 break;
14858 case 16:
14859 index = 2;
14860 break;
14861 default:
14862 return 100;
14864 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14866 if (MMX_CLASS_P (class))
14868 int index;
14869 switch (GET_MODE_SIZE (mode))
14871 case 4:
14872 index = 0;
14873 break;
14874 case 8:
14875 index = 1;
14876 break;
14877 default:
14878 return 100;
14880 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14882 switch (GET_MODE_SIZE (mode))
14884 case 1:
14885 if (in)
14886 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14887 : ix86_cost->movzbl_load);
14888 else
14889 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14890 : ix86_cost->int_store[0] + 4);
14891 break;
14892 case 2:
14893 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14894 default:
14895 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14896 if (mode == TFmode)
14897 mode = XFmode;
14898 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14899 * (((int) GET_MODE_SIZE (mode)
14900 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14904 /* Compute a (partial) cost for rtx X. Return true if the complete
14905 cost has been computed, and false if subexpressions should be
14906 scanned. In either case, *TOTAL contains the cost result. */
14908 static bool
14909 ix86_rtx_costs (x, code, outer_code, total)
14910 rtx x;
14911 int code, outer_code;
14912 int *total;
14914 enum machine_mode mode = GET_MODE (x);
14916 switch (code)
14918 case CONST_INT:
14919 case CONST:
14920 case LABEL_REF:
14921 case SYMBOL_REF:
14922 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14923 *total = 3;
14924 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14925 *total = 2;
14926 else if (flag_pic && SYMBOLIC_CONST (x)
14927 && (!TARGET_64BIT
14928 || (!GET_CODE (x) != LABEL_REF
14929 && (GET_CODE (x) != SYMBOL_REF
14930 || !SYMBOL_REF_LOCAL_P (x)))))
14931 *total = 1;
14932 else
14933 *total = 0;
14934 return true;
14936 case CONST_DOUBLE:
14937 if (mode == VOIDmode)
14938 *total = 0;
14939 else
14940 switch (standard_80387_constant_p (x))
14942 case 1: /* 0.0 */
14943 *total = 1;
14944 break;
14945 default: /* Other constants */
14946 *total = 2;
14947 break;
14948 case 0:
14949 case -1:
14950 /* Start with (MEM (SYMBOL_REF)), since that's where
14951 it'll probably end up. Add a penalty for size. */
14952 *total = (COSTS_N_INSNS (1)
14953 + (flag_pic != 0 && !TARGET_64BIT)
14954 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14955 break;
14957 return true;
14959 case ZERO_EXTEND:
14960 /* The zero extensions is often completely free on x86_64, so make
14961 it as cheap as possible. */
14962 if (TARGET_64BIT && mode == DImode
14963 && GET_MODE (XEXP (x, 0)) == SImode)
14964 *total = 1;
14965 else if (TARGET_ZERO_EXTEND_WITH_AND)
14966 *total = COSTS_N_INSNS (ix86_cost->add);
14967 else
14968 *total = COSTS_N_INSNS (ix86_cost->movzx);
14969 return false;
14971 case SIGN_EXTEND:
14972 *total = COSTS_N_INSNS (ix86_cost->movsx);
14973 return false;
14975 case ASHIFT:
14976 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14977 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14979 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14980 if (value == 1)
14982 *total = COSTS_N_INSNS (ix86_cost->add);
14983 return false;
14985 if ((value == 2 || value == 3)
14986 && !TARGET_DECOMPOSE_LEA
14987 && ix86_cost->lea <= ix86_cost->shift_const)
14989 *total = COSTS_N_INSNS (ix86_cost->lea);
14990 return false;
14993 /* FALLTHRU */
14995 case ROTATE:
14996 case ASHIFTRT:
14997 case LSHIFTRT:
14998 case ROTATERT:
14999 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15001 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15003 if (INTVAL (XEXP (x, 1)) > 32)
15004 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15005 else
15006 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15008 else
15010 if (GET_CODE (XEXP (x, 1)) == AND)
15011 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15012 else
15013 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15016 else
15018 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15019 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15020 else
15021 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15023 return false;
15025 case MULT:
15026 if (FLOAT_MODE_P (mode))
15027 *total = COSTS_N_INSNS (ix86_cost->fmul);
15028 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15030 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15031 int nbits;
15033 for (nbits = 0; value != 0; value >>= 1)
15034 nbits++;
15036 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15037 + nbits * ix86_cost->mult_bit);
15039 else
15041 /* This is arbitrary */
15042 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15043 + 7 * ix86_cost->mult_bit);
15045 return false;
15047 case DIV:
15048 case UDIV:
15049 case MOD:
15050 case UMOD:
15051 if (FLOAT_MODE_P (mode))
15052 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15053 else
15054 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15055 return false;
15057 case PLUS:
15058 if (FLOAT_MODE_P (mode))
15059 *total = COSTS_N_INSNS (ix86_cost->fadd);
15060 else if (!TARGET_DECOMPOSE_LEA
15061 && GET_MODE_CLASS (mode) == MODE_INT
15062 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15064 if (GET_CODE (XEXP (x, 0)) == PLUS
15065 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15066 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15067 && CONSTANT_P (XEXP (x, 1)))
15069 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15070 if (val == 2 || val == 4 || val == 8)
15072 *total = COSTS_N_INSNS (ix86_cost->lea);
15073 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15074 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15075 outer_code);
15076 *total += rtx_cost (XEXP (x, 1), outer_code);
15077 return true;
15080 else if (GET_CODE (XEXP (x, 0)) == MULT
15081 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15083 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15084 if (val == 2 || val == 4 || val == 8)
15086 *total = COSTS_N_INSNS (ix86_cost->lea);
15087 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15088 *total += rtx_cost (XEXP (x, 1), outer_code);
15089 return true;
15092 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15094 *total = COSTS_N_INSNS (ix86_cost->lea);
15095 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15096 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15097 *total += rtx_cost (XEXP (x, 1), outer_code);
15098 return true;
15101 /* FALLTHRU */
15103 case MINUS:
15104 if (FLOAT_MODE_P (mode))
15106 *total = COSTS_N_INSNS (ix86_cost->fadd);
15107 return false;
15109 /* FALLTHRU */
15111 case AND:
15112 case IOR:
15113 case XOR:
15114 if (!TARGET_64BIT && mode == DImode)
15116 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15117 + (rtx_cost (XEXP (x, 0), outer_code)
15118 << (GET_MODE (XEXP (x, 0)) != DImode))
15119 + (rtx_cost (XEXP (x, 1), outer_code)
15120 << (GET_MODE (XEXP (x, 1)) != DImode)));
15121 return true;
15123 /* FALLTHRU */
15125 case NEG:
15126 if (FLOAT_MODE_P (mode))
15128 *total = COSTS_N_INSNS (ix86_cost->fchs);
15129 return false;
15131 /* FALLTHRU */
15133 case NOT:
15134 if (!TARGET_64BIT && mode == DImode)
15135 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15136 else
15137 *total = COSTS_N_INSNS (ix86_cost->add);
15138 return false;
15140 case FLOAT_EXTEND:
15141 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15142 *total = 0;
15143 return false;
15145 case ABS:
15146 if (FLOAT_MODE_P (mode))
15147 *total = COSTS_N_INSNS (ix86_cost->fabs);
15148 return false;
15150 case SQRT:
15151 if (FLOAT_MODE_P (mode))
15152 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15153 return false;
15155 case UNSPEC:
15156 if (XINT (x, 1) == UNSPEC_TP)
15157 *total = 0;
15158 return false;
15160 default:
15161 return false;
15165 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15166 static void
15167 ix86_svr3_asm_out_constructor (symbol, priority)
15168 rtx symbol;
15169 int priority ATTRIBUTE_UNUSED;
15171 init_section ();
15172 fputs ("\tpushl $", asm_out_file);
15173 assemble_name (asm_out_file, XSTR (symbol, 0));
15174 fputc ('\n', asm_out_file);
15176 #endif
15178 #if TARGET_MACHO
15180 static int current_machopic_label_num;
15182 /* Given a symbol name and its associated stub, write out the
15183 definition of the stub. */
15185 void
15186 machopic_output_stub (file, symb, stub)
15187 FILE *file;
15188 const char *symb, *stub;
15190 unsigned int length;
15191 char *binder_name, *symbol_name, lazy_ptr_name[32];
15192 int label = ++current_machopic_label_num;
15194 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15195 symb = (*targetm.strip_name_encoding) (symb);
15197 length = strlen (stub);
15198 binder_name = alloca (length + 32);
15199 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15201 length = strlen (symb);
15202 symbol_name = alloca (length + 32);
15203 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15205 sprintf (lazy_ptr_name, "L%d$lz", label);
15207 if (MACHOPIC_PURE)
15208 machopic_picsymbol_stub_section ();
15209 else
15210 machopic_symbol_stub_section ();
15212 fprintf (file, "%s:\n", stub);
15213 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15215 if (MACHOPIC_PURE)
15217 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15218 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15219 fprintf (file, "\tjmp %%edx\n");
15221 else
15222 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15224 fprintf (file, "%s:\n", binder_name);
15226 if (MACHOPIC_PURE)
15228 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15229 fprintf (file, "\tpushl %%eax\n");
15231 else
15232 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15234 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15236 machopic_lazy_symbol_ptr_section ();
15237 fprintf (file, "%s:\n", lazy_ptr_name);
15238 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15239 fprintf (file, "\t.long %s\n", binder_name);
15241 #endif /* TARGET_MACHO */
15243 /* Order the registers for register allocator. */
15245 void
15246 x86_order_regs_for_local_alloc ()
15248 int pos = 0;
15249 int i;
15251 /* First allocate the local general purpose registers. */
15252 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15253 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15254 reg_alloc_order [pos++] = i;
15256 /* Global general purpose registers. */
15257 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15258 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15259 reg_alloc_order [pos++] = i;
15261 /* x87 registers come first in case we are doing FP math
15262 using them. */
15263 if (!TARGET_SSE_MATH)
15264 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15265 reg_alloc_order [pos++] = i;
15267 /* SSE registers. */
15268 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15269 reg_alloc_order [pos++] = i;
15270 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15271 reg_alloc_order [pos++] = i;
15273 /* x87 registers. */
15274 if (TARGET_SSE_MATH)
15275 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15276 reg_alloc_order [pos++] = i;
15278 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15279 reg_alloc_order [pos++] = i;
15281 /* Initialize the rest of array as we do not allocate some registers
15282 at all. */
15283 while (pos < FIRST_PSEUDO_REGISTER)
15284 reg_alloc_order [pos++] = 0;
15287 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15288 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15289 #endif
15291 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15292 struct attribute_spec.handler. */
15293 static tree
15294 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
15295 tree *node;
15296 tree name;
15297 tree args ATTRIBUTE_UNUSED;
15298 int flags ATTRIBUTE_UNUSED;
15299 bool *no_add_attrs;
15301 tree *type = NULL;
15302 if (DECL_P (*node))
15304 if (TREE_CODE (*node) == TYPE_DECL)
15305 type = &TREE_TYPE (*node);
15307 else
15308 type = node;
15310 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15311 || TREE_CODE (*type) == UNION_TYPE)))
15313 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15314 *no_add_attrs = true;
15317 else if ((is_attribute_p ("ms_struct", name)
15318 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15319 || ((is_attribute_p ("gcc_struct", name)
15320 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15322 warning ("`%s' incompatible attribute ignored",
15323 IDENTIFIER_POINTER (name));
15324 *no_add_attrs = true;
15327 return NULL_TREE;
15330 static bool
15331 ix86_ms_bitfield_layout_p (record_type)
15332 tree record_type;
15334 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15335 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15336 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15339 /* Returns an expression indicating where the this parameter is
15340 located on entry to the FUNCTION. */
15342 static rtx
15343 x86_this_parameter (function)
15344 tree function;
15346 tree type = TREE_TYPE (function);
15348 if (TARGET_64BIT)
15350 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15351 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15354 if (ix86_fntype_regparm (type) > 0)
15356 tree parm;
15358 parm = TYPE_ARG_TYPES (type);
15359 /* Figure out whether or not the function has a variable number of
15360 arguments. */
15361 for (; parm; parm = TREE_CHAIN (parm))
15362 if (TREE_VALUE (parm) == void_type_node)
15363 break;
15364 /* If not, the this parameter is in %eax. */
15365 if (parm)
15366 return gen_rtx_REG (SImode, 0);
15369 if (aggregate_value_p (TREE_TYPE (type)))
15370 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15371 else
15372 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15375 /* Determine whether x86_output_mi_thunk can succeed. */
15377 static bool
15378 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15379 tree thunk ATTRIBUTE_UNUSED;
15380 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15381 HOST_WIDE_INT vcall_offset;
15382 tree function;
15384 /* 64-bit can handle anything. */
15385 if (TARGET_64BIT)
15386 return true;
15388 /* For 32-bit, everything's fine if we have one free register. */
15389 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15390 return true;
15392 /* Need a free register for vcall_offset. */
15393 if (vcall_offset)
15394 return false;
15396 /* Need a free register for GOT references. */
15397 if (flag_pic && !(*targetm.binds_local_p) (function))
15398 return false;
15400 /* Otherwise ok. */
15401 return true;
15404 /* Output the assembler code for a thunk function. THUNK_DECL is the
15405 declaration for the thunk function itself, FUNCTION is the decl for
15406 the target function. DELTA is an immediate constant offset to be
15407 added to THIS. If VCALL_OFFSET is nonzero, the word at
15408 *(*this + vcall_offset) should be added to THIS. */
15410 static void
15411 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15412 FILE *file ATTRIBUTE_UNUSED;
15413 tree thunk ATTRIBUTE_UNUSED;
15414 HOST_WIDE_INT delta;
15415 HOST_WIDE_INT vcall_offset;
15416 tree function;
15418 rtx xops[3];
15419 rtx this = x86_this_parameter (function);
15420 rtx this_reg, tmp;
15422 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15423 pull it in now and let DELTA benefit. */
15424 if (REG_P (this))
15425 this_reg = this;
15426 else if (vcall_offset)
15428 /* Put the this parameter into %eax. */
15429 xops[0] = this;
15430 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15431 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15433 else
15434 this_reg = NULL_RTX;
15436 /* Adjust the this parameter by a fixed constant. */
15437 if (delta)
15439 xops[0] = GEN_INT (delta);
15440 xops[1] = this_reg ? this_reg : this;
15441 if (TARGET_64BIT)
15443 if (!x86_64_general_operand (xops[0], DImode))
15445 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15446 xops[1] = tmp;
15447 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15448 xops[0] = tmp;
15449 xops[1] = this;
15451 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15453 else
15454 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15457 /* Adjust the this parameter by a value stored in the vtable. */
15458 if (vcall_offset)
15460 if (TARGET_64BIT)
15461 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15462 else
15463 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15465 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15466 xops[1] = tmp;
15467 if (TARGET_64BIT)
15468 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15469 else
15470 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15472 /* Adjust the this parameter. */
15473 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15474 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15476 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15477 xops[0] = GEN_INT (vcall_offset);
15478 xops[1] = tmp2;
15479 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15480 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15482 xops[1] = this_reg;
15483 if (TARGET_64BIT)
15484 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15485 else
15486 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15489 /* If necessary, drop THIS back to its stack slot. */
15490 if (this_reg && this_reg != this)
15492 xops[0] = this_reg;
15493 xops[1] = this;
15494 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15497 xops[0] = DECL_RTL (function);
15498 if (TARGET_64BIT)
15500 if (!flag_pic || (*targetm.binds_local_p) (function))
15501 output_asm_insn ("jmp\t%P0", xops);
15502 else
15504 tmp = XEXP (xops[0], 0);
15505 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15506 tmp = gen_rtx_CONST (Pmode, tmp);
15507 tmp = gen_rtx_MEM (QImode, tmp);
15508 xops[0] = tmp;
15509 output_asm_insn ("jmp\t%A0", xops);
15512 else
15514 if (!flag_pic || (*targetm.binds_local_p) (function))
15515 output_asm_insn ("jmp\t%P0", xops);
15516 else
15517 #if TARGET_MACHO
15518 if (TARGET_MACHO)
15520 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15521 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15522 tmp = gen_rtx_MEM (QImode, tmp);
15523 xops[0] = tmp;
15524 output_asm_insn ("jmp\t%0", xops);
15526 else
15527 #endif /* TARGET_MACHO */
15529 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15530 output_set_got (tmp);
15532 xops[1] = tmp;
15533 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15534 output_asm_insn ("jmp\t{*}%1", xops);
15539 static void
15540 x86_file_start ()
15542 default_file_start ();
15543 if (X86_FILE_START_VERSION_DIRECTIVE)
15544 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15545 if (X86_FILE_START_FLTUSED)
15546 fputs ("\t.global\t__fltused\n", asm_out_file);
15547 if (ix86_asm_dialect == ASM_INTEL)
15548 fputs ("\t.intel_syntax\n", asm_out_file);
15552 x86_field_alignment (field, computed)
15553 tree field;
15554 int computed;
15556 enum machine_mode mode;
15557 tree type = TREE_TYPE (field);
15559 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15560 return computed;
15561 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15562 ? get_inner_array_type (type) : type);
15563 if (mode == DFmode || mode == DCmode
15564 || GET_MODE_CLASS (mode) == MODE_INT
15565 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15566 return MIN (32, computed);
15567 return computed;
15570 /* Output assembler code to FILE to increment profiler label # LABELNO
15571 for profiling a function entry. */
15572 void
15573 x86_function_profiler (file, labelno)
15574 FILE *file;
15575 int labelno ATTRIBUTE_UNUSED;
15577 if (TARGET_64BIT)
15578 if (flag_pic)
15580 #ifndef NO_PROFILE_COUNTERS
15581 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15582 #endif
15583 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15585 else
15587 #ifndef NO_PROFILE_COUNTERS
15588 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15589 #endif
15590 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15592 else if (flag_pic)
15594 #ifndef NO_PROFILE_COUNTERS
15595 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15596 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15597 #endif
15598 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15600 else
15602 #ifndef NO_PROFILE_COUNTERS
15603 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15604 PROFILE_COUNT_REGISTER);
15605 #endif
15606 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15610 /* We don't have exact information about the insn sizes, but we may assume
15611 quite safely that we are informed about all 1 byte insns and memory
15612 address sizes. This is enought to elliminate unnecesary padding in
15613 99% of cases. */
15615 static int
15616 min_insn_size (insn)
15617 rtx insn;
15619 int l = 0;
15621 if (!INSN_P (insn) || !active_insn_p (insn))
15622 return 0;
15624 /* Discard alignments we've emit and jump instructions. */
15625 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15626 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15627 return 0;
15628 if (GET_CODE (insn) == JUMP_INSN
15629 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15630 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15631 return 0;
15633 /* Important case - calls are always 5 bytes.
15634 It is common to have many calls in the row. */
15635 if (GET_CODE (insn) == CALL_INSN
15636 && symbolic_reference_mentioned_p (PATTERN (insn))
15637 && !SIBLING_CALL_P (insn))
15638 return 5;
15639 if (get_attr_length (insn) <= 1)
15640 return 1;
15642 /* For normal instructions we may rely on the sizes of addresses
15643 and the presence of symbol to require 4 bytes of encoding.
15644 This is not the case for jumps where references are PC relative. */
15645 if (GET_CODE (insn) != JUMP_INSN)
15647 l = get_attr_length_address (insn);
15648 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15649 l = 4;
15651 if (l)
15652 return 1+l;
15653 else
15654 return 2;
15657 /* AMD K8 core misspredicts jumps when there are more than 3 jumps in 16 byte
15658 window. */
15660 static void
15661 k8_avoid_jump_misspredicts ()
15663 rtx insn, start = get_insns ();
15664 int nbytes = 0, njumps = 0;
15665 int isjump = 0;
15667 /* Look for all minimal intervals of instructions containing 4 jumps.
15668 The intervals are bounded by START and INSN. NBYTES is the total
15669 size of instructions in the interval including INSN and not including
15670 START. When the NBYTES is smaller than 16 bytes, it is possible
15671 that the end of START and INSN ends up in the same 16byte page.
15673 The smallest offset in the page INSN can start is the case where START
15674 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15675 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15677 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15680 nbytes += min_insn_size (insn);
15681 if (rtl_dump_file)
15682 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15683 INSN_UID (insn), min_insn_size (insn));
15684 if ((GET_CODE (insn) == JUMP_INSN
15685 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15686 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15687 || GET_CODE (insn) == CALL_INSN)
15688 njumps++;
15689 else
15690 continue;
15692 while (njumps > 3)
15694 start = NEXT_INSN (start);
15695 if ((GET_CODE (start) == JUMP_INSN
15696 && GET_CODE (PATTERN (start)) != ADDR_VEC
15697 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15698 || GET_CODE (start) == CALL_INSN)
15699 njumps--, isjump = 1;
15700 else
15701 isjump = 0;
15702 nbytes -= min_insn_size (start);
15704 if (njumps < 0)
15705 abort ();
15706 if (rtl_dump_file)
15707 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15708 INSN_UID (start), INSN_UID (insn), nbytes);
15710 if (njumps == 3 && isjump && nbytes < 16)
15712 int padsize = 15 - nbytes + min_insn_size (insn);
15714 if (rtl_dump_file)
15715 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15716 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15721 /* Implement machine specific optimizations.
15722 At the moment we implement single transformation: AMD Athlon works faster
15723 when RET is not destination of conditional jump or directly preceded
15724 by other jump instruction. We avoid the penalty by inserting NOP just
15725 before the RET instructions in such cases. */
15726 static void
15727 ix86_reorg ()
15729 edge e;
15731 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15732 return;
15733 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15735 basic_block bb = e->src;
15736 rtx ret = bb->end;
15737 rtx prev;
15738 bool replace = false;
15740 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15741 || !maybe_hot_bb_p (bb))
15742 continue;
15743 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15744 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15745 break;
15746 if (prev && GET_CODE (prev) == CODE_LABEL)
15748 edge e;
15749 for (e = bb->pred; e; e = e->pred_next)
15750 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15751 && !(e->flags & EDGE_FALLTHRU))
15752 replace = true;
15754 if (!replace)
15756 prev = prev_active_insn (ret);
15757 if (prev
15758 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15759 || GET_CODE (prev) == CALL_INSN))
15760 replace = true;
15761 /* Empty functions get branch misspredict even when the jump destination
15762 is not visible to us. */
15763 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15764 replace = true;
15766 if (replace)
15768 emit_insn_before (gen_return_internal_long (), ret);
15769 delete_insn (ret);
15772 k8_avoid_jump_misspredicts ();
15775 /* Return nonzero when QImode register that must be represented via REX prefix
15776 is used. */
15777 bool
15778 x86_extended_QIreg_mentioned_p (insn)
15779 rtx insn;
15781 int i;
15782 extract_insn_cached (insn);
15783 for (i = 0; i < recog_data.n_operands; i++)
15784 if (REG_P (recog_data.operand[i])
15785 && REGNO (recog_data.operand[i]) >= 4)
15786 return true;
15787 return false;
15790 /* Return nonzero when P points to register encoded via REX prefix.
15791 Called via for_each_rtx. */
15792 static int
15793 extended_reg_mentioned_1 (p, data)
15794 rtx *p;
15795 void *data ATTRIBUTE_UNUSED;
15797 unsigned int regno;
15798 if (!REG_P (*p))
15799 return 0;
15800 regno = REGNO (*p);
15801 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15804 /* Return true when INSN mentions register that must be encoded using REX
15805 prefix. */
15806 bool
15807 x86_extended_reg_mentioned_p (insn)
15808 rtx insn;
15810 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15813 /* Generate an unsigned DImode to FP conversion. This is the same code
15814 optabs would emit if we didn't have TFmode patterns. */
15816 void
15817 x86_emit_floatuns (operands)
15818 rtx operands[2];
15820 rtx neglab, donelab, i0, i1, f0, in, out;
15821 enum machine_mode mode;
15823 out = operands[0];
15824 in = force_reg (DImode, operands[1]);
15825 mode = GET_MODE (out);
15826 neglab = gen_label_rtx ();
15827 donelab = gen_label_rtx ();
15828 i1 = gen_reg_rtx (Pmode);
15829 f0 = gen_reg_rtx (mode);
15831 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15833 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15834 emit_jump_insn (gen_jump (donelab));
15835 emit_barrier ();
15837 emit_label (neglab);
15839 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15840 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15841 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15842 expand_float (f0, i0, 0);
15843 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15845 emit_label (donelab);
15848 /* Return if we do not know how to pass TYPE solely in registers. */
15849 bool
15850 ix86_must_pass_in_stack (mode, type)
15851 enum machine_mode mode;
15852 tree type;
15854 if (default_must_pass_in_stack (mode, type))
15855 return true;
15856 return (!TARGET_64BIT && type && mode == TImode);
15859 #include "gt-i386.h"