* config/alpha/alpha.c, config/arc/arc.c,
[official-gcc.git] / gcc / config / i386 / i386.c
blob13f30d23b1cc194876dbcacc820b8975da99bdc4
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
52 #endif
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
62 /* Processor costs (relative to an add) */
63 static const
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
98 1, /* Branch cost */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
108 static const
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
120 3, /* MOVE_RATIO */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
143 1, /* Branch cost */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
152 static const
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
164 3, /* MOVE_RATIO */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
187 1, /* Branch cost */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
196 static const
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
208 6, /* MOVE_RATIO */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
231 2, /* Branch cost */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
240 static const
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
252 6, /* MOVE_RATIO */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
275 2, /* Branch cost */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
284 static const
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
296 4, /* MOVE_RATIO */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
319 1, /* Branch cost */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
328 static const
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
340 9, /* MOVE_RATIO */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
363 2, /* Branch cost */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
372 static const
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
416 static const
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
451 2, /* Branch cost */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
530 epilogue code. */
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
543 /* ax, dx, cx, bx */
544 AREG, DREG, CREG, BREG,
545 /* si, di, bp, sp */
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
547 /* FP registers */
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
550 /* arg pointer */
551 NON_Q_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
555 SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
557 MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
561 SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
644 numbers.
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
679 unsigned short mode;
680 unsigned short n;
681 rtx rtl;
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
688 [arguments]
689 <- ARG_POINTER
690 saved pc
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
694 [saved regs]
696 [padding1] \
698 [va_arg registers] (
699 > to_allocate <- FRAME_POINTER
700 [frame] (
702 [padding2] /
704 struct ix86_frame
706 int nregs;
707 int padding1;
708 int va_arg_size;
709 HOST_WIDE_INT frame;
710 int padding2;
711 int outgoing_arguments_size;
712 int red_zone_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
729 /* Parsed value. */
730 enum cmodel ix86_cmodel;
731 /* Asm dialect. */
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
734 /* TLS dialext. */
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
758 int ix86_regparm;
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
789 int, int, FILE *);
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
795 rtx *);
796 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
797 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
798 enum machine_mode);
799 static rtx get_thread_pointer (int);
800 static rtx legitimize_tls_address (rtx, enum tls_model, int);
801 static void get_pc_thunk_name (char [32], unsigned int);
802 static rtx gen_push (rtx);
803 static int memory_address_length (rtx addr);
804 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
805 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
806 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
807 static void ix86_dump_ppro_packet (FILE *);
808 static void ix86_reorder_insn (rtx *, rtx *);
809 static struct machine_function * ix86_init_machine_status (void);
810 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
811 static int ix86_nsaved_regs (void);
812 static void ix86_emit_save_regs (void);
813 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
814 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
815 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
816 static void ix86_sched_reorder_ppro (rtx *, rtx *);
817 static HOST_WIDE_INT ix86_GOT_alias_set (void);
818 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
819 static rtx ix86_expand_aligntest (rtx, int);
820 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
821 static int ix86_issue_rate (void);
822 static int ix86_adjust_cost (rtx, rtx, rtx, int);
823 static void ix86_sched_init (FILE *, int, int);
824 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
825 static int ix86_variable_issue (FILE *, int, rtx, int);
826 static int ia32_use_dfa_pipeline_interface (void);
827 static int ia32_multipass_dfa_lookahead (void);
828 static void ix86_init_mmx_sse_builtins (void);
829 static rtx x86_this_parameter (tree);
830 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree);
832 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
833 static void x86_file_start (void);
834 static void ix86_reorg (void);
835 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
836 static tree ix86_build_builtin_va_list (void);
837 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
838 tree, int *, int);
840 struct ix86_address
842 rtx base, index, disp;
843 HOST_WIDE_INT scale;
844 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
847 static int ix86_decompose_address (rtx, struct ix86_address *);
848 static int ix86_address_cost (rtx);
849 static bool ix86_cannot_force_const_mem (rtx);
850 static rtx ix86_delegitimize_address (rtx);
852 struct builtin_description;
853 static rtx ix86_expand_sse_comi (const struct builtin_description *,
854 tree, rtx);
855 static rtx ix86_expand_sse_compare (const struct builtin_description *,
856 tree, rtx);
857 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
858 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
859 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
860 static rtx ix86_expand_store_builtin (enum insn_code, tree);
861 static rtx safe_vector_operand (rtx, enum machine_mode);
862 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
863 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
864 enum rtx_code *, enum rtx_code *);
865 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
866 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
867 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
868 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
869 static int ix86_fp_comparison_cost (enum rtx_code code);
870 static unsigned int ix86_select_alt_pic_regnum (void);
871 static int ix86_save_reg (unsigned int, int);
872 static void ix86_compute_frame_layout (struct ix86_frame *);
873 static int ix86_comp_type_attributes (tree, tree);
874 static int ix86_function_regparm (tree, tree);
875 const struct attribute_spec ix86_attribute_table[];
876 static bool ix86_function_ok_for_sibcall (tree, tree);
877 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
878 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
879 static int ix86_value_regno (enum machine_mode);
880 static bool contains_128bit_aligned_vector_p (tree);
881 static bool ix86_ms_bitfield_layout_p (tree);
882 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
883 static int extended_reg_mentioned_1 (rtx *, void *);
884 static bool ix86_rtx_costs (rtx, int, int, int *);
885 static int min_insn_size (rtx);
886 static void k8_avoid_jump_misspredicts (void);
888 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
889 static void ix86_svr3_asm_out_constructor (rtx, int);
890 #endif
892 /* Register class used for passing given 64bit part of the argument.
893 These represent classes as documented by the PS ABI, with the exception
894 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
895 use SF or DFmode move instead of DImode to avoid reformatting penalties.
897 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
898 whenever possible (upper half does contain padding).
900 enum x86_64_reg_class
902 X86_64_NO_CLASS,
903 X86_64_INTEGER_CLASS,
904 X86_64_INTEGERSI_CLASS,
905 X86_64_SSE_CLASS,
906 X86_64_SSESF_CLASS,
907 X86_64_SSEDF_CLASS,
908 X86_64_SSEUP_CLASS,
909 X86_64_X87_CLASS,
910 X86_64_X87UP_CLASS,
911 X86_64_MEMORY_CLASS
913 static const char * const x86_64_reg_class_name[] =
914 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
916 #define MAX_CLASSES 4
917 static int classify_argument (enum machine_mode, tree,
918 enum x86_64_reg_class [MAX_CLASSES], int);
919 static int examine_argument (enum machine_mode, tree, int, int *, int *);
920 static rtx construct_container (enum machine_mode, tree, int, int, int,
921 const int *, int);
922 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
923 enum x86_64_reg_class);
925 /* Table of constants used by fldpi, fldln2, etc.... */
926 static REAL_VALUE_TYPE ext_80387_constants_table [5];
927 static bool ext_80387_constants_init = 0;
928 static void init_ext_80387_constants (void);
930 /* Initialize the GCC target structure. */
931 #undef TARGET_ATTRIBUTE_TABLE
932 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
933 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
934 # undef TARGET_MERGE_DECL_ATTRIBUTES
935 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
936 #endif
938 #undef TARGET_COMP_TYPE_ATTRIBUTES
939 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
941 #undef TARGET_INIT_BUILTINS
942 #define TARGET_INIT_BUILTINS ix86_init_builtins
944 #undef TARGET_EXPAND_BUILTIN
945 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
947 #undef TARGET_ASM_FUNCTION_EPILOGUE
948 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
950 #undef TARGET_ASM_OPEN_PAREN
951 #define TARGET_ASM_OPEN_PAREN ""
952 #undef TARGET_ASM_CLOSE_PAREN
953 #define TARGET_ASM_CLOSE_PAREN ""
955 #undef TARGET_ASM_ALIGNED_HI_OP
956 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
957 #undef TARGET_ASM_ALIGNED_SI_OP
958 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
959 #ifdef ASM_QUAD
960 #undef TARGET_ASM_ALIGNED_DI_OP
961 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
962 #endif
964 #undef TARGET_ASM_UNALIGNED_HI_OP
965 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
966 #undef TARGET_ASM_UNALIGNED_SI_OP
967 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
968 #undef TARGET_ASM_UNALIGNED_DI_OP
969 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
971 #undef TARGET_SCHED_ADJUST_COST
972 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
973 #undef TARGET_SCHED_ISSUE_RATE
974 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
975 #undef TARGET_SCHED_VARIABLE_ISSUE
976 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
977 #undef TARGET_SCHED_INIT
978 #define TARGET_SCHED_INIT ix86_sched_init
979 #undef TARGET_SCHED_REORDER
980 #define TARGET_SCHED_REORDER ix86_sched_reorder
981 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
982 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
983 ia32_use_dfa_pipeline_interface
984 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
985 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
986 ia32_multipass_dfa_lookahead
988 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
989 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
991 #ifdef HAVE_AS_TLS
992 #undef TARGET_HAVE_TLS
993 #define TARGET_HAVE_TLS true
994 #endif
995 #undef TARGET_CANNOT_FORCE_CONST_MEM
996 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
998 #undef TARGET_DELEGITIMIZE_ADDRESS
999 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1001 #undef TARGET_MS_BITFIELD_LAYOUT_P
1002 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1004 #undef TARGET_ASM_OUTPUT_MI_THUNK
1005 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1006 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1007 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1009 #undef TARGET_ASM_FILE_START
1010 #define TARGET_ASM_FILE_START x86_file_start
1012 #undef TARGET_RTX_COSTS
1013 #define TARGET_RTX_COSTS ix86_rtx_costs
1014 #undef TARGET_ADDRESS_COST
1015 #define TARGET_ADDRESS_COST ix86_address_cost
1017 #undef TARGET_FIXED_CONDITION_CODE_REGS
1018 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1019 #undef TARGET_CC_MODES_COMPATIBLE
1020 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1022 #undef TARGET_MACHINE_DEPENDENT_REORG
1023 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1025 #undef TARGET_BUILD_BUILTIN_VA_LIST
1026 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1028 #undef TARGET_PROMOTE_PROTOTYPES
1029 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1031 #undef TARGET_STRUCT_VALUE_RTX
1032 #define TARGET_STRUCT_VALUE_RTX hook_rtx_tree_int_null
1034 #undef TARGET_SETUP_INCOMING_VARARGS
1035 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1037 struct gcc_target targetm = TARGET_INITIALIZER;
1039 /* The svr4 ABI for the i386 says that records and unions are returned
1040 in memory. */
1041 #ifndef DEFAULT_PCC_STRUCT_RETURN
1042 #define DEFAULT_PCC_STRUCT_RETURN 1
1043 #endif
1045 /* Sometimes certain combinations of command options do not make
1046 sense on a particular target machine. You can define a macro
1047 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1048 defined, is executed once just after all the command options have
1049 been parsed.
1051 Don't use this macro to turn on various extra optimizations for
1052 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1054 void
1055 override_options (void)
1057 int i;
1058 /* Comes from final.c -- no real reason to change it. */
1059 #define MAX_CODE_ALIGN 16
1061 static struct ptt
1063 const struct processor_costs *cost; /* Processor costs */
1064 const int target_enable; /* Target flags to enable. */
1065 const int target_disable; /* Target flags to disable. */
1066 const int align_loop; /* Default alignments. */
1067 const int align_loop_max_skip;
1068 const int align_jump;
1069 const int align_jump_max_skip;
1070 const int align_func;
1072 const processor_target_table[PROCESSOR_max] =
1074 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1075 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1076 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1077 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1078 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1079 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1080 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1081 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1084 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1085 static struct pta
1087 const char *const name; /* processor name or nickname. */
1088 const enum processor_type processor;
1089 const enum pta_flags
1091 PTA_SSE = 1,
1092 PTA_SSE2 = 2,
1093 PTA_MMX = 4,
1094 PTA_PREFETCH_SSE = 8,
1095 PTA_3DNOW = 16,
1096 PTA_3DNOW_A = 64,
1097 PTA_64BIT = 128
1098 } flags;
1100 const processor_alias_table[] =
1102 {"i386", PROCESSOR_I386, 0},
1103 {"i486", PROCESSOR_I486, 0},
1104 {"i586", PROCESSOR_PENTIUM, 0},
1105 {"pentium", PROCESSOR_PENTIUM, 0},
1106 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1107 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1108 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1109 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1110 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1111 {"i686", PROCESSOR_PENTIUMPRO, 0},
1112 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1113 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1114 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1115 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1116 PTA_MMX | PTA_PREFETCH_SSE},
1117 {"k6", PROCESSOR_K6, PTA_MMX},
1118 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1119 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1120 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1121 | PTA_3DNOW_A},
1122 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1123 | PTA_3DNOW | PTA_3DNOW_A},
1124 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1125 | PTA_3DNOW_A | PTA_SSE},
1126 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1127 | PTA_3DNOW_A | PTA_SSE},
1128 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1129 | PTA_3DNOW_A | PTA_SSE},
1130 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1131 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1132 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1133 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1134 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1135 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1136 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1137 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1140 int const pta_size = ARRAY_SIZE (processor_alias_table);
1142 /* Set the default values for switches whose default depends on TARGET_64BIT
1143 in case they weren't overwritten by command line options. */
1144 if (TARGET_64BIT)
1146 if (flag_omit_frame_pointer == 2)
1147 flag_omit_frame_pointer = 1;
1148 if (flag_asynchronous_unwind_tables == 2)
1149 flag_asynchronous_unwind_tables = 1;
1150 if (flag_pcc_struct_return == 2)
1151 flag_pcc_struct_return = 0;
1153 else
1155 if (flag_omit_frame_pointer == 2)
1156 flag_omit_frame_pointer = 0;
1157 if (flag_asynchronous_unwind_tables == 2)
1158 flag_asynchronous_unwind_tables = 0;
1159 if (flag_pcc_struct_return == 2)
1160 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1163 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1164 SUBTARGET_OVERRIDE_OPTIONS;
1165 #endif
1167 if (!ix86_tune_string && ix86_arch_string)
1168 ix86_tune_string = ix86_arch_string;
1169 if (!ix86_tune_string)
1170 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1171 if (!ix86_arch_string)
1172 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1174 if (ix86_cmodel_string != 0)
1176 if (!strcmp (ix86_cmodel_string, "small"))
1177 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1178 else if (flag_pic)
1179 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1180 else if (!strcmp (ix86_cmodel_string, "32"))
1181 ix86_cmodel = CM_32;
1182 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1183 ix86_cmodel = CM_KERNEL;
1184 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1185 ix86_cmodel = CM_MEDIUM;
1186 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1187 ix86_cmodel = CM_LARGE;
1188 else
1189 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1191 else
1193 ix86_cmodel = CM_32;
1194 if (TARGET_64BIT)
1195 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1197 if (ix86_asm_string != 0)
1199 if (!strcmp (ix86_asm_string, "intel"))
1200 ix86_asm_dialect = ASM_INTEL;
1201 else if (!strcmp (ix86_asm_string, "att"))
1202 ix86_asm_dialect = ASM_ATT;
1203 else
1204 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1206 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1207 error ("code model `%s' not supported in the %s bit mode",
1208 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1209 if (ix86_cmodel == CM_LARGE)
1210 sorry ("code model `large' not supported yet");
1211 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1212 sorry ("%i-bit mode not compiled in",
1213 (target_flags & MASK_64BIT) ? 64 : 32);
1215 for (i = 0; i < pta_size; i++)
1216 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1218 ix86_arch = processor_alias_table[i].processor;
1219 /* Default cpu tuning to the architecture. */
1220 ix86_tune = ix86_arch;
1221 if (processor_alias_table[i].flags & PTA_MMX
1222 && !(target_flags_explicit & MASK_MMX))
1223 target_flags |= MASK_MMX;
1224 if (processor_alias_table[i].flags & PTA_3DNOW
1225 && !(target_flags_explicit & MASK_3DNOW))
1226 target_flags |= MASK_3DNOW;
1227 if (processor_alias_table[i].flags & PTA_3DNOW_A
1228 && !(target_flags_explicit & MASK_3DNOW_A))
1229 target_flags |= MASK_3DNOW_A;
1230 if (processor_alias_table[i].flags & PTA_SSE
1231 && !(target_flags_explicit & MASK_SSE))
1232 target_flags |= MASK_SSE;
1233 if (processor_alias_table[i].flags & PTA_SSE2
1234 && !(target_flags_explicit & MASK_SSE2))
1235 target_flags |= MASK_SSE2;
1236 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1237 x86_prefetch_sse = true;
1238 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1239 error ("CPU you selected does not support x86-64 instruction set");
1240 break;
1243 if (i == pta_size)
1244 error ("bad value (%s) for -march= switch", ix86_arch_string);
1246 for (i = 0; i < pta_size; i++)
1247 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1249 ix86_tune = processor_alias_table[i].processor;
1250 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1251 error ("CPU you selected does not support x86-64 instruction set");
1252 break;
1254 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1255 x86_prefetch_sse = true;
1256 if (i == pta_size)
1257 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1259 if (optimize_size)
1260 ix86_cost = &size_cost;
1261 else
1262 ix86_cost = processor_target_table[ix86_tune].cost;
1263 target_flags |= processor_target_table[ix86_tune].target_enable;
1264 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1266 /* Arrange to set up i386_stack_locals for all functions. */
1267 init_machine_status = ix86_init_machine_status;
1269 /* Validate -mregparm= value. */
1270 if (ix86_regparm_string)
1272 i = atoi (ix86_regparm_string);
1273 if (i < 0 || i > REGPARM_MAX)
1274 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1275 else
1276 ix86_regparm = i;
1278 else
1279 if (TARGET_64BIT)
1280 ix86_regparm = REGPARM_MAX;
1282 /* If the user has provided any of the -malign-* options,
1283 warn and use that value only if -falign-* is not set.
1284 Remove this code in GCC 3.2 or later. */
1285 if (ix86_align_loops_string)
1287 warning ("-malign-loops is obsolete, use -falign-loops");
1288 if (align_loops == 0)
1290 i = atoi (ix86_align_loops_string);
1291 if (i < 0 || i > MAX_CODE_ALIGN)
1292 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1293 else
1294 align_loops = 1 << i;
1298 if (ix86_align_jumps_string)
1300 warning ("-malign-jumps is obsolete, use -falign-jumps");
1301 if (align_jumps == 0)
1303 i = atoi (ix86_align_jumps_string);
1304 if (i < 0 || i > MAX_CODE_ALIGN)
1305 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1306 else
1307 align_jumps = 1 << i;
1311 if (ix86_align_funcs_string)
1313 warning ("-malign-functions is obsolete, use -falign-functions");
1314 if (align_functions == 0)
1316 i = atoi (ix86_align_funcs_string);
1317 if (i < 0 || i > MAX_CODE_ALIGN)
1318 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1319 else
1320 align_functions = 1 << i;
1324 /* Default align_* from the processor table. */
1325 if (align_loops == 0)
1327 align_loops = processor_target_table[ix86_tune].align_loop;
1328 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1330 if (align_jumps == 0)
1332 align_jumps = processor_target_table[ix86_tune].align_jump;
1333 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1335 if (align_functions == 0)
1337 align_functions = processor_target_table[ix86_tune].align_func;
1340 /* Validate -mpreferred-stack-boundary= value, or provide default.
1341 The default of 128 bits is for Pentium III's SSE __m128, but we
1342 don't want additional code to keep the stack aligned when
1343 optimizing for code size. */
1344 ix86_preferred_stack_boundary = (optimize_size
1345 ? TARGET_64BIT ? 128 : 32
1346 : 128);
1347 if (ix86_preferred_stack_boundary_string)
1349 i = atoi (ix86_preferred_stack_boundary_string);
1350 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1351 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1352 TARGET_64BIT ? 4 : 2);
1353 else
1354 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1357 /* Validate -mbranch-cost= value, or provide default. */
1358 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1359 if (ix86_branch_cost_string)
1361 i = atoi (ix86_branch_cost_string);
1362 if (i < 0 || i > 5)
1363 error ("-mbranch-cost=%d is not between 0 and 5", i);
1364 else
1365 ix86_branch_cost = i;
1368 if (ix86_tls_dialect_string)
1370 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1371 ix86_tls_dialect = TLS_DIALECT_GNU;
1372 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1373 ix86_tls_dialect = TLS_DIALECT_SUN;
1374 else
1375 error ("bad value (%s) for -mtls-dialect= switch",
1376 ix86_tls_dialect_string);
1379 /* Keep nonleaf frame pointers. */
1380 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1381 flag_omit_frame_pointer = 1;
1383 /* If we're doing fast math, we don't care about comparison order
1384 wrt NaNs. This lets us use a shorter comparison sequence. */
1385 if (flag_unsafe_math_optimizations)
1386 target_flags &= ~MASK_IEEE_FP;
1388 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1389 since the insns won't need emulation. */
1390 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1391 target_flags &= ~MASK_NO_FANCY_MATH_387;
1393 /* Turn on SSE2 builtins for -mpni. */
1394 if (TARGET_PNI)
1395 target_flags |= MASK_SSE2;
1397 /* Turn on SSE builtins for -msse2. */
1398 if (TARGET_SSE2)
1399 target_flags |= MASK_SSE;
1401 if (TARGET_64BIT)
1403 if (TARGET_ALIGN_DOUBLE)
1404 error ("-malign-double makes no sense in the 64bit mode");
1405 if (TARGET_RTD)
1406 error ("-mrtd calling convention not supported in the 64bit mode");
1407 /* Enable by default the SSE and MMX builtins. */
1408 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1409 ix86_fpmath = FPMATH_SSE;
1411 else
1413 ix86_fpmath = FPMATH_387;
1414 /* i386 ABI does not specify red zone. It still makes sense to use it
1415 when programmer takes care to stack from being destroyed. */
1416 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1417 target_flags |= MASK_NO_RED_ZONE;
1420 if (ix86_fpmath_string != 0)
1422 if (! strcmp (ix86_fpmath_string, "387"))
1423 ix86_fpmath = FPMATH_387;
1424 else if (! strcmp (ix86_fpmath_string, "sse"))
1426 if (!TARGET_SSE)
1428 warning ("SSE instruction set disabled, using 387 arithmetics");
1429 ix86_fpmath = FPMATH_387;
1431 else
1432 ix86_fpmath = FPMATH_SSE;
1434 else if (! strcmp (ix86_fpmath_string, "387,sse")
1435 || ! strcmp (ix86_fpmath_string, "sse,387"))
1437 if (!TARGET_SSE)
1439 warning ("SSE instruction set disabled, using 387 arithmetics");
1440 ix86_fpmath = FPMATH_387;
1442 else if (!TARGET_80387)
1444 warning ("387 instruction set disabled, using SSE arithmetics");
1445 ix86_fpmath = FPMATH_SSE;
1447 else
1448 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1450 else
1451 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1454 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1455 on by -msse. */
1456 if (TARGET_SSE)
1458 target_flags |= MASK_MMX;
1459 x86_prefetch_sse = true;
1462 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1463 if (TARGET_3DNOW)
1465 target_flags |= MASK_MMX;
1466 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1467 extensions it adds. */
1468 if (x86_3dnow_a & (1 << ix86_arch))
1469 target_flags |= MASK_3DNOW_A;
1471 if ((x86_accumulate_outgoing_args & TUNEMASK)
1472 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1473 && !optimize_size)
1474 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1476 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1478 char *p;
1479 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1480 p = strchr (internal_label_prefix, 'X');
1481 internal_label_prefix_len = p - internal_label_prefix;
1482 *p = '\0';
1486 void
1487 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1489 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1490 make the problem with not enough registers even worse. */
1491 #ifdef INSN_SCHEDULING
1492 if (level > 1)
1493 flag_schedule_insns = 0;
1494 #endif
1496 /* The default values of these switches depend on the TARGET_64BIT
1497 that is not known at this moment. Mark these values with 2 and
1498 let user the to override these. In case there is no command line option
1499 specifying them, we will set the defaults in override_options. */
1500 if (optimize >= 1)
1501 flag_omit_frame_pointer = 2;
1502 flag_pcc_struct_return = 2;
1503 flag_asynchronous_unwind_tables = 2;
1506 /* Table of valid machine attributes. */
1507 const struct attribute_spec ix86_attribute_table[] =
1509 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1510 /* Stdcall attribute says callee is responsible for popping arguments
1511 if they are not variable. */
1512 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1513 /* Fastcall attribute says callee is responsible for popping arguments
1514 if they are not variable. */
1515 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1516 /* Cdecl attribute says the callee is a normal C declaration */
1517 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1518 /* Regparm attribute specifies how many integer arguments are to be
1519 passed in registers. */
1520 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1521 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1522 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1523 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1524 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1525 #endif
1526 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1527 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1528 { NULL, 0, 0, false, false, false, NULL }
1531 /* Decide whether we can make a sibling call to a function. DECL is the
1532 declaration of the function being targeted by the call and EXP is the
1533 CALL_EXPR representing the call. */
1535 static bool
1536 ix86_function_ok_for_sibcall (tree decl, tree exp)
1538 /* If we are generating position-independent code, we cannot sibcall
1539 optimize any indirect call, or a direct call to a global function,
1540 as the PLT requires %ebx be live. */
1541 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1542 return false;
1544 /* If we are returning floats on the 80387 register stack, we cannot
1545 make a sibcall from a function that doesn't return a float to a
1546 function that does or, conversely, from a function that does return
1547 a float to a function that doesn't; the necessary stack adjustment
1548 would not be executed. */
1549 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1550 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1551 return false;
1553 /* If this call is indirect, we'll need to be able to use a call-clobbered
1554 register for the address of the target function. Make sure that all
1555 such registers are not used for passing parameters. */
1556 if (!decl && !TARGET_64BIT)
1558 tree type;
1560 /* We're looking at the CALL_EXPR, we need the type of the function. */
1561 type = TREE_OPERAND (exp, 0); /* pointer expression */
1562 type = TREE_TYPE (type); /* pointer type */
1563 type = TREE_TYPE (type); /* function type */
1565 if (ix86_function_regparm (type, NULL) >= 3)
1567 /* ??? Need to count the actual number of registers to be used,
1568 not the possible number of registers. Fix later. */
1569 return false;
1573 /* Otherwise okay. That also includes certain types of indirect calls. */
1574 return true;
1577 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1578 arguments as in struct attribute_spec.handler. */
1579 static tree
1580 ix86_handle_cdecl_attribute (tree *node, tree name,
1581 tree args ATTRIBUTE_UNUSED,
1582 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1584 if (TREE_CODE (*node) != FUNCTION_TYPE
1585 && TREE_CODE (*node) != METHOD_TYPE
1586 && TREE_CODE (*node) != FIELD_DECL
1587 && TREE_CODE (*node) != TYPE_DECL)
1589 warning ("`%s' attribute only applies to functions",
1590 IDENTIFIER_POINTER (name));
1591 *no_add_attrs = true;
1593 else
1595 if (is_attribute_p ("fastcall", name))
1597 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1599 error ("fastcall and stdcall attributes are not compatible");
1601 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1603 error ("fastcall and regparm attributes are not compatible");
1606 else if (is_attribute_p ("stdcall", name))
1608 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1610 error ("fastcall and stdcall attributes are not compatible");
1615 if (TARGET_64BIT)
1617 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1618 *no_add_attrs = true;
1621 return NULL_TREE;
1624 /* Handle a "regparm" attribute;
1625 arguments as in struct attribute_spec.handler. */
1626 static tree
1627 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1628 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1630 if (TREE_CODE (*node) != FUNCTION_TYPE
1631 && TREE_CODE (*node) != METHOD_TYPE
1632 && TREE_CODE (*node) != FIELD_DECL
1633 && TREE_CODE (*node) != TYPE_DECL)
1635 warning ("`%s' attribute only applies to functions",
1636 IDENTIFIER_POINTER (name));
1637 *no_add_attrs = true;
1639 else
1641 tree cst;
1643 cst = TREE_VALUE (args);
1644 if (TREE_CODE (cst) != INTEGER_CST)
1646 warning ("`%s' attribute requires an integer constant argument",
1647 IDENTIFIER_POINTER (name));
1648 *no_add_attrs = true;
1650 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1652 warning ("argument to `%s' attribute larger than %d",
1653 IDENTIFIER_POINTER (name), REGPARM_MAX);
1654 *no_add_attrs = true;
1657 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1659 error ("fastcall and regparm attributes are not compatible");
1663 return NULL_TREE;
1666 /* Return 0 if the attributes for two types are incompatible, 1 if they
1667 are compatible, and 2 if they are nearly compatible (which causes a
1668 warning to be generated). */
1670 static int
1671 ix86_comp_type_attributes (tree type1, tree type2)
1673 /* Check for mismatch of non-default calling convention. */
1674 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1676 if (TREE_CODE (type1) != FUNCTION_TYPE)
1677 return 1;
1679 /* Check for mismatched fastcall types */
1680 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1681 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1682 return 0;
1684 /* Check for mismatched return types (cdecl vs stdcall). */
1685 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1686 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1687 return 0;
1688 return 1;
1691 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1692 DECL may be NULL when calling function indirectly
1693 or considering a libcall. */
1695 static int
1696 ix86_function_regparm (tree type, tree decl)
1698 tree attr;
1699 int regparm = ix86_regparm;
1700 bool user_convention = false;
1702 if (!TARGET_64BIT)
1704 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1705 if (attr)
1707 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1708 user_convention = true;
1711 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1713 regparm = 2;
1714 user_convention = true;
1717 /* Use register calling convention for local functions when possible. */
1718 if (!TARGET_64BIT && !user_convention && decl
1719 && flag_unit_at_a_time && !profile_flag)
1721 struct cgraph_local_info *i = cgraph_local_info (decl);
1722 if (i && i->local)
1724 /* We can't use regparm(3) for nested functions as these use
1725 static chain pointer in third argument. */
1726 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1727 regparm = 2;
1728 else
1729 regparm = 3;
1733 return regparm;
1736 /* Return true if EAX is live at the start of the function. Used by
1737 ix86_expand_prologue to determine if we need special help before
1738 calling allocate_stack_worker. */
1740 static bool
1741 ix86_eax_live_at_start_p (void)
1743 /* Cheat. Don't bother working forward from ix86_function_regparm
1744 to the function type to whether an actual argument is located in
1745 eax. Instead just look at cfg info, which is still close enough
1746 to correct at this point. This gives false positives for broken
1747 functions that might use uninitialized data that happens to be
1748 allocated in eax, but who cares? */
1749 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1752 /* Value is the number of bytes of arguments automatically
1753 popped when returning from a subroutine call.
1754 FUNDECL is the declaration node of the function (as a tree),
1755 FUNTYPE is the data type of the function (as a tree),
1756 or for a library call it is an identifier node for the subroutine name.
1757 SIZE is the number of bytes of arguments passed on the stack.
1759 On the 80386, the RTD insn may be used to pop them if the number
1760 of args is fixed, but if the number is variable then the caller
1761 must pop them all. RTD can't be used for library calls now
1762 because the library is compiled with the Unix compiler.
1763 Use of RTD is a selectable option, since it is incompatible with
1764 standard Unix calling sequences. If the option is not selected,
1765 the caller must always pop the args.
1767 The attribute stdcall is equivalent to RTD on a per module basis. */
1770 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1772 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1774 /* Cdecl functions override -mrtd, and never pop the stack. */
1775 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1777 /* Stdcall and fastcall functions will pop the stack if not
1778 variable args. */
1779 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1780 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1781 rtd = 1;
1783 if (rtd
1784 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1785 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1786 == void_type_node)))
1787 return size;
1790 /* Lose any fake structure return argument if it is passed on the stack. */
1791 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1792 && !TARGET_64BIT)
1794 int nregs = ix86_function_regparm (funtype, fundecl);
1796 if (!nregs)
1797 return GET_MODE_SIZE (Pmode);
1800 return 0;
1803 /* Argument support functions. */
1805 /* Return true when register may be used to pass function parameters. */
1806 bool
1807 ix86_function_arg_regno_p (int regno)
1809 int i;
1810 if (!TARGET_64BIT)
1811 return (regno < REGPARM_MAX
1812 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1813 if (SSE_REGNO_P (regno) && TARGET_SSE)
1814 return true;
1815 /* RAX is used as hidden argument to va_arg functions. */
1816 if (!regno)
1817 return true;
1818 for (i = 0; i < REGPARM_MAX; i++)
1819 if (regno == x86_64_int_parameter_registers[i])
1820 return true;
1821 return false;
1824 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1825 for a call to a function whose data type is FNTYPE.
1826 For a library call, FNTYPE is 0. */
1828 void
1829 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1830 tree fntype, /* tree ptr for function decl */
1831 rtx libname, /* SYMBOL_REF of library name or 0 */
1832 tree fndecl)
1834 static CUMULATIVE_ARGS zero_cum;
1835 tree param, next_param;
1837 if (TARGET_DEBUG_ARG)
1839 fprintf (stderr, "\ninit_cumulative_args (");
1840 if (fntype)
1841 fprintf (stderr, "fntype code = %s, ret code = %s",
1842 tree_code_name[(int) TREE_CODE (fntype)],
1843 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1844 else
1845 fprintf (stderr, "no fntype");
1847 if (libname)
1848 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1851 *cum = zero_cum;
1853 /* Set up the number of registers to use for passing arguments. */
1854 if (fntype)
1855 cum->nregs = ix86_function_regparm (fntype, fndecl);
1856 else
1857 cum->nregs = ix86_regparm;
1858 cum->sse_nregs = SSE_REGPARM_MAX;
1859 cum->mmx_nregs = MMX_REGPARM_MAX;
1860 cum->warn_sse = true;
1861 cum->warn_mmx = true;
1862 cum->maybe_vaarg = false;
1864 /* Use ecx and edx registers if function has fastcall attribute */
1865 if (fntype && !TARGET_64BIT)
1867 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1869 cum->nregs = 2;
1870 cum->fastcall = 1;
1875 /* Determine if this function has variable arguments. This is
1876 indicated by the last argument being 'void_type_mode' if there
1877 are no variable arguments. If there are variable arguments, then
1878 we won't pass anything in registers */
1880 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1882 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1883 param != 0; param = next_param)
1885 next_param = TREE_CHAIN (param);
1886 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1888 if (!TARGET_64BIT)
1890 cum->nregs = 0;
1891 cum->sse_nregs = 0;
1892 cum->mmx_nregs = 0;
1893 cum->warn_sse = 0;
1894 cum->warn_mmx = 0;
1895 cum->fastcall = 0;
1897 cum->maybe_vaarg = true;
1901 if ((!fntype && !libname)
1902 || (fntype && !TYPE_ARG_TYPES (fntype)))
1903 cum->maybe_vaarg = 1;
1905 if (TARGET_DEBUG_ARG)
1906 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1908 return;
1911 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1912 of this code is to classify each 8bytes of incoming argument by the register
1913 class and assign registers accordingly. */
1915 /* Return the union class of CLASS1 and CLASS2.
1916 See the x86-64 PS ABI for details. */
1918 static enum x86_64_reg_class
1919 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1921 /* Rule #1: If both classes are equal, this is the resulting class. */
1922 if (class1 == class2)
1923 return class1;
1925 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1926 the other class. */
1927 if (class1 == X86_64_NO_CLASS)
1928 return class2;
1929 if (class2 == X86_64_NO_CLASS)
1930 return class1;
1932 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1933 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1934 return X86_64_MEMORY_CLASS;
1936 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1937 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1938 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1939 return X86_64_INTEGERSI_CLASS;
1940 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1941 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1942 return X86_64_INTEGER_CLASS;
1944 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1945 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1946 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1947 return X86_64_MEMORY_CLASS;
1949 /* Rule #6: Otherwise class SSE is used. */
1950 return X86_64_SSE_CLASS;
1953 /* Classify the argument of type TYPE and mode MODE.
1954 CLASSES will be filled by the register class used to pass each word
1955 of the operand. The number of words is returned. In case the parameter
1956 should be passed in memory, 0 is returned. As a special case for zero
1957 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1959 BIT_OFFSET is used internally for handling records and specifies offset
1960 of the offset in bits modulo 256 to avoid overflow cases.
1962 See the x86-64 PS ABI for details.
1965 static int
1966 classify_argument (enum machine_mode mode, tree type,
1967 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1969 HOST_WIDE_INT bytes =
1970 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1971 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1973 /* Variable sized entities are always passed/returned in memory. */
1974 if (bytes < 0)
1975 return 0;
1977 if (mode != VOIDmode
1978 && MUST_PASS_IN_STACK (mode, type))
1979 return 0;
1981 if (type && AGGREGATE_TYPE_P (type))
1983 int i;
1984 tree field;
1985 enum x86_64_reg_class subclasses[MAX_CLASSES];
1987 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1988 if (bytes > 16)
1989 return 0;
1991 for (i = 0; i < words; i++)
1992 classes[i] = X86_64_NO_CLASS;
1994 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1995 signalize memory class, so handle it as special case. */
1996 if (!words)
1998 classes[0] = X86_64_NO_CLASS;
1999 return 1;
2002 /* Classify each field of record and merge classes. */
2003 if (TREE_CODE (type) == RECORD_TYPE)
2005 /* For classes first merge in the field of the subclasses. */
2006 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2008 tree bases = TYPE_BINFO_BASETYPES (type);
2009 int n_bases = TREE_VEC_LENGTH (bases);
2010 int i;
2012 for (i = 0; i < n_bases; ++i)
2014 tree binfo = TREE_VEC_ELT (bases, i);
2015 int num;
2016 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2017 tree type = BINFO_TYPE (binfo);
2019 num = classify_argument (TYPE_MODE (type),
2020 type, subclasses,
2021 (offset + bit_offset) % 256);
2022 if (!num)
2023 return 0;
2024 for (i = 0; i < num; i++)
2026 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2027 classes[i + pos] =
2028 merge_classes (subclasses[i], classes[i + pos]);
2032 /* And now merge the fields of structure. */
2033 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2035 if (TREE_CODE (field) == FIELD_DECL)
2037 int num;
2039 /* Bitfields are always classified as integer. Handle them
2040 early, since later code would consider them to be
2041 misaligned integers. */
2042 if (DECL_BIT_FIELD (field))
2044 for (i = int_bit_position (field) / 8 / 8;
2045 i < (int_bit_position (field)
2046 + tree_low_cst (DECL_SIZE (field), 0)
2047 + 63) / 8 / 8; i++)
2048 classes[i] =
2049 merge_classes (X86_64_INTEGER_CLASS,
2050 classes[i]);
2052 else
2054 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2055 TREE_TYPE (field), subclasses,
2056 (int_bit_position (field)
2057 + bit_offset) % 256);
2058 if (!num)
2059 return 0;
2060 for (i = 0; i < num; i++)
2062 int pos =
2063 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2064 classes[i + pos] =
2065 merge_classes (subclasses[i], classes[i + pos]);
2071 /* Arrays are handled as small records. */
2072 else if (TREE_CODE (type) == ARRAY_TYPE)
2074 int num;
2075 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2076 TREE_TYPE (type), subclasses, bit_offset);
2077 if (!num)
2078 return 0;
2080 /* The partial classes are now full classes. */
2081 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2082 subclasses[0] = X86_64_SSE_CLASS;
2083 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2084 subclasses[0] = X86_64_INTEGER_CLASS;
2086 for (i = 0; i < words; i++)
2087 classes[i] = subclasses[i % num];
2089 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2090 else if (TREE_CODE (type) == UNION_TYPE
2091 || TREE_CODE (type) == QUAL_UNION_TYPE)
2093 /* For classes first merge in the field of the subclasses. */
2094 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2096 tree bases = TYPE_BINFO_BASETYPES (type);
2097 int n_bases = TREE_VEC_LENGTH (bases);
2098 int i;
2100 for (i = 0; i < n_bases; ++i)
2102 tree binfo = TREE_VEC_ELT (bases, i);
2103 int num;
2104 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2105 tree type = BINFO_TYPE (binfo);
2107 num = classify_argument (TYPE_MODE (type),
2108 type, subclasses,
2109 (offset + (bit_offset % 64)) % 256);
2110 if (!num)
2111 return 0;
2112 for (i = 0; i < num; i++)
2114 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2115 classes[i + pos] =
2116 merge_classes (subclasses[i], classes[i + pos]);
2120 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2122 if (TREE_CODE (field) == FIELD_DECL)
2124 int num;
2125 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2126 TREE_TYPE (field), subclasses,
2127 bit_offset);
2128 if (!num)
2129 return 0;
2130 for (i = 0; i < num; i++)
2131 classes[i] = merge_classes (subclasses[i], classes[i]);
2135 else if (TREE_CODE (type) == SET_TYPE)
2137 if (bytes <= 4)
2139 classes[0] = X86_64_INTEGERSI_CLASS;
2140 return 1;
2142 else if (bytes <= 8)
2144 classes[0] = X86_64_INTEGER_CLASS;
2145 return 1;
2147 else if (bytes <= 12)
2149 classes[0] = X86_64_INTEGER_CLASS;
2150 classes[1] = X86_64_INTEGERSI_CLASS;
2151 return 2;
2153 else
2155 classes[0] = X86_64_INTEGER_CLASS;
2156 classes[1] = X86_64_INTEGER_CLASS;
2157 return 2;
2160 else
2161 abort ();
2163 /* Final merger cleanup. */
2164 for (i = 0; i < words; i++)
2166 /* If one class is MEMORY, everything should be passed in
2167 memory. */
2168 if (classes[i] == X86_64_MEMORY_CLASS)
2169 return 0;
2171 /* The X86_64_SSEUP_CLASS should be always preceded by
2172 X86_64_SSE_CLASS. */
2173 if (classes[i] == X86_64_SSEUP_CLASS
2174 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2175 classes[i] = X86_64_SSE_CLASS;
2177 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2178 if (classes[i] == X86_64_X87UP_CLASS
2179 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2180 classes[i] = X86_64_SSE_CLASS;
2182 return words;
2185 /* Compute alignment needed. We align all types to natural boundaries with
2186 exception of XFmode that is aligned to 64bits. */
2187 if (mode != VOIDmode && mode != BLKmode)
2189 int mode_alignment = GET_MODE_BITSIZE (mode);
2191 if (mode == XFmode)
2192 mode_alignment = 128;
2193 else if (mode == XCmode)
2194 mode_alignment = 256;
2195 /* Misaligned fields are always returned in memory. */
2196 if (bit_offset % mode_alignment)
2197 return 0;
2200 /* Classification of atomic types. */
2201 switch (mode)
2203 case DImode:
2204 case SImode:
2205 case HImode:
2206 case QImode:
2207 case CSImode:
2208 case CHImode:
2209 case CQImode:
2210 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2211 classes[0] = X86_64_INTEGERSI_CLASS;
2212 else
2213 classes[0] = X86_64_INTEGER_CLASS;
2214 return 1;
2215 case CDImode:
2216 case TImode:
2217 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2218 return 2;
2219 case CTImode:
2220 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2221 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2222 return 4;
2223 case SFmode:
2224 if (!(bit_offset % 64))
2225 classes[0] = X86_64_SSESF_CLASS;
2226 else
2227 classes[0] = X86_64_SSE_CLASS;
2228 return 1;
2229 case DFmode:
2230 classes[0] = X86_64_SSEDF_CLASS;
2231 return 1;
2232 case XFmode:
2233 classes[0] = X86_64_X87_CLASS;
2234 classes[1] = X86_64_X87UP_CLASS;
2235 return 2;
2236 case TFmode:
2237 case TCmode:
2238 return 0;
2239 case XCmode:
2240 classes[0] = X86_64_X87_CLASS;
2241 classes[1] = X86_64_X87UP_CLASS;
2242 classes[2] = X86_64_X87_CLASS;
2243 classes[3] = X86_64_X87UP_CLASS;
2244 return 4;
2245 case DCmode:
2246 classes[0] = X86_64_SSEDF_CLASS;
2247 classes[1] = X86_64_SSEDF_CLASS;
2248 return 2;
2249 case SCmode:
2250 classes[0] = X86_64_SSE_CLASS;
2251 return 1;
2252 case V4SFmode:
2253 case V4SImode:
2254 case V16QImode:
2255 case V8HImode:
2256 case V2DFmode:
2257 case V2DImode:
2258 classes[0] = X86_64_SSE_CLASS;
2259 classes[1] = X86_64_SSEUP_CLASS;
2260 return 2;
2261 case V2SFmode:
2262 case V2SImode:
2263 case V4HImode:
2264 case V8QImode:
2265 return 0;
2266 case BLKmode:
2267 case VOIDmode:
2268 return 0;
2269 default:
2270 abort ();
2274 /* Examine the argument and return set number of register required in each
2275 class. Return 0 iff parameter should be passed in memory. */
2276 static int
2277 examine_argument (enum machine_mode mode, tree type, int in_return,
2278 int *int_nregs, int *sse_nregs)
2280 enum x86_64_reg_class class[MAX_CLASSES];
2281 int n = classify_argument (mode, type, class, 0);
2283 *int_nregs = 0;
2284 *sse_nregs = 0;
2285 if (!n)
2286 return 0;
2287 for (n--; n >= 0; n--)
2288 switch (class[n])
2290 case X86_64_INTEGER_CLASS:
2291 case X86_64_INTEGERSI_CLASS:
2292 (*int_nregs)++;
2293 break;
2294 case X86_64_SSE_CLASS:
2295 case X86_64_SSESF_CLASS:
2296 case X86_64_SSEDF_CLASS:
2297 (*sse_nregs)++;
2298 break;
2299 case X86_64_NO_CLASS:
2300 case X86_64_SSEUP_CLASS:
2301 break;
2302 case X86_64_X87_CLASS:
2303 case X86_64_X87UP_CLASS:
2304 if (!in_return)
2305 return 0;
2306 break;
2307 case X86_64_MEMORY_CLASS:
2308 abort ();
2310 return 1;
2312 /* Construct container for the argument used by GCC interface. See
2313 FUNCTION_ARG for the detailed description. */
2314 static rtx
2315 construct_container (enum machine_mode mode, tree type, int in_return,
2316 int nintregs, int nsseregs, const int * intreg,
2317 int sse_regno)
2319 enum machine_mode tmpmode;
2320 int bytes =
2321 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2322 enum x86_64_reg_class class[MAX_CLASSES];
2323 int n;
2324 int i;
2325 int nexps = 0;
2326 int needed_sseregs, needed_intregs;
2327 rtx exp[MAX_CLASSES];
2328 rtx ret;
2330 n = classify_argument (mode, type, class, 0);
2331 if (TARGET_DEBUG_ARG)
2333 if (!n)
2334 fprintf (stderr, "Memory class\n");
2335 else
2337 fprintf (stderr, "Classes:");
2338 for (i = 0; i < n; i++)
2340 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2342 fprintf (stderr, "\n");
2345 if (!n)
2346 return NULL;
2347 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2348 return NULL;
2349 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2350 return NULL;
2352 /* First construct simple cases. Avoid SCmode, since we want to use
2353 single register to pass this type. */
2354 if (n == 1 && mode != SCmode)
2355 switch (class[0])
2357 case X86_64_INTEGER_CLASS:
2358 case X86_64_INTEGERSI_CLASS:
2359 return gen_rtx_REG (mode, intreg[0]);
2360 case X86_64_SSE_CLASS:
2361 case X86_64_SSESF_CLASS:
2362 case X86_64_SSEDF_CLASS:
2363 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2364 case X86_64_X87_CLASS:
2365 return gen_rtx_REG (mode, FIRST_STACK_REG);
2366 case X86_64_NO_CLASS:
2367 /* Zero sized array, struct or class. */
2368 return NULL;
2369 default:
2370 abort ();
2372 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2373 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2374 if (n == 2
2375 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2376 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2377 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2378 && class[1] == X86_64_INTEGER_CLASS
2379 && (mode == CDImode || mode == TImode || mode == TFmode)
2380 && intreg[0] + 1 == intreg[1])
2381 return gen_rtx_REG (mode, intreg[0]);
2382 if (n == 4
2383 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2384 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2385 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2387 /* Otherwise figure out the entries of the PARALLEL. */
2388 for (i = 0; i < n; i++)
2390 switch (class[i])
2392 case X86_64_NO_CLASS:
2393 break;
2394 case X86_64_INTEGER_CLASS:
2395 case X86_64_INTEGERSI_CLASS:
2396 /* Merge TImodes on aligned occasions here too. */
2397 if (i * 8 + 8 > bytes)
2398 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2399 else if (class[i] == X86_64_INTEGERSI_CLASS)
2400 tmpmode = SImode;
2401 else
2402 tmpmode = DImode;
2403 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2404 if (tmpmode == BLKmode)
2405 tmpmode = DImode;
2406 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2407 gen_rtx_REG (tmpmode, *intreg),
2408 GEN_INT (i*8));
2409 intreg++;
2410 break;
2411 case X86_64_SSESF_CLASS:
2412 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2413 gen_rtx_REG (SFmode,
2414 SSE_REGNO (sse_regno)),
2415 GEN_INT (i*8));
2416 sse_regno++;
2417 break;
2418 case X86_64_SSEDF_CLASS:
2419 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2420 gen_rtx_REG (DFmode,
2421 SSE_REGNO (sse_regno)),
2422 GEN_INT (i*8));
2423 sse_regno++;
2424 break;
2425 case X86_64_SSE_CLASS:
2426 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2427 tmpmode = TImode;
2428 else
2429 tmpmode = DImode;
2430 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2431 gen_rtx_REG (tmpmode,
2432 SSE_REGNO (sse_regno)),
2433 GEN_INT (i*8));
2434 if (tmpmode == TImode)
2435 i++;
2436 sse_regno++;
2437 break;
2438 default:
2439 abort ();
2442 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2443 for (i = 0; i < nexps; i++)
2444 XVECEXP (ret, 0, i) = exp [i];
2445 return ret;
2448 /* Update the data in CUM to advance over an argument
2449 of mode MODE and data type TYPE.
2450 (TYPE is null for libcalls where that information may not be available.) */
2452 void
2453 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2454 enum machine_mode mode, /* current arg mode */
2455 tree type, /* type of the argument or 0 if lib support */
2456 int named) /* whether or not the argument was named */
2458 int bytes =
2459 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2460 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2462 if (TARGET_DEBUG_ARG)
2463 fprintf (stderr,
2464 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2465 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2466 if (TARGET_64BIT)
2468 int int_nregs, sse_nregs;
2469 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2470 cum->words += words;
2471 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2473 cum->nregs -= int_nregs;
2474 cum->sse_nregs -= sse_nregs;
2475 cum->regno += int_nregs;
2476 cum->sse_regno += sse_nregs;
2478 else
2479 cum->words += words;
2481 else
2483 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2484 && (!type || !AGGREGATE_TYPE_P (type)))
2486 cum->sse_words += words;
2487 cum->sse_nregs -= 1;
2488 cum->sse_regno += 1;
2489 if (cum->sse_nregs <= 0)
2491 cum->sse_nregs = 0;
2492 cum->sse_regno = 0;
2495 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2496 && (!type || !AGGREGATE_TYPE_P (type)))
2498 cum->mmx_words += words;
2499 cum->mmx_nregs -= 1;
2500 cum->mmx_regno += 1;
2501 if (cum->mmx_nregs <= 0)
2503 cum->mmx_nregs = 0;
2504 cum->mmx_regno = 0;
2507 else
2509 cum->words += words;
2510 cum->nregs -= words;
2511 cum->regno += words;
2513 if (cum->nregs <= 0)
2515 cum->nregs = 0;
2516 cum->regno = 0;
2520 return;
2523 /* Define where to put the arguments to a function.
2524 Value is zero to push the argument on the stack,
2525 or a hard register in which to store the argument.
2527 MODE is the argument's machine mode.
2528 TYPE is the data type of the argument (as a tree).
2529 This is null for libcalls where that information may
2530 not be available.
2531 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2532 the preceding args and about the function being called.
2533 NAMED is nonzero if this argument is a named parameter
2534 (otherwise it is an extra parameter matching an ellipsis). */
2537 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2538 enum machine_mode mode, /* current arg mode */
2539 tree type, /* type of the argument or 0 if lib support */
2540 int named) /* != 0 for normal args, == 0 for ... args */
2542 rtx ret = NULL_RTX;
2543 int bytes =
2544 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2545 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2546 static bool warnedsse, warnedmmx;
2548 /* Handle a hidden AL argument containing number of registers for varargs
2549 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2550 any AL settings. */
2551 if (mode == VOIDmode)
2553 if (TARGET_64BIT)
2554 return GEN_INT (cum->maybe_vaarg
2555 ? (cum->sse_nregs < 0
2556 ? SSE_REGPARM_MAX
2557 : cum->sse_regno)
2558 : -1);
2559 else
2560 return constm1_rtx;
2562 if (TARGET_64BIT)
2563 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2564 &x86_64_int_parameter_registers [cum->regno],
2565 cum->sse_regno);
2566 else
2567 switch (mode)
2569 /* For now, pass fp/complex values on the stack. */
2570 default:
2571 break;
2573 case BLKmode:
2574 if (bytes < 0)
2575 break;
2576 /* Fall through. */
2577 case DImode:
2578 case SImode:
2579 case HImode:
2580 case QImode:
2581 if (words <= cum->nregs)
2583 int regno = cum->regno;
2585 /* Fastcall allocates the first two DWORD (SImode) or
2586 smaller arguments to ECX and EDX. */
2587 if (cum->fastcall)
2589 if (mode == BLKmode || mode == DImode)
2590 break;
2592 /* ECX not EAX is the first allocated register. */
2593 if (regno == 0)
2594 regno = 2;
2596 ret = gen_rtx_REG (mode, regno);
2598 break;
2599 case TImode:
2600 case V16QImode:
2601 case V8HImode:
2602 case V4SImode:
2603 case V2DImode:
2604 case V4SFmode:
2605 case V2DFmode:
2606 if (!type || !AGGREGATE_TYPE_P (type))
2608 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2610 warnedsse = true;
2611 warning ("SSE vector argument without SSE enabled "
2612 "changes the ABI");
2614 if (cum->sse_nregs)
2615 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2617 break;
2618 case V8QImode:
2619 case V4HImode:
2620 case V2SImode:
2621 case V2SFmode:
2622 if (!type || !AGGREGATE_TYPE_P (type))
2624 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2626 warnedmmx = true;
2627 warning ("MMX vector argument without MMX enabled "
2628 "changes the ABI");
2630 if (cum->mmx_nregs)
2631 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2633 break;
2636 if (TARGET_DEBUG_ARG)
2638 fprintf (stderr,
2639 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2640 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2642 if (ret)
2643 print_simple_rtl (stderr, ret);
2644 else
2645 fprintf (stderr, ", stack");
2647 fprintf (stderr, " )\n");
2650 return ret;
2653 /* A C expression that indicates when an argument must be passed by
2654 reference. If nonzero for an argument, a copy of that argument is
2655 made in memory and a pointer to the argument is passed instead of
2656 the argument itself. The pointer is passed in whatever way is
2657 appropriate for passing a pointer to that type. */
2660 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2661 enum machine_mode mode ATTRIBUTE_UNUSED,
2662 tree type, int named ATTRIBUTE_UNUSED)
2664 if (!TARGET_64BIT)
2665 return 0;
2667 if (type && int_size_in_bytes (type) == -1)
2669 if (TARGET_DEBUG_ARG)
2670 fprintf (stderr, "function_arg_pass_by_reference\n");
2671 return 1;
2674 return 0;
2677 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2678 ABI */
2679 static bool
2680 contains_128bit_aligned_vector_p (tree type)
2682 enum machine_mode mode = TYPE_MODE (type);
2683 if (SSE_REG_MODE_P (mode)
2684 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2685 return true;
2686 if (TYPE_ALIGN (type) < 128)
2687 return false;
2689 if (AGGREGATE_TYPE_P (type))
2691 /* Walk the aggregates recursively. */
2692 if (TREE_CODE (type) == RECORD_TYPE
2693 || TREE_CODE (type) == UNION_TYPE
2694 || TREE_CODE (type) == QUAL_UNION_TYPE)
2696 tree field;
2698 if (TYPE_BINFO (type) != NULL
2699 && TYPE_BINFO_BASETYPES (type) != NULL)
2701 tree bases = TYPE_BINFO_BASETYPES (type);
2702 int n_bases = TREE_VEC_LENGTH (bases);
2703 int i;
2705 for (i = 0; i < n_bases; ++i)
2707 tree binfo = TREE_VEC_ELT (bases, i);
2708 tree type = BINFO_TYPE (binfo);
2710 if (contains_128bit_aligned_vector_p (type))
2711 return true;
2714 /* And now merge the fields of structure. */
2715 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2717 if (TREE_CODE (field) == FIELD_DECL
2718 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2719 return true;
2722 /* Just for use if some languages passes arrays by value. */
2723 else if (TREE_CODE (type) == ARRAY_TYPE)
2725 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2726 return true;
2728 else
2729 abort ();
2731 return false;
2734 /* Gives the alignment boundary, in bits, of an argument with the
2735 specified mode and type. */
2738 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2740 int align;
2741 if (type)
2742 align = TYPE_ALIGN (type);
2743 else
2744 align = GET_MODE_ALIGNMENT (mode);
2745 if (align < PARM_BOUNDARY)
2746 align = PARM_BOUNDARY;
2747 if (!TARGET_64BIT)
2749 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2750 make an exception for SSE modes since these require 128bit
2751 alignment.
2753 The handling here differs from field_alignment. ICC aligns MMX
2754 arguments to 4 byte boundaries, while structure fields are aligned
2755 to 8 byte boundaries. */
2756 if (!type)
2758 if (!SSE_REG_MODE_P (mode))
2759 align = PARM_BOUNDARY;
2761 else
2763 if (!contains_128bit_aligned_vector_p (type))
2764 align = PARM_BOUNDARY;
2767 if (align > 128)
2768 align = 128;
2769 return align;
2772 /* Return true if N is a possible register number of function value. */
2773 bool
2774 ix86_function_value_regno_p (int regno)
2776 if (!TARGET_64BIT)
2778 return ((regno) == 0
2779 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2780 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2782 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2783 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2784 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2787 /* Define how to find the value returned by a function.
2788 VALTYPE is the data type of the value (as a tree).
2789 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2790 otherwise, FUNC is 0. */
2792 ix86_function_value (tree valtype)
2794 if (TARGET_64BIT)
2796 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2797 REGPARM_MAX, SSE_REGPARM_MAX,
2798 x86_64_int_return_registers, 0);
2799 /* For zero sized structures, construct_container return NULL, but we need
2800 to keep rest of compiler happy by returning meaningful value. */
2801 if (!ret)
2802 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2803 return ret;
2805 else
2806 return gen_rtx_REG (TYPE_MODE (valtype),
2807 ix86_value_regno (TYPE_MODE (valtype)));
2810 /* Return false iff type is returned in memory. */
2812 ix86_return_in_memory (tree type)
2814 int needed_intregs, needed_sseregs, size;
2815 enum machine_mode mode = TYPE_MODE (type);
2817 if (TARGET_64BIT)
2818 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2820 if (mode == BLKmode)
2821 return 1;
2823 size = int_size_in_bytes (type);
2825 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2826 return 0;
2828 if (VECTOR_MODE_P (mode) || mode == TImode)
2830 /* User-created vectors small enough to fit in EAX. */
2831 if (size < 8)
2832 return 0;
2834 /* MMX/3dNow values are returned on the stack, since we've
2835 got to EMMS/FEMMS before returning. */
2836 if (size == 8)
2837 return 1;
2839 /* SSE values are returned in XMM0. */
2840 /* ??? Except when it doesn't exist? We have a choice of
2841 either (1) being abi incompatible with a -march switch,
2842 or (2) generating an error here. Given no good solution,
2843 I think the safest thing is one warning. The user won't
2844 be able to use -Werror, but.... */
2845 if (size == 16)
2847 static bool warned;
2849 if (TARGET_SSE)
2850 return 0;
2852 if (!warned)
2854 warned = true;
2855 warning ("SSE vector return without SSE enabled "
2856 "changes the ABI");
2858 return 1;
2862 if (mode == XFmode)
2863 return 0;
2865 if (size > 12)
2866 return 1;
2867 return 0;
2870 /* Define how to find the value returned by a library function
2871 assuming the value has mode MODE. */
2873 ix86_libcall_value (enum machine_mode mode)
2875 if (TARGET_64BIT)
2877 switch (mode)
2879 case SFmode:
2880 case SCmode:
2881 case DFmode:
2882 case DCmode:
2883 return gen_rtx_REG (mode, FIRST_SSE_REG);
2884 case XFmode:
2885 case XCmode:
2886 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2887 case TFmode:
2888 case TCmode:
2889 return NULL;
2890 default:
2891 return gen_rtx_REG (mode, 0);
2894 else
2895 return gen_rtx_REG (mode, ix86_value_regno (mode));
2898 /* Given a mode, return the register to use for a return value. */
2900 static int
2901 ix86_value_regno (enum machine_mode mode)
2903 /* Floating point return values in %st(0). */
2904 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2905 return FIRST_FLOAT_REG;
2906 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2907 we prevent this case when sse is not available. */
2908 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2909 return FIRST_SSE_REG;
2910 /* Everything else in %eax. */
2911 return 0;
2914 /* Create the va_list data type. */
2916 static tree
2917 ix86_build_builtin_va_list (void)
2919 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2921 /* For i386 we use plain pointer to argument area. */
2922 if (!TARGET_64BIT)
2923 return build_pointer_type (char_type_node);
2925 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2926 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2928 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2929 unsigned_type_node);
2930 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2931 unsigned_type_node);
2932 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2933 ptr_type_node);
2934 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2935 ptr_type_node);
2937 DECL_FIELD_CONTEXT (f_gpr) = record;
2938 DECL_FIELD_CONTEXT (f_fpr) = record;
2939 DECL_FIELD_CONTEXT (f_ovf) = record;
2940 DECL_FIELD_CONTEXT (f_sav) = record;
2942 TREE_CHAIN (record) = type_decl;
2943 TYPE_NAME (record) = type_decl;
2944 TYPE_FIELDS (record) = f_gpr;
2945 TREE_CHAIN (f_gpr) = f_fpr;
2946 TREE_CHAIN (f_fpr) = f_ovf;
2947 TREE_CHAIN (f_ovf) = f_sav;
2949 layout_type (record);
2951 /* The correct type is an array type of one element. */
2952 return build_array_type (record, build_index_type (size_zero_node));
2955 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
2957 static void
2958 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2959 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2960 int no_rtl)
2962 CUMULATIVE_ARGS next_cum;
2963 rtx save_area = NULL_RTX, mem;
2964 rtx label;
2965 rtx label_ref;
2966 rtx tmp_reg;
2967 rtx nsse_reg;
2968 int set;
2969 tree fntype;
2970 int stdarg_p;
2971 int i;
2973 if (!TARGET_64BIT)
2974 return;
2976 /* Indicate to allocate space on the stack for varargs save area. */
2977 ix86_save_varrargs_registers = 1;
2979 cfun->stack_alignment_needed = 128;
2981 fntype = TREE_TYPE (current_function_decl);
2982 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2983 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2984 != void_type_node));
2986 /* For varargs, we do not want to skip the dummy va_dcl argument.
2987 For stdargs, we do want to skip the last named argument. */
2988 next_cum = *cum;
2989 if (stdarg_p)
2990 function_arg_advance (&next_cum, mode, type, 1);
2992 if (!no_rtl)
2993 save_area = frame_pointer_rtx;
2995 set = get_varargs_alias_set ();
2997 for (i = next_cum.regno; i < ix86_regparm; i++)
2999 mem = gen_rtx_MEM (Pmode,
3000 plus_constant (save_area, i * UNITS_PER_WORD));
3001 set_mem_alias_set (mem, set);
3002 emit_move_insn (mem, gen_rtx_REG (Pmode,
3003 x86_64_int_parameter_registers[i]));
3006 if (next_cum.sse_nregs)
3008 /* Now emit code to save SSE registers. The AX parameter contains number
3009 of SSE parameter registers used to call this function. We use
3010 sse_prologue_save insn template that produces computed jump across
3011 SSE saves. We need some preparation work to get this working. */
3013 label = gen_label_rtx ();
3014 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3016 /* Compute address to jump to :
3017 label - 5*eax + nnamed_sse_arguments*5 */
3018 tmp_reg = gen_reg_rtx (Pmode);
3019 nsse_reg = gen_reg_rtx (Pmode);
3020 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3021 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3022 gen_rtx_MULT (Pmode, nsse_reg,
3023 GEN_INT (4))));
3024 if (next_cum.sse_regno)
3025 emit_move_insn
3026 (nsse_reg,
3027 gen_rtx_CONST (DImode,
3028 gen_rtx_PLUS (DImode,
3029 label_ref,
3030 GEN_INT (next_cum.sse_regno * 4))));
3031 else
3032 emit_move_insn (nsse_reg, label_ref);
3033 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3035 /* Compute address of memory block we save into. We always use pointer
3036 pointing 127 bytes after first byte to store - this is needed to keep
3037 instruction size limited by 4 bytes. */
3038 tmp_reg = gen_reg_rtx (Pmode);
3039 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3040 plus_constant (save_area,
3041 8 * REGPARM_MAX + 127)));
3042 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3043 set_mem_alias_set (mem, set);
3044 set_mem_align (mem, BITS_PER_WORD);
3046 /* And finally do the dirty job! */
3047 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3048 GEN_INT (next_cum.sse_regno), label));
3053 /* Implement va_start. */
3055 void
3056 ix86_va_start (tree valist, rtx nextarg)
3058 HOST_WIDE_INT words, n_gpr, n_fpr;
3059 tree f_gpr, f_fpr, f_ovf, f_sav;
3060 tree gpr, fpr, ovf, sav, t;
3062 /* Only 64bit target needs something special. */
3063 if (!TARGET_64BIT)
3065 std_expand_builtin_va_start (valist, nextarg);
3066 return;
3069 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3070 f_fpr = TREE_CHAIN (f_gpr);
3071 f_ovf = TREE_CHAIN (f_fpr);
3072 f_sav = TREE_CHAIN (f_ovf);
3074 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3075 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3076 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3077 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3078 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3080 /* Count number of gp and fp argument registers used. */
3081 words = current_function_args_info.words;
3082 n_gpr = current_function_args_info.regno;
3083 n_fpr = current_function_args_info.sse_regno;
3085 if (TARGET_DEBUG_ARG)
3086 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3087 (int) words, (int) n_gpr, (int) n_fpr);
3089 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3090 build_int_2 (n_gpr * 8, 0));
3091 TREE_SIDE_EFFECTS (t) = 1;
3092 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3094 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3095 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3096 TREE_SIDE_EFFECTS (t) = 1;
3097 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3099 /* Find the overflow area. */
3100 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3101 if (words != 0)
3102 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3103 build_int_2 (words * UNITS_PER_WORD, 0));
3104 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3105 TREE_SIDE_EFFECTS (t) = 1;
3106 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3108 /* Find the register save area.
3109 Prologue of the function save it right above stack frame. */
3110 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3111 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3112 TREE_SIDE_EFFECTS (t) = 1;
3113 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3116 /* Implement va_arg. */
3118 ix86_va_arg (tree valist, tree type)
3120 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3121 tree f_gpr, f_fpr, f_ovf, f_sav;
3122 tree gpr, fpr, ovf, sav, t;
3123 int size, rsize;
3124 rtx lab_false, lab_over = NULL_RTX;
3125 rtx addr_rtx, r;
3126 rtx container;
3127 int indirect_p = 0;
3129 /* Only 64bit target needs something special. */
3130 if (!TARGET_64BIT)
3132 return std_expand_builtin_va_arg (valist, type);
3135 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3136 f_fpr = TREE_CHAIN (f_gpr);
3137 f_ovf = TREE_CHAIN (f_fpr);
3138 f_sav = TREE_CHAIN (f_ovf);
3140 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3141 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3142 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3143 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3144 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3146 size = int_size_in_bytes (type);
3147 if (size == -1)
3149 /* Passed by reference. */
3150 indirect_p = 1;
3151 type = build_pointer_type (type);
3152 size = int_size_in_bytes (type);
3154 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3156 container = construct_container (TYPE_MODE (type), type, 0,
3157 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3159 * Pull the value out of the saved registers ...
3162 addr_rtx = gen_reg_rtx (Pmode);
3164 if (container)
3166 rtx int_addr_rtx, sse_addr_rtx;
3167 int needed_intregs, needed_sseregs;
3168 int need_temp;
3170 lab_over = gen_label_rtx ();
3171 lab_false = gen_label_rtx ();
3173 examine_argument (TYPE_MODE (type), type, 0,
3174 &needed_intregs, &needed_sseregs);
3177 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3178 || TYPE_ALIGN (type) > 128);
3180 /* In case we are passing structure, verify that it is consecutive block
3181 on the register save area. If not we need to do moves. */
3182 if (!need_temp && !REG_P (container))
3184 /* Verify that all registers are strictly consecutive */
3185 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3187 int i;
3189 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3191 rtx slot = XVECEXP (container, 0, i);
3192 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3193 || INTVAL (XEXP (slot, 1)) != i * 16)
3194 need_temp = 1;
3197 else
3199 int i;
3201 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3203 rtx slot = XVECEXP (container, 0, i);
3204 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3205 || INTVAL (XEXP (slot, 1)) != i * 8)
3206 need_temp = 1;
3210 if (!need_temp)
3212 int_addr_rtx = addr_rtx;
3213 sse_addr_rtx = addr_rtx;
3215 else
3217 int_addr_rtx = gen_reg_rtx (Pmode);
3218 sse_addr_rtx = gen_reg_rtx (Pmode);
3220 /* First ensure that we fit completely in registers. */
3221 if (needed_intregs)
3223 emit_cmp_and_jump_insns (expand_expr
3224 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3225 GEN_INT ((REGPARM_MAX - needed_intregs +
3226 1) * 8), GE, const1_rtx, SImode,
3227 1, lab_false);
3229 if (needed_sseregs)
3231 emit_cmp_and_jump_insns (expand_expr
3232 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3233 GEN_INT ((SSE_REGPARM_MAX -
3234 needed_sseregs + 1) * 16 +
3235 REGPARM_MAX * 8), GE, const1_rtx,
3236 SImode, 1, lab_false);
3239 /* Compute index to start of area used for integer regs. */
3240 if (needed_intregs)
3242 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3243 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3244 if (r != int_addr_rtx)
3245 emit_move_insn (int_addr_rtx, r);
3247 if (needed_sseregs)
3249 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3250 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3251 if (r != sse_addr_rtx)
3252 emit_move_insn (sse_addr_rtx, r);
3254 if (need_temp)
3256 int i;
3257 rtx mem;
3258 rtx x;
3260 /* Never use the memory itself, as it has the alias set. */
3261 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3262 mem = gen_rtx_MEM (BLKmode, x);
3263 force_operand (x, addr_rtx);
3264 set_mem_alias_set (mem, get_varargs_alias_set ());
3265 set_mem_align (mem, BITS_PER_UNIT);
3267 for (i = 0; i < XVECLEN (container, 0); i++)
3269 rtx slot = XVECEXP (container, 0, i);
3270 rtx reg = XEXP (slot, 0);
3271 enum machine_mode mode = GET_MODE (reg);
3272 rtx src_addr;
3273 rtx src_mem;
3274 int src_offset;
3275 rtx dest_mem;
3277 if (SSE_REGNO_P (REGNO (reg)))
3279 src_addr = sse_addr_rtx;
3280 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3282 else
3284 src_addr = int_addr_rtx;
3285 src_offset = REGNO (reg) * 8;
3287 src_mem = gen_rtx_MEM (mode, src_addr);
3288 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3289 src_mem = adjust_address (src_mem, mode, src_offset);
3290 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3291 emit_move_insn (dest_mem, src_mem);
3295 if (needed_intregs)
3298 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3299 build_int_2 (needed_intregs * 8, 0));
3300 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3301 TREE_SIDE_EFFECTS (t) = 1;
3302 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3304 if (needed_sseregs)
3307 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3308 build_int_2 (needed_sseregs * 16, 0));
3309 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3310 TREE_SIDE_EFFECTS (t) = 1;
3311 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3314 emit_jump_insn (gen_jump (lab_over));
3315 emit_barrier ();
3316 emit_label (lab_false);
3319 /* ... otherwise out of the overflow area. */
3321 /* Care for on-stack alignment if needed. */
3322 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3323 t = ovf;
3324 else
3326 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3327 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3328 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3330 t = save_expr (t);
3332 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3333 if (r != addr_rtx)
3334 emit_move_insn (addr_rtx, r);
3337 build (PLUS_EXPR, TREE_TYPE (t), t,
3338 build_int_2 (rsize * UNITS_PER_WORD, 0));
3339 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3340 TREE_SIDE_EFFECTS (t) = 1;
3341 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3343 if (container)
3344 emit_label (lab_over);
3346 if (indirect_p)
3348 r = gen_rtx_MEM (Pmode, addr_rtx);
3349 set_mem_alias_set (r, get_varargs_alias_set ());
3350 emit_move_insn (addr_rtx, r);
3353 return addr_rtx;
3356 /* Return nonzero if OP is either a i387 or SSE fp register. */
3358 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3360 return ANY_FP_REG_P (op);
3363 /* Return nonzero if OP is an i387 fp register. */
3365 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3367 return FP_REG_P (op);
3370 /* Return nonzero if OP is a non-fp register_operand. */
3372 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3374 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3377 /* Return nonzero if OP is a register operand other than an
3378 i387 fp register. */
3380 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3382 return register_operand (op, mode) && !FP_REG_P (op);
3385 /* Return nonzero if OP is general operand representable on x86_64. */
3388 x86_64_general_operand (rtx op, enum machine_mode mode)
3390 if (!TARGET_64BIT)
3391 return general_operand (op, mode);
3392 if (nonimmediate_operand (op, mode))
3393 return 1;
3394 return x86_64_sign_extended_value (op);
3397 /* Return nonzero if OP is general operand representable on x86_64
3398 as either sign extended or zero extended constant. */
3401 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3403 if (!TARGET_64BIT)
3404 return general_operand (op, mode);
3405 if (nonimmediate_operand (op, mode))
3406 return 1;
3407 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3410 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3413 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3415 if (!TARGET_64BIT)
3416 return nonmemory_operand (op, mode);
3417 if (register_operand (op, mode))
3418 return 1;
3419 return x86_64_sign_extended_value (op);
3422 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3425 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3427 if (!TARGET_64BIT || !flag_pic)
3428 return nonmemory_operand (op, mode);
3429 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3430 return 1;
3431 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3432 return 1;
3433 return 0;
3436 /* Return nonzero if OPNUM's MEM should be matched
3437 in movabs* patterns. */
3440 ix86_check_movabs (rtx insn, int opnum)
3442 rtx set, mem;
3444 set = PATTERN (insn);
3445 if (GET_CODE (set) == PARALLEL)
3446 set = XVECEXP (set, 0, 0);
3447 if (GET_CODE (set) != SET)
3448 abort ();
3449 mem = XEXP (set, opnum);
3450 while (GET_CODE (mem) == SUBREG)
3451 mem = SUBREG_REG (mem);
3452 if (GET_CODE (mem) != MEM)
3453 abort ();
3454 return (volatile_ok || !MEM_VOLATILE_P (mem));
3457 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3460 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3462 if (!TARGET_64BIT)
3463 return nonmemory_operand (op, mode);
3464 if (register_operand (op, mode))
3465 return 1;
3466 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3469 /* Return nonzero if OP is immediate operand representable on x86_64. */
3472 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3474 if (!TARGET_64BIT)
3475 return immediate_operand (op, mode);
3476 return x86_64_sign_extended_value (op);
3479 /* Return nonzero if OP is immediate operand representable on x86_64. */
3482 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3484 return x86_64_zero_extended_value (op);
3487 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3488 for shift & compare patterns, as shifting by 0 does not change flags),
3489 else return zero. */
3492 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3494 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3497 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3498 reference and a constant. */
3501 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3503 switch (GET_CODE (op))
3505 case SYMBOL_REF:
3506 case LABEL_REF:
3507 return 1;
3509 case CONST:
3510 op = XEXP (op, 0);
3511 if (GET_CODE (op) == SYMBOL_REF
3512 || GET_CODE (op) == LABEL_REF
3513 || (GET_CODE (op) == UNSPEC
3514 && (XINT (op, 1) == UNSPEC_GOT
3515 || XINT (op, 1) == UNSPEC_GOTOFF
3516 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3517 return 1;
3518 if (GET_CODE (op) != PLUS
3519 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3520 return 0;
3522 op = XEXP (op, 0);
3523 if (GET_CODE (op) == SYMBOL_REF
3524 || GET_CODE (op) == LABEL_REF)
3525 return 1;
3526 /* Only @GOTOFF gets offsets. */
3527 if (GET_CODE (op) != UNSPEC
3528 || XINT (op, 1) != UNSPEC_GOTOFF)
3529 return 0;
3531 op = XVECEXP (op, 0, 0);
3532 if (GET_CODE (op) == SYMBOL_REF
3533 || GET_CODE (op) == LABEL_REF)
3534 return 1;
3535 return 0;
3537 default:
3538 return 0;
3542 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3545 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3547 if (GET_CODE (op) != CONST)
3548 return 0;
3549 op = XEXP (op, 0);
3550 if (TARGET_64BIT)
3552 if (GET_CODE (op) == UNSPEC
3553 && XINT (op, 1) == UNSPEC_GOTPCREL)
3554 return 1;
3555 if (GET_CODE (op) == PLUS
3556 && GET_CODE (XEXP (op, 0)) == UNSPEC
3557 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3558 return 1;
3560 else
3562 if (GET_CODE (op) == UNSPEC)
3563 return 1;
3564 if (GET_CODE (op) != PLUS
3565 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3566 return 0;
3567 op = XEXP (op, 0);
3568 if (GET_CODE (op) == UNSPEC)
3569 return 1;
3571 return 0;
3574 /* Return true if OP is a symbolic operand that resolves locally. */
3576 static int
3577 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3579 if (GET_CODE (op) == CONST
3580 && GET_CODE (XEXP (op, 0)) == PLUS
3581 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3582 op = XEXP (XEXP (op, 0), 0);
3584 if (GET_CODE (op) == LABEL_REF)
3585 return 1;
3587 if (GET_CODE (op) != SYMBOL_REF)
3588 return 0;
3590 if (SYMBOL_REF_LOCAL_P (op))
3591 return 1;
3593 /* There is, however, a not insubstantial body of code in the rest of
3594 the compiler that assumes it can just stick the results of
3595 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3596 /* ??? This is a hack. Should update the body of the compiler to
3597 always create a DECL an invoke targetm.encode_section_info. */
3598 if (strncmp (XSTR (op, 0), internal_label_prefix,
3599 internal_label_prefix_len) == 0)
3600 return 1;
3602 return 0;
3605 /* Test for various thread-local symbols. */
3608 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3610 if (GET_CODE (op) != SYMBOL_REF)
3611 return 0;
3612 return SYMBOL_REF_TLS_MODEL (op);
3615 static inline int
3616 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3618 if (GET_CODE (op) != SYMBOL_REF)
3619 return 0;
3620 return SYMBOL_REF_TLS_MODEL (op) == kind;
3624 global_dynamic_symbolic_operand (rtx op,
3625 enum machine_mode mode ATTRIBUTE_UNUSED)
3627 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3631 local_dynamic_symbolic_operand (rtx op,
3632 enum machine_mode mode ATTRIBUTE_UNUSED)
3634 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3638 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3640 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3644 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3646 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3649 /* Test for a valid operand for a call instruction. Don't allow the
3650 arg pointer register or virtual regs since they may decay into
3651 reg + const, which the patterns can't handle. */
3654 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3656 /* Disallow indirect through a virtual register. This leads to
3657 compiler aborts when trying to eliminate them. */
3658 if (GET_CODE (op) == REG
3659 && (op == arg_pointer_rtx
3660 || op == frame_pointer_rtx
3661 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3662 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3663 return 0;
3665 /* Disallow `call 1234'. Due to varying assembler lameness this
3666 gets either rejected or translated to `call .+1234'. */
3667 if (GET_CODE (op) == CONST_INT)
3668 return 0;
3670 /* Explicitly allow SYMBOL_REF even if pic. */
3671 if (GET_CODE (op) == SYMBOL_REF)
3672 return 1;
3674 /* Otherwise we can allow any general_operand in the address. */
3675 return general_operand (op, Pmode);
3678 /* Test for a valid operand for a call instruction. Don't allow the
3679 arg pointer register or virtual regs since they may decay into
3680 reg + const, which the patterns can't handle. */
3683 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3685 /* Disallow indirect through a virtual register. This leads to
3686 compiler aborts when trying to eliminate them. */
3687 if (GET_CODE (op) == REG
3688 && (op == arg_pointer_rtx
3689 || op == frame_pointer_rtx
3690 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3691 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3692 return 0;
3694 /* Explicitly allow SYMBOL_REF even if pic. */
3695 if (GET_CODE (op) == SYMBOL_REF)
3696 return 1;
3698 /* Otherwise we can only allow register operands. */
3699 return register_operand (op, Pmode);
3703 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3705 if (GET_CODE (op) == CONST
3706 && GET_CODE (XEXP (op, 0)) == PLUS
3707 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3708 op = XEXP (XEXP (op, 0), 0);
3709 return GET_CODE (op) == SYMBOL_REF;
3712 /* Match exactly zero and one. */
3715 const0_operand (rtx op, enum machine_mode mode)
3717 return op == CONST0_RTX (mode);
3721 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3723 return op == const1_rtx;
3726 /* Match 2, 4, or 8. Used for leal multiplicands. */
3729 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3731 return (GET_CODE (op) == CONST_INT
3732 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3736 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3738 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3742 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3744 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3748 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3750 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3754 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3756 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3760 /* True if this is a constant appropriate for an increment or decrement. */
3763 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3765 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3766 registers, since carry flag is not set. */
3767 if (TARGET_PENTIUM4 && !optimize_size)
3768 return 0;
3769 return op == const1_rtx || op == constm1_rtx;
3772 /* Return nonzero if OP is acceptable as operand of DImode shift
3773 expander. */
3776 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3778 if (TARGET_64BIT)
3779 return nonimmediate_operand (op, mode);
3780 else
3781 return register_operand (op, mode);
3784 /* Return false if this is the stack pointer, or any other fake
3785 register eliminable to the stack pointer. Otherwise, this is
3786 a register operand.
3788 This is used to prevent esp from being used as an index reg.
3789 Which would only happen in pathological cases. */
3792 reg_no_sp_operand (rtx op, enum machine_mode mode)
3794 rtx t = op;
3795 if (GET_CODE (t) == SUBREG)
3796 t = SUBREG_REG (t);
3797 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3798 return 0;
3800 return register_operand (op, mode);
3804 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3806 return MMX_REG_P (op);
3809 /* Return false if this is any eliminable register. Otherwise
3810 general_operand. */
3813 general_no_elim_operand (rtx op, enum machine_mode mode)
3815 rtx t = op;
3816 if (GET_CODE (t) == SUBREG)
3817 t = SUBREG_REG (t);
3818 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3819 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3820 || t == virtual_stack_dynamic_rtx)
3821 return 0;
3822 if (REG_P (t)
3823 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3824 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3825 return 0;
3827 return general_operand (op, mode);
3830 /* Return false if this is any eliminable register. Otherwise
3831 register_operand or const_int. */
3834 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3836 rtx t = op;
3837 if (GET_CODE (t) == SUBREG)
3838 t = SUBREG_REG (t);
3839 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3840 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3841 || t == virtual_stack_dynamic_rtx)
3842 return 0;
3844 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3847 /* Return false if this is any eliminable register or stack register,
3848 otherwise work like register_operand. */
3851 index_register_operand (rtx op, enum machine_mode mode)
3853 rtx t = op;
3854 if (GET_CODE (t) == SUBREG)
3855 t = SUBREG_REG (t);
3856 if (!REG_P (t))
3857 return 0;
3858 if (t == arg_pointer_rtx
3859 || t == frame_pointer_rtx
3860 || t == virtual_incoming_args_rtx
3861 || t == virtual_stack_vars_rtx
3862 || t == virtual_stack_dynamic_rtx
3863 || REGNO (t) == STACK_POINTER_REGNUM)
3864 return 0;
3866 return general_operand (op, mode);
3869 /* Return true if op is a Q_REGS class register. */
3872 q_regs_operand (rtx op, enum machine_mode mode)
3874 if (mode != VOIDmode && GET_MODE (op) != mode)
3875 return 0;
3876 if (GET_CODE (op) == SUBREG)
3877 op = SUBREG_REG (op);
3878 return ANY_QI_REG_P (op);
3881 /* Return true if op is an flags register. */
3884 flags_reg_operand (rtx op, enum machine_mode mode)
3886 if (mode != VOIDmode && GET_MODE (op) != mode)
3887 return 0;
3888 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3891 /* Return true if op is a NON_Q_REGS class register. */
3894 non_q_regs_operand (rtx op, enum machine_mode mode)
3896 if (mode != VOIDmode && GET_MODE (op) != mode)
3897 return 0;
3898 if (GET_CODE (op) == SUBREG)
3899 op = SUBREG_REG (op);
3900 return NON_QI_REG_P (op);
3904 zero_extended_scalar_load_operand (rtx op,
3905 enum machine_mode mode ATTRIBUTE_UNUSED)
3907 unsigned n_elts;
3908 if (GET_CODE (op) != MEM)
3909 return 0;
3910 op = maybe_get_pool_constant (op);
3911 if (!op)
3912 return 0;
3913 if (GET_CODE (op) != CONST_VECTOR)
3914 return 0;
3915 n_elts =
3916 (GET_MODE_SIZE (GET_MODE (op)) /
3917 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3918 for (n_elts--; n_elts > 0; n_elts--)
3920 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3921 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3922 return 0;
3924 return 1;
3927 /* Return 1 when OP is operand acceptable for standard SSE move. */
3929 vector_move_operand (rtx op, enum machine_mode mode)
3931 if (nonimmediate_operand (op, mode))
3932 return 1;
3933 if (GET_MODE (op) != mode && mode != VOIDmode)
3934 return 0;
3935 return (op == CONST0_RTX (GET_MODE (op)));
3938 /* Return true if op if a valid address, and does not contain
3939 a segment override. */
3942 no_seg_address_operand (rtx op, enum machine_mode mode)
3944 struct ix86_address parts;
3946 if (! address_operand (op, mode))
3947 return 0;
3949 if (! ix86_decompose_address (op, &parts))
3950 abort ();
3952 return parts.seg == SEG_DEFAULT;
3955 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3956 insns. */
3958 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3960 enum rtx_code code = GET_CODE (op);
3961 switch (code)
3963 /* Operations supported directly. */
3964 case EQ:
3965 case LT:
3966 case LE:
3967 case UNORDERED:
3968 case NE:
3969 case UNGE:
3970 case UNGT:
3971 case ORDERED:
3972 return 1;
3973 /* These are equivalent to ones above in non-IEEE comparisons. */
3974 case UNEQ:
3975 case UNLT:
3976 case UNLE:
3977 case LTGT:
3978 case GE:
3979 case GT:
3980 return !TARGET_IEEE_FP;
3981 default:
3982 return 0;
3985 /* Return 1 if OP is a valid comparison operator in valid mode. */
3987 ix86_comparison_operator (rtx op, enum machine_mode mode)
3989 enum machine_mode inmode;
3990 enum rtx_code code = GET_CODE (op);
3991 if (mode != VOIDmode && GET_MODE (op) != mode)
3992 return 0;
3993 if (GET_RTX_CLASS (code) != '<')
3994 return 0;
3995 inmode = GET_MODE (XEXP (op, 0));
3997 if (inmode == CCFPmode || inmode == CCFPUmode)
3999 enum rtx_code second_code, bypass_code;
4000 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4001 return (bypass_code == NIL && second_code == NIL);
4003 switch (code)
4005 case EQ: case NE:
4006 return 1;
4007 case LT: case GE:
4008 if (inmode == CCmode || inmode == CCGCmode
4009 || inmode == CCGOCmode || inmode == CCNOmode)
4010 return 1;
4011 return 0;
4012 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4013 if (inmode == CCmode)
4014 return 1;
4015 return 0;
4016 case GT: case LE:
4017 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4018 return 1;
4019 return 0;
4020 default:
4021 return 0;
4025 /* Return 1 if OP is a valid comparison operator testing carry flag
4026 to be set. */
4028 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4030 enum machine_mode inmode;
4031 enum rtx_code code = GET_CODE (op);
4033 if (mode != VOIDmode && GET_MODE (op) != mode)
4034 return 0;
4035 if (GET_RTX_CLASS (code) != '<')
4036 return 0;
4037 inmode = GET_MODE (XEXP (op, 0));
4038 if (GET_CODE (XEXP (op, 0)) != REG
4039 || REGNO (XEXP (op, 0)) != 17
4040 || XEXP (op, 1) != const0_rtx)
4041 return 0;
4043 if (inmode == CCFPmode || inmode == CCFPUmode)
4045 enum rtx_code second_code, bypass_code;
4047 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4048 if (bypass_code != NIL || second_code != NIL)
4049 return 0;
4050 code = ix86_fp_compare_code_to_integer (code);
4052 else if (inmode != CCmode)
4053 return 0;
4054 return code == LTU;
4057 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4060 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4062 enum machine_mode inmode;
4063 enum rtx_code code = GET_CODE (op);
4065 if (mode != VOIDmode && GET_MODE (op) != mode)
4066 return 0;
4067 if (GET_RTX_CLASS (code) != '<')
4068 return 0;
4069 inmode = GET_MODE (XEXP (op, 0));
4070 if (inmode == CCFPmode || inmode == CCFPUmode)
4072 enum rtx_code second_code, bypass_code;
4074 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4075 if (bypass_code != NIL || second_code != NIL)
4076 return 0;
4077 code = ix86_fp_compare_code_to_integer (code);
4079 /* i387 supports just limited amount of conditional codes. */
4080 switch (code)
4082 case LTU: case GTU: case LEU: case GEU:
4083 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4084 return 1;
4085 return 0;
4086 case ORDERED: case UNORDERED:
4087 case EQ: case NE:
4088 return 1;
4089 default:
4090 return 0;
4094 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4097 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4099 switch (GET_CODE (op))
4101 case MULT:
4102 /* Modern CPUs have same latency for HImode and SImode multiply,
4103 but 386 and 486 do HImode multiply faster. */
4104 return ix86_tune > PROCESSOR_I486;
4105 case PLUS:
4106 case AND:
4107 case IOR:
4108 case XOR:
4109 case ASHIFT:
4110 return 1;
4111 default:
4112 return 0;
4116 /* Nearly general operand, but accept any const_double, since we wish
4117 to be able to drop them into memory rather than have them get pulled
4118 into registers. */
4121 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4123 if (mode != VOIDmode && mode != GET_MODE (op))
4124 return 0;
4125 if (GET_CODE (op) == CONST_DOUBLE)
4126 return 1;
4127 return general_operand (op, mode);
4130 /* Match an SI or HImode register for a zero_extract. */
4133 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4135 int regno;
4136 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4137 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4138 return 0;
4140 if (!register_operand (op, VOIDmode))
4141 return 0;
4143 /* Be careful to accept only registers having upper parts. */
4144 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4145 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4148 /* Return 1 if this is a valid binary floating-point operation.
4149 OP is the expression matched, and MODE is its mode. */
4152 binary_fp_operator (rtx op, enum machine_mode mode)
4154 if (mode != VOIDmode && mode != GET_MODE (op))
4155 return 0;
4157 switch (GET_CODE (op))
4159 case PLUS:
4160 case MINUS:
4161 case MULT:
4162 case DIV:
4163 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4165 default:
4166 return 0;
4171 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4173 return GET_CODE (op) == MULT;
4177 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4179 return GET_CODE (op) == DIV;
4183 arith_or_logical_operator (rtx op, enum machine_mode mode)
4185 return ((mode == VOIDmode || GET_MODE (op) == mode)
4186 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4187 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4190 /* Returns 1 if OP is memory operand with a displacement. */
4193 memory_displacement_operand (rtx op, enum machine_mode mode)
4195 struct ix86_address parts;
4197 if (! memory_operand (op, mode))
4198 return 0;
4200 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4201 abort ();
4203 return parts.disp != NULL_RTX;
4206 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4207 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4209 ??? It seems likely that this will only work because cmpsi is an
4210 expander, and no actual insns use this. */
4213 cmpsi_operand (rtx op, enum machine_mode mode)
4215 if (nonimmediate_operand (op, mode))
4216 return 1;
4218 if (GET_CODE (op) == AND
4219 && GET_MODE (op) == SImode
4220 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4221 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4222 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4223 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4224 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4225 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4226 return 1;
4228 return 0;
4231 /* Returns 1 if OP is memory operand that can not be represented by the
4232 modRM array. */
4235 long_memory_operand (rtx op, enum machine_mode mode)
4237 if (! memory_operand (op, mode))
4238 return 0;
4240 return memory_address_length (op) != 0;
4243 /* Return nonzero if the rtx is known aligned. */
4246 aligned_operand (rtx op, enum machine_mode mode)
4248 struct ix86_address parts;
4250 if (!general_operand (op, mode))
4251 return 0;
4253 /* Registers and immediate operands are always "aligned". */
4254 if (GET_CODE (op) != MEM)
4255 return 1;
4257 /* Don't even try to do any aligned optimizations with volatiles. */
4258 if (MEM_VOLATILE_P (op))
4259 return 0;
4261 op = XEXP (op, 0);
4263 /* Pushes and pops are only valid on the stack pointer. */
4264 if (GET_CODE (op) == PRE_DEC
4265 || GET_CODE (op) == POST_INC)
4266 return 1;
4268 /* Decode the address. */
4269 if (! ix86_decompose_address (op, &parts))
4270 abort ();
4272 /* Look for some component that isn't known to be aligned. */
4273 if (parts.index)
4275 if (parts.scale < 4
4276 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4277 return 0;
4279 if (parts.base)
4281 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4282 return 0;
4284 if (parts.disp)
4286 if (GET_CODE (parts.disp) != CONST_INT
4287 || (INTVAL (parts.disp) & 3) != 0)
4288 return 0;
4291 /* Didn't find one -- this must be an aligned address. */
4292 return 1;
4295 /* Initialize the table of extra 80387 mathematical constants. */
4297 static void
4298 init_ext_80387_constants (void)
4300 static const char * cst[5] =
4302 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4303 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4304 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4305 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4306 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4308 int i;
4310 for (i = 0; i < 5; i++)
4312 real_from_string (&ext_80387_constants_table[i], cst[i]);
4313 /* Ensure each constant is rounded to XFmode precision. */
4314 real_convert (&ext_80387_constants_table[i],
4315 XFmode, &ext_80387_constants_table[i]);
4318 ext_80387_constants_init = 1;
4321 /* Return true if the constant is something that can be loaded with
4322 a special instruction. */
4325 standard_80387_constant_p (rtx x)
4327 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4328 return -1;
4330 if (x == CONST0_RTX (GET_MODE (x)))
4331 return 1;
4332 if (x == CONST1_RTX (GET_MODE (x)))
4333 return 2;
4335 /* For XFmode constants, try to find a special 80387 instruction on
4336 those CPUs that benefit from them. */
4337 if (GET_MODE (x) == XFmode
4338 && x86_ext_80387_constants & TUNEMASK)
4340 REAL_VALUE_TYPE r;
4341 int i;
4343 if (! ext_80387_constants_init)
4344 init_ext_80387_constants ();
4346 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4347 for (i = 0; i < 5; i++)
4348 if (real_identical (&r, &ext_80387_constants_table[i]))
4349 return i + 3;
4352 return 0;
4355 /* Return the opcode of the special instruction to be used to load
4356 the constant X. */
4358 const char *
4359 standard_80387_constant_opcode (rtx x)
4361 switch (standard_80387_constant_p (x))
4363 case 1:
4364 return "fldz";
4365 case 2:
4366 return "fld1";
4367 case 3:
4368 return "fldlg2";
4369 case 4:
4370 return "fldln2";
4371 case 5:
4372 return "fldl2e";
4373 case 6:
4374 return "fldl2t";
4375 case 7:
4376 return "fldpi";
4378 abort ();
4381 /* Return the CONST_DOUBLE representing the 80387 constant that is
4382 loaded by the specified special instruction. The argument IDX
4383 matches the return value from standard_80387_constant_p. */
4386 standard_80387_constant_rtx (int idx)
4388 int i;
4390 if (! ext_80387_constants_init)
4391 init_ext_80387_constants ();
4393 switch (idx)
4395 case 3:
4396 case 4:
4397 case 5:
4398 case 6:
4399 case 7:
4400 i = idx - 3;
4401 break;
4403 default:
4404 abort ();
4407 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4408 XFmode);
4411 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4414 standard_sse_constant_p (rtx x)
4416 if (x == const0_rtx)
4417 return 1;
4418 return (x == CONST0_RTX (GET_MODE (x)));
4421 /* Returns 1 if OP contains a symbol reference */
4424 symbolic_reference_mentioned_p (rtx op)
4426 const char *fmt;
4427 int i;
4429 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4430 return 1;
4432 fmt = GET_RTX_FORMAT (GET_CODE (op));
4433 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4435 if (fmt[i] == 'E')
4437 int j;
4439 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4440 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4441 return 1;
4444 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4445 return 1;
4448 return 0;
4451 /* Return 1 if it is appropriate to emit `ret' instructions in the
4452 body of a function. Do this only if the epilogue is simple, needing a
4453 couple of insns. Prior to reloading, we can't tell how many registers
4454 must be saved, so return 0 then. Return 0 if there is no frame
4455 marker to de-allocate.
4457 If NON_SAVING_SETJMP is defined and true, then it is not possible
4458 for the epilogue to be simple, so return 0. This is a special case
4459 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4460 until final, but jump_optimize may need to know sooner if a
4461 `return' is OK. */
4464 ix86_can_use_return_insn_p (void)
4466 struct ix86_frame frame;
4468 #ifdef NON_SAVING_SETJMP
4469 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4470 return 0;
4471 #endif
4473 if (! reload_completed || frame_pointer_needed)
4474 return 0;
4476 /* Don't allow more than 32 pop, since that's all we can do
4477 with one instruction. */
4478 if (current_function_pops_args
4479 && current_function_args_size >= 32768)
4480 return 0;
4482 ix86_compute_frame_layout (&frame);
4483 return frame.to_allocate == 0 && frame.nregs == 0;
4486 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4488 x86_64_sign_extended_value (rtx value)
4490 switch (GET_CODE (value))
4492 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4493 to be at least 32 and this all acceptable constants are
4494 represented as CONST_INT. */
4495 case CONST_INT:
4496 if (HOST_BITS_PER_WIDE_INT == 32)
4497 return 1;
4498 else
4500 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4501 return trunc_int_for_mode (val, SImode) == val;
4503 break;
4505 /* For certain code models, the symbolic references are known to fit.
4506 in CM_SMALL_PIC model we know it fits if it is local to the shared
4507 library. Don't count TLS SYMBOL_REFs here, since they should fit
4508 only if inside of UNSPEC handled below. */
4509 case SYMBOL_REF:
4510 /* TLS symbols are not constant. */
4511 if (tls_symbolic_operand (value, Pmode))
4512 return false;
4513 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4515 /* For certain code models, the code is near as well. */
4516 case LABEL_REF:
4517 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4518 || ix86_cmodel == CM_KERNEL);
4520 /* We also may accept the offsetted memory references in certain special
4521 cases. */
4522 case CONST:
4523 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4524 switch (XINT (XEXP (value, 0), 1))
4526 case UNSPEC_GOTPCREL:
4527 case UNSPEC_DTPOFF:
4528 case UNSPEC_GOTNTPOFF:
4529 case UNSPEC_NTPOFF:
4530 return 1;
4531 default:
4532 break;
4534 if (GET_CODE (XEXP (value, 0)) == PLUS)
4536 rtx op1 = XEXP (XEXP (value, 0), 0);
4537 rtx op2 = XEXP (XEXP (value, 0), 1);
4538 HOST_WIDE_INT offset;
4540 if (ix86_cmodel == CM_LARGE)
4541 return 0;
4542 if (GET_CODE (op2) != CONST_INT)
4543 return 0;
4544 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4545 switch (GET_CODE (op1))
4547 case SYMBOL_REF:
4548 /* For CM_SMALL assume that latest object is 16MB before
4549 end of 31bits boundary. We may also accept pretty
4550 large negative constants knowing that all objects are
4551 in the positive half of address space. */
4552 if (ix86_cmodel == CM_SMALL
4553 && offset < 16*1024*1024
4554 && trunc_int_for_mode (offset, SImode) == offset)
4555 return 1;
4556 /* For CM_KERNEL we know that all object resist in the
4557 negative half of 32bits address space. We may not
4558 accept negative offsets, since they may be just off
4559 and we may accept pretty large positive ones. */
4560 if (ix86_cmodel == CM_KERNEL
4561 && offset > 0
4562 && trunc_int_for_mode (offset, SImode) == offset)
4563 return 1;
4564 break;
4565 case LABEL_REF:
4566 /* These conditions are similar to SYMBOL_REF ones, just the
4567 constraints for code models differ. */
4568 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4569 && offset < 16*1024*1024
4570 && trunc_int_for_mode (offset, SImode) == offset)
4571 return 1;
4572 if (ix86_cmodel == CM_KERNEL
4573 && offset > 0
4574 && trunc_int_for_mode (offset, SImode) == offset)
4575 return 1;
4576 break;
4577 case UNSPEC:
4578 switch (XINT (op1, 1))
4580 case UNSPEC_DTPOFF:
4581 case UNSPEC_NTPOFF:
4582 if (offset > 0
4583 && trunc_int_for_mode (offset, SImode) == offset)
4584 return 1;
4586 break;
4587 default:
4588 return 0;
4591 return 0;
4592 default:
4593 return 0;
4597 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4599 x86_64_zero_extended_value (rtx value)
4601 switch (GET_CODE (value))
4603 case CONST_DOUBLE:
4604 if (HOST_BITS_PER_WIDE_INT == 32)
4605 return (GET_MODE (value) == VOIDmode
4606 && !CONST_DOUBLE_HIGH (value));
4607 else
4608 return 0;
4609 case CONST_INT:
4610 if (HOST_BITS_PER_WIDE_INT == 32)
4611 return INTVAL (value) >= 0;
4612 else
4613 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4614 break;
4616 /* For certain code models, the symbolic references are known to fit. */
4617 case SYMBOL_REF:
4618 /* TLS symbols are not constant. */
4619 if (tls_symbolic_operand (value, Pmode))
4620 return false;
4621 return ix86_cmodel == CM_SMALL;
4623 /* For certain code models, the code is near as well. */
4624 case LABEL_REF:
4625 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4627 /* We also may accept the offsetted memory references in certain special
4628 cases. */
4629 case CONST:
4630 if (GET_CODE (XEXP (value, 0)) == PLUS)
4632 rtx op1 = XEXP (XEXP (value, 0), 0);
4633 rtx op2 = XEXP (XEXP (value, 0), 1);
4635 if (ix86_cmodel == CM_LARGE)
4636 return 0;
4637 switch (GET_CODE (op1))
4639 case SYMBOL_REF:
4640 return 0;
4641 /* For small code model we may accept pretty large positive
4642 offsets, since one bit is available for free. Negative
4643 offsets are limited by the size of NULL pointer area
4644 specified by the ABI. */
4645 if (ix86_cmodel == CM_SMALL
4646 && GET_CODE (op2) == CONST_INT
4647 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4648 && (trunc_int_for_mode (INTVAL (op2), SImode)
4649 == INTVAL (op2)))
4650 return 1;
4651 /* ??? For the kernel, we may accept adjustment of
4652 -0x10000000, since we know that it will just convert
4653 negative address space to positive, but perhaps this
4654 is not worthwhile. */
4655 break;
4656 case LABEL_REF:
4657 /* These conditions are similar to SYMBOL_REF ones, just the
4658 constraints for code models differ. */
4659 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4660 && GET_CODE (op2) == CONST_INT
4661 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4662 && (trunc_int_for_mode (INTVAL (op2), SImode)
4663 == INTVAL (op2)))
4664 return 1;
4665 break;
4666 default:
4667 return 0;
4670 return 0;
4671 default:
4672 return 0;
4676 /* Value should be nonzero if functions must have frame pointers.
4677 Zero means the frame pointer need not be set up (and parms may
4678 be accessed via the stack pointer) in functions that seem suitable. */
4681 ix86_frame_pointer_required (void)
4683 /* If we accessed previous frames, then the generated code expects
4684 to be able to access the saved ebp value in our frame. */
4685 if (cfun->machine->accesses_prev_frame)
4686 return 1;
4688 /* Several x86 os'es need a frame pointer for other reasons,
4689 usually pertaining to setjmp. */
4690 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4691 return 1;
4693 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4694 the frame pointer by default. Turn it back on now if we've not
4695 got a leaf function. */
4696 if (TARGET_OMIT_LEAF_FRAME_POINTER
4697 && (!current_function_is_leaf))
4698 return 1;
4700 if (current_function_profile)
4701 return 1;
4703 return 0;
4706 /* Record that the current function accesses previous call frames. */
4708 void
4709 ix86_setup_frame_addresses (void)
4711 cfun->machine->accesses_prev_frame = 1;
4714 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4715 # define USE_HIDDEN_LINKONCE 1
4716 #else
4717 # define USE_HIDDEN_LINKONCE 0
4718 #endif
4720 static int pic_labels_used;
4722 /* Fills in the label name that should be used for a pc thunk for
4723 the given register. */
4725 static void
4726 get_pc_thunk_name (char name[32], unsigned int regno)
4728 if (USE_HIDDEN_LINKONCE)
4729 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4730 else
4731 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4735 /* This function generates code for -fpic that loads %ebx with
4736 the return address of the caller and then returns. */
4738 void
4739 ix86_file_end (void)
4741 rtx xops[2];
4742 int regno;
4744 for (regno = 0; regno < 8; ++regno)
4746 char name[32];
4748 if (! ((pic_labels_used >> regno) & 1))
4749 continue;
4751 get_pc_thunk_name (name, regno);
4753 if (USE_HIDDEN_LINKONCE)
4755 tree decl;
4757 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4758 error_mark_node);
4759 TREE_PUBLIC (decl) = 1;
4760 TREE_STATIC (decl) = 1;
4761 DECL_ONE_ONLY (decl) = 1;
4763 (*targetm.asm_out.unique_section) (decl, 0);
4764 named_section (decl, NULL, 0);
4766 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4767 fputs ("\t.hidden\t", asm_out_file);
4768 assemble_name (asm_out_file, name);
4769 fputc ('\n', asm_out_file);
4770 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4772 else
4774 text_section ();
4775 ASM_OUTPUT_LABEL (asm_out_file, name);
4778 xops[0] = gen_rtx_REG (SImode, regno);
4779 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4780 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4781 output_asm_insn ("ret", xops);
4784 if (NEED_INDICATE_EXEC_STACK)
4785 file_end_indicate_exec_stack ();
4788 /* Emit code for the SET_GOT patterns. */
4790 const char *
4791 output_set_got (rtx dest)
4793 rtx xops[3];
4795 xops[0] = dest;
4796 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4798 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4800 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4802 if (!flag_pic)
4803 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4804 else
4805 output_asm_insn ("call\t%a2", xops);
4807 #if TARGET_MACHO
4808 /* Output the "canonical" label name ("Lxx$pb") here too. This
4809 is what will be referred to by the Mach-O PIC subsystem. */
4810 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4811 #endif
4812 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4813 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4815 if (flag_pic)
4816 output_asm_insn ("pop{l}\t%0", xops);
4818 else
4820 char name[32];
4821 get_pc_thunk_name (name, REGNO (dest));
4822 pic_labels_used |= 1 << REGNO (dest);
4824 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4825 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4826 output_asm_insn ("call\t%X2", xops);
4829 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4830 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4831 else if (!TARGET_MACHO)
4832 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4834 return "";
4837 /* Generate an "push" pattern for input ARG. */
4839 static rtx
4840 gen_push (rtx arg)
4842 return gen_rtx_SET (VOIDmode,
4843 gen_rtx_MEM (Pmode,
4844 gen_rtx_PRE_DEC (Pmode,
4845 stack_pointer_rtx)),
4846 arg);
4849 /* Return >= 0 if there is an unused call-clobbered register available
4850 for the entire function. */
4852 static unsigned int
4853 ix86_select_alt_pic_regnum (void)
4855 if (current_function_is_leaf && !current_function_profile)
4857 int i;
4858 for (i = 2; i >= 0; --i)
4859 if (!regs_ever_live[i])
4860 return i;
4863 return INVALID_REGNUM;
4866 /* Return 1 if we need to save REGNO. */
4867 static int
4868 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4870 if (pic_offset_table_rtx
4871 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4872 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4873 || current_function_profile
4874 || current_function_calls_eh_return
4875 || current_function_uses_const_pool))
4877 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4878 return 0;
4879 return 1;
4882 if (current_function_calls_eh_return && maybe_eh_return)
4884 unsigned i;
4885 for (i = 0; ; i++)
4887 unsigned test = EH_RETURN_DATA_REGNO (i);
4888 if (test == INVALID_REGNUM)
4889 break;
4890 if (test == regno)
4891 return 1;
4895 return (regs_ever_live[regno]
4896 && !call_used_regs[regno]
4897 && !fixed_regs[regno]
4898 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4901 /* Return number of registers to be saved on the stack. */
4903 static int
4904 ix86_nsaved_regs (void)
4906 int nregs = 0;
4907 int regno;
4909 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4910 if (ix86_save_reg (regno, true))
4911 nregs++;
4912 return nregs;
4915 /* Return the offset between two registers, one to be eliminated, and the other
4916 its replacement, at the start of a routine. */
4918 HOST_WIDE_INT
4919 ix86_initial_elimination_offset (int from, int to)
4921 struct ix86_frame frame;
4922 ix86_compute_frame_layout (&frame);
4924 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4925 return frame.hard_frame_pointer_offset;
4926 else if (from == FRAME_POINTER_REGNUM
4927 && to == HARD_FRAME_POINTER_REGNUM)
4928 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4929 else
4931 if (to != STACK_POINTER_REGNUM)
4932 abort ();
4933 else if (from == ARG_POINTER_REGNUM)
4934 return frame.stack_pointer_offset;
4935 else if (from != FRAME_POINTER_REGNUM)
4936 abort ();
4937 else
4938 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4942 /* Fill structure ix86_frame about frame of currently computed function. */
4944 static void
4945 ix86_compute_frame_layout (struct ix86_frame *frame)
4947 HOST_WIDE_INT total_size;
4948 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4949 HOST_WIDE_INT offset;
4950 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4951 HOST_WIDE_INT size = get_frame_size ();
4953 frame->nregs = ix86_nsaved_regs ();
4954 total_size = size;
4956 /* During reload iteration the amount of registers saved can change.
4957 Recompute the value as needed. Do not recompute when amount of registers
4958 didn't change as reload does mutiple calls to the function and does not
4959 expect the decision to change within single iteration. */
4960 if (!optimize_size
4961 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4963 int count = frame->nregs;
4965 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4966 /* The fast prologue uses move instead of push to save registers. This
4967 is significantly longer, but also executes faster as modern hardware
4968 can execute the moves in parallel, but can't do that for push/pop.
4970 Be careful about choosing what prologue to emit: When function takes
4971 many instructions to execute we may use slow version as well as in
4972 case function is known to be outside hot spot (this is known with
4973 feedback only). Weight the size of function by number of registers
4974 to save as it is cheap to use one or two push instructions but very
4975 slow to use many of them. */
4976 if (count)
4977 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4978 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4979 || (flag_branch_probabilities
4980 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4981 cfun->machine->use_fast_prologue_epilogue = false;
4982 else
4983 cfun->machine->use_fast_prologue_epilogue
4984 = !expensive_function_p (count);
4986 if (TARGET_PROLOGUE_USING_MOVE
4987 && cfun->machine->use_fast_prologue_epilogue)
4988 frame->save_regs_using_mov = true;
4989 else
4990 frame->save_regs_using_mov = false;
4993 /* Skip return address and saved base pointer. */
4994 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4996 frame->hard_frame_pointer_offset = offset;
4998 /* Do some sanity checking of stack_alignment_needed and
4999 preferred_alignment, since i386 port is the only using those features
5000 that may break easily. */
5002 if (size && !stack_alignment_needed)
5003 abort ();
5004 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5005 abort ();
5006 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5007 abort ();
5008 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5009 abort ();
5011 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5012 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5014 /* Register save area */
5015 offset += frame->nregs * UNITS_PER_WORD;
5017 /* Va-arg area */
5018 if (ix86_save_varrargs_registers)
5020 offset += X86_64_VARARGS_SIZE;
5021 frame->va_arg_size = X86_64_VARARGS_SIZE;
5023 else
5024 frame->va_arg_size = 0;
5026 /* Align start of frame for local function. */
5027 frame->padding1 = ((offset + stack_alignment_needed - 1)
5028 & -stack_alignment_needed) - offset;
5030 offset += frame->padding1;
5032 /* Frame pointer points here. */
5033 frame->frame_pointer_offset = offset;
5035 offset += size;
5037 /* Add outgoing arguments area. Can be skipped if we eliminated
5038 all the function calls as dead code.
5039 Skipping is however impossible when function calls alloca. Alloca
5040 expander assumes that last current_function_outgoing_args_size
5041 of stack frame are unused. */
5042 if (ACCUMULATE_OUTGOING_ARGS
5043 && (!current_function_is_leaf || current_function_calls_alloca))
5045 offset += current_function_outgoing_args_size;
5046 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5048 else
5049 frame->outgoing_arguments_size = 0;
5051 /* Align stack boundary. Only needed if we're calling another function
5052 or using alloca. */
5053 if (!current_function_is_leaf || current_function_calls_alloca)
5054 frame->padding2 = ((offset + preferred_alignment - 1)
5055 & -preferred_alignment) - offset;
5056 else
5057 frame->padding2 = 0;
5059 offset += frame->padding2;
5061 /* We've reached end of stack frame. */
5062 frame->stack_pointer_offset = offset;
5064 /* Size prologue needs to allocate. */
5065 frame->to_allocate =
5066 (size + frame->padding1 + frame->padding2
5067 + frame->outgoing_arguments_size + frame->va_arg_size);
5069 if ((!frame->to_allocate && frame->nregs <= 1)
5070 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5071 frame->save_regs_using_mov = false;
5073 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5074 && current_function_is_leaf)
5076 frame->red_zone_size = frame->to_allocate;
5077 if (frame->save_regs_using_mov)
5078 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5079 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5080 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5082 else
5083 frame->red_zone_size = 0;
5084 frame->to_allocate -= frame->red_zone_size;
5085 frame->stack_pointer_offset -= frame->red_zone_size;
5086 #if 0
5087 fprintf (stderr, "nregs: %i\n", frame->nregs);
5088 fprintf (stderr, "size: %i\n", size);
5089 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5090 fprintf (stderr, "padding1: %i\n", frame->padding1);
5091 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5092 fprintf (stderr, "padding2: %i\n", frame->padding2);
5093 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5094 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5095 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5096 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5097 frame->hard_frame_pointer_offset);
5098 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5099 #endif
5102 /* Emit code to save registers in the prologue. */
5104 static void
5105 ix86_emit_save_regs (void)
5107 int regno;
5108 rtx insn;
5110 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5111 if (ix86_save_reg (regno, true))
5113 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5114 RTX_FRAME_RELATED_P (insn) = 1;
5118 /* Emit code to save registers using MOV insns. First register
5119 is restored from POINTER + OFFSET. */
5120 static void
5121 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5123 int regno;
5124 rtx insn;
5126 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5127 if (ix86_save_reg (regno, true))
5129 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5130 Pmode, offset),
5131 gen_rtx_REG (Pmode, regno));
5132 RTX_FRAME_RELATED_P (insn) = 1;
5133 offset += UNITS_PER_WORD;
5137 /* Expand prologue or epilogue stack adjustment.
5138 The pattern exist to put a dependency on all ebp-based memory accesses.
5139 STYLE should be negative if instructions should be marked as frame related,
5140 zero if %r11 register is live and cannot be freely used and positive
5141 otherwise. */
5143 static void
5144 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5146 rtx insn;
5148 if (! TARGET_64BIT)
5149 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5150 else if (x86_64_immediate_operand (offset, DImode))
5151 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5152 else
5154 rtx r11;
5155 /* r11 is used by indirect sibcall return as well, set before the
5156 epilogue and used after the epilogue. ATM indirect sibcall
5157 shouldn't be used together with huge frame sizes in one
5158 function because of the frame_size check in sibcall.c. */
5159 if (style == 0)
5160 abort ();
5161 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5162 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5163 if (style < 0)
5164 RTX_FRAME_RELATED_P (insn) = 1;
5165 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5166 offset));
5168 if (style < 0)
5169 RTX_FRAME_RELATED_P (insn) = 1;
5172 /* Expand the prologue into a bunch of separate insns. */
5174 void
5175 ix86_expand_prologue (void)
5177 rtx insn;
5178 bool pic_reg_used;
5179 struct ix86_frame frame;
5180 HOST_WIDE_INT allocate;
5182 ix86_compute_frame_layout (&frame);
5184 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5185 slower on all targets. Also sdb doesn't like it. */
5187 if (frame_pointer_needed)
5189 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5190 RTX_FRAME_RELATED_P (insn) = 1;
5192 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5193 RTX_FRAME_RELATED_P (insn) = 1;
5196 allocate = frame.to_allocate;
5198 if (!frame.save_regs_using_mov)
5199 ix86_emit_save_regs ();
5200 else
5201 allocate += frame.nregs * UNITS_PER_WORD;
5203 /* When using red zone we may start register saving before allocating
5204 the stack frame saving one cycle of the prologue. */
5205 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5206 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5207 : stack_pointer_rtx,
5208 -frame.nregs * UNITS_PER_WORD);
5210 if (allocate == 0)
5212 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5213 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5214 GEN_INT (-allocate), -1);
5215 else
5217 /* Only valid for Win32. */
5218 rtx eax = gen_rtx_REG (SImode, 0);
5219 bool eax_live = ix86_eax_live_at_start_p ();
5221 if (TARGET_64BIT)
5222 abort ();
5224 if (eax_live)
5226 emit_insn (gen_push (eax));
5227 allocate -= 4;
5230 insn = emit_move_insn (eax, GEN_INT (allocate));
5231 RTX_FRAME_RELATED_P (insn) = 1;
5233 insn = emit_insn (gen_allocate_stack_worker (eax));
5234 RTX_FRAME_RELATED_P (insn) = 1;
5236 if (eax_live)
5238 rtx t = plus_constant (stack_pointer_rtx, allocate);
5239 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5243 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5245 if (!frame_pointer_needed || !frame.to_allocate)
5246 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5247 else
5248 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5249 -frame.nregs * UNITS_PER_WORD);
5252 pic_reg_used = false;
5253 if (pic_offset_table_rtx
5254 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5255 || current_function_profile))
5257 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5259 if (alt_pic_reg_used != INVALID_REGNUM)
5260 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5262 pic_reg_used = true;
5265 if (pic_reg_used)
5267 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5269 /* Even with accurate pre-reload life analysis, we can wind up
5270 deleting all references to the pic register after reload.
5271 Consider if cross-jumping unifies two sides of a branch
5272 controlled by a comparison vs the only read from a global.
5273 In which case, allow the set_got to be deleted, though we're
5274 too late to do anything about the ebx save in the prologue. */
5275 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5278 /* Prevent function calls from be scheduled before the call to mcount.
5279 In the pic_reg_used case, make sure that the got load isn't deleted. */
5280 if (current_function_profile)
5281 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5284 /* Emit code to restore saved registers using MOV insns. First register
5285 is restored from POINTER + OFFSET. */
5286 static void
5287 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5288 int maybe_eh_return)
5290 int regno;
5291 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5293 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5294 if (ix86_save_reg (regno, maybe_eh_return))
5296 /* Ensure that adjust_address won't be forced to produce pointer
5297 out of range allowed by x86-64 instruction set. */
5298 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5300 rtx r11;
5302 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5303 emit_move_insn (r11, GEN_INT (offset));
5304 emit_insn (gen_adddi3 (r11, r11, pointer));
5305 base_address = gen_rtx_MEM (Pmode, r11);
5306 offset = 0;
5308 emit_move_insn (gen_rtx_REG (Pmode, regno),
5309 adjust_address (base_address, Pmode, offset));
5310 offset += UNITS_PER_WORD;
5314 /* Restore function stack, frame, and registers. */
5316 void
5317 ix86_expand_epilogue (int style)
5319 int regno;
5320 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5321 struct ix86_frame frame;
5322 HOST_WIDE_INT offset;
5324 ix86_compute_frame_layout (&frame);
5326 /* Calculate start of saved registers relative to ebp. Special care
5327 must be taken for the normal return case of a function using
5328 eh_return: the eax and edx registers are marked as saved, but not
5329 restored along this path. */
5330 offset = frame.nregs;
5331 if (current_function_calls_eh_return && style != 2)
5332 offset -= 2;
5333 offset *= -UNITS_PER_WORD;
5335 /* If we're only restoring one register and sp is not valid then
5336 using a move instruction to restore the register since it's
5337 less work than reloading sp and popping the register.
5339 The default code result in stack adjustment using add/lea instruction,
5340 while this code results in LEAVE instruction (or discrete equivalent),
5341 so it is profitable in some other cases as well. Especially when there
5342 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5343 and there is exactly one register to pop. This heuristic may need some
5344 tuning in future. */
5345 if ((!sp_valid && frame.nregs <= 1)
5346 || (TARGET_EPILOGUE_USING_MOVE
5347 && cfun->machine->use_fast_prologue_epilogue
5348 && (frame.nregs > 1 || frame.to_allocate))
5349 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5350 || (frame_pointer_needed && TARGET_USE_LEAVE
5351 && cfun->machine->use_fast_prologue_epilogue
5352 && frame.nregs == 1)
5353 || current_function_calls_eh_return)
5355 /* Restore registers. We can use ebp or esp to address the memory
5356 locations. If both are available, default to ebp, since offsets
5357 are known to be small. Only exception is esp pointing directly to the
5358 end of block of saved registers, where we may simplify addressing
5359 mode. */
5361 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5362 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5363 frame.to_allocate, style == 2);
5364 else
5365 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5366 offset, style == 2);
5368 /* eh_return epilogues need %ecx added to the stack pointer. */
5369 if (style == 2)
5371 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5373 if (frame_pointer_needed)
5375 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5376 tmp = plus_constant (tmp, UNITS_PER_WORD);
5377 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5379 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5380 emit_move_insn (hard_frame_pointer_rtx, tmp);
5382 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5383 const0_rtx, style);
5385 else
5387 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5388 tmp = plus_constant (tmp, (frame.to_allocate
5389 + frame.nregs * UNITS_PER_WORD));
5390 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5393 else if (!frame_pointer_needed)
5394 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5395 GEN_INT (frame.to_allocate
5396 + frame.nregs * UNITS_PER_WORD),
5397 style);
5398 /* If not an i386, mov & pop is faster than "leave". */
5399 else if (TARGET_USE_LEAVE || optimize_size
5400 || !cfun->machine->use_fast_prologue_epilogue)
5401 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5402 else
5404 pro_epilogue_adjust_stack (stack_pointer_rtx,
5405 hard_frame_pointer_rtx,
5406 const0_rtx, style);
5407 if (TARGET_64BIT)
5408 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5409 else
5410 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5413 else
5415 /* First step is to deallocate the stack frame so that we can
5416 pop the registers. */
5417 if (!sp_valid)
5419 if (!frame_pointer_needed)
5420 abort ();
5421 pro_epilogue_adjust_stack (stack_pointer_rtx,
5422 hard_frame_pointer_rtx,
5423 GEN_INT (offset), style);
5425 else if (frame.to_allocate)
5426 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5427 GEN_INT (frame.to_allocate), style);
5429 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5430 if (ix86_save_reg (regno, false))
5432 if (TARGET_64BIT)
5433 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5434 else
5435 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5437 if (frame_pointer_needed)
5439 /* Leave results in shorter dependency chains on CPUs that are
5440 able to grok it fast. */
5441 if (TARGET_USE_LEAVE)
5442 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5443 else if (TARGET_64BIT)
5444 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5445 else
5446 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5450 /* Sibcall epilogues don't want a return instruction. */
5451 if (style == 0)
5452 return;
5454 if (current_function_pops_args && current_function_args_size)
5456 rtx popc = GEN_INT (current_function_pops_args);
5458 /* i386 can only pop 64K bytes. If asked to pop more, pop
5459 return address, do explicit add, and jump indirectly to the
5460 caller. */
5462 if (current_function_pops_args >= 65536)
5464 rtx ecx = gen_rtx_REG (SImode, 2);
5466 /* There is no "pascal" calling convention in 64bit ABI. */
5467 if (TARGET_64BIT)
5468 abort ();
5470 emit_insn (gen_popsi1 (ecx));
5471 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5472 emit_jump_insn (gen_return_indirect_internal (ecx));
5474 else
5475 emit_jump_insn (gen_return_pop_internal (popc));
5477 else
5478 emit_jump_insn (gen_return_internal ());
5481 /* Reset from the function's potential modifications. */
5483 static void
5484 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5485 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5487 if (pic_offset_table_rtx)
5488 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5491 /* Extract the parts of an RTL expression that is a valid memory address
5492 for an instruction. Return 0 if the structure of the address is
5493 grossly off. Return -1 if the address contains ASHIFT, so it is not
5494 strictly valid, but still used for computing length of lea instruction. */
5496 static int
5497 ix86_decompose_address (rtx addr, struct ix86_address *out)
5499 rtx base = NULL_RTX;
5500 rtx index = NULL_RTX;
5501 rtx disp = NULL_RTX;
5502 HOST_WIDE_INT scale = 1;
5503 rtx scale_rtx = NULL_RTX;
5504 int retval = 1;
5505 enum ix86_address_seg seg = SEG_DEFAULT;
5507 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5508 base = addr;
5509 else if (GET_CODE (addr) == PLUS)
5511 rtx addends[4], op;
5512 int n = 0, i;
5514 op = addr;
5517 if (n >= 4)
5518 return 0;
5519 addends[n++] = XEXP (op, 1);
5520 op = XEXP (op, 0);
5522 while (GET_CODE (op) == PLUS);
5523 if (n >= 4)
5524 return 0;
5525 addends[n] = op;
5527 for (i = n; i >= 0; --i)
5529 op = addends[i];
5530 switch (GET_CODE (op))
5532 case MULT:
5533 if (index)
5534 return 0;
5535 index = XEXP (op, 0);
5536 scale_rtx = XEXP (op, 1);
5537 break;
5539 case UNSPEC:
5540 if (XINT (op, 1) == UNSPEC_TP
5541 && TARGET_TLS_DIRECT_SEG_REFS
5542 && seg == SEG_DEFAULT)
5543 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5544 else
5545 return 0;
5546 break;
5548 case REG:
5549 case SUBREG:
5550 if (!base)
5551 base = op;
5552 else if (!index)
5553 index = op;
5554 else
5555 return 0;
5556 break;
5558 case CONST:
5559 case CONST_INT:
5560 case SYMBOL_REF:
5561 case LABEL_REF:
5562 if (disp)
5563 return 0;
5564 disp = op;
5565 break;
5567 default:
5568 return 0;
5572 else if (GET_CODE (addr) == MULT)
5574 index = XEXP (addr, 0); /* index*scale */
5575 scale_rtx = XEXP (addr, 1);
5577 else if (GET_CODE (addr) == ASHIFT)
5579 rtx tmp;
5581 /* We're called for lea too, which implements ashift on occasion. */
5582 index = XEXP (addr, 0);
5583 tmp = XEXP (addr, 1);
5584 if (GET_CODE (tmp) != CONST_INT)
5585 return 0;
5586 scale = INTVAL (tmp);
5587 if ((unsigned HOST_WIDE_INT) scale > 3)
5588 return 0;
5589 scale = 1 << scale;
5590 retval = -1;
5592 else
5593 disp = addr; /* displacement */
5595 /* Extract the integral value of scale. */
5596 if (scale_rtx)
5598 if (GET_CODE (scale_rtx) != CONST_INT)
5599 return 0;
5600 scale = INTVAL (scale_rtx);
5603 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5604 if (base && index && scale == 1
5605 && (index == arg_pointer_rtx
5606 || index == frame_pointer_rtx
5607 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5609 rtx tmp = base;
5610 base = index;
5611 index = tmp;
5614 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5615 if ((base == hard_frame_pointer_rtx
5616 || base == frame_pointer_rtx
5617 || base == arg_pointer_rtx) && !disp)
5618 disp = const0_rtx;
5620 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5621 Avoid this by transforming to [%esi+0]. */
5622 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5623 && base && !index && !disp
5624 && REG_P (base)
5625 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5626 disp = const0_rtx;
5628 /* Special case: encode reg+reg instead of reg*2. */
5629 if (!base && index && scale && scale == 2)
5630 base = index, scale = 1;
5632 /* Special case: scaling cannot be encoded without base or displacement. */
5633 if (!base && !disp && index && scale != 1)
5634 disp = const0_rtx;
5636 out->base = base;
5637 out->index = index;
5638 out->disp = disp;
5639 out->scale = scale;
5640 out->seg = seg;
5642 return retval;
5645 /* Return cost of the memory address x.
5646 For i386, it is better to use a complex address than let gcc copy
5647 the address into a reg and make a new pseudo. But not if the address
5648 requires to two regs - that would mean more pseudos with longer
5649 lifetimes. */
5650 static int
5651 ix86_address_cost (rtx x)
5653 struct ix86_address parts;
5654 int cost = 1;
5656 if (!ix86_decompose_address (x, &parts))
5657 abort ();
5659 /* More complex memory references are better. */
5660 if (parts.disp && parts.disp != const0_rtx)
5661 cost--;
5662 if (parts.seg != SEG_DEFAULT)
5663 cost--;
5665 /* Attempt to minimize number of registers in the address. */
5666 if ((parts.base
5667 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5668 || (parts.index
5669 && (!REG_P (parts.index)
5670 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5671 cost++;
5673 if (parts.base
5674 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5675 && parts.index
5676 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5677 && parts.base != parts.index)
5678 cost++;
5680 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5681 since it's predecode logic can't detect the length of instructions
5682 and it degenerates to vector decoded. Increase cost of such
5683 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5684 to split such addresses or even refuse such addresses at all.
5686 Following addressing modes are affected:
5687 [base+scale*index]
5688 [scale*index+disp]
5689 [base+index]
5691 The first and last case may be avoidable by explicitly coding the zero in
5692 memory address, but I don't have AMD-K6 machine handy to check this
5693 theory. */
5695 if (TARGET_K6
5696 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5697 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5698 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5699 cost += 10;
5701 return cost;
5704 /* If X is a machine specific address (i.e. a symbol or label being
5705 referenced as a displacement from the GOT implemented using an
5706 UNSPEC), then return the base term. Otherwise return X. */
5709 ix86_find_base_term (rtx x)
5711 rtx term;
5713 if (TARGET_64BIT)
5715 if (GET_CODE (x) != CONST)
5716 return x;
5717 term = XEXP (x, 0);
5718 if (GET_CODE (term) == PLUS
5719 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5720 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5721 term = XEXP (term, 0);
5722 if (GET_CODE (term) != UNSPEC
5723 || XINT (term, 1) != UNSPEC_GOTPCREL)
5724 return x;
5726 term = XVECEXP (term, 0, 0);
5728 if (GET_CODE (term) != SYMBOL_REF
5729 && GET_CODE (term) != LABEL_REF)
5730 return x;
5732 return term;
5735 term = ix86_delegitimize_address (x);
5737 if (GET_CODE (term) != SYMBOL_REF
5738 && GET_CODE (term) != LABEL_REF)
5739 return x;
5741 return term;
5744 /* Determine if a given RTX is a valid constant. We already know this
5745 satisfies CONSTANT_P. */
5747 bool
5748 legitimate_constant_p (rtx x)
5750 rtx inner;
5752 switch (GET_CODE (x))
5754 case SYMBOL_REF:
5755 /* TLS symbols are not constant. */
5756 if (tls_symbolic_operand (x, Pmode))
5757 return false;
5758 break;
5760 case CONST:
5761 inner = XEXP (x, 0);
5763 /* Offsets of TLS symbols are never valid.
5764 Discourage CSE from creating them. */
5765 if (GET_CODE (inner) == PLUS
5766 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5767 return false;
5769 if (GET_CODE (inner) == PLUS)
5771 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5772 return false;
5773 inner = XEXP (inner, 0);
5776 /* Only some unspecs are valid as "constants". */
5777 if (GET_CODE (inner) == UNSPEC)
5778 switch (XINT (inner, 1))
5780 case UNSPEC_TPOFF:
5781 case UNSPEC_NTPOFF:
5782 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5783 case UNSPEC_DTPOFF:
5784 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5785 default:
5786 return false;
5788 break;
5790 default:
5791 break;
5794 /* Otherwise we handle everything else in the move patterns. */
5795 return true;
5798 /* Determine if it's legal to put X into the constant pool. This
5799 is not possible for the address of thread-local symbols, which
5800 is checked above. */
5802 static bool
5803 ix86_cannot_force_const_mem (rtx x)
5805 return !legitimate_constant_p (x);
5808 /* Determine if a given RTX is a valid constant address. */
5810 bool
5811 constant_address_p (rtx x)
5813 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5816 /* Nonzero if the constant value X is a legitimate general operand
5817 when generating PIC code. It is given that flag_pic is on and
5818 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5820 bool
5821 legitimate_pic_operand_p (rtx x)
5823 rtx inner;
5825 switch (GET_CODE (x))
5827 case CONST:
5828 inner = XEXP (x, 0);
5830 /* Only some unspecs are valid as "constants". */
5831 if (GET_CODE (inner) == UNSPEC)
5832 switch (XINT (inner, 1))
5834 case UNSPEC_TPOFF:
5835 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5836 default:
5837 return false;
5839 /* Fall through. */
5841 case SYMBOL_REF:
5842 case LABEL_REF:
5843 return legitimate_pic_address_disp_p (x);
5845 default:
5846 return true;
5850 /* Determine if a given CONST RTX is a valid memory displacement
5851 in PIC mode. */
5854 legitimate_pic_address_disp_p (rtx disp)
5856 bool saw_plus;
5858 /* In 64bit mode we can allow direct addresses of symbols and labels
5859 when they are not dynamic symbols. */
5860 if (TARGET_64BIT)
5862 /* TLS references should always be enclosed in UNSPEC. */
5863 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5864 return 0;
5865 if (GET_CODE (disp) == SYMBOL_REF
5866 && ix86_cmodel == CM_SMALL_PIC
5867 && SYMBOL_REF_LOCAL_P (disp))
5868 return 1;
5869 if (GET_CODE (disp) == LABEL_REF)
5870 return 1;
5871 if (GET_CODE (disp) == CONST
5872 && GET_CODE (XEXP (disp, 0)) == PLUS)
5874 rtx op0 = XEXP (XEXP (disp, 0), 0);
5875 rtx op1 = XEXP (XEXP (disp, 0), 1);
5877 /* TLS references should always be enclosed in UNSPEC. */
5878 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5879 return 0;
5880 if (((GET_CODE (op0) == SYMBOL_REF
5881 && ix86_cmodel == CM_SMALL_PIC
5882 && SYMBOL_REF_LOCAL_P (op0))
5883 || GET_CODE (op0) == LABEL_REF)
5884 && GET_CODE (op1) == CONST_INT
5885 && INTVAL (op1) < 16*1024*1024
5886 && INTVAL (op1) >= -16*1024*1024)
5887 return 1;
5890 if (GET_CODE (disp) != CONST)
5891 return 0;
5892 disp = XEXP (disp, 0);
5894 if (TARGET_64BIT)
5896 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5897 of GOT tables. We should not need these anyway. */
5898 if (GET_CODE (disp) != UNSPEC
5899 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5900 return 0;
5902 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5903 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5904 return 0;
5905 return 1;
5908 saw_plus = false;
5909 if (GET_CODE (disp) == PLUS)
5911 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5912 return 0;
5913 disp = XEXP (disp, 0);
5914 saw_plus = true;
5917 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5918 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5920 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5921 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5922 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5924 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5925 if (! strcmp (sym_name, "<pic base>"))
5926 return 1;
5930 if (GET_CODE (disp) != UNSPEC)
5931 return 0;
5933 switch (XINT (disp, 1))
5935 case UNSPEC_GOT:
5936 if (saw_plus)
5937 return false;
5938 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5939 case UNSPEC_GOTOFF:
5940 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5941 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5942 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5943 return false;
5944 case UNSPEC_GOTTPOFF:
5945 case UNSPEC_GOTNTPOFF:
5946 case UNSPEC_INDNTPOFF:
5947 if (saw_plus)
5948 return false;
5949 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5950 case UNSPEC_NTPOFF:
5951 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5952 case UNSPEC_DTPOFF:
5953 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5956 return 0;
5959 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5960 memory address for an instruction. The MODE argument is the machine mode
5961 for the MEM expression that wants to use this address.
5963 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5964 convert common non-canonical forms to canonical form so that they will
5965 be recognized. */
5968 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5970 struct ix86_address parts;
5971 rtx base, index, disp;
5972 HOST_WIDE_INT scale;
5973 const char *reason = NULL;
5974 rtx reason_rtx = NULL_RTX;
5976 if (TARGET_DEBUG_ADDR)
5978 fprintf (stderr,
5979 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5980 GET_MODE_NAME (mode), strict);
5981 debug_rtx (addr);
5984 if (ix86_decompose_address (addr, &parts) <= 0)
5986 reason = "decomposition failed";
5987 goto report_error;
5990 base = parts.base;
5991 index = parts.index;
5992 disp = parts.disp;
5993 scale = parts.scale;
5995 /* Validate base register.
5997 Don't allow SUBREG's here, it can lead to spill failures when the base
5998 is one word out of a two word structure, which is represented internally
5999 as a DImode int. */
6001 if (base)
6003 reason_rtx = base;
6005 if (GET_CODE (base) != REG)
6007 reason = "base is not a register";
6008 goto report_error;
6011 if (GET_MODE (base) != Pmode)
6013 reason = "base is not in Pmode";
6014 goto report_error;
6017 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6018 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6020 reason = "base is not valid";
6021 goto report_error;
6025 /* Validate index register.
6027 Don't allow SUBREG's here, it can lead to spill failures when the index
6028 is one word out of a two word structure, which is represented internally
6029 as a DImode int. */
6031 if (index)
6033 reason_rtx = index;
6035 if (GET_CODE (index) != REG)
6037 reason = "index is not a register";
6038 goto report_error;
6041 if (GET_MODE (index) != Pmode)
6043 reason = "index is not in Pmode";
6044 goto report_error;
6047 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6048 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6050 reason = "index is not valid";
6051 goto report_error;
6055 /* Validate scale factor. */
6056 if (scale != 1)
6058 reason_rtx = GEN_INT (scale);
6059 if (!index)
6061 reason = "scale without index";
6062 goto report_error;
6065 if (scale != 2 && scale != 4 && scale != 8)
6067 reason = "scale is not a valid multiplier";
6068 goto report_error;
6072 /* Validate displacement. */
6073 if (disp)
6075 reason_rtx = disp;
6077 if (GET_CODE (disp) == CONST
6078 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6079 switch (XINT (XEXP (disp, 0), 1))
6081 case UNSPEC_GOT:
6082 case UNSPEC_GOTOFF:
6083 case UNSPEC_GOTPCREL:
6084 if (!flag_pic)
6085 abort ();
6086 goto is_legitimate_pic;
6088 case UNSPEC_GOTTPOFF:
6089 case UNSPEC_GOTNTPOFF:
6090 case UNSPEC_INDNTPOFF:
6091 case UNSPEC_NTPOFF:
6092 case UNSPEC_DTPOFF:
6093 break;
6095 default:
6096 reason = "invalid address unspec";
6097 goto report_error;
6100 else if (flag_pic && (SYMBOLIC_CONST (disp)
6101 #if TARGET_MACHO
6102 && !machopic_operand_p (disp)
6103 #endif
6106 is_legitimate_pic:
6107 if (TARGET_64BIT && (index || base))
6109 /* foo@dtpoff(%rX) is ok. */
6110 if (GET_CODE (disp) != CONST
6111 || GET_CODE (XEXP (disp, 0)) != PLUS
6112 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6113 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6114 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6115 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6117 reason = "non-constant pic memory reference";
6118 goto report_error;
6121 else if (! legitimate_pic_address_disp_p (disp))
6123 reason = "displacement is an invalid pic construct";
6124 goto report_error;
6127 /* This code used to verify that a symbolic pic displacement
6128 includes the pic_offset_table_rtx register.
6130 While this is good idea, unfortunately these constructs may
6131 be created by "adds using lea" optimization for incorrect
6132 code like:
6134 int a;
6135 int foo(int i)
6137 return *(&a+i);
6140 This code is nonsensical, but results in addressing
6141 GOT table with pic_offset_table_rtx base. We can't
6142 just refuse it easily, since it gets matched by
6143 "addsi3" pattern, that later gets split to lea in the
6144 case output register differs from input. While this
6145 can be handled by separate addsi pattern for this case
6146 that never results in lea, this seems to be easier and
6147 correct fix for crash to disable this test. */
6149 else if (GET_CODE (disp) != LABEL_REF
6150 && GET_CODE (disp) != CONST_INT
6151 && (GET_CODE (disp) != CONST
6152 || !legitimate_constant_p (disp))
6153 && (GET_CODE (disp) != SYMBOL_REF
6154 || !legitimate_constant_p (disp)))
6156 reason = "displacement is not constant";
6157 goto report_error;
6159 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6161 reason = "displacement is out of range";
6162 goto report_error;
6166 /* Everything looks valid. */
6167 if (TARGET_DEBUG_ADDR)
6168 fprintf (stderr, "Success.\n");
6169 return TRUE;
6171 report_error:
6172 if (TARGET_DEBUG_ADDR)
6174 fprintf (stderr, "Error: %s\n", reason);
6175 debug_rtx (reason_rtx);
6177 return FALSE;
6180 /* Return an unique alias set for the GOT. */
6182 static HOST_WIDE_INT
6183 ix86_GOT_alias_set (void)
6185 static HOST_WIDE_INT set = -1;
6186 if (set == -1)
6187 set = new_alias_set ();
6188 return set;
6191 /* Return a legitimate reference for ORIG (an address) using the
6192 register REG. If REG is 0, a new pseudo is generated.
6194 There are two types of references that must be handled:
6196 1. Global data references must load the address from the GOT, via
6197 the PIC reg. An insn is emitted to do this load, and the reg is
6198 returned.
6200 2. Static data references, constant pool addresses, and code labels
6201 compute the address as an offset from the GOT, whose base is in
6202 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6203 differentiate them from global data objects. The returned
6204 address is the PIC reg + an unspec constant.
6206 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6207 reg also appears in the address. */
6210 legitimize_pic_address (rtx orig, rtx reg)
6212 rtx addr = orig;
6213 rtx new = orig;
6214 rtx base;
6216 #if TARGET_MACHO
6217 if (reg == 0)
6218 reg = gen_reg_rtx (Pmode);
6219 /* Use the generic Mach-O PIC machinery. */
6220 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6221 #endif
6223 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6224 new = addr;
6225 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6227 /* This symbol may be referenced via a displacement from the PIC
6228 base address (@GOTOFF). */
6230 if (reload_in_progress)
6231 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6232 if (GET_CODE (addr) == CONST)
6233 addr = XEXP (addr, 0);
6234 if (GET_CODE (addr) == PLUS)
6236 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6237 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6239 else
6240 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6241 new = gen_rtx_CONST (Pmode, new);
6242 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6244 if (reg != 0)
6246 emit_move_insn (reg, new);
6247 new = reg;
6250 else if (GET_CODE (addr) == SYMBOL_REF)
6252 if (TARGET_64BIT)
6254 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6255 new = gen_rtx_CONST (Pmode, new);
6256 new = gen_rtx_MEM (Pmode, new);
6257 RTX_UNCHANGING_P (new) = 1;
6258 set_mem_alias_set (new, ix86_GOT_alias_set ());
6260 if (reg == 0)
6261 reg = gen_reg_rtx (Pmode);
6262 /* Use directly gen_movsi, otherwise the address is loaded
6263 into register for CSE. We don't want to CSE this addresses,
6264 instead we CSE addresses from the GOT table, so skip this. */
6265 emit_insn (gen_movsi (reg, new));
6266 new = reg;
6268 else
6270 /* This symbol must be referenced via a load from the
6271 Global Offset Table (@GOT). */
6273 if (reload_in_progress)
6274 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6275 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6276 new = gen_rtx_CONST (Pmode, new);
6277 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6278 new = gen_rtx_MEM (Pmode, new);
6279 RTX_UNCHANGING_P (new) = 1;
6280 set_mem_alias_set (new, ix86_GOT_alias_set ());
6282 if (reg == 0)
6283 reg = gen_reg_rtx (Pmode);
6284 emit_move_insn (reg, new);
6285 new = reg;
6288 else
6290 if (GET_CODE (addr) == CONST)
6292 addr = XEXP (addr, 0);
6294 /* We must match stuff we generate before. Assume the only
6295 unspecs that can get here are ours. Not that we could do
6296 anything with them anyway.... */
6297 if (GET_CODE (addr) == UNSPEC
6298 || (GET_CODE (addr) == PLUS
6299 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6300 return orig;
6301 if (GET_CODE (addr) != PLUS)
6302 abort ();
6304 if (GET_CODE (addr) == PLUS)
6306 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6308 /* Check first to see if this is a constant offset from a @GOTOFF
6309 symbol reference. */
6310 if (local_symbolic_operand (op0, Pmode)
6311 && GET_CODE (op1) == CONST_INT)
6313 if (!TARGET_64BIT)
6315 if (reload_in_progress)
6316 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6317 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6318 UNSPEC_GOTOFF);
6319 new = gen_rtx_PLUS (Pmode, new, op1);
6320 new = gen_rtx_CONST (Pmode, new);
6321 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6323 if (reg != 0)
6325 emit_move_insn (reg, new);
6326 new = reg;
6329 else
6331 if (INTVAL (op1) < -16*1024*1024
6332 || INTVAL (op1) >= 16*1024*1024)
6333 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6336 else
6338 base = legitimize_pic_address (XEXP (addr, 0), reg);
6339 new = legitimize_pic_address (XEXP (addr, 1),
6340 base == reg ? NULL_RTX : reg);
6342 if (GET_CODE (new) == CONST_INT)
6343 new = plus_constant (base, INTVAL (new));
6344 else
6346 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6348 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6349 new = XEXP (new, 1);
6351 new = gen_rtx_PLUS (Pmode, base, new);
6356 return new;
6359 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6361 static rtx
6362 get_thread_pointer (int to_reg)
6364 rtx tp, reg, insn;
6366 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6367 if (!to_reg)
6368 return tp;
6370 reg = gen_reg_rtx (Pmode);
6371 insn = gen_rtx_SET (VOIDmode, reg, tp);
6372 insn = emit_insn (insn);
6374 return reg;
6377 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6378 false if we expect this to be used for a memory address and true if
6379 we expect to load the address into a register. */
6381 static rtx
6382 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6384 rtx dest, base, off, pic;
6385 int type;
6387 switch (model)
6389 case TLS_MODEL_GLOBAL_DYNAMIC:
6390 dest = gen_reg_rtx (Pmode);
6391 if (TARGET_64BIT)
6393 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6395 start_sequence ();
6396 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6397 insns = get_insns ();
6398 end_sequence ();
6400 emit_libcall_block (insns, dest, rax, x);
6402 else
6403 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6404 break;
6406 case TLS_MODEL_LOCAL_DYNAMIC:
6407 base = gen_reg_rtx (Pmode);
6408 if (TARGET_64BIT)
6410 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6412 start_sequence ();
6413 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6414 insns = get_insns ();
6415 end_sequence ();
6417 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6418 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6419 emit_libcall_block (insns, base, rax, note);
6421 else
6422 emit_insn (gen_tls_local_dynamic_base_32 (base));
6424 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6425 off = gen_rtx_CONST (Pmode, off);
6427 return gen_rtx_PLUS (Pmode, base, off);
6429 case TLS_MODEL_INITIAL_EXEC:
6430 if (TARGET_64BIT)
6432 pic = NULL;
6433 type = UNSPEC_GOTNTPOFF;
6435 else if (flag_pic)
6437 if (reload_in_progress)
6438 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6439 pic = pic_offset_table_rtx;
6440 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6442 else if (!TARGET_GNU_TLS)
6444 pic = gen_reg_rtx (Pmode);
6445 emit_insn (gen_set_got (pic));
6446 type = UNSPEC_GOTTPOFF;
6448 else
6450 pic = NULL;
6451 type = UNSPEC_INDNTPOFF;
6454 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6455 off = gen_rtx_CONST (Pmode, off);
6456 if (pic)
6457 off = gen_rtx_PLUS (Pmode, pic, off);
6458 off = gen_rtx_MEM (Pmode, off);
6459 RTX_UNCHANGING_P (off) = 1;
6460 set_mem_alias_set (off, ix86_GOT_alias_set ());
6462 if (TARGET_64BIT || TARGET_GNU_TLS)
6464 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6465 off = force_reg (Pmode, off);
6466 return gen_rtx_PLUS (Pmode, base, off);
6468 else
6470 base = get_thread_pointer (true);
6471 dest = gen_reg_rtx (Pmode);
6472 emit_insn (gen_subsi3 (dest, base, off));
6474 break;
6476 case TLS_MODEL_LOCAL_EXEC:
6477 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6478 (TARGET_64BIT || TARGET_GNU_TLS)
6479 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6480 off = gen_rtx_CONST (Pmode, off);
6482 if (TARGET_64BIT || TARGET_GNU_TLS)
6484 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6485 return gen_rtx_PLUS (Pmode, base, off);
6487 else
6489 base = get_thread_pointer (true);
6490 dest = gen_reg_rtx (Pmode);
6491 emit_insn (gen_subsi3 (dest, base, off));
6493 break;
6495 default:
6496 abort ();
6499 return dest;
6502 /* Try machine-dependent ways of modifying an illegitimate address
6503 to be legitimate. If we find one, return the new, valid address.
6504 This macro is used in only one place: `memory_address' in explow.c.
6506 OLDX is the address as it was before break_out_memory_refs was called.
6507 In some cases it is useful to look at this to decide what needs to be done.
6509 MODE and WIN are passed so that this macro can use
6510 GO_IF_LEGITIMATE_ADDRESS.
6512 It is always safe for this macro to do nothing. It exists to recognize
6513 opportunities to optimize the output.
6515 For the 80386, we handle X+REG by loading X into a register R and
6516 using R+REG. R will go in a general reg and indexing will be used.
6517 However, if REG is a broken-out memory address or multiplication,
6518 nothing needs to be done because REG can certainly go in a general reg.
6520 When -fpic is used, special handling is needed for symbolic references.
6521 See comments by legitimize_pic_address in i386.c for details. */
6524 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6526 int changed = 0;
6527 unsigned log;
6529 if (TARGET_DEBUG_ADDR)
6531 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6532 GET_MODE_NAME (mode));
6533 debug_rtx (x);
6536 log = tls_symbolic_operand (x, mode);
6537 if (log)
6538 return legitimize_tls_address (x, log, false);
6540 if (flag_pic && SYMBOLIC_CONST (x))
6541 return legitimize_pic_address (x, 0);
6543 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6544 if (GET_CODE (x) == ASHIFT
6545 && GET_CODE (XEXP (x, 1)) == CONST_INT
6546 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6548 changed = 1;
6549 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6550 GEN_INT (1 << log));
6553 if (GET_CODE (x) == PLUS)
6555 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6557 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6558 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6559 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6561 changed = 1;
6562 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6563 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6564 GEN_INT (1 << log));
6567 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6568 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6569 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6571 changed = 1;
6572 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6573 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6574 GEN_INT (1 << log));
6577 /* Put multiply first if it isn't already. */
6578 if (GET_CODE (XEXP (x, 1)) == MULT)
6580 rtx tmp = XEXP (x, 0);
6581 XEXP (x, 0) = XEXP (x, 1);
6582 XEXP (x, 1) = tmp;
6583 changed = 1;
6586 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6587 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6588 created by virtual register instantiation, register elimination, and
6589 similar optimizations. */
6590 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6592 changed = 1;
6593 x = gen_rtx_PLUS (Pmode,
6594 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6595 XEXP (XEXP (x, 1), 0)),
6596 XEXP (XEXP (x, 1), 1));
6599 /* Canonicalize
6600 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6601 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6602 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6603 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6604 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6605 && CONSTANT_P (XEXP (x, 1)))
6607 rtx constant;
6608 rtx other = NULL_RTX;
6610 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6612 constant = XEXP (x, 1);
6613 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6615 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6617 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6618 other = XEXP (x, 1);
6620 else
6621 constant = 0;
6623 if (constant)
6625 changed = 1;
6626 x = gen_rtx_PLUS (Pmode,
6627 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6628 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6629 plus_constant (other, INTVAL (constant)));
6633 if (changed && legitimate_address_p (mode, x, FALSE))
6634 return x;
6636 if (GET_CODE (XEXP (x, 0)) == MULT)
6638 changed = 1;
6639 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6642 if (GET_CODE (XEXP (x, 1)) == MULT)
6644 changed = 1;
6645 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6648 if (changed
6649 && GET_CODE (XEXP (x, 1)) == REG
6650 && GET_CODE (XEXP (x, 0)) == REG)
6651 return x;
6653 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6655 changed = 1;
6656 x = legitimize_pic_address (x, 0);
6659 if (changed && legitimate_address_p (mode, x, FALSE))
6660 return x;
6662 if (GET_CODE (XEXP (x, 0)) == REG)
6664 rtx temp = gen_reg_rtx (Pmode);
6665 rtx val = force_operand (XEXP (x, 1), temp);
6666 if (val != temp)
6667 emit_move_insn (temp, val);
6669 XEXP (x, 1) = temp;
6670 return x;
6673 else if (GET_CODE (XEXP (x, 1)) == REG)
6675 rtx temp = gen_reg_rtx (Pmode);
6676 rtx val = force_operand (XEXP (x, 0), temp);
6677 if (val != temp)
6678 emit_move_insn (temp, val);
6680 XEXP (x, 0) = temp;
6681 return x;
6685 return x;
6688 /* Print an integer constant expression in assembler syntax. Addition
6689 and subtraction are the only arithmetic that may appear in these
6690 expressions. FILE is the stdio stream to write to, X is the rtx, and
6691 CODE is the operand print code from the output string. */
6693 static void
6694 output_pic_addr_const (FILE *file, rtx x, int code)
6696 char buf[256];
6698 switch (GET_CODE (x))
6700 case PC:
6701 if (flag_pic)
6702 putc ('.', file);
6703 else
6704 abort ();
6705 break;
6707 case SYMBOL_REF:
6708 assemble_name (file, XSTR (x, 0));
6709 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6710 fputs ("@PLT", file);
6711 break;
6713 case LABEL_REF:
6714 x = XEXP (x, 0);
6715 /* Fall through. */
6716 case CODE_LABEL:
6717 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6718 assemble_name (asm_out_file, buf);
6719 break;
6721 case CONST_INT:
6722 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6723 break;
6725 case CONST:
6726 /* This used to output parentheses around the expression,
6727 but that does not work on the 386 (either ATT or BSD assembler). */
6728 output_pic_addr_const (file, XEXP (x, 0), code);
6729 break;
6731 case CONST_DOUBLE:
6732 if (GET_MODE (x) == VOIDmode)
6734 /* We can use %d if the number is <32 bits and positive. */
6735 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6736 fprintf (file, "0x%lx%08lx",
6737 (unsigned long) CONST_DOUBLE_HIGH (x),
6738 (unsigned long) CONST_DOUBLE_LOW (x));
6739 else
6740 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6742 else
6743 /* We can't handle floating point constants;
6744 PRINT_OPERAND must handle them. */
6745 output_operand_lossage ("floating constant misused");
6746 break;
6748 case PLUS:
6749 /* Some assemblers need integer constants to appear first. */
6750 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6752 output_pic_addr_const (file, XEXP (x, 0), code);
6753 putc ('+', file);
6754 output_pic_addr_const (file, XEXP (x, 1), code);
6756 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6758 output_pic_addr_const (file, XEXP (x, 1), code);
6759 putc ('+', file);
6760 output_pic_addr_const (file, XEXP (x, 0), code);
6762 else
6763 abort ();
6764 break;
6766 case MINUS:
6767 if (!TARGET_MACHO)
6768 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6769 output_pic_addr_const (file, XEXP (x, 0), code);
6770 putc ('-', file);
6771 output_pic_addr_const (file, XEXP (x, 1), code);
6772 if (!TARGET_MACHO)
6773 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6774 break;
6776 case UNSPEC:
6777 if (XVECLEN (x, 0) != 1)
6778 abort ();
6779 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6780 switch (XINT (x, 1))
6782 case UNSPEC_GOT:
6783 fputs ("@GOT", file);
6784 break;
6785 case UNSPEC_GOTOFF:
6786 fputs ("@GOTOFF", file);
6787 break;
6788 case UNSPEC_GOTPCREL:
6789 fputs ("@GOTPCREL(%rip)", file);
6790 break;
6791 case UNSPEC_GOTTPOFF:
6792 /* FIXME: This might be @TPOFF in Sun ld too. */
6793 fputs ("@GOTTPOFF", file);
6794 break;
6795 case UNSPEC_TPOFF:
6796 fputs ("@TPOFF", file);
6797 break;
6798 case UNSPEC_NTPOFF:
6799 if (TARGET_64BIT)
6800 fputs ("@TPOFF", file);
6801 else
6802 fputs ("@NTPOFF", file);
6803 break;
6804 case UNSPEC_DTPOFF:
6805 fputs ("@DTPOFF", file);
6806 break;
6807 case UNSPEC_GOTNTPOFF:
6808 if (TARGET_64BIT)
6809 fputs ("@GOTTPOFF(%rip)", file);
6810 else
6811 fputs ("@GOTNTPOFF", file);
6812 break;
6813 case UNSPEC_INDNTPOFF:
6814 fputs ("@INDNTPOFF", file);
6815 break;
6816 default:
6817 output_operand_lossage ("invalid UNSPEC as operand");
6818 break;
6820 break;
6822 default:
6823 output_operand_lossage ("invalid expression as operand");
6827 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6828 We need to handle our special PIC relocations. */
6830 void
6831 i386_dwarf_output_addr_const (FILE *file, rtx x)
6833 #ifdef ASM_QUAD
6834 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6835 #else
6836 if (TARGET_64BIT)
6837 abort ();
6838 fprintf (file, "%s", ASM_LONG);
6839 #endif
6840 if (flag_pic)
6841 output_pic_addr_const (file, x, '\0');
6842 else
6843 output_addr_const (file, x);
6844 fputc ('\n', file);
6847 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6848 We need to emit DTP-relative relocations. */
6850 void
6851 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6853 fputs (ASM_LONG, file);
6854 output_addr_const (file, x);
6855 fputs ("@DTPOFF", file);
6856 switch (size)
6858 case 4:
6859 break;
6860 case 8:
6861 fputs (", 0", file);
6862 break;
6863 default:
6864 abort ();
6868 /* In the name of slightly smaller debug output, and to cater to
6869 general assembler losage, recognize PIC+GOTOFF and turn it back
6870 into a direct symbol reference. */
6872 static rtx
6873 ix86_delegitimize_address (rtx orig_x)
6875 rtx x = orig_x, y;
6877 if (GET_CODE (x) == MEM)
6878 x = XEXP (x, 0);
6880 if (TARGET_64BIT)
6882 if (GET_CODE (x) != CONST
6883 || GET_CODE (XEXP (x, 0)) != UNSPEC
6884 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6885 || GET_CODE (orig_x) != MEM)
6886 return orig_x;
6887 return XVECEXP (XEXP (x, 0), 0, 0);
6890 if (GET_CODE (x) != PLUS
6891 || GET_CODE (XEXP (x, 1)) != CONST)
6892 return orig_x;
6894 if (GET_CODE (XEXP (x, 0)) == REG
6895 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6896 /* %ebx + GOT/GOTOFF */
6897 y = NULL;
6898 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6900 /* %ebx + %reg * scale + GOT/GOTOFF */
6901 y = XEXP (x, 0);
6902 if (GET_CODE (XEXP (y, 0)) == REG
6903 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6904 y = XEXP (y, 1);
6905 else if (GET_CODE (XEXP (y, 1)) == REG
6906 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6907 y = XEXP (y, 0);
6908 else
6909 return orig_x;
6910 if (GET_CODE (y) != REG
6911 && GET_CODE (y) != MULT
6912 && GET_CODE (y) != ASHIFT)
6913 return orig_x;
6915 else
6916 return orig_x;
6918 x = XEXP (XEXP (x, 1), 0);
6919 if (GET_CODE (x) == UNSPEC
6920 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6921 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6923 if (y)
6924 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6925 return XVECEXP (x, 0, 0);
6928 if (GET_CODE (x) == PLUS
6929 && GET_CODE (XEXP (x, 0)) == UNSPEC
6930 && GET_CODE (XEXP (x, 1)) == CONST_INT
6931 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6932 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6933 && GET_CODE (orig_x) != MEM)))
6935 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6936 if (y)
6937 return gen_rtx_PLUS (Pmode, y, x);
6938 return x;
6941 return orig_x;
6944 static void
6945 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6946 int fp, FILE *file)
6948 const char *suffix;
6950 if (mode == CCFPmode || mode == CCFPUmode)
6952 enum rtx_code second_code, bypass_code;
6953 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6954 if (bypass_code != NIL || second_code != NIL)
6955 abort ();
6956 code = ix86_fp_compare_code_to_integer (code);
6957 mode = CCmode;
6959 if (reverse)
6960 code = reverse_condition (code);
6962 switch (code)
6964 case EQ:
6965 suffix = "e";
6966 break;
6967 case NE:
6968 suffix = "ne";
6969 break;
6970 case GT:
6971 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6972 abort ();
6973 suffix = "g";
6974 break;
6975 case GTU:
6976 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6977 Those same assemblers have the same but opposite losage on cmov. */
6978 if (mode != CCmode)
6979 abort ();
6980 suffix = fp ? "nbe" : "a";
6981 break;
6982 case LT:
6983 if (mode == CCNOmode || mode == CCGOCmode)
6984 suffix = "s";
6985 else if (mode == CCmode || mode == CCGCmode)
6986 suffix = "l";
6987 else
6988 abort ();
6989 break;
6990 case LTU:
6991 if (mode != CCmode)
6992 abort ();
6993 suffix = "b";
6994 break;
6995 case GE:
6996 if (mode == CCNOmode || mode == CCGOCmode)
6997 suffix = "ns";
6998 else if (mode == CCmode || mode == CCGCmode)
6999 suffix = "ge";
7000 else
7001 abort ();
7002 break;
7003 case GEU:
7004 /* ??? As above. */
7005 if (mode != CCmode)
7006 abort ();
7007 suffix = fp ? "nb" : "ae";
7008 break;
7009 case LE:
7010 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7011 abort ();
7012 suffix = "le";
7013 break;
7014 case LEU:
7015 if (mode != CCmode)
7016 abort ();
7017 suffix = "be";
7018 break;
7019 case UNORDERED:
7020 suffix = fp ? "u" : "p";
7021 break;
7022 case ORDERED:
7023 suffix = fp ? "nu" : "np";
7024 break;
7025 default:
7026 abort ();
7028 fputs (suffix, file);
7031 /* Print the name of register X to FILE based on its machine mode and number.
7032 If CODE is 'w', pretend the mode is HImode.
7033 If CODE is 'b', pretend the mode is QImode.
7034 If CODE is 'k', pretend the mode is SImode.
7035 If CODE is 'q', pretend the mode is DImode.
7036 If CODE is 'h', pretend the reg is the `high' byte register.
7037 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7039 void
7040 print_reg (rtx x, int code, FILE *file)
7042 if (REGNO (x) == ARG_POINTER_REGNUM
7043 || REGNO (x) == FRAME_POINTER_REGNUM
7044 || REGNO (x) == FLAGS_REG
7045 || REGNO (x) == FPSR_REG)
7046 abort ();
7048 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7049 putc ('%', file);
7051 if (code == 'w' || MMX_REG_P (x))
7052 code = 2;
7053 else if (code == 'b')
7054 code = 1;
7055 else if (code == 'k')
7056 code = 4;
7057 else if (code == 'q')
7058 code = 8;
7059 else if (code == 'y')
7060 code = 3;
7061 else if (code == 'h')
7062 code = 0;
7063 else
7064 code = GET_MODE_SIZE (GET_MODE (x));
7066 /* Irritatingly, AMD extended registers use different naming convention
7067 from the normal registers. */
7068 if (REX_INT_REG_P (x))
7070 if (!TARGET_64BIT)
7071 abort ();
7072 switch (code)
7074 case 0:
7075 error ("extended registers have no high halves");
7076 break;
7077 case 1:
7078 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7079 break;
7080 case 2:
7081 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7082 break;
7083 case 4:
7084 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7085 break;
7086 case 8:
7087 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7088 break;
7089 default:
7090 error ("unsupported operand size for extended register");
7091 break;
7093 return;
7095 switch (code)
7097 case 3:
7098 if (STACK_TOP_P (x))
7100 fputs ("st(0)", file);
7101 break;
7103 /* Fall through. */
7104 case 8:
7105 case 4:
7106 case 12:
7107 if (! ANY_FP_REG_P (x))
7108 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7109 /* Fall through. */
7110 case 16:
7111 case 2:
7112 normal:
7113 fputs (hi_reg_name[REGNO (x)], file);
7114 break;
7115 case 1:
7116 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7117 goto normal;
7118 fputs (qi_reg_name[REGNO (x)], file);
7119 break;
7120 case 0:
7121 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7122 goto normal;
7123 fputs (qi_high_reg_name[REGNO (x)], file);
7124 break;
7125 default:
7126 abort ();
7130 /* Locate some local-dynamic symbol still in use by this function
7131 so that we can print its name in some tls_local_dynamic_base
7132 pattern. */
7134 static const char *
7135 get_some_local_dynamic_name (void)
7137 rtx insn;
7139 if (cfun->machine->some_ld_name)
7140 return cfun->machine->some_ld_name;
7142 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7143 if (INSN_P (insn)
7144 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7145 return cfun->machine->some_ld_name;
7147 abort ();
7150 static int
7151 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7153 rtx x = *px;
7155 if (GET_CODE (x) == SYMBOL_REF
7156 && local_dynamic_symbolic_operand (x, Pmode))
7158 cfun->machine->some_ld_name = XSTR (x, 0);
7159 return 1;
7162 return 0;
7165 /* Meaning of CODE:
7166 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7167 C -- print opcode suffix for set/cmov insn.
7168 c -- like C, but print reversed condition
7169 F,f -- likewise, but for floating-point.
7170 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7171 otherwise nothing
7172 R -- print the prefix for register names.
7173 z -- print the opcode suffix for the size of the current operand.
7174 * -- print a star (in certain assembler syntax)
7175 A -- print an absolute memory reference.
7176 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7177 s -- print a shift double count, followed by the assemblers argument
7178 delimiter.
7179 b -- print the QImode name of the register for the indicated operand.
7180 %b0 would print %al if operands[0] is reg 0.
7181 w -- likewise, print the HImode name of the register.
7182 k -- likewise, print the SImode name of the register.
7183 q -- likewise, print the DImode name of the register.
7184 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7185 y -- print "st(0)" instead of "st" as a register.
7186 D -- print condition for SSE cmp instruction.
7187 P -- if PIC, print an @PLT suffix.
7188 X -- don't print any sort of PIC '@' suffix for a symbol.
7189 & -- print some in-use local-dynamic symbol name.
7192 void
7193 print_operand (FILE *file, rtx x, int code)
7195 if (code)
7197 switch (code)
7199 case '*':
7200 if (ASSEMBLER_DIALECT == ASM_ATT)
7201 putc ('*', file);
7202 return;
7204 case '&':
7205 assemble_name (file, get_some_local_dynamic_name ());
7206 return;
7208 case 'A':
7209 if (ASSEMBLER_DIALECT == ASM_ATT)
7210 putc ('*', file);
7211 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7213 /* Intel syntax. For absolute addresses, registers should not
7214 be surrounded by braces. */
7215 if (GET_CODE (x) != REG)
7217 putc ('[', file);
7218 PRINT_OPERAND (file, x, 0);
7219 putc (']', file);
7220 return;
7223 else
7224 abort ();
7226 PRINT_OPERAND (file, x, 0);
7227 return;
7230 case 'L':
7231 if (ASSEMBLER_DIALECT == ASM_ATT)
7232 putc ('l', file);
7233 return;
7235 case 'W':
7236 if (ASSEMBLER_DIALECT == ASM_ATT)
7237 putc ('w', file);
7238 return;
7240 case 'B':
7241 if (ASSEMBLER_DIALECT == ASM_ATT)
7242 putc ('b', file);
7243 return;
7245 case 'Q':
7246 if (ASSEMBLER_DIALECT == ASM_ATT)
7247 putc ('l', file);
7248 return;
7250 case 'S':
7251 if (ASSEMBLER_DIALECT == ASM_ATT)
7252 putc ('s', file);
7253 return;
7255 case 'T':
7256 if (ASSEMBLER_DIALECT == ASM_ATT)
7257 putc ('t', file);
7258 return;
7260 case 'z':
7261 /* 387 opcodes don't get size suffixes if the operands are
7262 registers. */
7263 if (STACK_REG_P (x))
7264 return;
7266 /* Likewise if using Intel opcodes. */
7267 if (ASSEMBLER_DIALECT == ASM_INTEL)
7268 return;
7270 /* This is the size of op from size of operand. */
7271 switch (GET_MODE_SIZE (GET_MODE (x)))
7273 case 2:
7274 #ifdef HAVE_GAS_FILDS_FISTS
7275 putc ('s', file);
7276 #endif
7277 return;
7279 case 4:
7280 if (GET_MODE (x) == SFmode)
7282 putc ('s', file);
7283 return;
7285 else
7286 putc ('l', file);
7287 return;
7289 case 12:
7290 case 16:
7291 putc ('t', file);
7292 return;
7294 case 8:
7295 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7297 #ifdef GAS_MNEMONICS
7298 putc ('q', file);
7299 #else
7300 putc ('l', file);
7301 putc ('l', file);
7302 #endif
7304 else
7305 putc ('l', file);
7306 return;
7308 default:
7309 abort ();
7312 case 'b':
7313 case 'w':
7314 case 'k':
7315 case 'q':
7316 case 'h':
7317 case 'y':
7318 case 'X':
7319 case 'P':
7320 break;
7322 case 's':
7323 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7325 PRINT_OPERAND (file, x, 0);
7326 putc (',', file);
7328 return;
7330 case 'D':
7331 /* Little bit of braindamage here. The SSE compare instructions
7332 does use completely different names for the comparisons that the
7333 fp conditional moves. */
7334 switch (GET_CODE (x))
7336 case EQ:
7337 case UNEQ:
7338 fputs ("eq", file);
7339 break;
7340 case LT:
7341 case UNLT:
7342 fputs ("lt", file);
7343 break;
7344 case LE:
7345 case UNLE:
7346 fputs ("le", file);
7347 break;
7348 case UNORDERED:
7349 fputs ("unord", file);
7350 break;
7351 case NE:
7352 case LTGT:
7353 fputs ("neq", file);
7354 break;
7355 case UNGE:
7356 case GE:
7357 fputs ("nlt", file);
7358 break;
7359 case UNGT:
7360 case GT:
7361 fputs ("nle", file);
7362 break;
7363 case ORDERED:
7364 fputs ("ord", file);
7365 break;
7366 default:
7367 abort ();
7368 break;
7370 return;
7371 case 'O':
7372 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7373 if (ASSEMBLER_DIALECT == ASM_ATT)
7375 switch (GET_MODE (x))
7377 case HImode: putc ('w', file); break;
7378 case SImode:
7379 case SFmode: putc ('l', file); break;
7380 case DImode:
7381 case DFmode: putc ('q', file); break;
7382 default: abort ();
7384 putc ('.', file);
7386 #endif
7387 return;
7388 case 'C':
7389 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7390 return;
7391 case 'F':
7392 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7393 if (ASSEMBLER_DIALECT == ASM_ATT)
7394 putc ('.', file);
7395 #endif
7396 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7397 return;
7399 /* Like above, but reverse condition */
7400 case 'c':
7401 /* Check to see if argument to %c is really a constant
7402 and not a condition code which needs to be reversed. */
7403 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7405 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7406 return;
7408 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7409 return;
7410 case 'f':
7411 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7412 if (ASSEMBLER_DIALECT == ASM_ATT)
7413 putc ('.', file);
7414 #endif
7415 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7416 return;
7417 case '+':
7419 rtx x;
7421 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7422 return;
7424 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7425 if (x)
7427 int pred_val = INTVAL (XEXP (x, 0));
7429 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7430 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7432 int taken = pred_val > REG_BR_PROB_BASE / 2;
7433 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7435 /* Emit hints only in the case default branch prediction
7436 heuristics would fail. */
7437 if (taken != cputaken)
7439 /* We use 3e (DS) prefix for taken branches and
7440 2e (CS) prefix for not taken branches. */
7441 if (taken)
7442 fputs ("ds ; ", file);
7443 else
7444 fputs ("cs ; ", file);
7448 return;
7450 default:
7451 output_operand_lossage ("invalid operand code `%c'", code);
7455 if (GET_CODE (x) == REG)
7456 print_reg (x, code, file);
7458 else if (GET_CODE (x) == MEM)
7460 /* No `byte ptr' prefix for call instructions. */
7461 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7463 const char * size;
7464 switch (GET_MODE_SIZE (GET_MODE (x)))
7466 case 1: size = "BYTE"; break;
7467 case 2: size = "WORD"; break;
7468 case 4: size = "DWORD"; break;
7469 case 8: size = "QWORD"; break;
7470 case 12: size = "XWORD"; break;
7471 case 16: size = "XMMWORD"; break;
7472 default:
7473 abort ();
7476 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7477 if (code == 'b')
7478 size = "BYTE";
7479 else if (code == 'w')
7480 size = "WORD";
7481 else if (code == 'k')
7482 size = "DWORD";
7484 fputs (size, file);
7485 fputs (" PTR ", file);
7488 x = XEXP (x, 0);
7489 /* Avoid (%rip) for call operands. */
7490 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7491 && GET_CODE (x) != CONST_INT)
7492 output_addr_const (file, x);
7493 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7494 output_operand_lossage ("invalid constraints for operand");
7495 else
7496 output_address (x);
7499 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7501 REAL_VALUE_TYPE r;
7502 long l;
7504 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7505 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7507 if (ASSEMBLER_DIALECT == ASM_ATT)
7508 putc ('$', file);
7509 fprintf (file, "0x%08lx", l);
7512 /* These float cases don't actually occur as immediate operands. */
7513 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7515 char dstr[30];
7517 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7518 fprintf (file, "%s", dstr);
7521 else if (GET_CODE (x) == CONST_DOUBLE
7522 && GET_MODE (x) == XFmode)
7524 char dstr[30];
7526 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7527 fprintf (file, "%s", dstr);
7530 else
7532 if (code != 'P')
7534 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7536 if (ASSEMBLER_DIALECT == ASM_ATT)
7537 putc ('$', file);
7539 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7540 || GET_CODE (x) == LABEL_REF)
7542 if (ASSEMBLER_DIALECT == ASM_ATT)
7543 putc ('$', file);
7544 else
7545 fputs ("OFFSET FLAT:", file);
7548 if (GET_CODE (x) == CONST_INT)
7549 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7550 else if (flag_pic)
7551 output_pic_addr_const (file, x, code);
7552 else
7553 output_addr_const (file, x);
7557 /* Print a memory operand whose address is ADDR. */
7559 void
7560 print_operand_address (FILE *file, rtx addr)
7562 struct ix86_address parts;
7563 rtx base, index, disp;
7564 int scale;
7566 if (! ix86_decompose_address (addr, &parts))
7567 abort ();
7569 base = parts.base;
7570 index = parts.index;
7571 disp = parts.disp;
7572 scale = parts.scale;
7574 switch (parts.seg)
7576 case SEG_DEFAULT:
7577 break;
7578 case SEG_FS:
7579 case SEG_GS:
7580 if (USER_LABEL_PREFIX[0] == 0)
7581 putc ('%', file);
7582 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7583 break;
7584 default:
7585 abort ();
7588 if (!base && !index)
7590 /* Displacement only requires special attention. */
7592 if (GET_CODE (disp) == CONST_INT)
7594 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7596 if (USER_LABEL_PREFIX[0] == 0)
7597 putc ('%', file);
7598 fputs ("ds:", file);
7600 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7602 else if (flag_pic)
7603 output_pic_addr_const (file, disp, 0);
7604 else
7605 output_addr_const (file, disp);
7607 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7608 if (TARGET_64BIT
7609 && ((GET_CODE (disp) == SYMBOL_REF
7610 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7611 || GET_CODE (disp) == LABEL_REF
7612 || (GET_CODE (disp) == CONST
7613 && GET_CODE (XEXP (disp, 0)) == PLUS
7614 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7615 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7616 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7617 fputs ("(%rip)", file);
7619 else
7621 if (ASSEMBLER_DIALECT == ASM_ATT)
7623 if (disp)
7625 if (flag_pic)
7626 output_pic_addr_const (file, disp, 0);
7627 else if (GET_CODE (disp) == LABEL_REF)
7628 output_asm_label (disp);
7629 else
7630 output_addr_const (file, disp);
7633 putc ('(', file);
7634 if (base)
7635 print_reg (base, 0, file);
7636 if (index)
7638 putc (',', file);
7639 print_reg (index, 0, file);
7640 if (scale != 1)
7641 fprintf (file, ",%d", scale);
7643 putc (')', file);
7645 else
7647 rtx offset = NULL_RTX;
7649 if (disp)
7651 /* Pull out the offset of a symbol; print any symbol itself. */
7652 if (GET_CODE (disp) == CONST
7653 && GET_CODE (XEXP (disp, 0)) == PLUS
7654 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7656 offset = XEXP (XEXP (disp, 0), 1);
7657 disp = gen_rtx_CONST (VOIDmode,
7658 XEXP (XEXP (disp, 0), 0));
7661 if (flag_pic)
7662 output_pic_addr_const (file, disp, 0);
7663 else if (GET_CODE (disp) == LABEL_REF)
7664 output_asm_label (disp);
7665 else if (GET_CODE (disp) == CONST_INT)
7666 offset = disp;
7667 else
7668 output_addr_const (file, disp);
7671 putc ('[', file);
7672 if (base)
7674 print_reg (base, 0, file);
7675 if (offset)
7677 if (INTVAL (offset) >= 0)
7678 putc ('+', file);
7679 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7682 else if (offset)
7683 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7684 else
7685 putc ('0', file);
7687 if (index)
7689 putc ('+', file);
7690 print_reg (index, 0, file);
7691 if (scale != 1)
7692 fprintf (file, "*%d", scale);
7694 putc (']', file);
7699 bool
7700 output_addr_const_extra (FILE *file, rtx x)
7702 rtx op;
7704 if (GET_CODE (x) != UNSPEC)
7705 return false;
7707 op = XVECEXP (x, 0, 0);
7708 switch (XINT (x, 1))
7710 case UNSPEC_GOTTPOFF:
7711 output_addr_const (file, op);
7712 /* FIXME: This might be @TPOFF in Sun ld. */
7713 fputs ("@GOTTPOFF", file);
7714 break;
7715 case UNSPEC_TPOFF:
7716 output_addr_const (file, op);
7717 fputs ("@TPOFF", file);
7718 break;
7719 case UNSPEC_NTPOFF:
7720 output_addr_const (file, op);
7721 if (TARGET_64BIT)
7722 fputs ("@TPOFF", file);
7723 else
7724 fputs ("@NTPOFF", file);
7725 break;
7726 case UNSPEC_DTPOFF:
7727 output_addr_const (file, op);
7728 fputs ("@DTPOFF", file);
7729 break;
7730 case UNSPEC_GOTNTPOFF:
7731 output_addr_const (file, op);
7732 if (TARGET_64BIT)
7733 fputs ("@GOTTPOFF(%rip)", file);
7734 else
7735 fputs ("@GOTNTPOFF", file);
7736 break;
7737 case UNSPEC_INDNTPOFF:
7738 output_addr_const (file, op);
7739 fputs ("@INDNTPOFF", file);
7740 break;
7742 default:
7743 return false;
7746 return true;
7749 /* Split one or more DImode RTL references into pairs of SImode
7750 references. The RTL can be REG, offsettable MEM, integer constant, or
7751 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7752 split and "num" is its length. lo_half and hi_half are output arrays
7753 that parallel "operands". */
7755 void
7756 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7758 while (num--)
7760 rtx op = operands[num];
7762 /* simplify_subreg refuse to split volatile memory addresses,
7763 but we still have to handle it. */
7764 if (GET_CODE (op) == MEM)
7766 lo_half[num] = adjust_address (op, SImode, 0);
7767 hi_half[num] = adjust_address (op, SImode, 4);
7769 else
7771 lo_half[num] = simplify_gen_subreg (SImode, op,
7772 GET_MODE (op) == VOIDmode
7773 ? DImode : GET_MODE (op), 0);
7774 hi_half[num] = simplify_gen_subreg (SImode, op,
7775 GET_MODE (op) == VOIDmode
7776 ? DImode : GET_MODE (op), 4);
7780 /* Split one or more TImode RTL references into pairs of SImode
7781 references. The RTL can be REG, offsettable MEM, integer constant, or
7782 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7783 split and "num" is its length. lo_half and hi_half are output arrays
7784 that parallel "operands". */
7786 void
7787 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7789 while (num--)
7791 rtx op = operands[num];
7793 /* simplify_subreg refuse to split volatile memory addresses, but we
7794 still have to handle it. */
7795 if (GET_CODE (op) == MEM)
7797 lo_half[num] = adjust_address (op, DImode, 0);
7798 hi_half[num] = adjust_address (op, DImode, 8);
7800 else
7802 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7803 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7808 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7809 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7810 is the expression of the binary operation. The output may either be
7811 emitted here, or returned to the caller, like all output_* functions.
7813 There is no guarantee that the operands are the same mode, as they
7814 might be within FLOAT or FLOAT_EXTEND expressions. */
7816 #ifndef SYSV386_COMPAT
7817 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7818 wants to fix the assemblers because that causes incompatibility
7819 with gcc. No-one wants to fix gcc because that causes
7820 incompatibility with assemblers... You can use the option of
7821 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7822 #define SYSV386_COMPAT 1
7823 #endif
7825 const char *
7826 output_387_binary_op (rtx insn, rtx *operands)
7828 static char buf[30];
7829 const char *p;
7830 const char *ssep;
7831 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7833 #ifdef ENABLE_CHECKING
7834 /* Even if we do not want to check the inputs, this documents input
7835 constraints. Which helps in understanding the following code. */
7836 if (STACK_REG_P (operands[0])
7837 && ((REG_P (operands[1])
7838 && REGNO (operands[0]) == REGNO (operands[1])
7839 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7840 || (REG_P (operands[2])
7841 && REGNO (operands[0]) == REGNO (operands[2])
7842 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7843 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7844 ; /* ok */
7845 else if (!is_sse)
7846 abort ();
7847 #endif
7849 switch (GET_CODE (operands[3]))
7851 case PLUS:
7852 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7853 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7854 p = "fiadd";
7855 else
7856 p = "fadd";
7857 ssep = "add";
7858 break;
7860 case MINUS:
7861 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7862 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7863 p = "fisub";
7864 else
7865 p = "fsub";
7866 ssep = "sub";
7867 break;
7869 case MULT:
7870 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7871 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7872 p = "fimul";
7873 else
7874 p = "fmul";
7875 ssep = "mul";
7876 break;
7878 case DIV:
7879 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7880 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7881 p = "fidiv";
7882 else
7883 p = "fdiv";
7884 ssep = "div";
7885 break;
7887 default:
7888 abort ();
7891 if (is_sse)
7893 strcpy (buf, ssep);
7894 if (GET_MODE (operands[0]) == SFmode)
7895 strcat (buf, "ss\t{%2, %0|%0, %2}");
7896 else
7897 strcat (buf, "sd\t{%2, %0|%0, %2}");
7898 return buf;
7900 strcpy (buf, p);
7902 switch (GET_CODE (operands[3]))
7904 case MULT:
7905 case PLUS:
7906 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7908 rtx temp = operands[2];
7909 operands[2] = operands[1];
7910 operands[1] = temp;
7913 /* know operands[0] == operands[1]. */
7915 if (GET_CODE (operands[2]) == MEM)
7917 p = "%z2\t%2";
7918 break;
7921 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7923 if (STACK_TOP_P (operands[0]))
7924 /* How is it that we are storing to a dead operand[2]?
7925 Well, presumably operands[1] is dead too. We can't
7926 store the result to st(0) as st(0) gets popped on this
7927 instruction. Instead store to operands[2] (which I
7928 think has to be st(1)). st(1) will be popped later.
7929 gcc <= 2.8.1 didn't have this check and generated
7930 assembly code that the Unixware assembler rejected. */
7931 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7932 else
7933 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7934 break;
7937 if (STACK_TOP_P (operands[0]))
7938 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7939 else
7940 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7941 break;
7943 case MINUS:
7944 case DIV:
7945 if (GET_CODE (operands[1]) == MEM)
7947 p = "r%z1\t%1";
7948 break;
7951 if (GET_CODE (operands[2]) == MEM)
7953 p = "%z2\t%2";
7954 break;
7957 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7959 #if SYSV386_COMPAT
7960 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7961 derived assemblers, confusingly reverse the direction of
7962 the operation for fsub{r} and fdiv{r} when the
7963 destination register is not st(0). The Intel assembler
7964 doesn't have this brain damage. Read !SYSV386_COMPAT to
7965 figure out what the hardware really does. */
7966 if (STACK_TOP_P (operands[0]))
7967 p = "{p\t%0, %2|rp\t%2, %0}";
7968 else
7969 p = "{rp\t%2, %0|p\t%0, %2}";
7970 #else
7971 if (STACK_TOP_P (operands[0]))
7972 /* As above for fmul/fadd, we can't store to st(0). */
7973 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7974 else
7975 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7976 #endif
7977 break;
7980 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7982 #if SYSV386_COMPAT
7983 if (STACK_TOP_P (operands[0]))
7984 p = "{rp\t%0, %1|p\t%1, %0}";
7985 else
7986 p = "{p\t%1, %0|rp\t%0, %1}";
7987 #else
7988 if (STACK_TOP_P (operands[0]))
7989 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7990 else
7991 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7992 #endif
7993 break;
7996 if (STACK_TOP_P (operands[0]))
7998 if (STACK_TOP_P (operands[1]))
7999 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8000 else
8001 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8002 break;
8004 else if (STACK_TOP_P (operands[1]))
8006 #if SYSV386_COMPAT
8007 p = "{\t%1, %0|r\t%0, %1}";
8008 #else
8009 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8010 #endif
8012 else
8014 #if SYSV386_COMPAT
8015 p = "{r\t%2, %0|\t%0, %2}";
8016 #else
8017 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8018 #endif
8020 break;
8022 default:
8023 abort ();
8026 strcat (buf, p);
8027 return buf;
8030 /* Output code to initialize control word copies used by
8031 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8032 is set to control word rounding downwards. */
8033 void
8034 emit_i387_cw_initialization (rtx normal, rtx round_down)
8036 rtx reg = gen_reg_rtx (HImode);
8038 emit_insn (gen_x86_fnstcw_1 (normal));
8039 emit_move_insn (reg, normal);
8040 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8041 && !TARGET_64BIT)
8042 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8043 else
8044 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8045 emit_move_insn (round_down, reg);
8048 /* Output code for INSN to convert a float to a signed int. OPERANDS
8049 are the insn operands. The output may be [HSD]Imode and the input
8050 operand may be [SDX]Fmode. */
8052 const char *
8053 output_fix_trunc (rtx insn, rtx *operands)
8055 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8056 int dimode_p = GET_MODE (operands[0]) == DImode;
8058 /* Jump through a hoop or two for DImode, since the hardware has no
8059 non-popping instruction. We used to do this a different way, but
8060 that was somewhat fragile and broke with post-reload splitters. */
8061 if (dimode_p && !stack_top_dies)
8062 output_asm_insn ("fld\t%y1", operands);
8064 if (!STACK_TOP_P (operands[1]))
8065 abort ();
8067 if (GET_CODE (operands[0]) != MEM)
8068 abort ();
8070 output_asm_insn ("fldcw\t%3", operands);
8071 if (stack_top_dies || dimode_p)
8072 output_asm_insn ("fistp%z0\t%0", operands);
8073 else
8074 output_asm_insn ("fist%z0\t%0", operands);
8075 output_asm_insn ("fldcw\t%2", operands);
8077 return "";
8080 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8081 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8082 when fucom should be used. */
8084 const char *
8085 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8087 int stack_top_dies;
8088 rtx cmp_op0 = operands[0];
8089 rtx cmp_op1 = operands[1];
8090 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8092 if (eflags_p == 2)
8094 cmp_op0 = cmp_op1;
8095 cmp_op1 = operands[2];
8097 if (is_sse)
8099 if (GET_MODE (operands[0]) == SFmode)
8100 if (unordered_p)
8101 return "ucomiss\t{%1, %0|%0, %1}";
8102 else
8103 return "comiss\t{%1, %0|%0, %1}";
8104 else
8105 if (unordered_p)
8106 return "ucomisd\t{%1, %0|%0, %1}";
8107 else
8108 return "comisd\t{%1, %0|%0, %1}";
8111 if (! STACK_TOP_P (cmp_op0))
8112 abort ();
8114 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8116 if (STACK_REG_P (cmp_op1)
8117 && stack_top_dies
8118 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8119 && REGNO (cmp_op1) != FIRST_STACK_REG)
8121 /* If both the top of the 387 stack dies, and the other operand
8122 is also a stack register that dies, then this must be a
8123 `fcompp' float compare */
8125 if (eflags_p == 1)
8127 /* There is no double popping fcomi variant. Fortunately,
8128 eflags is immune from the fstp's cc clobbering. */
8129 if (unordered_p)
8130 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8131 else
8132 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8133 return "fstp\t%y0";
8135 else
8137 if (eflags_p == 2)
8139 if (unordered_p)
8140 return "fucompp\n\tfnstsw\t%0";
8141 else
8142 return "fcompp\n\tfnstsw\t%0";
8144 else
8146 if (unordered_p)
8147 return "fucompp";
8148 else
8149 return "fcompp";
8153 else
8155 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8157 static const char * const alt[24] =
8159 "fcom%z1\t%y1",
8160 "fcomp%z1\t%y1",
8161 "fucom%z1\t%y1",
8162 "fucomp%z1\t%y1",
8164 "ficom%z1\t%y1",
8165 "ficomp%z1\t%y1",
8166 NULL,
8167 NULL,
8169 "fcomi\t{%y1, %0|%0, %y1}",
8170 "fcomip\t{%y1, %0|%0, %y1}",
8171 "fucomi\t{%y1, %0|%0, %y1}",
8172 "fucomip\t{%y1, %0|%0, %y1}",
8174 NULL,
8175 NULL,
8176 NULL,
8177 NULL,
8179 "fcom%z2\t%y2\n\tfnstsw\t%0",
8180 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8181 "fucom%z2\t%y2\n\tfnstsw\t%0",
8182 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8184 "ficom%z2\t%y2\n\tfnstsw\t%0",
8185 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8186 NULL,
8187 NULL
8190 int mask;
8191 const char *ret;
8193 mask = eflags_p << 3;
8194 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8195 mask |= unordered_p << 1;
8196 mask |= stack_top_dies;
8198 if (mask >= 24)
8199 abort ();
8200 ret = alt[mask];
8201 if (ret == NULL)
8202 abort ();
8204 return ret;
8208 void
8209 ix86_output_addr_vec_elt (FILE *file, int value)
8211 const char *directive = ASM_LONG;
8213 if (TARGET_64BIT)
8215 #ifdef ASM_QUAD
8216 directive = ASM_QUAD;
8217 #else
8218 abort ();
8219 #endif
8222 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8225 void
8226 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8228 if (TARGET_64BIT)
8229 fprintf (file, "%s%s%d-%s%d\n",
8230 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8231 else if (HAVE_AS_GOTOFF_IN_DATA)
8232 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8233 #if TARGET_MACHO
8234 else if (TARGET_MACHO)
8236 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8237 machopic_output_function_base_name (file);
8238 fprintf(file, "\n");
8240 #endif
8241 else
8242 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8243 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8246 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8247 for the target. */
8249 void
8250 ix86_expand_clear (rtx dest)
8252 rtx tmp;
8254 /* We play register width games, which are only valid after reload. */
8255 if (!reload_completed)
8256 abort ();
8258 /* Avoid HImode and its attendant prefix byte. */
8259 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8260 dest = gen_rtx_REG (SImode, REGNO (dest));
8262 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8264 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8265 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8267 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8268 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8271 emit_insn (tmp);
8274 /* X is an unchanging MEM. If it is a constant pool reference, return
8275 the constant pool rtx, else NULL. */
8277 static rtx
8278 maybe_get_pool_constant (rtx x)
8280 x = ix86_delegitimize_address (XEXP (x, 0));
8282 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8283 return get_pool_constant (x);
8285 return NULL_RTX;
8288 void
8289 ix86_expand_move (enum machine_mode mode, rtx operands[])
8291 int strict = (reload_in_progress || reload_completed);
8292 rtx op0, op1;
8293 enum tls_model model;
8295 op0 = operands[0];
8296 op1 = operands[1];
8298 model = tls_symbolic_operand (op1, Pmode);
8299 if (model)
8301 op1 = legitimize_tls_address (op1, model, true);
8302 op1 = force_operand (op1, op0);
8303 if (op1 == op0)
8304 return;
8307 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8309 #if TARGET_MACHO
8310 if (MACHOPIC_PURE)
8312 rtx temp = ((reload_in_progress
8313 || ((op0 && GET_CODE (op0) == REG)
8314 && mode == Pmode))
8315 ? op0 : gen_reg_rtx (Pmode));
8316 op1 = machopic_indirect_data_reference (op1, temp);
8317 op1 = machopic_legitimize_pic_address (op1, mode,
8318 temp == op1 ? 0 : temp);
8320 else if (MACHOPIC_INDIRECT)
8321 op1 = machopic_indirect_data_reference (op1, 0);
8322 if (op0 == op1)
8323 return;
8324 #else
8325 if (GET_CODE (op0) == MEM)
8326 op1 = force_reg (Pmode, op1);
8327 else
8329 rtx temp = op0;
8330 if (GET_CODE (temp) != REG)
8331 temp = gen_reg_rtx (Pmode);
8332 temp = legitimize_pic_address (op1, temp);
8333 if (temp == op0)
8334 return;
8335 op1 = temp;
8337 #endif /* TARGET_MACHO */
8339 else
8341 if (GET_CODE (op0) == MEM
8342 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8343 || !push_operand (op0, mode))
8344 && GET_CODE (op1) == MEM)
8345 op1 = force_reg (mode, op1);
8347 if (push_operand (op0, mode)
8348 && ! general_no_elim_operand (op1, mode))
8349 op1 = copy_to_mode_reg (mode, op1);
8351 /* Force large constants in 64bit compilation into register
8352 to get them CSEed. */
8353 if (TARGET_64BIT && mode == DImode
8354 && immediate_operand (op1, mode)
8355 && !x86_64_zero_extended_value (op1)
8356 && !register_operand (op0, mode)
8357 && optimize && !reload_completed && !reload_in_progress)
8358 op1 = copy_to_mode_reg (mode, op1);
8360 if (FLOAT_MODE_P (mode))
8362 /* If we are loading a floating point constant to a register,
8363 force the value to memory now, since we'll get better code
8364 out the back end. */
8366 if (strict)
8368 else if (GET_CODE (op1) == CONST_DOUBLE)
8370 op1 = validize_mem (force_const_mem (mode, op1));
8371 if (!register_operand (op0, mode))
8373 rtx temp = gen_reg_rtx (mode);
8374 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8375 emit_move_insn (op0, temp);
8376 return;
8382 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8385 void
8386 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8388 /* Force constants other than zero into memory. We do not know how
8389 the instructions used to build constants modify the upper 64 bits
8390 of the register, once we have that information we may be able
8391 to handle some of them more efficiently. */
8392 if ((reload_in_progress | reload_completed) == 0
8393 && register_operand (operands[0], mode)
8394 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8395 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8397 /* Make operand1 a register if it isn't already. */
8398 if (!no_new_pseudos
8399 && !register_operand (operands[0], mode)
8400 && !register_operand (operands[1], mode))
8402 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8403 emit_move_insn (operands[0], temp);
8404 return;
8407 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8410 /* Attempt to expand a binary operator. Make the expansion closer to the
8411 actual machine, then just general_operand, which will allow 3 separate
8412 memory references (one output, two input) in a single insn. */
8414 void
8415 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8416 rtx operands[])
8418 int matching_memory;
8419 rtx src1, src2, dst, op, clob;
8421 dst = operands[0];
8422 src1 = operands[1];
8423 src2 = operands[2];
8425 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8426 if (GET_RTX_CLASS (code) == 'c'
8427 && (rtx_equal_p (dst, src2)
8428 || immediate_operand (src1, mode)))
8430 rtx temp = src1;
8431 src1 = src2;
8432 src2 = temp;
8435 /* If the destination is memory, and we do not have matching source
8436 operands, do things in registers. */
8437 matching_memory = 0;
8438 if (GET_CODE (dst) == MEM)
8440 if (rtx_equal_p (dst, src1))
8441 matching_memory = 1;
8442 else if (GET_RTX_CLASS (code) == 'c'
8443 && rtx_equal_p (dst, src2))
8444 matching_memory = 2;
8445 else
8446 dst = gen_reg_rtx (mode);
8449 /* Both source operands cannot be in memory. */
8450 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8452 if (matching_memory != 2)
8453 src2 = force_reg (mode, src2);
8454 else
8455 src1 = force_reg (mode, src1);
8458 /* If the operation is not commutable, source 1 cannot be a constant
8459 or non-matching memory. */
8460 if ((CONSTANT_P (src1)
8461 || (!matching_memory && GET_CODE (src1) == MEM))
8462 && GET_RTX_CLASS (code) != 'c')
8463 src1 = force_reg (mode, src1);
8465 /* If optimizing, copy to regs to improve CSE */
8466 if (optimize && ! no_new_pseudos)
8468 if (GET_CODE (dst) == MEM)
8469 dst = gen_reg_rtx (mode);
8470 if (GET_CODE (src1) == MEM)
8471 src1 = force_reg (mode, src1);
8472 if (GET_CODE (src2) == MEM)
8473 src2 = force_reg (mode, src2);
8476 /* Emit the instruction. */
8478 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8479 if (reload_in_progress)
8481 /* Reload doesn't know about the flags register, and doesn't know that
8482 it doesn't want to clobber it. We can only do this with PLUS. */
8483 if (code != PLUS)
8484 abort ();
8485 emit_insn (op);
8487 else
8489 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8490 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8493 /* Fix up the destination if needed. */
8494 if (dst != operands[0])
8495 emit_move_insn (operands[0], dst);
8498 /* Return TRUE or FALSE depending on whether the binary operator meets the
8499 appropriate constraints. */
8502 ix86_binary_operator_ok (enum rtx_code code,
8503 enum machine_mode mode ATTRIBUTE_UNUSED,
8504 rtx operands[3])
8506 /* Both source operands cannot be in memory. */
8507 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8508 return 0;
8509 /* If the operation is not commutable, source 1 cannot be a constant. */
8510 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8511 return 0;
8512 /* If the destination is memory, we must have a matching source operand. */
8513 if (GET_CODE (operands[0]) == MEM
8514 && ! (rtx_equal_p (operands[0], operands[1])
8515 || (GET_RTX_CLASS (code) == 'c'
8516 && rtx_equal_p (operands[0], operands[2]))))
8517 return 0;
8518 /* If the operation is not commutable and the source 1 is memory, we must
8519 have a matching destination. */
8520 if (GET_CODE (operands[1]) == MEM
8521 && GET_RTX_CLASS (code) != 'c'
8522 && ! rtx_equal_p (operands[0], operands[1]))
8523 return 0;
8524 return 1;
8527 /* Attempt to expand a unary operator. Make the expansion closer to the
8528 actual machine, then just general_operand, which will allow 2 separate
8529 memory references (one output, one input) in a single insn. */
8531 void
8532 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8533 rtx operands[])
8535 int matching_memory;
8536 rtx src, dst, op, clob;
8538 dst = operands[0];
8539 src = operands[1];
8541 /* If the destination is memory, and we do not have matching source
8542 operands, do things in registers. */
8543 matching_memory = 0;
8544 if (GET_CODE (dst) == MEM)
8546 if (rtx_equal_p (dst, src))
8547 matching_memory = 1;
8548 else
8549 dst = gen_reg_rtx (mode);
8552 /* When source operand is memory, destination must match. */
8553 if (!matching_memory && GET_CODE (src) == MEM)
8554 src = force_reg (mode, src);
8556 /* If optimizing, copy to regs to improve CSE */
8557 if (optimize && ! no_new_pseudos)
8559 if (GET_CODE (dst) == MEM)
8560 dst = gen_reg_rtx (mode);
8561 if (GET_CODE (src) == MEM)
8562 src = force_reg (mode, src);
8565 /* Emit the instruction. */
8567 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8568 if (reload_in_progress || code == NOT)
8570 /* Reload doesn't know about the flags register, and doesn't know that
8571 it doesn't want to clobber it. */
8572 if (code != NOT)
8573 abort ();
8574 emit_insn (op);
8576 else
8578 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8579 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8582 /* Fix up the destination if needed. */
8583 if (dst != operands[0])
8584 emit_move_insn (operands[0], dst);
8587 /* Return TRUE or FALSE depending on whether the unary operator meets the
8588 appropriate constraints. */
8591 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8592 enum machine_mode mode ATTRIBUTE_UNUSED,
8593 rtx operands[2] ATTRIBUTE_UNUSED)
8595 /* If one of operands is memory, source and destination must match. */
8596 if ((GET_CODE (operands[0]) == MEM
8597 || GET_CODE (operands[1]) == MEM)
8598 && ! rtx_equal_p (operands[0], operands[1]))
8599 return FALSE;
8600 return TRUE;
8603 /* Return TRUE or FALSE depending on whether the first SET in INSN
8604 has source and destination with matching CC modes, and that the
8605 CC mode is at least as constrained as REQ_MODE. */
8608 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8610 rtx set;
8611 enum machine_mode set_mode;
8613 set = PATTERN (insn);
8614 if (GET_CODE (set) == PARALLEL)
8615 set = XVECEXP (set, 0, 0);
8616 if (GET_CODE (set) != SET)
8617 abort ();
8618 if (GET_CODE (SET_SRC (set)) != COMPARE)
8619 abort ();
8621 set_mode = GET_MODE (SET_DEST (set));
8622 switch (set_mode)
8624 case CCNOmode:
8625 if (req_mode != CCNOmode
8626 && (req_mode != CCmode
8627 || XEXP (SET_SRC (set), 1) != const0_rtx))
8628 return 0;
8629 break;
8630 case CCmode:
8631 if (req_mode == CCGCmode)
8632 return 0;
8633 /* Fall through. */
8634 case CCGCmode:
8635 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8636 return 0;
8637 /* Fall through. */
8638 case CCGOCmode:
8639 if (req_mode == CCZmode)
8640 return 0;
8641 /* Fall through. */
8642 case CCZmode:
8643 break;
8645 default:
8646 abort ();
8649 return (GET_MODE (SET_SRC (set)) == set_mode);
8652 /* Generate insn patterns to do an integer compare of OPERANDS. */
8654 static rtx
8655 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8657 enum machine_mode cmpmode;
8658 rtx tmp, flags;
8660 cmpmode = SELECT_CC_MODE (code, op0, op1);
8661 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8663 /* This is very simple, but making the interface the same as in the
8664 FP case makes the rest of the code easier. */
8665 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8666 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8668 /* Return the test that should be put into the flags user, i.e.
8669 the bcc, scc, or cmov instruction. */
8670 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8673 /* Figure out whether to use ordered or unordered fp comparisons.
8674 Return the appropriate mode to use. */
8676 enum machine_mode
8677 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8679 /* ??? In order to make all comparisons reversible, we do all comparisons
8680 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8681 all forms trapping and nontrapping comparisons, we can make inequality
8682 comparisons trapping again, since it results in better code when using
8683 FCOM based compares. */
8684 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8687 enum machine_mode
8688 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8690 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8691 return ix86_fp_compare_mode (code);
8692 switch (code)
8694 /* Only zero flag is needed. */
8695 case EQ: /* ZF=0 */
8696 case NE: /* ZF!=0 */
8697 return CCZmode;
8698 /* Codes needing carry flag. */
8699 case GEU: /* CF=0 */
8700 case GTU: /* CF=0 & ZF=0 */
8701 case LTU: /* CF=1 */
8702 case LEU: /* CF=1 | ZF=1 */
8703 return CCmode;
8704 /* Codes possibly doable only with sign flag when
8705 comparing against zero. */
8706 case GE: /* SF=OF or SF=0 */
8707 case LT: /* SF<>OF or SF=1 */
8708 if (op1 == const0_rtx)
8709 return CCGOCmode;
8710 else
8711 /* For other cases Carry flag is not required. */
8712 return CCGCmode;
8713 /* Codes doable only with sign flag when comparing
8714 against zero, but we miss jump instruction for it
8715 so we need to use relational tests against overflow
8716 that thus needs to be zero. */
8717 case GT: /* ZF=0 & SF=OF */
8718 case LE: /* ZF=1 | SF<>OF */
8719 if (op1 == const0_rtx)
8720 return CCNOmode;
8721 else
8722 return CCGCmode;
8723 /* strcmp pattern do (use flags) and combine may ask us for proper
8724 mode. */
8725 case USE:
8726 return CCmode;
8727 default:
8728 abort ();
8732 /* Return the fixed registers used for condition codes. */
8734 static bool
8735 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8737 *p1 = FLAGS_REG;
8738 *p2 = FPSR_REG;
8739 return true;
8742 /* If two condition code modes are compatible, return a condition code
8743 mode which is compatible with both. Otherwise, return
8744 VOIDmode. */
8746 static enum machine_mode
8747 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8749 if (m1 == m2)
8750 return m1;
8752 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8753 return VOIDmode;
8755 if ((m1 == CCGCmode && m2 == CCGOCmode)
8756 || (m1 == CCGOCmode && m2 == CCGCmode))
8757 return CCGCmode;
8759 switch (m1)
8761 default:
8762 abort ();
8764 case CCmode:
8765 case CCGCmode:
8766 case CCGOCmode:
8767 case CCNOmode:
8768 case CCZmode:
8769 switch (m2)
8771 default:
8772 return VOIDmode;
8774 case CCmode:
8775 case CCGCmode:
8776 case CCGOCmode:
8777 case CCNOmode:
8778 case CCZmode:
8779 return CCmode;
8782 case CCFPmode:
8783 case CCFPUmode:
8784 /* These are only compatible with themselves, which we already
8785 checked above. */
8786 return VOIDmode;
8790 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8793 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8795 enum rtx_code swapped_code = swap_condition (code);
8796 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8797 || (ix86_fp_comparison_cost (swapped_code)
8798 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8801 /* Swap, force into registers, or otherwise massage the two operands
8802 to a fp comparison. The operands are updated in place; the new
8803 comparison code is returned. */
8805 static enum rtx_code
8806 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8808 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8809 rtx op0 = *pop0, op1 = *pop1;
8810 enum machine_mode op_mode = GET_MODE (op0);
8811 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8813 /* All of the unordered compare instructions only work on registers.
8814 The same is true of the XFmode compare instructions. The same is
8815 true of the fcomi compare instructions. */
8817 if (!is_sse
8818 && (fpcmp_mode == CCFPUmode
8819 || op_mode == XFmode
8820 || ix86_use_fcomi_compare (code)))
8822 op0 = force_reg (op_mode, op0);
8823 op1 = force_reg (op_mode, op1);
8825 else
8827 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8828 things around if they appear profitable, otherwise force op0
8829 into a register. */
8831 if (standard_80387_constant_p (op0) == 0
8832 || (GET_CODE (op0) == MEM
8833 && ! (standard_80387_constant_p (op1) == 0
8834 || GET_CODE (op1) == MEM)))
8836 rtx tmp;
8837 tmp = op0, op0 = op1, op1 = tmp;
8838 code = swap_condition (code);
8841 if (GET_CODE (op0) != REG)
8842 op0 = force_reg (op_mode, op0);
8844 if (CONSTANT_P (op1))
8846 if (standard_80387_constant_p (op1))
8847 op1 = force_reg (op_mode, op1);
8848 else
8849 op1 = validize_mem (force_const_mem (op_mode, op1));
8853 /* Try to rearrange the comparison to make it cheaper. */
8854 if (ix86_fp_comparison_cost (code)
8855 > ix86_fp_comparison_cost (swap_condition (code))
8856 && (GET_CODE (op1) == REG || !no_new_pseudos))
8858 rtx tmp;
8859 tmp = op0, op0 = op1, op1 = tmp;
8860 code = swap_condition (code);
8861 if (GET_CODE (op0) != REG)
8862 op0 = force_reg (op_mode, op0);
8865 *pop0 = op0;
8866 *pop1 = op1;
8867 return code;
8870 /* Convert comparison codes we use to represent FP comparison to integer
8871 code that will result in proper branch. Return UNKNOWN if no such code
8872 is available. */
8873 static enum rtx_code
8874 ix86_fp_compare_code_to_integer (enum rtx_code code)
8876 switch (code)
8878 case GT:
8879 return GTU;
8880 case GE:
8881 return GEU;
8882 case ORDERED:
8883 case UNORDERED:
8884 return code;
8885 break;
8886 case UNEQ:
8887 return EQ;
8888 break;
8889 case UNLT:
8890 return LTU;
8891 break;
8892 case UNLE:
8893 return LEU;
8894 break;
8895 case LTGT:
8896 return NE;
8897 break;
8898 default:
8899 return UNKNOWN;
8903 /* Split comparison code CODE into comparisons we can do using branch
8904 instructions. BYPASS_CODE is comparison code for branch that will
8905 branch around FIRST_CODE and SECOND_CODE. If some of branches
8906 is not required, set value to NIL.
8907 We never require more than two branches. */
8908 static void
8909 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8910 enum rtx_code *first_code,
8911 enum rtx_code *second_code)
8913 *first_code = code;
8914 *bypass_code = NIL;
8915 *second_code = NIL;
8917 /* The fcomi comparison sets flags as follows:
8919 cmp ZF PF CF
8920 > 0 0 0
8921 < 0 0 1
8922 = 1 0 0
8923 un 1 1 1 */
8925 switch (code)
8927 case GT: /* GTU - CF=0 & ZF=0 */
8928 case GE: /* GEU - CF=0 */
8929 case ORDERED: /* PF=0 */
8930 case UNORDERED: /* PF=1 */
8931 case UNEQ: /* EQ - ZF=1 */
8932 case UNLT: /* LTU - CF=1 */
8933 case UNLE: /* LEU - CF=1 | ZF=1 */
8934 case LTGT: /* EQ - ZF=0 */
8935 break;
8936 case LT: /* LTU - CF=1 - fails on unordered */
8937 *first_code = UNLT;
8938 *bypass_code = UNORDERED;
8939 break;
8940 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8941 *first_code = UNLE;
8942 *bypass_code = UNORDERED;
8943 break;
8944 case EQ: /* EQ - ZF=1 - fails on unordered */
8945 *first_code = UNEQ;
8946 *bypass_code = UNORDERED;
8947 break;
8948 case NE: /* NE - ZF=0 - fails on unordered */
8949 *first_code = LTGT;
8950 *second_code = UNORDERED;
8951 break;
8952 case UNGE: /* GEU - CF=0 - fails on unordered */
8953 *first_code = GE;
8954 *second_code = UNORDERED;
8955 break;
8956 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8957 *first_code = GT;
8958 *second_code = UNORDERED;
8959 break;
8960 default:
8961 abort ();
8963 if (!TARGET_IEEE_FP)
8965 *second_code = NIL;
8966 *bypass_code = NIL;
8970 /* Return cost of comparison done fcom + arithmetics operations on AX.
8971 All following functions do use number of instructions as a cost metrics.
8972 In future this should be tweaked to compute bytes for optimize_size and
8973 take into account performance of various instructions on various CPUs. */
8974 static int
8975 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8977 if (!TARGET_IEEE_FP)
8978 return 4;
8979 /* The cost of code output by ix86_expand_fp_compare. */
8980 switch (code)
8982 case UNLE:
8983 case UNLT:
8984 case LTGT:
8985 case GT:
8986 case GE:
8987 case UNORDERED:
8988 case ORDERED:
8989 case UNEQ:
8990 return 4;
8991 break;
8992 case LT:
8993 case NE:
8994 case EQ:
8995 case UNGE:
8996 return 5;
8997 break;
8998 case LE:
8999 case UNGT:
9000 return 6;
9001 break;
9002 default:
9003 abort ();
9007 /* Return cost of comparison done using fcomi operation.
9008 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9009 static int
9010 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9012 enum rtx_code bypass_code, first_code, second_code;
9013 /* Return arbitrarily high cost when instruction is not supported - this
9014 prevents gcc from using it. */
9015 if (!TARGET_CMOVE)
9016 return 1024;
9017 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9018 return (bypass_code != NIL || second_code != NIL) + 2;
9021 /* Return cost of comparison done using sahf operation.
9022 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9023 static int
9024 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9026 enum rtx_code bypass_code, first_code, second_code;
9027 /* Return arbitrarily high cost when instruction is not preferred - this
9028 avoids gcc from using it. */
9029 if (!TARGET_USE_SAHF && !optimize_size)
9030 return 1024;
9031 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9032 return (bypass_code != NIL || second_code != NIL) + 3;
9035 /* Compute cost of the comparison done using any method.
9036 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9037 static int
9038 ix86_fp_comparison_cost (enum rtx_code code)
9040 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9041 int min;
9043 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9044 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9046 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9047 if (min > sahf_cost)
9048 min = sahf_cost;
9049 if (min > fcomi_cost)
9050 min = fcomi_cost;
9051 return min;
9054 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9056 static rtx
9057 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9058 rtx *second_test, rtx *bypass_test)
9060 enum machine_mode fpcmp_mode, intcmp_mode;
9061 rtx tmp, tmp2;
9062 int cost = ix86_fp_comparison_cost (code);
9063 enum rtx_code bypass_code, first_code, second_code;
9065 fpcmp_mode = ix86_fp_compare_mode (code);
9066 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9068 if (second_test)
9069 *second_test = NULL_RTX;
9070 if (bypass_test)
9071 *bypass_test = NULL_RTX;
9073 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9075 /* Do fcomi/sahf based test when profitable. */
9076 if ((bypass_code == NIL || bypass_test)
9077 && (second_code == NIL || second_test)
9078 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9080 if (TARGET_CMOVE)
9082 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9083 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9084 tmp);
9085 emit_insn (tmp);
9087 else
9089 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9090 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9091 if (!scratch)
9092 scratch = gen_reg_rtx (HImode);
9093 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9094 emit_insn (gen_x86_sahf_1 (scratch));
9097 /* The FP codes work out to act like unsigned. */
9098 intcmp_mode = fpcmp_mode;
9099 code = first_code;
9100 if (bypass_code != NIL)
9101 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9102 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9103 const0_rtx);
9104 if (second_code != NIL)
9105 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9106 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9107 const0_rtx);
9109 else
9111 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9112 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9113 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9114 if (!scratch)
9115 scratch = gen_reg_rtx (HImode);
9116 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9118 /* In the unordered case, we have to check C2 for NaN's, which
9119 doesn't happen to work out to anything nice combination-wise.
9120 So do some bit twiddling on the value we've got in AH to come
9121 up with an appropriate set of condition codes. */
9123 intcmp_mode = CCNOmode;
9124 switch (code)
9126 case GT:
9127 case UNGT:
9128 if (code == GT || !TARGET_IEEE_FP)
9130 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9131 code = EQ;
9133 else
9135 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9136 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9137 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9138 intcmp_mode = CCmode;
9139 code = GEU;
9141 break;
9142 case LT:
9143 case UNLT:
9144 if (code == LT && TARGET_IEEE_FP)
9146 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9147 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9148 intcmp_mode = CCmode;
9149 code = EQ;
9151 else
9153 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9154 code = NE;
9156 break;
9157 case GE:
9158 case UNGE:
9159 if (code == GE || !TARGET_IEEE_FP)
9161 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9162 code = EQ;
9164 else
9166 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9167 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9168 GEN_INT (0x01)));
9169 code = NE;
9171 break;
9172 case LE:
9173 case UNLE:
9174 if (code == LE && TARGET_IEEE_FP)
9176 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9177 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9178 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9179 intcmp_mode = CCmode;
9180 code = LTU;
9182 else
9184 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9185 code = NE;
9187 break;
9188 case EQ:
9189 case UNEQ:
9190 if (code == EQ && TARGET_IEEE_FP)
9192 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9193 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9194 intcmp_mode = CCmode;
9195 code = EQ;
9197 else
9199 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9200 code = NE;
9201 break;
9203 break;
9204 case NE:
9205 case LTGT:
9206 if (code == NE && TARGET_IEEE_FP)
9208 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9209 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9210 GEN_INT (0x40)));
9211 code = NE;
9213 else
9215 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9216 code = EQ;
9218 break;
9220 case UNORDERED:
9221 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9222 code = NE;
9223 break;
9224 case ORDERED:
9225 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9226 code = EQ;
9227 break;
9229 default:
9230 abort ();
9234 /* Return the test that should be put into the flags user, i.e.
9235 the bcc, scc, or cmov instruction. */
9236 return gen_rtx_fmt_ee (code, VOIDmode,
9237 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9238 const0_rtx);
9242 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9244 rtx op0, op1, ret;
9245 op0 = ix86_compare_op0;
9246 op1 = ix86_compare_op1;
9248 if (second_test)
9249 *second_test = NULL_RTX;
9250 if (bypass_test)
9251 *bypass_test = NULL_RTX;
9253 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9254 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9255 second_test, bypass_test);
9256 else
9257 ret = ix86_expand_int_compare (code, op0, op1);
9259 return ret;
9262 /* Return true if the CODE will result in nontrivial jump sequence. */
9263 bool
9264 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9266 enum rtx_code bypass_code, first_code, second_code;
9267 if (!TARGET_CMOVE)
9268 return true;
9269 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9270 return bypass_code != NIL || second_code != NIL;
9273 void
9274 ix86_expand_branch (enum rtx_code code, rtx label)
9276 rtx tmp;
9278 switch (GET_MODE (ix86_compare_op0))
9280 case QImode:
9281 case HImode:
9282 case SImode:
9283 simple:
9284 tmp = ix86_expand_compare (code, NULL, NULL);
9285 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9286 gen_rtx_LABEL_REF (VOIDmode, label),
9287 pc_rtx);
9288 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9289 return;
9291 case SFmode:
9292 case DFmode:
9293 case XFmode:
9295 rtvec vec;
9296 int use_fcomi;
9297 enum rtx_code bypass_code, first_code, second_code;
9299 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9300 &ix86_compare_op1);
9302 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9304 /* Check whether we will use the natural sequence with one jump. If
9305 so, we can expand jump early. Otherwise delay expansion by
9306 creating compound insn to not confuse optimizers. */
9307 if (bypass_code == NIL && second_code == NIL
9308 && TARGET_CMOVE)
9310 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9311 gen_rtx_LABEL_REF (VOIDmode, label),
9312 pc_rtx, NULL_RTX);
9314 else
9316 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9317 ix86_compare_op0, ix86_compare_op1);
9318 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9319 gen_rtx_LABEL_REF (VOIDmode, label),
9320 pc_rtx);
9321 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9323 use_fcomi = ix86_use_fcomi_compare (code);
9324 vec = rtvec_alloc (3 + !use_fcomi);
9325 RTVEC_ELT (vec, 0) = tmp;
9326 RTVEC_ELT (vec, 1)
9327 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9328 RTVEC_ELT (vec, 2)
9329 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9330 if (! use_fcomi)
9331 RTVEC_ELT (vec, 3)
9332 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9334 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9336 return;
9339 case DImode:
9340 if (TARGET_64BIT)
9341 goto simple;
9342 /* Expand DImode branch into multiple compare+branch. */
9344 rtx lo[2], hi[2], label2;
9345 enum rtx_code code1, code2, code3;
9347 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9349 tmp = ix86_compare_op0;
9350 ix86_compare_op0 = ix86_compare_op1;
9351 ix86_compare_op1 = tmp;
9352 code = swap_condition (code);
9354 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9355 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9357 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9358 avoid two branches. This costs one extra insn, so disable when
9359 optimizing for size. */
9361 if ((code == EQ || code == NE)
9362 && (!optimize_size
9363 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9365 rtx xor0, xor1;
9367 xor1 = hi[0];
9368 if (hi[1] != const0_rtx)
9369 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9370 NULL_RTX, 0, OPTAB_WIDEN);
9372 xor0 = lo[0];
9373 if (lo[1] != const0_rtx)
9374 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9375 NULL_RTX, 0, OPTAB_WIDEN);
9377 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9378 NULL_RTX, 0, OPTAB_WIDEN);
9380 ix86_compare_op0 = tmp;
9381 ix86_compare_op1 = const0_rtx;
9382 ix86_expand_branch (code, label);
9383 return;
9386 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9387 op1 is a constant and the low word is zero, then we can just
9388 examine the high word. */
9390 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9391 switch (code)
9393 case LT: case LTU: case GE: case GEU:
9394 ix86_compare_op0 = hi[0];
9395 ix86_compare_op1 = hi[1];
9396 ix86_expand_branch (code, label);
9397 return;
9398 default:
9399 break;
9402 /* Otherwise, we need two or three jumps. */
9404 label2 = gen_label_rtx ();
9406 code1 = code;
9407 code2 = swap_condition (code);
9408 code3 = unsigned_condition (code);
9410 switch (code)
9412 case LT: case GT: case LTU: case GTU:
9413 break;
9415 case LE: code1 = LT; code2 = GT; break;
9416 case GE: code1 = GT; code2 = LT; break;
9417 case LEU: code1 = LTU; code2 = GTU; break;
9418 case GEU: code1 = GTU; code2 = LTU; break;
9420 case EQ: code1 = NIL; code2 = NE; break;
9421 case NE: code2 = NIL; break;
9423 default:
9424 abort ();
9428 * a < b =>
9429 * if (hi(a) < hi(b)) goto true;
9430 * if (hi(a) > hi(b)) goto false;
9431 * if (lo(a) < lo(b)) goto true;
9432 * false:
9435 ix86_compare_op0 = hi[0];
9436 ix86_compare_op1 = hi[1];
9438 if (code1 != NIL)
9439 ix86_expand_branch (code1, label);
9440 if (code2 != NIL)
9441 ix86_expand_branch (code2, label2);
9443 ix86_compare_op0 = lo[0];
9444 ix86_compare_op1 = lo[1];
9445 ix86_expand_branch (code3, label);
9447 if (code2 != NIL)
9448 emit_label (label2);
9449 return;
9452 default:
9453 abort ();
9457 /* Split branch based on floating point condition. */
9458 void
9459 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9460 rtx target1, rtx target2, rtx tmp)
9462 rtx second, bypass;
9463 rtx label = NULL_RTX;
9464 rtx condition;
9465 int bypass_probability = -1, second_probability = -1, probability = -1;
9466 rtx i;
9468 if (target2 != pc_rtx)
9470 rtx tmp = target2;
9471 code = reverse_condition_maybe_unordered (code);
9472 target2 = target1;
9473 target1 = tmp;
9476 condition = ix86_expand_fp_compare (code, op1, op2,
9477 tmp, &second, &bypass);
9479 if (split_branch_probability >= 0)
9481 /* Distribute the probabilities across the jumps.
9482 Assume the BYPASS and SECOND to be always test
9483 for UNORDERED. */
9484 probability = split_branch_probability;
9486 /* Value of 1 is low enough to make no need for probability
9487 to be updated. Later we may run some experiments and see
9488 if unordered values are more frequent in practice. */
9489 if (bypass)
9490 bypass_probability = 1;
9491 if (second)
9492 second_probability = 1;
9494 if (bypass != NULL_RTX)
9496 label = gen_label_rtx ();
9497 i = emit_jump_insn (gen_rtx_SET
9498 (VOIDmode, pc_rtx,
9499 gen_rtx_IF_THEN_ELSE (VOIDmode,
9500 bypass,
9501 gen_rtx_LABEL_REF (VOIDmode,
9502 label),
9503 pc_rtx)));
9504 if (bypass_probability >= 0)
9505 REG_NOTES (i)
9506 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9507 GEN_INT (bypass_probability),
9508 REG_NOTES (i));
9510 i = emit_jump_insn (gen_rtx_SET
9511 (VOIDmode, pc_rtx,
9512 gen_rtx_IF_THEN_ELSE (VOIDmode,
9513 condition, target1, target2)));
9514 if (probability >= 0)
9515 REG_NOTES (i)
9516 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9517 GEN_INT (probability),
9518 REG_NOTES (i));
9519 if (second != NULL_RTX)
9521 i = emit_jump_insn (gen_rtx_SET
9522 (VOIDmode, pc_rtx,
9523 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9524 target2)));
9525 if (second_probability >= 0)
9526 REG_NOTES (i)
9527 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9528 GEN_INT (second_probability),
9529 REG_NOTES (i));
9531 if (label != NULL_RTX)
9532 emit_label (label);
9536 ix86_expand_setcc (enum rtx_code code, rtx dest)
9538 rtx ret, tmp, tmpreg, equiv;
9539 rtx second_test, bypass_test;
9541 if (GET_MODE (ix86_compare_op0) == DImode
9542 && !TARGET_64BIT)
9543 return 0; /* FAIL */
9545 if (GET_MODE (dest) != QImode)
9546 abort ();
9548 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9549 PUT_MODE (ret, QImode);
9551 tmp = dest;
9552 tmpreg = dest;
9554 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9555 if (bypass_test || second_test)
9557 rtx test = second_test;
9558 int bypass = 0;
9559 rtx tmp2 = gen_reg_rtx (QImode);
9560 if (bypass_test)
9562 if (second_test)
9563 abort ();
9564 test = bypass_test;
9565 bypass = 1;
9566 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9568 PUT_MODE (test, QImode);
9569 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9571 if (bypass)
9572 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9573 else
9574 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9577 /* Attach a REG_EQUAL note describing the comparison result. */
9578 equiv = simplify_gen_relational (code, QImode,
9579 GET_MODE (ix86_compare_op0),
9580 ix86_compare_op0, ix86_compare_op1);
9581 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9583 return 1; /* DONE */
9586 /* Expand comparison setting or clearing carry flag. Return true when
9587 successful and set pop for the operation. */
9588 static bool
9589 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9591 enum machine_mode mode =
9592 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9594 /* Do not handle DImode compares that go trought special path. Also we can't
9595 deal with FP compares yet. This is possible to add. */
9596 if ((mode == DImode && !TARGET_64BIT))
9597 return false;
9598 if (FLOAT_MODE_P (mode))
9600 rtx second_test = NULL, bypass_test = NULL;
9601 rtx compare_op, compare_seq;
9603 /* Shortcut: following common codes never translate into carry flag compares. */
9604 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9605 || code == ORDERED || code == UNORDERED)
9606 return false;
9608 /* These comparisons require zero flag; swap operands so they won't. */
9609 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9610 && !TARGET_IEEE_FP)
9612 rtx tmp = op0;
9613 op0 = op1;
9614 op1 = tmp;
9615 code = swap_condition (code);
9618 /* Try to expand the comparison and verify that we end up with carry flag
9619 based comparison. This is fails to be true only when we decide to expand
9620 comparison using arithmetic that is not too common scenario. */
9621 start_sequence ();
9622 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9623 &second_test, &bypass_test);
9624 compare_seq = get_insns ();
9625 end_sequence ();
9627 if (second_test || bypass_test)
9628 return false;
9629 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9630 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9631 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9632 else
9633 code = GET_CODE (compare_op);
9634 if (code != LTU && code != GEU)
9635 return false;
9636 emit_insn (compare_seq);
9637 *pop = compare_op;
9638 return true;
9640 if (!INTEGRAL_MODE_P (mode))
9641 return false;
9642 switch (code)
9644 case LTU:
9645 case GEU:
9646 break;
9648 /* Convert a==0 into (unsigned)a<1. */
9649 case EQ:
9650 case NE:
9651 if (op1 != const0_rtx)
9652 return false;
9653 op1 = const1_rtx;
9654 code = (code == EQ ? LTU : GEU);
9655 break;
9657 /* Convert a>b into b<a or a>=b-1. */
9658 case GTU:
9659 case LEU:
9660 if (GET_CODE (op1) == CONST_INT)
9662 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9663 /* Bail out on overflow. We still can swap operands but that
9664 would force loading of the constant into register. */
9665 if (op1 == const0_rtx
9666 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9667 return false;
9668 code = (code == GTU ? GEU : LTU);
9670 else
9672 rtx tmp = op1;
9673 op1 = op0;
9674 op0 = tmp;
9675 code = (code == GTU ? LTU : GEU);
9677 break;
9679 /* Convert a>=0 into (unsigned)a<0x80000000. */
9680 case LT:
9681 case GE:
9682 if (mode == DImode || op1 != const0_rtx)
9683 return false;
9684 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9685 code = (code == LT ? GEU : LTU);
9686 break;
9687 case LE:
9688 case GT:
9689 if (mode == DImode || op1 != constm1_rtx)
9690 return false;
9691 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9692 code = (code == LE ? GEU : LTU);
9693 break;
9695 default:
9696 return false;
9698 /* Swapping operands may cause constant to appear as first operand. */
9699 if (!nonimmediate_operand (op0, VOIDmode))
9701 if (no_new_pseudos)
9702 return false;
9703 op0 = force_reg (mode, op0);
9705 ix86_compare_op0 = op0;
9706 ix86_compare_op1 = op1;
9707 *pop = ix86_expand_compare (code, NULL, NULL);
9708 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9709 abort ();
9710 return true;
9714 ix86_expand_int_movcc (rtx operands[])
9716 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9717 rtx compare_seq, compare_op;
9718 rtx second_test, bypass_test;
9719 enum machine_mode mode = GET_MODE (operands[0]);
9720 bool sign_bit_compare_p = false;;
9722 start_sequence ();
9723 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9724 compare_seq = get_insns ();
9725 end_sequence ();
9727 compare_code = GET_CODE (compare_op);
9729 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9730 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9731 sign_bit_compare_p = true;
9733 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9734 HImode insns, we'd be swallowed in word prefix ops. */
9736 if ((mode != HImode || TARGET_FAST_PREFIX)
9737 && (mode != DImode || TARGET_64BIT)
9738 && GET_CODE (operands[2]) == CONST_INT
9739 && GET_CODE (operands[3]) == CONST_INT)
9741 rtx out = operands[0];
9742 HOST_WIDE_INT ct = INTVAL (operands[2]);
9743 HOST_WIDE_INT cf = INTVAL (operands[3]);
9744 HOST_WIDE_INT diff;
9746 diff = ct - cf;
9747 /* Sign bit compares are better done using shifts than we do by using
9748 sbb. */
9749 if (sign_bit_compare_p
9750 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9751 ix86_compare_op1, &compare_op))
9753 /* Detect overlap between destination and compare sources. */
9754 rtx tmp = out;
9756 if (!sign_bit_compare_p)
9758 bool fpcmp = false;
9760 compare_code = GET_CODE (compare_op);
9762 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9763 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9765 fpcmp = true;
9766 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9769 /* To simplify rest of code, restrict to the GEU case. */
9770 if (compare_code == LTU)
9772 HOST_WIDE_INT tmp = ct;
9773 ct = cf;
9774 cf = tmp;
9775 compare_code = reverse_condition (compare_code);
9776 code = reverse_condition (code);
9778 else
9780 if (fpcmp)
9781 PUT_CODE (compare_op,
9782 reverse_condition_maybe_unordered
9783 (GET_CODE (compare_op)));
9784 else
9785 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9787 diff = ct - cf;
9789 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9790 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9791 tmp = gen_reg_rtx (mode);
9793 if (mode == DImode)
9794 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9795 else
9796 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9798 else
9800 if (code == GT || code == GE)
9801 code = reverse_condition (code);
9802 else
9804 HOST_WIDE_INT tmp = ct;
9805 ct = cf;
9806 cf = tmp;
9807 diff = ct - cf;
9809 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9810 ix86_compare_op1, VOIDmode, 0, -1);
9813 if (diff == 1)
9816 * cmpl op0,op1
9817 * sbbl dest,dest
9818 * [addl dest, ct]
9820 * Size 5 - 8.
9822 if (ct)
9823 tmp = expand_simple_binop (mode, PLUS,
9824 tmp, GEN_INT (ct),
9825 copy_rtx (tmp), 1, OPTAB_DIRECT);
9827 else if (cf == -1)
9830 * cmpl op0,op1
9831 * sbbl dest,dest
9832 * orl $ct, dest
9834 * Size 8.
9836 tmp = expand_simple_binop (mode, IOR,
9837 tmp, GEN_INT (ct),
9838 copy_rtx (tmp), 1, OPTAB_DIRECT);
9840 else if (diff == -1 && ct)
9843 * cmpl op0,op1
9844 * sbbl dest,dest
9845 * notl dest
9846 * [addl dest, cf]
9848 * Size 8 - 11.
9850 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9851 if (cf)
9852 tmp = expand_simple_binop (mode, PLUS,
9853 copy_rtx (tmp), GEN_INT (cf),
9854 copy_rtx (tmp), 1, OPTAB_DIRECT);
9856 else
9859 * cmpl op0,op1
9860 * sbbl dest,dest
9861 * [notl dest]
9862 * andl cf - ct, dest
9863 * [addl dest, ct]
9865 * Size 8 - 11.
9868 if (cf == 0)
9870 cf = ct;
9871 ct = 0;
9872 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9875 tmp = expand_simple_binop (mode, AND,
9876 copy_rtx (tmp),
9877 gen_int_mode (cf - ct, mode),
9878 copy_rtx (tmp), 1, OPTAB_DIRECT);
9879 if (ct)
9880 tmp = expand_simple_binop (mode, PLUS,
9881 copy_rtx (tmp), GEN_INT (ct),
9882 copy_rtx (tmp), 1, OPTAB_DIRECT);
9885 if (!rtx_equal_p (tmp, out))
9886 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9888 return 1; /* DONE */
9891 if (diff < 0)
9893 HOST_WIDE_INT tmp;
9894 tmp = ct, ct = cf, cf = tmp;
9895 diff = -diff;
9896 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9898 /* We may be reversing unordered compare to normal compare, that
9899 is not valid in general (we may convert non-trapping condition
9900 to trapping one), however on i386 we currently emit all
9901 comparisons unordered. */
9902 compare_code = reverse_condition_maybe_unordered (compare_code);
9903 code = reverse_condition_maybe_unordered (code);
9905 else
9907 compare_code = reverse_condition (compare_code);
9908 code = reverse_condition (code);
9912 compare_code = NIL;
9913 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9914 && GET_CODE (ix86_compare_op1) == CONST_INT)
9916 if (ix86_compare_op1 == const0_rtx
9917 && (code == LT || code == GE))
9918 compare_code = code;
9919 else if (ix86_compare_op1 == constm1_rtx)
9921 if (code == LE)
9922 compare_code = LT;
9923 else if (code == GT)
9924 compare_code = GE;
9928 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9929 if (compare_code != NIL
9930 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9931 && (cf == -1 || ct == -1))
9933 /* If lea code below could be used, only optimize
9934 if it results in a 2 insn sequence. */
9936 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9937 || diff == 3 || diff == 5 || diff == 9)
9938 || (compare_code == LT && ct == -1)
9939 || (compare_code == GE && cf == -1))
9942 * notl op1 (if necessary)
9943 * sarl $31, op1
9944 * orl cf, op1
9946 if (ct != -1)
9948 cf = ct;
9949 ct = -1;
9950 code = reverse_condition (code);
9953 out = emit_store_flag (out, code, ix86_compare_op0,
9954 ix86_compare_op1, VOIDmode, 0, -1);
9956 out = expand_simple_binop (mode, IOR,
9957 out, GEN_INT (cf),
9958 out, 1, OPTAB_DIRECT);
9959 if (out != operands[0])
9960 emit_move_insn (operands[0], out);
9962 return 1; /* DONE */
9967 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9968 || diff == 3 || diff == 5 || diff == 9)
9969 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9970 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9973 * xorl dest,dest
9974 * cmpl op1,op2
9975 * setcc dest
9976 * lea cf(dest*(ct-cf)),dest
9978 * Size 14.
9980 * This also catches the degenerate setcc-only case.
9983 rtx tmp;
9984 int nops;
9986 out = emit_store_flag (out, code, ix86_compare_op0,
9987 ix86_compare_op1, VOIDmode, 0, 1);
9989 nops = 0;
9990 /* On x86_64 the lea instruction operates on Pmode, so we need
9991 to get arithmetics done in proper mode to match. */
9992 if (diff == 1)
9993 tmp = copy_rtx (out);
9994 else
9996 rtx out1;
9997 out1 = copy_rtx (out);
9998 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9999 nops++;
10000 if (diff & 1)
10002 tmp = gen_rtx_PLUS (mode, tmp, out1);
10003 nops++;
10006 if (cf != 0)
10008 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10009 nops++;
10011 if (!rtx_equal_p (tmp, out))
10013 if (nops == 1)
10014 out = force_operand (tmp, copy_rtx (out));
10015 else
10016 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10018 if (!rtx_equal_p (out, operands[0]))
10019 emit_move_insn (operands[0], copy_rtx (out));
10021 return 1; /* DONE */
10025 * General case: Jumpful:
10026 * xorl dest,dest cmpl op1, op2
10027 * cmpl op1, op2 movl ct, dest
10028 * setcc dest jcc 1f
10029 * decl dest movl cf, dest
10030 * andl (cf-ct),dest 1:
10031 * addl ct,dest
10033 * Size 20. Size 14.
10035 * This is reasonably steep, but branch mispredict costs are
10036 * high on modern cpus, so consider failing only if optimizing
10037 * for space.
10040 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10041 && BRANCH_COST >= 2)
10043 if (cf == 0)
10045 cf = ct;
10046 ct = 0;
10047 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10048 /* We may be reversing unordered compare to normal compare,
10049 that is not valid in general (we may convert non-trapping
10050 condition to trapping one), however on i386 we currently
10051 emit all comparisons unordered. */
10052 code = reverse_condition_maybe_unordered (code);
10053 else
10055 code = reverse_condition (code);
10056 if (compare_code != NIL)
10057 compare_code = reverse_condition (compare_code);
10061 if (compare_code != NIL)
10063 /* notl op1 (if needed)
10064 sarl $31, op1
10065 andl (cf-ct), op1
10066 addl ct, op1
10068 For x < 0 (resp. x <= -1) there will be no notl,
10069 so if possible swap the constants to get rid of the
10070 complement.
10071 True/false will be -1/0 while code below (store flag
10072 followed by decrement) is 0/-1, so the constants need
10073 to be exchanged once more. */
10075 if (compare_code == GE || !cf)
10077 code = reverse_condition (code);
10078 compare_code = LT;
10080 else
10082 HOST_WIDE_INT tmp = cf;
10083 cf = ct;
10084 ct = tmp;
10087 out = emit_store_flag (out, code, ix86_compare_op0,
10088 ix86_compare_op1, VOIDmode, 0, -1);
10090 else
10092 out = emit_store_flag (out, code, ix86_compare_op0,
10093 ix86_compare_op1, VOIDmode, 0, 1);
10095 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10096 copy_rtx (out), 1, OPTAB_DIRECT);
10099 out = expand_simple_binop (mode, AND, copy_rtx (out),
10100 gen_int_mode (cf - ct, mode),
10101 copy_rtx (out), 1, OPTAB_DIRECT);
10102 if (ct)
10103 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10104 copy_rtx (out), 1, OPTAB_DIRECT);
10105 if (!rtx_equal_p (out, operands[0]))
10106 emit_move_insn (operands[0], copy_rtx (out));
10108 return 1; /* DONE */
10112 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10114 /* Try a few things more with specific constants and a variable. */
10116 optab op;
10117 rtx var, orig_out, out, tmp;
10119 if (BRANCH_COST <= 2)
10120 return 0; /* FAIL */
10122 /* If one of the two operands is an interesting constant, load a
10123 constant with the above and mask it in with a logical operation. */
10125 if (GET_CODE (operands[2]) == CONST_INT)
10127 var = operands[3];
10128 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10129 operands[3] = constm1_rtx, op = and_optab;
10130 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10131 operands[3] = const0_rtx, op = ior_optab;
10132 else
10133 return 0; /* FAIL */
10135 else if (GET_CODE (operands[3]) == CONST_INT)
10137 var = operands[2];
10138 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10139 operands[2] = constm1_rtx, op = and_optab;
10140 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10141 operands[2] = const0_rtx, op = ior_optab;
10142 else
10143 return 0; /* FAIL */
10145 else
10146 return 0; /* FAIL */
10148 orig_out = operands[0];
10149 tmp = gen_reg_rtx (mode);
10150 operands[0] = tmp;
10152 /* Recurse to get the constant loaded. */
10153 if (ix86_expand_int_movcc (operands) == 0)
10154 return 0; /* FAIL */
10156 /* Mask in the interesting variable. */
10157 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10158 OPTAB_WIDEN);
10159 if (!rtx_equal_p (out, orig_out))
10160 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10162 return 1; /* DONE */
10166 * For comparison with above,
10168 * movl cf,dest
10169 * movl ct,tmp
10170 * cmpl op1,op2
10171 * cmovcc tmp,dest
10173 * Size 15.
10176 if (! nonimmediate_operand (operands[2], mode))
10177 operands[2] = force_reg (mode, operands[2]);
10178 if (! nonimmediate_operand (operands[3], mode))
10179 operands[3] = force_reg (mode, operands[3]);
10181 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10183 rtx tmp = gen_reg_rtx (mode);
10184 emit_move_insn (tmp, operands[3]);
10185 operands[3] = tmp;
10187 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10189 rtx tmp = gen_reg_rtx (mode);
10190 emit_move_insn (tmp, operands[2]);
10191 operands[2] = tmp;
10194 if (! register_operand (operands[2], VOIDmode)
10195 && (mode == QImode
10196 || ! register_operand (operands[3], VOIDmode)))
10197 operands[2] = force_reg (mode, operands[2]);
10199 if (mode == QImode
10200 && ! register_operand (operands[3], VOIDmode))
10201 operands[3] = force_reg (mode, operands[3]);
10203 emit_insn (compare_seq);
10204 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10205 gen_rtx_IF_THEN_ELSE (mode,
10206 compare_op, operands[2],
10207 operands[3])));
10208 if (bypass_test)
10209 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10210 gen_rtx_IF_THEN_ELSE (mode,
10211 bypass_test,
10212 copy_rtx (operands[3]),
10213 copy_rtx (operands[0]))));
10214 if (second_test)
10215 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10216 gen_rtx_IF_THEN_ELSE (mode,
10217 second_test,
10218 copy_rtx (operands[2]),
10219 copy_rtx (operands[0]))));
10221 return 1; /* DONE */
10225 ix86_expand_fp_movcc (rtx operands[])
10227 enum rtx_code code;
10228 rtx tmp;
10229 rtx compare_op, second_test, bypass_test;
10231 /* For SF/DFmode conditional moves based on comparisons
10232 in same mode, we may want to use SSE min/max instructions. */
10233 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10234 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10235 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10236 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10237 && (!TARGET_IEEE_FP
10238 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10239 /* We may be called from the post-reload splitter. */
10240 && (!REG_P (operands[0])
10241 || SSE_REG_P (operands[0])
10242 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10244 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10245 code = GET_CODE (operands[1]);
10247 /* See if we have (cross) match between comparison operands and
10248 conditional move operands. */
10249 if (rtx_equal_p (operands[2], op1))
10251 rtx tmp = op0;
10252 op0 = op1;
10253 op1 = tmp;
10254 code = reverse_condition_maybe_unordered (code);
10256 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10258 /* Check for min operation. */
10259 if (code == LT || code == UNLE)
10261 if (code == UNLE)
10263 rtx tmp = op0;
10264 op0 = op1;
10265 op1 = tmp;
10267 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10268 if (memory_operand (op0, VOIDmode))
10269 op0 = force_reg (GET_MODE (operands[0]), op0);
10270 if (GET_MODE (operands[0]) == SFmode)
10271 emit_insn (gen_minsf3 (operands[0], op0, op1));
10272 else
10273 emit_insn (gen_mindf3 (operands[0], op0, op1));
10274 return 1;
10276 /* Check for max operation. */
10277 if (code == GT || code == UNGE)
10279 if (code == UNGE)
10281 rtx tmp = op0;
10282 op0 = op1;
10283 op1 = tmp;
10285 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10286 if (memory_operand (op0, VOIDmode))
10287 op0 = force_reg (GET_MODE (operands[0]), op0);
10288 if (GET_MODE (operands[0]) == SFmode)
10289 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10290 else
10291 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10292 return 1;
10295 /* Manage condition to be sse_comparison_operator. In case we are
10296 in non-ieee mode, try to canonicalize the destination operand
10297 to be first in the comparison - this helps reload to avoid extra
10298 moves. */
10299 if (!sse_comparison_operator (operands[1], VOIDmode)
10300 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10302 rtx tmp = ix86_compare_op0;
10303 ix86_compare_op0 = ix86_compare_op1;
10304 ix86_compare_op1 = tmp;
10305 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10306 VOIDmode, ix86_compare_op0,
10307 ix86_compare_op1);
10309 /* Similarly try to manage result to be first operand of conditional
10310 move. We also don't support the NE comparison on SSE, so try to
10311 avoid it. */
10312 if ((rtx_equal_p (operands[0], operands[3])
10313 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10314 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10316 rtx tmp = operands[2];
10317 operands[2] = operands[3];
10318 operands[3] = tmp;
10319 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10320 (GET_CODE (operands[1])),
10321 VOIDmode, ix86_compare_op0,
10322 ix86_compare_op1);
10324 if (GET_MODE (operands[0]) == SFmode)
10325 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10326 operands[2], operands[3],
10327 ix86_compare_op0, ix86_compare_op1));
10328 else
10329 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10330 operands[2], operands[3],
10331 ix86_compare_op0, ix86_compare_op1));
10332 return 1;
10335 /* The floating point conditional move instructions don't directly
10336 support conditions resulting from a signed integer comparison. */
10338 code = GET_CODE (operands[1]);
10339 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10341 /* The floating point conditional move instructions don't directly
10342 support signed integer comparisons. */
10344 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10346 if (second_test != NULL || bypass_test != NULL)
10347 abort ();
10348 tmp = gen_reg_rtx (QImode);
10349 ix86_expand_setcc (code, tmp);
10350 code = NE;
10351 ix86_compare_op0 = tmp;
10352 ix86_compare_op1 = const0_rtx;
10353 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10355 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10357 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10358 emit_move_insn (tmp, operands[3]);
10359 operands[3] = tmp;
10361 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10363 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10364 emit_move_insn (tmp, operands[2]);
10365 operands[2] = tmp;
10368 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10369 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10370 compare_op,
10371 operands[2],
10372 operands[3])));
10373 if (bypass_test)
10374 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10375 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10376 bypass_test,
10377 operands[3],
10378 operands[0])));
10379 if (second_test)
10380 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10381 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10382 second_test,
10383 operands[2],
10384 operands[0])));
10386 return 1;
10389 /* Expand conditional increment or decrement using adb/sbb instructions.
10390 The default case using setcc followed by the conditional move can be
10391 done by generic code. */
10393 ix86_expand_int_addcc (rtx operands[])
10395 enum rtx_code code = GET_CODE (operands[1]);
10396 rtx compare_op;
10397 rtx val = const0_rtx;
10398 bool fpcmp = false;
10399 enum machine_mode mode = GET_MODE (operands[0]);
10401 if (operands[3] != const1_rtx
10402 && operands[3] != constm1_rtx)
10403 return 0;
10404 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10405 ix86_compare_op1, &compare_op))
10406 return 0;
10407 code = GET_CODE (compare_op);
10409 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10410 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10412 fpcmp = true;
10413 code = ix86_fp_compare_code_to_integer (code);
10416 if (code != LTU)
10418 val = constm1_rtx;
10419 if (fpcmp)
10420 PUT_CODE (compare_op,
10421 reverse_condition_maybe_unordered
10422 (GET_CODE (compare_op)));
10423 else
10424 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10426 PUT_MODE (compare_op, mode);
10428 /* Construct either adc or sbb insn. */
10429 if ((code == LTU) == (operands[3] == constm1_rtx))
10431 switch (GET_MODE (operands[0]))
10433 case QImode:
10434 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10435 break;
10436 case HImode:
10437 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10438 break;
10439 case SImode:
10440 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10441 break;
10442 case DImode:
10443 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10444 break;
10445 default:
10446 abort ();
10449 else
10451 switch (GET_MODE (operands[0]))
10453 case QImode:
10454 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10455 break;
10456 case HImode:
10457 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10458 break;
10459 case SImode:
10460 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10461 break;
10462 case DImode:
10463 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10464 break;
10465 default:
10466 abort ();
10469 return 1; /* DONE */
10473 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10474 works for floating pointer parameters and nonoffsetable memories.
10475 For pushes, it returns just stack offsets; the values will be saved
10476 in the right order. Maximally three parts are generated. */
10478 static int
10479 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10481 int size;
10483 if (!TARGET_64BIT)
10484 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10485 else
10486 size = (GET_MODE_SIZE (mode) + 4) / 8;
10488 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10489 abort ();
10490 if (size < 2 || size > 3)
10491 abort ();
10493 /* Optimize constant pool reference to immediates. This is used by fp
10494 moves, that force all constants to memory to allow combining. */
10495 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10497 rtx tmp = maybe_get_pool_constant (operand);
10498 if (tmp)
10499 operand = tmp;
10502 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10504 /* The only non-offsetable memories we handle are pushes. */
10505 if (! push_operand (operand, VOIDmode))
10506 abort ();
10508 operand = copy_rtx (operand);
10509 PUT_MODE (operand, Pmode);
10510 parts[0] = parts[1] = parts[2] = operand;
10512 else if (!TARGET_64BIT)
10514 if (mode == DImode)
10515 split_di (&operand, 1, &parts[0], &parts[1]);
10516 else
10518 if (REG_P (operand))
10520 if (!reload_completed)
10521 abort ();
10522 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10523 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10524 if (size == 3)
10525 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10527 else if (offsettable_memref_p (operand))
10529 operand = adjust_address (operand, SImode, 0);
10530 parts[0] = operand;
10531 parts[1] = adjust_address (operand, SImode, 4);
10532 if (size == 3)
10533 parts[2] = adjust_address (operand, SImode, 8);
10535 else if (GET_CODE (operand) == CONST_DOUBLE)
10537 REAL_VALUE_TYPE r;
10538 long l[4];
10540 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10541 switch (mode)
10543 case XFmode:
10544 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10545 parts[2] = gen_int_mode (l[2], SImode);
10546 break;
10547 case DFmode:
10548 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10549 break;
10550 default:
10551 abort ();
10553 parts[1] = gen_int_mode (l[1], SImode);
10554 parts[0] = gen_int_mode (l[0], SImode);
10556 else
10557 abort ();
10560 else
10562 if (mode == TImode)
10563 split_ti (&operand, 1, &parts[0], &parts[1]);
10564 if (mode == XFmode || mode == TFmode)
10566 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10567 if (REG_P (operand))
10569 if (!reload_completed)
10570 abort ();
10571 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10572 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10574 else if (offsettable_memref_p (operand))
10576 operand = adjust_address (operand, DImode, 0);
10577 parts[0] = operand;
10578 parts[1] = adjust_address (operand, upper_mode, 8);
10580 else if (GET_CODE (operand) == CONST_DOUBLE)
10582 REAL_VALUE_TYPE r;
10583 long l[3];
10585 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10586 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10587 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10588 if (HOST_BITS_PER_WIDE_INT >= 64)
10589 parts[0]
10590 = gen_int_mode
10591 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10592 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10593 DImode);
10594 else
10595 parts[0] = immed_double_const (l[0], l[1], DImode);
10596 if (upper_mode == SImode)
10597 parts[1] = gen_int_mode (l[2], SImode);
10598 else if (HOST_BITS_PER_WIDE_INT >= 64)
10599 parts[1]
10600 = gen_int_mode
10601 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10602 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10603 DImode);
10604 else
10605 parts[1] = immed_double_const (l[2], l[3], DImode);
10607 else
10608 abort ();
10612 return size;
10615 /* Emit insns to perform a move or push of DI, DF, and XF values.
10616 Return false when normal moves are needed; true when all required
10617 insns have been emitted. Operands 2-4 contain the input values
10618 int the correct order; operands 5-7 contain the output values. */
10620 void
10621 ix86_split_long_move (rtx operands[])
10623 rtx part[2][3];
10624 int nparts;
10625 int push = 0;
10626 int collisions = 0;
10627 enum machine_mode mode = GET_MODE (operands[0]);
10629 /* The DFmode expanders may ask us to move double.
10630 For 64bit target this is single move. By hiding the fact
10631 here we simplify i386.md splitters. */
10632 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10634 /* Optimize constant pool reference to immediates. This is used by
10635 fp moves, that force all constants to memory to allow combining. */
10637 if (GET_CODE (operands[1]) == MEM
10638 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10639 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10640 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10641 if (push_operand (operands[0], VOIDmode))
10643 operands[0] = copy_rtx (operands[0]);
10644 PUT_MODE (operands[0], Pmode);
10646 else
10647 operands[0] = gen_lowpart (DImode, operands[0]);
10648 operands[1] = gen_lowpart (DImode, operands[1]);
10649 emit_move_insn (operands[0], operands[1]);
10650 return;
10653 /* The only non-offsettable memory we handle is push. */
10654 if (push_operand (operands[0], VOIDmode))
10655 push = 1;
10656 else if (GET_CODE (operands[0]) == MEM
10657 && ! offsettable_memref_p (operands[0]))
10658 abort ();
10660 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10661 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10663 /* When emitting push, take care for source operands on the stack. */
10664 if (push && GET_CODE (operands[1]) == MEM
10665 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10667 if (nparts == 3)
10668 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10669 XEXP (part[1][2], 0));
10670 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10671 XEXP (part[1][1], 0));
10674 /* We need to do copy in the right order in case an address register
10675 of the source overlaps the destination. */
10676 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10678 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10679 collisions++;
10680 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10681 collisions++;
10682 if (nparts == 3
10683 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10684 collisions++;
10686 /* Collision in the middle part can be handled by reordering. */
10687 if (collisions == 1 && nparts == 3
10688 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10690 rtx tmp;
10691 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10692 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10695 /* If there are more collisions, we can't handle it by reordering.
10696 Do an lea to the last part and use only one colliding move. */
10697 else if (collisions > 1)
10699 rtx base;
10701 collisions = 1;
10703 base = part[0][nparts - 1];
10705 /* Handle the case when the last part isn't valid for lea.
10706 Happens in 64-bit mode storing the 12-byte XFmode. */
10707 if (GET_MODE (base) != Pmode)
10708 base = gen_rtx_REG (Pmode, REGNO (base));
10710 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10711 part[1][0] = replace_equiv_address (part[1][0], base);
10712 part[1][1] = replace_equiv_address (part[1][1],
10713 plus_constant (base, UNITS_PER_WORD));
10714 if (nparts == 3)
10715 part[1][2] = replace_equiv_address (part[1][2],
10716 plus_constant (base, 8));
10720 if (push)
10722 if (!TARGET_64BIT)
10724 if (nparts == 3)
10726 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10727 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10728 emit_move_insn (part[0][2], part[1][2]);
10731 else
10733 /* In 64bit mode we don't have 32bit push available. In case this is
10734 register, it is OK - we will just use larger counterpart. We also
10735 retype memory - these comes from attempt to avoid REX prefix on
10736 moving of second half of TFmode value. */
10737 if (GET_MODE (part[1][1]) == SImode)
10739 if (GET_CODE (part[1][1]) == MEM)
10740 part[1][1] = adjust_address (part[1][1], DImode, 0);
10741 else if (REG_P (part[1][1]))
10742 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10743 else
10744 abort ();
10745 if (GET_MODE (part[1][0]) == SImode)
10746 part[1][0] = part[1][1];
10749 emit_move_insn (part[0][1], part[1][1]);
10750 emit_move_insn (part[0][0], part[1][0]);
10751 return;
10754 /* Choose correct order to not overwrite the source before it is copied. */
10755 if ((REG_P (part[0][0])
10756 && REG_P (part[1][1])
10757 && (REGNO (part[0][0]) == REGNO (part[1][1])
10758 || (nparts == 3
10759 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10760 || (collisions > 0
10761 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10763 if (nparts == 3)
10765 operands[2] = part[0][2];
10766 operands[3] = part[0][1];
10767 operands[4] = part[0][0];
10768 operands[5] = part[1][2];
10769 operands[6] = part[1][1];
10770 operands[7] = part[1][0];
10772 else
10774 operands[2] = part[0][1];
10775 operands[3] = part[0][0];
10776 operands[5] = part[1][1];
10777 operands[6] = part[1][0];
10780 else
10782 if (nparts == 3)
10784 operands[2] = part[0][0];
10785 operands[3] = part[0][1];
10786 operands[4] = part[0][2];
10787 operands[5] = part[1][0];
10788 operands[6] = part[1][1];
10789 operands[7] = part[1][2];
10791 else
10793 operands[2] = part[0][0];
10794 operands[3] = part[0][1];
10795 operands[5] = part[1][0];
10796 operands[6] = part[1][1];
10799 emit_move_insn (operands[2], operands[5]);
10800 emit_move_insn (operands[3], operands[6]);
10801 if (nparts == 3)
10802 emit_move_insn (operands[4], operands[7]);
10804 return;
10807 void
10808 ix86_split_ashldi (rtx *operands, rtx scratch)
10810 rtx low[2], high[2];
10811 int count;
10813 if (GET_CODE (operands[2]) == CONST_INT)
10815 split_di (operands, 2, low, high);
10816 count = INTVAL (operands[2]) & 63;
10818 if (count >= 32)
10820 emit_move_insn (high[0], low[1]);
10821 emit_move_insn (low[0], const0_rtx);
10823 if (count > 32)
10824 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10826 else
10828 if (!rtx_equal_p (operands[0], operands[1]))
10829 emit_move_insn (operands[0], operands[1]);
10830 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10831 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10834 else
10836 if (!rtx_equal_p (operands[0], operands[1]))
10837 emit_move_insn (operands[0], operands[1]);
10839 split_di (operands, 1, low, high);
10841 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10842 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10844 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10846 if (! no_new_pseudos)
10847 scratch = force_reg (SImode, const0_rtx);
10848 else
10849 emit_move_insn (scratch, const0_rtx);
10851 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10852 scratch));
10854 else
10855 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10859 void
10860 ix86_split_ashrdi (rtx *operands, rtx scratch)
10862 rtx low[2], high[2];
10863 int count;
10865 if (GET_CODE (operands[2]) == CONST_INT)
10867 split_di (operands, 2, low, high);
10868 count = INTVAL (operands[2]) & 63;
10870 if (count >= 32)
10872 emit_move_insn (low[0], high[1]);
10874 if (! reload_completed)
10875 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10876 else
10878 emit_move_insn (high[0], low[0]);
10879 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10882 if (count > 32)
10883 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10885 else
10887 if (!rtx_equal_p (operands[0], operands[1]))
10888 emit_move_insn (operands[0], operands[1]);
10889 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10890 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10893 else
10895 if (!rtx_equal_p (operands[0], operands[1]))
10896 emit_move_insn (operands[0], operands[1]);
10898 split_di (operands, 1, low, high);
10900 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10901 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10903 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10905 if (! no_new_pseudos)
10906 scratch = gen_reg_rtx (SImode);
10907 emit_move_insn (scratch, high[0]);
10908 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10909 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10910 scratch));
10912 else
10913 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10917 void
10918 ix86_split_lshrdi (rtx *operands, rtx scratch)
10920 rtx low[2], high[2];
10921 int count;
10923 if (GET_CODE (operands[2]) == CONST_INT)
10925 split_di (operands, 2, low, high);
10926 count = INTVAL (operands[2]) & 63;
10928 if (count >= 32)
10930 emit_move_insn (low[0], high[1]);
10931 emit_move_insn (high[0], const0_rtx);
10933 if (count > 32)
10934 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10936 else
10938 if (!rtx_equal_p (operands[0], operands[1]))
10939 emit_move_insn (operands[0], operands[1]);
10940 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10941 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10944 else
10946 if (!rtx_equal_p (operands[0], operands[1]))
10947 emit_move_insn (operands[0], operands[1]);
10949 split_di (operands, 1, low, high);
10951 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10952 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10954 /* Heh. By reversing the arguments, we can reuse this pattern. */
10955 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10957 if (! no_new_pseudos)
10958 scratch = force_reg (SImode, const0_rtx);
10959 else
10960 emit_move_insn (scratch, const0_rtx);
10962 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10963 scratch));
10965 else
10966 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10970 /* Helper function for the string operations below. Dest VARIABLE whether
10971 it is aligned to VALUE bytes. If true, jump to the label. */
10972 static rtx
10973 ix86_expand_aligntest (rtx variable, int value)
10975 rtx label = gen_label_rtx ();
10976 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10977 if (GET_MODE (variable) == DImode)
10978 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10979 else
10980 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10981 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10982 1, label);
10983 return label;
10986 /* Adjust COUNTER by the VALUE. */
10987 static void
10988 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10990 if (GET_MODE (countreg) == DImode)
10991 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10992 else
10993 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10996 /* Zero extend possibly SImode EXP to Pmode register. */
10998 ix86_zero_extend_to_Pmode (rtx exp)
11000 rtx r;
11001 if (GET_MODE (exp) == VOIDmode)
11002 return force_reg (Pmode, exp);
11003 if (GET_MODE (exp) == Pmode)
11004 return copy_to_mode_reg (Pmode, exp);
11005 r = gen_reg_rtx (Pmode);
11006 emit_insn (gen_zero_extendsidi2 (r, exp));
11007 return r;
11010 /* Expand string move (memcpy) operation. Use i386 string operations when
11011 profitable. expand_clrstr contains similar code. */
11013 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11015 rtx srcreg, destreg, countreg, srcexp, destexp;
11016 enum machine_mode counter_mode;
11017 HOST_WIDE_INT align = 0;
11018 unsigned HOST_WIDE_INT count = 0;
11020 if (GET_CODE (align_exp) == CONST_INT)
11021 align = INTVAL (align_exp);
11023 /* Can't use any of this if the user has appropriated esi or edi. */
11024 if (global_regs[4] || global_regs[5])
11025 return 0;
11027 /* This simple hack avoids all inlining code and simplifies code below. */
11028 if (!TARGET_ALIGN_STRINGOPS)
11029 align = 64;
11031 if (GET_CODE (count_exp) == CONST_INT)
11033 count = INTVAL (count_exp);
11034 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11035 return 0;
11038 /* Figure out proper mode for counter. For 32bits it is always SImode,
11039 for 64bits use SImode when possible, otherwise DImode.
11040 Set count to number of bytes copied when known at compile time. */
11041 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11042 || x86_64_zero_extended_value (count_exp))
11043 counter_mode = SImode;
11044 else
11045 counter_mode = DImode;
11047 if (counter_mode != SImode && counter_mode != DImode)
11048 abort ();
11050 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11051 if (destreg != XEXP (dst, 0))
11052 dst = replace_equiv_address_nv (dst, destreg);
11053 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11054 if (srcreg != XEXP (src, 0))
11055 src = replace_equiv_address_nv (src, srcreg);
11057 /* When optimizing for size emit simple rep ; movsb instruction for
11058 counts not divisible by 4. */
11060 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11062 emit_insn (gen_cld ());
11063 countreg = ix86_zero_extend_to_Pmode (count_exp);
11064 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11065 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11066 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11067 destexp, srcexp));
11070 /* For constant aligned (or small unaligned) copies use rep movsl
11071 followed by code copying the rest. For PentiumPro ensure 8 byte
11072 alignment to allow rep movsl acceleration. */
11074 else if (count != 0
11075 && (align >= 8
11076 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11077 || optimize_size || count < (unsigned int) 64))
11079 unsigned HOST_WIDE_INT offset = 0;
11080 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11081 rtx srcmem, dstmem;
11083 emit_insn (gen_cld ());
11084 if (count & ~(size - 1))
11086 countreg = copy_to_mode_reg (counter_mode,
11087 GEN_INT ((count >> (size == 4 ? 2 : 3))
11088 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11089 countreg = ix86_zero_extend_to_Pmode (countreg);
11091 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11092 GEN_INT (size == 4 ? 2 : 3));
11093 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11094 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11096 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11097 countreg, destexp, srcexp));
11098 offset = count & ~(size - 1);
11100 if (size == 8 && (count & 0x04))
11102 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11103 offset);
11104 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11105 offset);
11106 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11107 offset += 4;
11109 if (count & 0x02)
11111 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11112 offset);
11113 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11114 offset);
11115 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11116 offset += 2;
11118 if (count & 0x01)
11120 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11121 offset);
11122 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11123 offset);
11124 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11127 /* The generic code based on the glibc implementation:
11128 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11129 allowing accelerated copying there)
11130 - copy the data using rep movsl
11131 - copy the rest. */
11132 else
11134 rtx countreg2;
11135 rtx label = NULL;
11136 rtx srcmem, dstmem;
11137 int desired_alignment = (TARGET_PENTIUMPRO
11138 && (count == 0 || count >= (unsigned int) 260)
11139 ? 8 : UNITS_PER_WORD);
11140 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11141 dst = change_address (dst, BLKmode, destreg);
11142 src = change_address (src, BLKmode, srcreg);
11144 /* In case we don't know anything about the alignment, default to
11145 library version, since it is usually equally fast and result in
11146 shorter code.
11148 Also emit call when we know that the count is large and call overhead
11149 will not be important. */
11150 if (!TARGET_INLINE_ALL_STRINGOPS
11151 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11152 return 0;
11154 if (TARGET_SINGLE_STRINGOP)
11155 emit_insn (gen_cld ());
11157 countreg2 = gen_reg_rtx (Pmode);
11158 countreg = copy_to_mode_reg (counter_mode, count_exp);
11160 /* We don't use loops to align destination and to copy parts smaller
11161 than 4 bytes, because gcc is able to optimize such code better (in
11162 the case the destination or the count really is aligned, gcc is often
11163 able to predict the branches) and also it is friendlier to the
11164 hardware branch prediction.
11166 Using loops is beneficial for generic case, because we can
11167 handle small counts using the loops. Many CPUs (such as Athlon)
11168 have large REP prefix setup costs.
11170 This is quite costly. Maybe we can revisit this decision later or
11171 add some customizability to this code. */
11173 if (count == 0 && align < desired_alignment)
11175 label = gen_label_rtx ();
11176 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11177 LEU, 0, counter_mode, 1, label);
11179 if (align <= 1)
11181 rtx label = ix86_expand_aligntest (destreg, 1);
11182 srcmem = change_address (src, QImode, srcreg);
11183 dstmem = change_address (dst, QImode, destreg);
11184 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11185 ix86_adjust_counter (countreg, 1);
11186 emit_label (label);
11187 LABEL_NUSES (label) = 1;
11189 if (align <= 2)
11191 rtx label = ix86_expand_aligntest (destreg, 2);
11192 srcmem = change_address (src, HImode, srcreg);
11193 dstmem = change_address (dst, HImode, destreg);
11194 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11195 ix86_adjust_counter (countreg, 2);
11196 emit_label (label);
11197 LABEL_NUSES (label) = 1;
11199 if (align <= 4 && desired_alignment > 4)
11201 rtx label = ix86_expand_aligntest (destreg, 4);
11202 srcmem = change_address (src, SImode, srcreg);
11203 dstmem = change_address (dst, SImode, destreg);
11204 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11205 ix86_adjust_counter (countreg, 4);
11206 emit_label (label);
11207 LABEL_NUSES (label) = 1;
11210 if (label && desired_alignment > 4 && !TARGET_64BIT)
11212 emit_label (label);
11213 LABEL_NUSES (label) = 1;
11214 label = NULL_RTX;
11216 if (!TARGET_SINGLE_STRINGOP)
11217 emit_insn (gen_cld ());
11218 if (TARGET_64BIT)
11220 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11221 GEN_INT (3)));
11222 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11224 else
11226 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11227 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11229 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11230 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11231 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11232 countreg2, destexp, srcexp));
11234 if (label)
11236 emit_label (label);
11237 LABEL_NUSES (label) = 1;
11239 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11241 srcmem = change_address (src, SImode, srcreg);
11242 dstmem = change_address (dst, SImode, destreg);
11243 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11245 if ((align <= 4 || count == 0) && TARGET_64BIT)
11247 rtx label = ix86_expand_aligntest (countreg, 4);
11248 srcmem = change_address (src, SImode, srcreg);
11249 dstmem = change_address (dst, SImode, destreg);
11250 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11251 emit_label (label);
11252 LABEL_NUSES (label) = 1;
11254 if (align > 2 && count != 0 && (count & 2))
11256 srcmem = change_address (src, HImode, srcreg);
11257 dstmem = change_address (dst, HImode, destreg);
11258 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11260 if (align <= 2 || count == 0)
11262 rtx label = ix86_expand_aligntest (countreg, 2);
11263 srcmem = change_address (src, HImode, srcreg);
11264 dstmem = change_address (dst, HImode, destreg);
11265 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11266 emit_label (label);
11267 LABEL_NUSES (label) = 1;
11269 if (align > 1 && count != 0 && (count & 1))
11271 srcmem = change_address (src, QImode, srcreg);
11272 dstmem = change_address (dst, QImode, destreg);
11273 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11275 if (align <= 1 || count == 0)
11277 rtx label = ix86_expand_aligntest (countreg, 1);
11278 srcmem = change_address (src, QImode, srcreg);
11279 dstmem = change_address (dst, QImode, destreg);
11280 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11281 emit_label (label);
11282 LABEL_NUSES (label) = 1;
11286 return 1;
11289 /* Expand string clear operation (bzero). Use i386 string operations when
11290 profitable. expand_movstr contains similar code. */
11292 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11294 rtx destreg, zeroreg, countreg, destexp;
11295 enum machine_mode counter_mode;
11296 HOST_WIDE_INT align = 0;
11297 unsigned HOST_WIDE_INT count = 0;
11299 if (GET_CODE (align_exp) == CONST_INT)
11300 align = INTVAL (align_exp);
11302 /* Can't use any of this if the user has appropriated esi. */
11303 if (global_regs[4])
11304 return 0;
11306 /* This simple hack avoids all inlining code and simplifies code below. */
11307 if (!TARGET_ALIGN_STRINGOPS)
11308 align = 32;
11310 if (GET_CODE (count_exp) == CONST_INT)
11312 count = INTVAL (count_exp);
11313 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11314 return 0;
11316 /* Figure out proper mode for counter. For 32bits it is always SImode,
11317 for 64bits use SImode when possible, otherwise DImode.
11318 Set count to number of bytes copied when known at compile time. */
11319 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11320 || x86_64_zero_extended_value (count_exp))
11321 counter_mode = SImode;
11322 else
11323 counter_mode = DImode;
11325 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11326 if (destreg != XEXP (dst, 0))
11327 dst = replace_equiv_address_nv (dst, destreg);
11329 emit_insn (gen_cld ());
11331 /* When optimizing for size emit simple rep ; movsb instruction for
11332 counts not divisible by 4. */
11334 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11336 countreg = ix86_zero_extend_to_Pmode (count_exp);
11337 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11338 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11339 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11341 else if (count != 0
11342 && (align >= 8
11343 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11344 || optimize_size || count < (unsigned int) 64))
11346 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11347 unsigned HOST_WIDE_INT offset = 0;
11349 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11350 if (count & ~(size - 1))
11352 countreg = copy_to_mode_reg (counter_mode,
11353 GEN_INT ((count >> (size == 4 ? 2 : 3))
11354 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11355 countreg = ix86_zero_extend_to_Pmode (countreg);
11356 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11357 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11358 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11359 offset = count & ~(size - 1);
11361 if (size == 8 && (count & 0x04))
11363 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11364 offset);
11365 emit_insn (gen_strset (destreg, mem,
11366 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11367 offset += 4;
11369 if (count & 0x02)
11371 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11372 offset);
11373 emit_insn (gen_strset (destreg, mem,
11374 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11375 offset += 2;
11377 if (count & 0x01)
11379 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11380 offset);
11381 emit_insn (gen_strset (destreg, mem,
11382 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11385 else
11387 rtx countreg2;
11388 rtx label = NULL;
11389 /* Compute desired alignment of the string operation. */
11390 int desired_alignment = (TARGET_PENTIUMPRO
11391 && (count == 0 || count >= (unsigned int) 260)
11392 ? 8 : UNITS_PER_WORD);
11394 /* In case we don't know anything about the alignment, default to
11395 library version, since it is usually equally fast and result in
11396 shorter code.
11398 Also emit call when we know that the count is large and call overhead
11399 will not be important. */
11400 if (!TARGET_INLINE_ALL_STRINGOPS
11401 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11402 return 0;
11404 if (TARGET_SINGLE_STRINGOP)
11405 emit_insn (gen_cld ());
11407 countreg2 = gen_reg_rtx (Pmode);
11408 countreg = copy_to_mode_reg (counter_mode, count_exp);
11409 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11410 /* Get rid of MEM_OFFSET, it won't be accurate. */
11411 dst = change_address (dst, BLKmode, destreg);
11413 if (count == 0 && align < desired_alignment)
11415 label = gen_label_rtx ();
11416 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11417 LEU, 0, counter_mode, 1, label);
11419 if (align <= 1)
11421 rtx label = ix86_expand_aligntest (destreg, 1);
11422 emit_insn (gen_strset (destreg, dst,
11423 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11424 ix86_adjust_counter (countreg, 1);
11425 emit_label (label);
11426 LABEL_NUSES (label) = 1;
11428 if (align <= 2)
11430 rtx label = ix86_expand_aligntest (destreg, 2);
11431 emit_insn (gen_strset (destreg, dst,
11432 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11433 ix86_adjust_counter (countreg, 2);
11434 emit_label (label);
11435 LABEL_NUSES (label) = 1;
11437 if (align <= 4 && desired_alignment > 4)
11439 rtx label = ix86_expand_aligntest (destreg, 4);
11440 emit_insn (gen_strset (destreg, dst,
11441 (TARGET_64BIT
11442 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11443 : zeroreg)));
11444 ix86_adjust_counter (countreg, 4);
11445 emit_label (label);
11446 LABEL_NUSES (label) = 1;
11449 if (label && desired_alignment > 4 && !TARGET_64BIT)
11451 emit_label (label);
11452 LABEL_NUSES (label) = 1;
11453 label = NULL_RTX;
11456 if (!TARGET_SINGLE_STRINGOP)
11457 emit_insn (gen_cld ());
11458 if (TARGET_64BIT)
11460 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11461 GEN_INT (3)));
11462 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11464 else
11466 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11467 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11469 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11470 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11472 if (label)
11474 emit_label (label);
11475 LABEL_NUSES (label) = 1;
11478 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11479 emit_insn (gen_strset (destreg, dst,
11480 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11481 if (TARGET_64BIT && (align <= 4 || count == 0))
11483 rtx label = ix86_expand_aligntest (countreg, 4);
11484 emit_insn (gen_strset (destreg, dst,
11485 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11486 emit_label (label);
11487 LABEL_NUSES (label) = 1;
11489 if (align > 2 && count != 0 && (count & 2))
11490 emit_insn (gen_strset (destreg, dst,
11491 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11492 if (align <= 2 || count == 0)
11494 rtx label = ix86_expand_aligntest (countreg, 2);
11495 emit_insn (gen_strset (destreg, dst,
11496 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11497 emit_label (label);
11498 LABEL_NUSES (label) = 1;
11500 if (align > 1 && count != 0 && (count & 1))
11501 emit_insn (gen_strset (destreg, dst,
11502 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11503 if (align <= 1 || count == 0)
11505 rtx label = ix86_expand_aligntest (countreg, 1);
11506 emit_insn (gen_strset (destreg, dst,
11507 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11508 emit_label (label);
11509 LABEL_NUSES (label) = 1;
11512 return 1;
11515 /* Expand strlen. */
11517 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11519 rtx addr, scratch1, scratch2, scratch3, scratch4;
11521 /* The generic case of strlen expander is long. Avoid it's
11522 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11524 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11525 && !TARGET_INLINE_ALL_STRINGOPS
11526 && !optimize_size
11527 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11528 return 0;
11530 addr = force_reg (Pmode, XEXP (src, 0));
11531 scratch1 = gen_reg_rtx (Pmode);
11533 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11534 && !optimize_size)
11536 /* Well it seems that some optimizer does not combine a call like
11537 foo(strlen(bar), strlen(bar));
11538 when the move and the subtraction is done here. It does calculate
11539 the length just once when these instructions are done inside of
11540 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11541 often used and I use one fewer register for the lifetime of
11542 output_strlen_unroll() this is better. */
11544 emit_move_insn (out, addr);
11546 ix86_expand_strlensi_unroll_1 (out, src, align);
11548 /* strlensi_unroll_1 returns the address of the zero at the end of
11549 the string, like memchr(), so compute the length by subtracting
11550 the start address. */
11551 if (TARGET_64BIT)
11552 emit_insn (gen_subdi3 (out, out, addr));
11553 else
11554 emit_insn (gen_subsi3 (out, out, addr));
11556 else
11558 rtx unspec;
11559 scratch2 = gen_reg_rtx (Pmode);
11560 scratch3 = gen_reg_rtx (Pmode);
11561 scratch4 = force_reg (Pmode, constm1_rtx);
11563 emit_move_insn (scratch3, addr);
11564 eoschar = force_reg (QImode, eoschar);
11566 emit_insn (gen_cld ());
11567 src = replace_equiv_address_nv (src, scratch3);
11569 /* If .md starts supporting :P, this can be done in .md. */
11570 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11571 scratch4), UNSPEC_SCAS);
11572 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11573 if (TARGET_64BIT)
11575 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11576 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11578 else
11580 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11581 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11584 return 1;
11587 /* Expand the appropriate insns for doing strlen if not just doing
11588 repnz; scasb
11590 out = result, initialized with the start address
11591 align_rtx = alignment of the address.
11592 scratch = scratch register, initialized with the startaddress when
11593 not aligned, otherwise undefined
11595 This is just the body. It needs the initializations mentioned above and
11596 some address computing at the end. These things are done in i386.md. */
11598 static void
11599 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11601 int align;
11602 rtx tmp;
11603 rtx align_2_label = NULL_RTX;
11604 rtx align_3_label = NULL_RTX;
11605 rtx align_4_label = gen_label_rtx ();
11606 rtx end_0_label = gen_label_rtx ();
11607 rtx mem;
11608 rtx tmpreg = gen_reg_rtx (SImode);
11609 rtx scratch = gen_reg_rtx (SImode);
11610 rtx cmp;
11612 align = 0;
11613 if (GET_CODE (align_rtx) == CONST_INT)
11614 align = INTVAL (align_rtx);
11616 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11618 /* Is there a known alignment and is it less than 4? */
11619 if (align < 4)
11621 rtx scratch1 = gen_reg_rtx (Pmode);
11622 emit_move_insn (scratch1, out);
11623 /* Is there a known alignment and is it not 2? */
11624 if (align != 2)
11626 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11627 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11629 /* Leave just the 3 lower bits. */
11630 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11631 NULL_RTX, 0, OPTAB_WIDEN);
11633 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11634 Pmode, 1, align_4_label);
11635 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11636 Pmode, 1, align_2_label);
11637 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11638 Pmode, 1, align_3_label);
11640 else
11642 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11643 check if is aligned to 4 - byte. */
11645 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11646 NULL_RTX, 0, OPTAB_WIDEN);
11648 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11649 Pmode, 1, align_4_label);
11652 mem = change_address (src, QImode, out);
11654 /* Now compare the bytes. */
11656 /* Compare the first n unaligned byte on a byte per byte basis. */
11657 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11658 QImode, 1, end_0_label);
11660 /* Increment the address. */
11661 if (TARGET_64BIT)
11662 emit_insn (gen_adddi3 (out, out, const1_rtx));
11663 else
11664 emit_insn (gen_addsi3 (out, out, const1_rtx));
11666 /* Not needed with an alignment of 2 */
11667 if (align != 2)
11669 emit_label (align_2_label);
11671 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11672 end_0_label);
11674 if (TARGET_64BIT)
11675 emit_insn (gen_adddi3 (out, out, const1_rtx));
11676 else
11677 emit_insn (gen_addsi3 (out, out, const1_rtx));
11679 emit_label (align_3_label);
11682 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11683 end_0_label);
11685 if (TARGET_64BIT)
11686 emit_insn (gen_adddi3 (out, out, const1_rtx));
11687 else
11688 emit_insn (gen_addsi3 (out, out, const1_rtx));
11691 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11692 align this loop. It gives only huge programs, but does not help to
11693 speed up. */
11694 emit_label (align_4_label);
11696 mem = change_address (src, SImode, out);
11697 emit_move_insn (scratch, mem);
11698 if (TARGET_64BIT)
11699 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11700 else
11701 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11703 /* This formula yields a nonzero result iff one of the bytes is zero.
11704 This saves three branches inside loop and many cycles. */
11706 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11707 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11708 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11709 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11710 gen_int_mode (0x80808080, SImode)));
11711 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11712 align_4_label);
11714 if (TARGET_CMOVE)
11716 rtx reg = gen_reg_rtx (SImode);
11717 rtx reg2 = gen_reg_rtx (Pmode);
11718 emit_move_insn (reg, tmpreg);
11719 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11721 /* If zero is not in the first two bytes, move two bytes forward. */
11722 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11723 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11724 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11725 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11726 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11727 reg,
11728 tmpreg)));
11729 /* Emit lea manually to avoid clobbering of flags. */
11730 emit_insn (gen_rtx_SET (SImode, reg2,
11731 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11733 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11734 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11735 emit_insn (gen_rtx_SET (VOIDmode, out,
11736 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11737 reg2,
11738 out)));
11741 else
11743 rtx end_2_label = gen_label_rtx ();
11744 /* Is zero in the first two bytes? */
11746 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11747 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11748 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11749 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11750 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11751 pc_rtx);
11752 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11753 JUMP_LABEL (tmp) = end_2_label;
11755 /* Not in the first two. Move two bytes forward. */
11756 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11757 if (TARGET_64BIT)
11758 emit_insn (gen_adddi3 (out, out, const2_rtx));
11759 else
11760 emit_insn (gen_addsi3 (out, out, const2_rtx));
11762 emit_label (end_2_label);
11766 /* Avoid branch in fixing the byte. */
11767 tmpreg = gen_lowpart (QImode, tmpreg);
11768 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11769 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11770 if (TARGET_64BIT)
11771 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11772 else
11773 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11775 emit_label (end_0_label);
11778 void
11779 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11780 rtx callarg2 ATTRIBUTE_UNUSED,
11781 rtx pop, int sibcall)
11783 rtx use = NULL, call;
11785 if (pop == const0_rtx)
11786 pop = NULL;
11787 if (TARGET_64BIT && pop)
11788 abort ();
11790 #if TARGET_MACHO
11791 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11792 fnaddr = machopic_indirect_call_target (fnaddr);
11793 #else
11794 /* Static functions and indirect calls don't need the pic register. */
11795 if (! TARGET_64BIT && flag_pic
11796 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11797 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11798 use_reg (&use, pic_offset_table_rtx);
11800 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11802 rtx al = gen_rtx_REG (QImode, 0);
11803 emit_move_insn (al, callarg2);
11804 use_reg (&use, al);
11806 #endif /* TARGET_MACHO */
11808 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11810 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11811 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11813 if (sibcall && TARGET_64BIT
11814 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11816 rtx addr;
11817 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11818 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11819 emit_move_insn (fnaddr, addr);
11820 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11823 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11824 if (retval)
11825 call = gen_rtx_SET (VOIDmode, retval, call);
11826 if (pop)
11828 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11829 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11830 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11833 call = emit_call_insn (call);
11834 if (use)
11835 CALL_INSN_FUNCTION_USAGE (call) = use;
11839 /* Clear stack slot assignments remembered from previous functions.
11840 This is called from INIT_EXPANDERS once before RTL is emitted for each
11841 function. */
11843 static struct machine_function *
11844 ix86_init_machine_status (void)
11846 struct machine_function *f;
11848 f = ggc_alloc_cleared (sizeof (struct machine_function));
11849 f->use_fast_prologue_epilogue_nregs = -1;
11851 return f;
11854 /* Return a MEM corresponding to a stack slot with mode MODE.
11855 Allocate a new slot if necessary.
11857 The RTL for a function can have several slots available: N is
11858 which slot to use. */
11861 assign_386_stack_local (enum machine_mode mode, int n)
11863 struct stack_local_entry *s;
11865 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11866 abort ();
11868 for (s = ix86_stack_locals; s; s = s->next)
11869 if (s->mode == mode && s->n == n)
11870 return s->rtl;
11872 s = (struct stack_local_entry *)
11873 ggc_alloc (sizeof (struct stack_local_entry));
11874 s->n = n;
11875 s->mode = mode;
11876 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11878 s->next = ix86_stack_locals;
11879 ix86_stack_locals = s;
11880 return s->rtl;
11883 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11885 static GTY(()) rtx ix86_tls_symbol;
11887 ix86_tls_get_addr (void)
11890 if (!ix86_tls_symbol)
11892 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11893 (TARGET_GNU_TLS && !TARGET_64BIT)
11894 ? "___tls_get_addr"
11895 : "__tls_get_addr");
11898 return ix86_tls_symbol;
11901 /* Calculate the length of the memory address in the instruction
11902 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11904 static int
11905 memory_address_length (rtx addr)
11907 struct ix86_address parts;
11908 rtx base, index, disp;
11909 int len;
11911 if (GET_CODE (addr) == PRE_DEC
11912 || GET_CODE (addr) == POST_INC
11913 || GET_CODE (addr) == PRE_MODIFY
11914 || GET_CODE (addr) == POST_MODIFY)
11915 return 0;
11917 if (! ix86_decompose_address (addr, &parts))
11918 abort ();
11920 base = parts.base;
11921 index = parts.index;
11922 disp = parts.disp;
11923 len = 0;
11925 /* Rule of thumb:
11926 - esp as the base always wants an index,
11927 - ebp as the base always wants a displacement. */
11929 /* Register Indirect. */
11930 if (base && !index && !disp)
11932 /* esp (for its index) and ebp (for its displacement) need
11933 the two-byte modrm form. */
11934 if (addr == stack_pointer_rtx
11935 || addr == arg_pointer_rtx
11936 || addr == frame_pointer_rtx
11937 || addr == hard_frame_pointer_rtx)
11938 len = 1;
11941 /* Direct Addressing. */
11942 else if (disp && !base && !index)
11943 len = 4;
11945 else
11947 /* Find the length of the displacement constant. */
11948 if (disp)
11950 if (GET_CODE (disp) == CONST_INT
11951 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11952 && base)
11953 len = 1;
11954 else
11955 len = 4;
11957 /* ebp always wants a displacement. */
11958 else if (base == hard_frame_pointer_rtx)
11959 len = 1;
11961 /* An index requires the two-byte modrm form.... */
11962 if (index
11963 /* ...like esp, which always wants an index. */
11964 || base == stack_pointer_rtx
11965 || base == arg_pointer_rtx
11966 || base == frame_pointer_rtx)
11967 len += 1;
11970 return len;
11973 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11974 is set, expect that insn have 8bit immediate alternative. */
11976 ix86_attr_length_immediate_default (rtx insn, int shortform)
11978 int len = 0;
11979 int i;
11980 extract_insn_cached (insn);
11981 for (i = recog_data.n_operands - 1; i >= 0; --i)
11982 if (CONSTANT_P (recog_data.operand[i]))
11984 if (len)
11985 abort ();
11986 if (shortform
11987 && GET_CODE (recog_data.operand[i]) == CONST_INT
11988 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11989 len = 1;
11990 else
11992 switch (get_attr_mode (insn))
11994 case MODE_QI:
11995 len+=1;
11996 break;
11997 case MODE_HI:
11998 len+=2;
11999 break;
12000 case MODE_SI:
12001 len+=4;
12002 break;
12003 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12004 case MODE_DI:
12005 len+=4;
12006 break;
12007 default:
12008 fatal_insn ("unknown insn mode", insn);
12012 return len;
12014 /* Compute default value for "length_address" attribute. */
12016 ix86_attr_length_address_default (rtx insn)
12018 int i;
12020 if (get_attr_type (insn) == TYPE_LEA)
12022 rtx set = PATTERN (insn);
12023 if (GET_CODE (set) == SET)
12025 else if (GET_CODE (set) == PARALLEL
12026 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12027 set = XVECEXP (set, 0, 0);
12028 else
12030 #ifdef ENABLE_CHECKING
12031 abort ();
12032 #endif
12033 return 0;
12036 return memory_address_length (SET_SRC (set));
12039 extract_insn_cached (insn);
12040 for (i = recog_data.n_operands - 1; i >= 0; --i)
12041 if (GET_CODE (recog_data.operand[i]) == MEM)
12043 return memory_address_length (XEXP (recog_data.operand[i], 0));
12044 break;
12046 return 0;
12049 /* Return the maximum number of instructions a cpu can issue. */
12051 static int
12052 ix86_issue_rate (void)
12054 switch (ix86_tune)
12056 case PROCESSOR_PENTIUM:
12057 case PROCESSOR_K6:
12058 return 2;
12060 case PROCESSOR_PENTIUMPRO:
12061 case PROCESSOR_PENTIUM4:
12062 case PROCESSOR_ATHLON:
12063 case PROCESSOR_K8:
12064 return 3;
12066 default:
12067 return 1;
12071 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12072 by DEP_INSN and nothing set by DEP_INSN. */
12074 static int
12075 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12077 rtx set, set2;
12079 /* Simplify the test for uninteresting insns. */
12080 if (insn_type != TYPE_SETCC
12081 && insn_type != TYPE_ICMOV
12082 && insn_type != TYPE_FCMOV
12083 && insn_type != TYPE_IBR)
12084 return 0;
12086 if ((set = single_set (dep_insn)) != 0)
12088 set = SET_DEST (set);
12089 set2 = NULL_RTX;
12091 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12092 && XVECLEN (PATTERN (dep_insn), 0) == 2
12093 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12094 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12096 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12097 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12099 else
12100 return 0;
12102 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12103 return 0;
12105 /* This test is true if the dependent insn reads the flags but
12106 not any other potentially set register. */
12107 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12108 return 0;
12110 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12111 return 0;
12113 return 1;
12116 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12117 address with operands set by DEP_INSN. */
12119 static int
12120 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12122 rtx addr;
12124 if (insn_type == TYPE_LEA
12125 && TARGET_PENTIUM)
12127 addr = PATTERN (insn);
12128 if (GET_CODE (addr) == SET)
12130 else if (GET_CODE (addr) == PARALLEL
12131 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12132 addr = XVECEXP (addr, 0, 0);
12133 else
12134 abort ();
12135 addr = SET_SRC (addr);
12137 else
12139 int i;
12140 extract_insn_cached (insn);
12141 for (i = recog_data.n_operands - 1; i >= 0; --i)
12142 if (GET_CODE (recog_data.operand[i]) == MEM)
12144 addr = XEXP (recog_data.operand[i], 0);
12145 goto found;
12147 return 0;
12148 found:;
12151 return modified_in_p (addr, dep_insn);
12154 static int
12155 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12157 enum attr_type insn_type, dep_insn_type;
12158 enum attr_memory memory, dep_memory;
12159 rtx set, set2;
12160 int dep_insn_code_number;
12162 /* Anti and output dependencies have zero cost on all CPUs. */
12163 if (REG_NOTE_KIND (link) != 0)
12164 return 0;
12166 dep_insn_code_number = recog_memoized (dep_insn);
12168 /* If we can't recognize the insns, we can't really do anything. */
12169 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12170 return cost;
12172 insn_type = get_attr_type (insn);
12173 dep_insn_type = get_attr_type (dep_insn);
12175 switch (ix86_tune)
12177 case PROCESSOR_PENTIUM:
12178 /* Address Generation Interlock adds a cycle of latency. */
12179 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12180 cost += 1;
12182 /* ??? Compares pair with jump/setcc. */
12183 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12184 cost = 0;
12186 /* Floating point stores require value to be ready one cycle earlier. */
12187 if (insn_type == TYPE_FMOV
12188 && get_attr_memory (insn) == MEMORY_STORE
12189 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12190 cost += 1;
12191 break;
12193 case PROCESSOR_PENTIUMPRO:
12194 memory = get_attr_memory (insn);
12195 dep_memory = get_attr_memory (dep_insn);
12197 /* Since we can't represent delayed latencies of load+operation,
12198 increase the cost here for non-imov insns. */
12199 if (dep_insn_type != TYPE_IMOV
12200 && dep_insn_type != TYPE_FMOV
12201 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12202 cost += 1;
12204 /* INT->FP conversion is expensive. */
12205 if (get_attr_fp_int_src (dep_insn))
12206 cost += 5;
12208 /* There is one cycle extra latency between an FP op and a store. */
12209 if (insn_type == TYPE_FMOV
12210 && (set = single_set (dep_insn)) != NULL_RTX
12211 && (set2 = single_set (insn)) != NULL_RTX
12212 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12213 && GET_CODE (SET_DEST (set2)) == MEM)
12214 cost += 1;
12216 /* Show ability of reorder buffer to hide latency of load by executing
12217 in parallel with previous instruction in case
12218 previous instruction is not needed to compute the address. */
12219 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12220 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12222 /* Claim moves to take one cycle, as core can issue one load
12223 at time and the next load can start cycle later. */
12224 if (dep_insn_type == TYPE_IMOV
12225 || dep_insn_type == TYPE_FMOV)
12226 cost = 1;
12227 else if (cost > 1)
12228 cost--;
12230 break;
12232 case PROCESSOR_K6:
12233 memory = get_attr_memory (insn);
12234 dep_memory = get_attr_memory (dep_insn);
12235 /* The esp dependency is resolved before the instruction is really
12236 finished. */
12237 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12238 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12239 return 1;
12241 /* Since we can't represent delayed latencies of load+operation,
12242 increase the cost here for non-imov insns. */
12243 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12244 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12246 /* INT->FP conversion is expensive. */
12247 if (get_attr_fp_int_src (dep_insn))
12248 cost += 5;
12250 /* Show ability of reorder buffer to hide latency of load by executing
12251 in parallel with previous instruction in case
12252 previous instruction is not needed to compute the address. */
12253 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12254 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12256 /* Claim moves to take one cycle, as core can issue one load
12257 at time and the next load can start cycle later. */
12258 if (dep_insn_type == TYPE_IMOV
12259 || dep_insn_type == TYPE_FMOV)
12260 cost = 1;
12261 else if (cost > 2)
12262 cost -= 2;
12263 else
12264 cost = 1;
12266 break;
12268 case PROCESSOR_ATHLON:
12269 case PROCESSOR_K8:
12270 memory = get_attr_memory (insn);
12271 dep_memory = get_attr_memory (dep_insn);
12273 /* Show ability of reorder buffer to hide latency of load by executing
12274 in parallel with previous instruction in case
12275 previous instruction is not needed to compute the address. */
12276 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12277 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12279 enum attr_unit unit = get_attr_unit (insn);
12280 int loadcost = 3;
12282 /* Because of the difference between the length of integer and
12283 floating unit pipeline preparation stages, the memory operands
12284 for floating point are cheaper.
12286 ??? For Athlon it the difference is most probably 2. */
12287 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12288 loadcost = 3;
12289 else
12290 loadcost = TARGET_ATHLON ? 2 : 0;
12292 if (cost >= loadcost)
12293 cost -= loadcost;
12294 else
12295 cost = 0;
12298 default:
12299 break;
12302 return cost;
12305 static union
12307 struct ppro_sched_data
12309 rtx decode[3];
12310 int issued_this_cycle;
12311 } ppro;
12312 } ix86_sched_data;
12314 static enum attr_ppro_uops
12315 ix86_safe_ppro_uops (rtx insn)
12317 if (recog_memoized (insn) >= 0)
12318 return get_attr_ppro_uops (insn);
12319 else
12320 return PPRO_UOPS_MANY;
12323 static void
12324 ix86_dump_ppro_packet (FILE *dump)
12326 if (ix86_sched_data.ppro.decode[0])
12328 fprintf (dump, "PPRO packet: %d",
12329 INSN_UID (ix86_sched_data.ppro.decode[0]));
12330 if (ix86_sched_data.ppro.decode[1])
12331 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12332 if (ix86_sched_data.ppro.decode[2])
12333 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12334 fputc ('\n', dump);
12338 /* We're beginning a new block. Initialize data structures as necessary. */
12340 static void
12341 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12342 int sched_verbose ATTRIBUTE_UNUSED,
12343 int veclen ATTRIBUTE_UNUSED)
12345 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12348 /* Shift INSN to SLOT, and shift everything else down. */
12350 static void
12351 ix86_reorder_insn (rtx *insnp, rtx *slot)
12353 if (insnp != slot)
12355 rtx insn = *insnp;
12357 insnp[0] = insnp[1];
12358 while (++insnp != slot);
12359 *insnp = insn;
12363 static void
12364 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12366 rtx decode[3];
12367 enum attr_ppro_uops cur_uops;
12368 int issued_this_cycle;
12369 rtx *insnp;
12370 int i;
12372 /* At this point .ppro.decode contains the state of the three
12373 decoders from last "cycle". That is, those insns that were
12374 actually independent. But here we're scheduling for the
12375 decoder, and we may find things that are decodable in the
12376 same cycle. */
12378 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12379 issued_this_cycle = 0;
12381 insnp = e_ready;
12382 cur_uops = ix86_safe_ppro_uops (*insnp);
12384 /* If the decoders are empty, and we've a complex insn at the
12385 head of the priority queue, let it issue without complaint. */
12386 if (decode[0] == NULL)
12388 if (cur_uops == PPRO_UOPS_MANY)
12390 decode[0] = *insnp;
12391 goto ppro_done;
12394 /* Otherwise, search for a 2-4 uop unsn to issue. */
12395 while (cur_uops != PPRO_UOPS_FEW)
12397 if (insnp == ready)
12398 break;
12399 cur_uops = ix86_safe_ppro_uops (*--insnp);
12402 /* If so, move it to the head of the line. */
12403 if (cur_uops == PPRO_UOPS_FEW)
12404 ix86_reorder_insn (insnp, e_ready);
12406 /* Issue the head of the queue. */
12407 issued_this_cycle = 1;
12408 decode[0] = *e_ready--;
12411 /* Look for simple insns to fill in the other two slots. */
12412 for (i = 1; i < 3; ++i)
12413 if (decode[i] == NULL)
12415 if (ready > e_ready)
12416 goto ppro_done;
12418 insnp = e_ready;
12419 cur_uops = ix86_safe_ppro_uops (*insnp);
12420 while (cur_uops != PPRO_UOPS_ONE)
12422 if (insnp == ready)
12423 break;
12424 cur_uops = ix86_safe_ppro_uops (*--insnp);
12427 /* Found one. Move it to the head of the queue and issue it. */
12428 if (cur_uops == PPRO_UOPS_ONE)
12430 ix86_reorder_insn (insnp, e_ready);
12431 decode[i] = *e_ready--;
12432 issued_this_cycle++;
12433 continue;
12436 /* ??? Didn't find one. Ideally, here we would do a lazy split
12437 of 2-uop insns, issue one and queue the other. */
12440 ppro_done:
12441 if (issued_this_cycle == 0)
12442 issued_this_cycle = 1;
12443 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12446 /* We are about to being issuing insns for this clock cycle.
12447 Override the default sort algorithm to better slot instructions. */
12448 static int
12449 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12450 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12451 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12453 int n_ready = *n_readyp;
12454 rtx *e_ready = ready + n_ready - 1;
12456 /* Make sure to go ahead and initialize key items in
12457 ix86_sched_data if we are not going to bother trying to
12458 reorder the ready queue. */
12459 if (n_ready < 2)
12461 ix86_sched_data.ppro.issued_this_cycle = 1;
12462 goto out;
12465 switch (ix86_tune)
12467 default:
12468 break;
12470 case PROCESSOR_PENTIUMPRO:
12471 ix86_sched_reorder_ppro (ready, e_ready);
12472 break;
12475 out:
12476 return ix86_issue_rate ();
12479 /* We are about to issue INSN. Return the number of insns left on the
12480 ready queue that can be issued this cycle. */
12482 static int
12483 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12484 int can_issue_more)
12486 int i;
12487 switch (ix86_tune)
12489 default:
12490 return can_issue_more - 1;
12492 case PROCESSOR_PENTIUMPRO:
12494 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12496 if (uops == PPRO_UOPS_MANY)
12498 if (sched_verbose)
12499 ix86_dump_ppro_packet (dump);
12500 ix86_sched_data.ppro.decode[0] = insn;
12501 ix86_sched_data.ppro.decode[1] = NULL;
12502 ix86_sched_data.ppro.decode[2] = NULL;
12503 if (sched_verbose)
12504 ix86_dump_ppro_packet (dump);
12505 ix86_sched_data.ppro.decode[0] = NULL;
12507 else if (uops == PPRO_UOPS_FEW)
12509 if (sched_verbose)
12510 ix86_dump_ppro_packet (dump);
12511 ix86_sched_data.ppro.decode[0] = insn;
12512 ix86_sched_data.ppro.decode[1] = NULL;
12513 ix86_sched_data.ppro.decode[2] = NULL;
12515 else
12517 for (i = 0; i < 3; ++i)
12518 if (ix86_sched_data.ppro.decode[i] == NULL)
12520 ix86_sched_data.ppro.decode[i] = insn;
12521 break;
12523 if (i == 3)
12524 abort ();
12525 if (i == 2)
12527 if (sched_verbose)
12528 ix86_dump_ppro_packet (dump);
12529 ix86_sched_data.ppro.decode[0] = NULL;
12530 ix86_sched_data.ppro.decode[1] = NULL;
12531 ix86_sched_data.ppro.decode[2] = NULL;
12535 return --ix86_sched_data.ppro.issued_this_cycle;
12539 static int
12540 ia32_use_dfa_pipeline_interface (void)
12542 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12543 return 1;
12544 return 0;
12547 /* How many alternative schedules to try. This should be as wide as the
12548 scheduling freedom in the DFA, but no wider. Making this value too
12549 large results extra work for the scheduler. */
12551 static int
12552 ia32_multipass_dfa_lookahead (void)
12554 if (ix86_tune == PROCESSOR_PENTIUM)
12555 return 2;
12556 else
12557 return 0;
12561 /* Compute the alignment given to a constant that is being placed in memory.
12562 EXP is the constant and ALIGN is the alignment that the object would
12563 ordinarily have.
12564 The value of this function is used instead of that alignment to align
12565 the object. */
12568 ix86_constant_alignment (tree exp, int align)
12570 if (TREE_CODE (exp) == REAL_CST)
12572 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12573 return 64;
12574 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12575 return 128;
12577 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12578 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12579 return BITS_PER_WORD;
12581 return align;
12584 /* Compute the alignment for a static variable.
12585 TYPE is the data type, and ALIGN is the alignment that
12586 the object would ordinarily have. The value of this function is used
12587 instead of that alignment to align the object. */
12590 ix86_data_alignment (tree type, int align)
12592 if (AGGREGATE_TYPE_P (type)
12593 && TYPE_SIZE (type)
12594 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12595 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12596 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12597 return 256;
12599 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12600 to 16byte boundary. */
12601 if (TARGET_64BIT)
12603 if (AGGREGATE_TYPE_P (type)
12604 && TYPE_SIZE (type)
12605 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12606 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12607 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12608 return 128;
12611 if (TREE_CODE (type) == ARRAY_TYPE)
12613 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12614 return 64;
12615 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12616 return 128;
12618 else if (TREE_CODE (type) == COMPLEX_TYPE)
12621 if (TYPE_MODE (type) == DCmode && align < 64)
12622 return 64;
12623 if (TYPE_MODE (type) == XCmode && align < 128)
12624 return 128;
12626 else if ((TREE_CODE (type) == RECORD_TYPE
12627 || TREE_CODE (type) == UNION_TYPE
12628 || TREE_CODE (type) == QUAL_UNION_TYPE)
12629 && TYPE_FIELDS (type))
12631 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12632 return 64;
12633 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12634 return 128;
12636 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12637 || TREE_CODE (type) == INTEGER_TYPE)
12639 if (TYPE_MODE (type) == DFmode && align < 64)
12640 return 64;
12641 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12642 return 128;
12645 return align;
12648 /* Compute the alignment for a local variable.
12649 TYPE is the data type, and ALIGN is the alignment that
12650 the object would ordinarily have. The value of this macro is used
12651 instead of that alignment to align the object. */
12654 ix86_local_alignment (tree type, int align)
12656 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12657 to 16byte boundary. */
12658 if (TARGET_64BIT)
12660 if (AGGREGATE_TYPE_P (type)
12661 && TYPE_SIZE (type)
12662 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12663 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12664 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12665 return 128;
12667 if (TREE_CODE (type) == ARRAY_TYPE)
12669 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12670 return 64;
12671 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12672 return 128;
12674 else if (TREE_CODE (type) == COMPLEX_TYPE)
12676 if (TYPE_MODE (type) == DCmode && align < 64)
12677 return 64;
12678 if (TYPE_MODE (type) == XCmode && align < 128)
12679 return 128;
12681 else if ((TREE_CODE (type) == RECORD_TYPE
12682 || TREE_CODE (type) == UNION_TYPE
12683 || TREE_CODE (type) == QUAL_UNION_TYPE)
12684 && TYPE_FIELDS (type))
12686 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12687 return 64;
12688 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12689 return 128;
12691 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12692 || TREE_CODE (type) == INTEGER_TYPE)
12695 if (TYPE_MODE (type) == DFmode && align < 64)
12696 return 64;
12697 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12698 return 128;
12700 return align;
12703 /* Emit RTL insns to initialize the variable parts of a trampoline.
12704 FNADDR is an RTX for the address of the function's pure code.
12705 CXT is an RTX for the static chain value for the function. */
12706 void
12707 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12709 if (!TARGET_64BIT)
12711 /* Compute offset from the end of the jmp to the target function. */
12712 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12713 plus_constant (tramp, 10),
12714 NULL_RTX, 1, OPTAB_DIRECT);
12715 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12716 gen_int_mode (0xb9, QImode));
12717 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12718 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12719 gen_int_mode (0xe9, QImode));
12720 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12722 else
12724 int offset = 0;
12725 /* Try to load address using shorter movl instead of movabs.
12726 We may want to support movq for kernel mode, but kernel does not use
12727 trampolines at the moment. */
12728 if (x86_64_zero_extended_value (fnaddr))
12730 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12731 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12732 gen_int_mode (0xbb41, HImode));
12733 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12734 gen_lowpart (SImode, fnaddr));
12735 offset += 6;
12737 else
12739 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12740 gen_int_mode (0xbb49, HImode));
12741 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12742 fnaddr);
12743 offset += 10;
12745 /* Load static chain using movabs to r10. */
12746 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12747 gen_int_mode (0xba49, HImode));
12748 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12749 cxt);
12750 offset += 10;
12751 /* Jump to the r11 */
12752 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12753 gen_int_mode (0xff49, HImode));
12754 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12755 gen_int_mode (0xe3, QImode));
12756 offset += 3;
12757 if (offset > TRAMPOLINE_SIZE)
12758 abort ();
12761 #ifdef TRANSFER_FROM_TRAMPOLINE
12762 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12763 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12764 #endif
12767 #define def_builtin(MASK, NAME, TYPE, CODE) \
12768 do { \
12769 if ((MASK) & target_flags \
12770 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12771 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12772 NULL, NULL_TREE); \
12773 } while (0)
12775 struct builtin_description
12777 const unsigned int mask;
12778 const enum insn_code icode;
12779 const char *const name;
12780 const enum ix86_builtins code;
12781 const enum rtx_code comparison;
12782 const unsigned int flag;
12785 static const struct builtin_description bdesc_comi[] =
12787 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12788 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12789 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12790 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12791 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12792 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12793 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12794 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12795 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12796 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12797 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12798 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12799 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12800 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12801 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12802 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12804 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12805 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12810 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12813 static const struct builtin_description bdesc_2arg[] =
12815 /* SSE */
12816 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12817 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12818 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12819 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12820 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12821 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12822 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12823 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12825 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12826 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12827 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12828 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12829 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12830 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12831 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12832 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12833 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12834 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12835 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12836 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12837 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12838 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12839 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12840 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12841 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12842 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12843 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12844 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12846 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12847 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12848 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12849 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12851 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12852 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12853 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12854 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12856 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12857 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12858 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12859 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12860 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12862 /* MMX */
12863 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12864 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12865 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12866 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12867 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12868 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12869 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12870 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12872 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12873 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12874 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12875 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12876 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12877 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12878 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12879 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12881 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12882 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12883 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12885 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12886 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12887 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12888 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12890 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12891 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12893 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12894 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12895 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12896 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12897 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12898 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12900 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12901 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12902 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12903 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12905 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12906 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12907 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12908 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12909 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12910 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12912 /* Special. */
12913 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12914 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12915 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12917 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12918 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12919 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12921 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12922 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12923 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12924 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12925 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12926 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12928 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12929 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12930 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12931 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12932 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12933 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12935 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12936 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12937 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12938 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12940 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12941 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12943 /* SSE2 */
12944 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12954 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12955 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12956 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12957 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12958 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12959 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12960 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12961 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12962 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12963 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12964 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12965 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12966 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12967 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12968 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12969 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12970 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12971 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12972 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12974 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12980 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12988 /* SSE2 MMX */
12989 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12996 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12998 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12999 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13000 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13001 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13002 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13003 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13004 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13005 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13041 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13060 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13065 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13067 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13070 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13071 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13072 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13074 /* PNI MMX */
13075 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13076 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13077 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13078 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13079 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13080 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13083 static const struct builtin_description bdesc_1arg[] =
13085 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13086 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13088 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13089 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13090 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13092 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13093 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13094 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13095 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13096 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13097 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13099 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13100 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13102 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13104 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13111 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13113 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13115 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13117 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13118 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13119 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13120 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13122 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13124 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13126 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13128 /* PNI */
13129 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13130 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13131 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13134 void
13135 ix86_init_builtins (void)
13137 if (TARGET_MMX)
13138 ix86_init_mmx_sse_builtins ();
13141 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13142 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13143 builtins. */
13144 static void
13145 ix86_init_mmx_sse_builtins (void)
13147 const struct builtin_description * d;
13148 size_t i;
13150 tree pchar_type_node = build_pointer_type (char_type_node);
13151 tree pcchar_type_node = build_pointer_type (
13152 build_type_variant (char_type_node, 1, 0));
13153 tree pfloat_type_node = build_pointer_type (float_type_node);
13154 tree pcfloat_type_node = build_pointer_type (
13155 build_type_variant (float_type_node, 1, 0));
13156 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13157 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13158 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13160 /* Comparisons. */
13161 tree int_ftype_v4sf_v4sf
13162 = build_function_type_list (integer_type_node,
13163 V4SF_type_node, V4SF_type_node, NULL_TREE);
13164 tree v4si_ftype_v4sf_v4sf
13165 = build_function_type_list (V4SI_type_node,
13166 V4SF_type_node, V4SF_type_node, NULL_TREE);
13167 /* MMX/SSE/integer conversions. */
13168 tree int_ftype_v4sf
13169 = build_function_type_list (integer_type_node,
13170 V4SF_type_node, NULL_TREE);
13171 tree int64_ftype_v4sf
13172 = build_function_type_list (long_long_integer_type_node,
13173 V4SF_type_node, NULL_TREE);
13174 tree int_ftype_v8qi
13175 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13176 tree v4sf_ftype_v4sf_int
13177 = build_function_type_list (V4SF_type_node,
13178 V4SF_type_node, integer_type_node, NULL_TREE);
13179 tree v4sf_ftype_v4sf_int64
13180 = build_function_type_list (V4SF_type_node,
13181 V4SF_type_node, long_long_integer_type_node,
13182 NULL_TREE);
13183 tree v4sf_ftype_v4sf_v2si
13184 = build_function_type_list (V4SF_type_node,
13185 V4SF_type_node, V2SI_type_node, NULL_TREE);
13186 tree int_ftype_v4hi_int
13187 = build_function_type_list (integer_type_node,
13188 V4HI_type_node, integer_type_node, NULL_TREE);
13189 tree v4hi_ftype_v4hi_int_int
13190 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13191 integer_type_node, integer_type_node,
13192 NULL_TREE);
13193 /* Miscellaneous. */
13194 tree v8qi_ftype_v4hi_v4hi
13195 = build_function_type_list (V8QI_type_node,
13196 V4HI_type_node, V4HI_type_node, NULL_TREE);
13197 tree v4hi_ftype_v2si_v2si
13198 = build_function_type_list (V4HI_type_node,
13199 V2SI_type_node, V2SI_type_node, NULL_TREE);
13200 tree v4sf_ftype_v4sf_v4sf_int
13201 = build_function_type_list (V4SF_type_node,
13202 V4SF_type_node, V4SF_type_node,
13203 integer_type_node, NULL_TREE);
13204 tree v2si_ftype_v4hi_v4hi
13205 = build_function_type_list (V2SI_type_node,
13206 V4HI_type_node, V4HI_type_node, NULL_TREE);
13207 tree v4hi_ftype_v4hi_int
13208 = build_function_type_list (V4HI_type_node,
13209 V4HI_type_node, integer_type_node, NULL_TREE);
13210 tree v4hi_ftype_v4hi_di
13211 = build_function_type_list (V4HI_type_node,
13212 V4HI_type_node, long_long_unsigned_type_node,
13213 NULL_TREE);
13214 tree v2si_ftype_v2si_di
13215 = build_function_type_list (V2SI_type_node,
13216 V2SI_type_node, long_long_unsigned_type_node,
13217 NULL_TREE);
13218 tree void_ftype_void
13219 = build_function_type (void_type_node, void_list_node);
13220 tree void_ftype_unsigned
13221 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13222 tree void_ftype_unsigned_unsigned
13223 = build_function_type_list (void_type_node, unsigned_type_node,
13224 unsigned_type_node, NULL_TREE);
13225 tree void_ftype_pcvoid_unsigned_unsigned
13226 = build_function_type_list (void_type_node, const_ptr_type_node,
13227 unsigned_type_node, unsigned_type_node,
13228 NULL_TREE);
13229 tree unsigned_ftype_void
13230 = build_function_type (unsigned_type_node, void_list_node);
13231 tree di_ftype_void
13232 = build_function_type (long_long_unsigned_type_node, void_list_node);
13233 tree v4sf_ftype_void
13234 = build_function_type (V4SF_type_node, void_list_node);
13235 tree v2si_ftype_v4sf
13236 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13237 /* Loads/stores. */
13238 tree void_ftype_v8qi_v8qi_pchar
13239 = build_function_type_list (void_type_node,
13240 V8QI_type_node, V8QI_type_node,
13241 pchar_type_node, NULL_TREE);
13242 tree v4sf_ftype_pcfloat
13243 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13244 /* @@@ the type is bogus */
13245 tree v4sf_ftype_v4sf_pv2si
13246 = build_function_type_list (V4SF_type_node,
13247 V4SF_type_node, pv2si_type_node, NULL_TREE);
13248 tree void_ftype_pv2si_v4sf
13249 = build_function_type_list (void_type_node,
13250 pv2si_type_node, V4SF_type_node, NULL_TREE);
13251 tree void_ftype_pfloat_v4sf
13252 = build_function_type_list (void_type_node,
13253 pfloat_type_node, V4SF_type_node, NULL_TREE);
13254 tree void_ftype_pdi_di
13255 = build_function_type_list (void_type_node,
13256 pdi_type_node, long_long_unsigned_type_node,
13257 NULL_TREE);
13258 tree void_ftype_pv2di_v2di
13259 = build_function_type_list (void_type_node,
13260 pv2di_type_node, V2DI_type_node, NULL_TREE);
13261 /* Normal vector unops. */
13262 tree v4sf_ftype_v4sf
13263 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13265 /* Normal vector binops. */
13266 tree v4sf_ftype_v4sf_v4sf
13267 = build_function_type_list (V4SF_type_node,
13268 V4SF_type_node, V4SF_type_node, NULL_TREE);
13269 tree v8qi_ftype_v8qi_v8qi
13270 = build_function_type_list (V8QI_type_node,
13271 V8QI_type_node, V8QI_type_node, NULL_TREE);
13272 tree v4hi_ftype_v4hi_v4hi
13273 = build_function_type_list (V4HI_type_node,
13274 V4HI_type_node, V4HI_type_node, NULL_TREE);
13275 tree v2si_ftype_v2si_v2si
13276 = build_function_type_list (V2SI_type_node,
13277 V2SI_type_node, V2SI_type_node, NULL_TREE);
13278 tree di_ftype_di_di
13279 = build_function_type_list (long_long_unsigned_type_node,
13280 long_long_unsigned_type_node,
13281 long_long_unsigned_type_node, NULL_TREE);
13283 tree v2si_ftype_v2sf
13284 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13285 tree v2sf_ftype_v2si
13286 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13287 tree v2si_ftype_v2si
13288 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13289 tree v2sf_ftype_v2sf
13290 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13291 tree v2sf_ftype_v2sf_v2sf
13292 = build_function_type_list (V2SF_type_node,
13293 V2SF_type_node, V2SF_type_node, NULL_TREE);
13294 tree v2si_ftype_v2sf_v2sf
13295 = build_function_type_list (V2SI_type_node,
13296 V2SF_type_node, V2SF_type_node, NULL_TREE);
13297 tree pint_type_node = build_pointer_type (integer_type_node);
13298 tree pcint_type_node = build_pointer_type (
13299 build_type_variant (integer_type_node, 1, 0));
13300 tree pdouble_type_node = build_pointer_type (double_type_node);
13301 tree pcdouble_type_node = build_pointer_type (
13302 build_type_variant (double_type_node, 1, 0));
13303 tree int_ftype_v2df_v2df
13304 = build_function_type_list (integer_type_node,
13305 V2DF_type_node, V2DF_type_node, NULL_TREE);
13307 tree ti_ftype_void
13308 = build_function_type (intTI_type_node, void_list_node);
13309 tree v2di_ftype_void
13310 = build_function_type (V2DI_type_node, void_list_node);
13311 tree ti_ftype_ti_ti
13312 = build_function_type_list (intTI_type_node,
13313 intTI_type_node, intTI_type_node, NULL_TREE);
13314 tree void_ftype_pcvoid
13315 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13316 tree v2di_ftype_di
13317 = build_function_type_list (V2DI_type_node,
13318 long_long_unsigned_type_node, NULL_TREE);
13319 tree di_ftype_v2di
13320 = build_function_type_list (long_long_unsigned_type_node,
13321 V2DI_type_node, NULL_TREE);
13322 tree v4sf_ftype_v4si
13323 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13324 tree v4si_ftype_v4sf
13325 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13326 tree v2df_ftype_v4si
13327 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13328 tree v4si_ftype_v2df
13329 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13330 tree v2si_ftype_v2df
13331 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13332 tree v4sf_ftype_v2df
13333 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13334 tree v2df_ftype_v2si
13335 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13336 tree v2df_ftype_v4sf
13337 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13338 tree int_ftype_v2df
13339 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13340 tree int64_ftype_v2df
13341 = build_function_type_list (long_long_integer_type_node,
13342 V2DF_type_node, NULL_TREE);
13343 tree v2df_ftype_v2df_int
13344 = build_function_type_list (V2DF_type_node,
13345 V2DF_type_node, integer_type_node, NULL_TREE);
13346 tree v2df_ftype_v2df_int64
13347 = build_function_type_list (V2DF_type_node,
13348 V2DF_type_node, long_long_integer_type_node,
13349 NULL_TREE);
13350 tree v4sf_ftype_v4sf_v2df
13351 = build_function_type_list (V4SF_type_node,
13352 V4SF_type_node, V2DF_type_node, NULL_TREE);
13353 tree v2df_ftype_v2df_v4sf
13354 = build_function_type_list (V2DF_type_node,
13355 V2DF_type_node, V4SF_type_node, NULL_TREE);
13356 tree v2df_ftype_v2df_v2df_int
13357 = build_function_type_list (V2DF_type_node,
13358 V2DF_type_node, V2DF_type_node,
13359 integer_type_node,
13360 NULL_TREE);
13361 tree v2df_ftype_v2df_pv2si
13362 = build_function_type_list (V2DF_type_node,
13363 V2DF_type_node, pv2si_type_node, NULL_TREE);
13364 tree void_ftype_pv2si_v2df
13365 = build_function_type_list (void_type_node,
13366 pv2si_type_node, V2DF_type_node, NULL_TREE);
13367 tree void_ftype_pdouble_v2df
13368 = build_function_type_list (void_type_node,
13369 pdouble_type_node, V2DF_type_node, NULL_TREE);
13370 tree void_ftype_pint_int
13371 = build_function_type_list (void_type_node,
13372 pint_type_node, integer_type_node, NULL_TREE);
13373 tree void_ftype_v16qi_v16qi_pchar
13374 = build_function_type_list (void_type_node,
13375 V16QI_type_node, V16QI_type_node,
13376 pchar_type_node, NULL_TREE);
13377 tree v2df_ftype_pcdouble
13378 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13379 tree v2df_ftype_v2df_v2df
13380 = build_function_type_list (V2DF_type_node,
13381 V2DF_type_node, V2DF_type_node, NULL_TREE);
13382 tree v16qi_ftype_v16qi_v16qi
13383 = build_function_type_list (V16QI_type_node,
13384 V16QI_type_node, V16QI_type_node, NULL_TREE);
13385 tree v8hi_ftype_v8hi_v8hi
13386 = build_function_type_list (V8HI_type_node,
13387 V8HI_type_node, V8HI_type_node, NULL_TREE);
13388 tree v4si_ftype_v4si_v4si
13389 = build_function_type_list (V4SI_type_node,
13390 V4SI_type_node, V4SI_type_node, NULL_TREE);
13391 tree v2di_ftype_v2di_v2di
13392 = build_function_type_list (V2DI_type_node,
13393 V2DI_type_node, V2DI_type_node, NULL_TREE);
13394 tree v2di_ftype_v2df_v2df
13395 = build_function_type_list (V2DI_type_node,
13396 V2DF_type_node, V2DF_type_node, NULL_TREE);
13397 tree v2df_ftype_v2df
13398 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13399 tree v2df_ftype_double
13400 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13401 tree v2df_ftype_double_double
13402 = build_function_type_list (V2DF_type_node,
13403 double_type_node, double_type_node, NULL_TREE);
13404 tree int_ftype_v8hi_int
13405 = build_function_type_list (integer_type_node,
13406 V8HI_type_node, integer_type_node, NULL_TREE);
13407 tree v8hi_ftype_v8hi_int_int
13408 = build_function_type_list (V8HI_type_node,
13409 V8HI_type_node, integer_type_node,
13410 integer_type_node, NULL_TREE);
13411 tree v2di_ftype_v2di_int
13412 = build_function_type_list (V2DI_type_node,
13413 V2DI_type_node, integer_type_node, NULL_TREE);
13414 tree v4si_ftype_v4si_int
13415 = build_function_type_list (V4SI_type_node,
13416 V4SI_type_node, integer_type_node, NULL_TREE);
13417 tree v8hi_ftype_v8hi_int
13418 = build_function_type_list (V8HI_type_node,
13419 V8HI_type_node, integer_type_node, NULL_TREE);
13420 tree v8hi_ftype_v8hi_v2di
13421 = build_function_type_list (V8HI_type_node,
13422 V8HI_type_node, V2DI_type_node, NULL_TREE);
13423 tree v4si_ftype_v4si_v2di
13424 = build_function_type_list (V4SI_type_node,
13425 V4SI_type_node, V2DI_type_node, NULL_TREE);
13426 tree v4si_ftype_v8hi_v8hi
13427 = build_function_type_list (V4SI_type_node,
13428 V8HI_type_node, V8HI_type_node, NULL_TREE);
13429 tree di_ftype_v8qi_v8qi
13430 = build_function_type_list (long_long_unsigned_type_node,
13431 V8QI_type_node, V8QI_type_node, NULL_TREE);
13432 tree v2di_ftype_v16qi_v16qi
13433 = build_function_type_list (V2DI_type_node,
13434 V16QI_type_node, V16QI_type_node, NULL_TREE);
13435 tree int_ftype_v16qi
13436 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13437 tree v16qi_ftype_pcchar
13438 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13439 tree void_ftype_pchar_v16qi
13440 = build_function_type_list (void_type_node,
13441 pchar_type_node, V16QI_type_node, NULL_TREE);
13442 tree v4si_ftype_pcint
13443 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13444 tree void_ftype_pcint_v4si
13445 = build_function_type_list (void_type_node,
13446 pcint_type_node, V4SI_type_node, NULL_TREE);
13447 tree v2di_ftype_v2di
13448 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13450 tree float80_type;
13451 tree float128_type;
13453 /* The __float80 type. */
13454 if (TYPE_MODE (long_double_type_node) == XFmode)
13455 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13456 "__float80");
13457 else
13459 /* The __float80 type. */
13460 float80_type = make_node (REAL_TYPE);
13461 TYPE_PRECISION (float80_type) = 96;
13462 layout_type (float80_type);
13463 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13466 float128_type = make_node (REAL_TYPE);
13467 TYPE_PRECISION (float128_type) = 128;
13468 layout_type (float128_type);
13469 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13471 /* Add all builtins that are more or less simple operations on two
13472 operands. */
13473 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13475 /* Use one of the operands; the target can have a different mode for
13476 mask-generating compares. */
13477 enum machine_mode mode;
13478 tree type;
13480 if (d->name == 0)
13481 continue;
13482 mode = insn_data[d->icode].operand[1].mode;
13484 switch (mode)
13486 case V16QImode:
13487 type = v16qi_ftype_v16qi_v16qi;
13488 break;
13489 case V8HImode:
13490 type = v8hi_ftype_v8hi_v8hi;
13491 break;
13492 case V4SImode:
13493 type = v4si_ftype_v4si_v4si;
13494 break;
13495 case V2DImode:
13496 type = v2di_ftype_v2di_v2di;
13497 break;
13498 case V2DFmode:
13499 type = v2df_ftype_v2df_v2df;
13500 break;
13501 case TImode:
13502 type = ti_ftype_ti_ti;
13503 break;
13504 case V4SFmode:
13505 type = v4sf_ftype_v4sf_v4sf;
13506 break;
13507 case V8QImode:
13508 type = v8qi_ftype_v8qi_v8qi;
13509 break;
13510 case V4HImode:
13511 type = v4hi_ftype_v4hi_v4hi;
13512 break;
13513 case V2SImode:
13514 type = v2si_ftype_v2si_v2si;
13515 break;
13516 case DImode:
13517 type = di_ftype_di_di;
13518 break;
13520 default:
13521 abort ();
13524 /* Override for comparisons. */
13525 if (d->icode == CODE_FOR_maskcmpv4sf3
13526 || d->icode == CODE_FOR_maskncmpv4sf3
13527 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13528 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13529 type = v4si_ftype_v4sf_v4sf;
13531 if (d->icode == CODE_FOR_maskcmpv2df3
13532 || d->icode == CODE_FOR_maskncmpv2df3
13533 || d->icode == CODE_FOR_vmmaskcmpv2df3
13534 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13535 type = v2di_ftype_v2df_v2df;
13537 def_builtin (d->mask, d->name, type, d->code);
13540 /* Add the remaining MMX insns with somewhat more complicated types. */
13541 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13542 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13543 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13544 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13545 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13547 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13548 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13549 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13551 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13552 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13554 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13555 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13557 /* comi/ucomi insns. */
13558 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13559 if (d->mask == MASK_SSE2)
13560 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13561 else
13562 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13564 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13565 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13566 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13568 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13569 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13570 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13571 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13572 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13573 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13574 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13575 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13576 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13577 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13578 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13580 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13581 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13583 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13585 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13586 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13587 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13588 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13589 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13590 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13592 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13593 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13594 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13595 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13597 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13598 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13599 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13600 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13602 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13604 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13606 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13607 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13608 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13609 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13610 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13611 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13613 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13615 /* Original 3DNow! */
13616 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13617 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13618 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13619 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13620 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13621 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13622 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13623 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13624 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13625 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13626 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13627 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13628 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13629 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13630 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13631 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13632 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13633 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13634 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13635 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13637 /* 3DNow! extension as used in the Athlon CPU. */
13638 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13639 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13640 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13641 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13642 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13643 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13645 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13647 /* SSE2 */
13648 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13651 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13653 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13679 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13696 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13697 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13701 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13703 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13704 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13705 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13706 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13708 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13709 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13710 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13711 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13712 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13713 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13714 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13716 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13717 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13718 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13720 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13721 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13723 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13726 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13728 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13731 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13734 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13735 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13736 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13738 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13739 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13741 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13742 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13743 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13744 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13746 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13747 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13748 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13749 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13751 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13752 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13754 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13756 /* Prescott New Instructions. */
13757 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13758 void_ftype_pcvoid_unsigned_unsigned,
13759 IX86_BUILTIN_MONITOR);
13760 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13761 void_ftype_unsigned_unsigned,
13762 IX86_BUILTIN_MWAIT);
13763 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13764 v4sf_ftype_v4sf,
13765 IX86_BUILTIN_MOVSHDUP);
13766 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13767 v4sf_ftype_v4sf,
13768 IX86_BUILTIN_MOVSLDUP);
13769 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13770 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13771 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13772 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13773 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13774 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13777 /* Errors in the source file can cause expand_expr to return const0_rtx
13778 where we expect a vector. To avoid crashing, use one of the vector
13779 clear instructions. */
13780 static rtx
13781 safe_vector_operand (rtx x, enum machine_mode mode)
13783 if (x != const0_rtx)
13784 return x;
13785 x = gen_reg_rtx (mode);
13787 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13788 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13789 : gen_rtx_SUBREG (DImode, x, 0)));
13790 else
13791 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13792 : gen_rtx_SUBREG (V4SFmode, x, 0),
13793 CONST0_RTX (V4SFmode)));
13794 return x;
13797 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13799 static rtx
13800 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13802 rtx pat;
13803 tree arg0 = TREE_VALUE (arglist);
13804 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13805 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13806 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13807 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13808 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13809 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13811 if (VECTOR_MODE_P (mode0))
13812 op0 = safe_vector_operand (op0, mode0);
13813 if (VECTOR_MODE_P (mode1))
13814 op1 = safe_vector_operand (op1, mode1);
13816 if (! target
13817 || GET_MODE (target) != tmode
13818 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13819 target = gen_reg_rtx (tmode);
13821 if (GET_MODE (op1) == SImode && mode1 == TImode)
13823 rtx x = gen_reg_rtx (V4SImode);
13824 emit_insn (gen_sse2_loadd (x, op1));
13825 op1 = gen_lowpart (TImode, x);
13828 /* In case the insn wants input operands in modes different from
13829 the result, abort. */
13830 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13831 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13832 abort ();
13834 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13835 op0 = copy_to_mode_reg (mode0, op0);
13836 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13837 op1 = copy_to_mode_reg (mode1, op1);
13839 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13840 yet one of the two must not be a memory. This is normally enforced
13841 by expanders, but we didn't bother to create one here. */
13842 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13843 op0 = copy_to_mode_reg (mode0, op0);
13845 pat = GEN_FCN (icode) (target, op0, op1);
13846 if (! pat)
13847 return 0;
13848 emit_insn (pat);
13849 return target;
13852 /* Subroutine of ix86_expand_builtin to take care of stores. */
13854 static rtx
13855 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13857 rtx pat;
13858 tree arg0 = TREE_VALUE (arglist);
13859 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13860 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13861 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13862 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13863 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13865 if (VECTOR_MODE_P (mode1))
13866 op1 = safe_vector_operand (op1, mode1);
13868 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13869 op1 = copy_to_mode_reg (mode1, op1);
13871 pat = GEN_FCN (icode) (op0, op1);
13872 if (pat)
13873 emit_insn (pat);
13874 return 0;
13877 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13879 static rtx
13880 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13881 rtx target, int do_load)
13883 rtx pat;
13884 tree arg0 = TREE_VALUE (arglist);
13885 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13886 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13887 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13889 if (! target
13890 || GET_MODE (target) != tmode
13891 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13892 target = gen_reg_rtx (tmode);
13893 if (do_load)
13894 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13895 else
13897 if (VECTOR_MODE_P (mode0))
13898 op0 = safe_vector_operand (op0, mode0);
13900 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13901 op0 = copy_to_mode_reg (mode0, op0);
13904 pat = GEN_FCN (icode) (target, op0);
13905 if (! pat)
13906 return 0;
13907 emit_insn (pat);
13908 return target;
13911 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13912 sqrtss, rsqrtss, rcpss. */
13914 static rtx
13915 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13917 rtx pat;
13918 tree arg0 = TREE_VALUE (arglist);
13919 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13920 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13921 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13923 if (! target
13924 || GET_MODE (target) != tmode
13925 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13926 target = gen_reg_rtx (tmode);
13928 if (VECTOR_MODE_P (mode0))
13929 op0 = safe_vector_operand (op0, mode0);
13931 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13932 op0 = copy_to_mode_reg (mode0, op0);
13934 op1 = op0;
13935 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13936 op1 = copy_to_mode_reg (mode0, op1);
13938 pat = GEN_FCN (icode) (target, op0, op1);
13939 if (! pat)
13940 return 0;
13941 emit_insn (pat);
13942 return target;
13945 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13947 static rtx
13948 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13949 rtx target)
13951 rtx pat;
13952 tree arg0 = TREE_VALUE (arglist);
13953 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13954 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13955 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13956 rtx op2;
13957 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13958 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13959 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13960 enum rtx_code comparison = d->comparison;
13962 if (VECTOR_MODE_P (mode0))
13963 op0 = safe_vector_operand (op0, mode0);
13964 if (VECTOR_MODE_P (mode1))
13965 op1 = safe_vector_operand (op1, mode1);
13967 /* Swap operands if we have a comparison that isn't available in
13968 hardware. */
13969 if (d->flag)
13971 rtx tmp = gen_reg_rtx (mode1);
13972 emit_move_insn (tmp, op1);
13973 op1 = op0;
13974 op0 = tmp;
13977 if (! target
13978 || GET_MODE (target) != tmode
13979 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13980 target = gen_reg_rtx (tmode);
13982 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13983 op0 = copy_to_mode_reg (mode0, op0);
13984 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13985 op1 = copy_to_mode_reg (mode1, op1);
13987 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13988 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13989 if (! pat)
13990 return 0;
13991 emit_insn (pat);
13992 return target;
13995 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13997 static rtx
13998 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13999 rtx target)
14001 rtx pat;
14002 tree arg0 = TREE_VALUE (arglist);
14003 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14004 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14005 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14006 rtx op2;
14007 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14008 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14009 enum rtx_code comparison = d->comparison;
14011 if (VECTOR_MODE_P (mode0))
14012 op0 = safe_vector_operand (op0, mode0);
14013 if (VECTOR_MODE_P (mode1))
14014 op1 = safe_vector_operand (op1, mode1);
14016 /* Swap operands if we have a comparison that isn't available in
14017 hardware. */
14018 if (d->flag)
14020 rtx tmp = op1;
14021 op1 = op0;
14022 op0 = tmp;
14025 target = gen_reg_rtx (SImode);
14026 emit_move_insn (target, const0_rtx);
14027 target = gen_rtx_SUBREG (QImode, target, 0);
14029 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14030 op0 = copy_to_mode_reg (mode0, op0);
14031 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14032 op1 = copy_to_mode_reg (mode1, op1);
14034 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14035 pat = GEN_FCN (d->icode) (op0, op1);
14036 if (! pat)
14037 return 0;
14038 emit_insn (pat);
14039 emit_insn (gen_rtx_SET (VOIDmode,
14040 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14041 gen_rtx_fmt_ee (comparison, QImode,
14042 SET_DEST (pat),
14043 const0_rtx)));
14045 return SUBREG_REG (target);
14048 /* Expand an expression EXP that calls a built-in function,
14049 with result going to TARGET if that's convenient
14050 (and in mode MODE if that's convenient).
14051 SUBTARGET may be used as the target for computing one of EXP's operands.
14052 IGNORE is nonzero if the value is to be ignored. */
14055 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14056 enum machine_mode mode ATTRIBUTE_UNUSED,
14057 int ignore ATTRIBUTE_UNUSED)
14059 const struct builtin_description *d;
14060 size_t i;
14061 enum insn_code icode;
14062 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14063 tree arglist = TREE_OPERAND (exp, 1);
14064 tree arg0, arg1, arg2;
14065 rtx op0, op1, op2, pat;
14066 enum machine_mode tmode, mode0, mode1, mode2;
14067 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14069 switch (fcode)
14071 case IX86_BUILTIN_EMMS:
14072 emit_insn (gen_emms ());
14073 return 0;
14075 case IX86_BUILTIN_SFENCE:
14076 emit_insn (gen_sfence ());
14077 return 0;
14079 case IX86_BUILTIN_PEXTRW:
14080 case IX86_BUILTIN_PEXTRW128:
14081 icode = (fcode == IX86_BUILTIN_PEXTRW
14082 ? CODE_FOR_mmx_pextrw
14083 : CODE_FOR_sse2_pextrw);
14084 arg0 = TREE_VALUE (arglist);
14085 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14086 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14087 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14088 tmode = insn_data[icode].operand[0].mode;
14089 mode0 = insn_data[icode].operand[1].mode;
14090 mode1 = insn_data[icode].operand[2].mode;
14092 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14093 op0 = copy_to_mode_reg (mode0, op0);
14094 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14096 error ("selector must be an integer constant in the range 0..%i",
14097 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14098 return gen_reg_rtx (tmode);
14100 if (target == 0
14101 || GET_MODE (target) != tmode
14102 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14103 target = gen_reg_rtx (tmode);
14104 pat = GEN_FCN (icode) (target, op0, op1);
14105 if (! pat)
14106 return 0;
14107 emit_insn (pat);
14108 return target;
14110 case IX86_BUILTIN_PINSRW:
14111 case IX86_BUILTIN_PINSRW128:
14112 icode = (fcode == IX86_BUILTIN_PINSRW
14113 ? CODE_FOR_mmx_pinsrw
14114 : CODE_FOR_sse2_pinsrw);
14115 arg0 = TREE_VALUE (arglist);
14116 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14117 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14118 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14119 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14120 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14121 tmode = insn_data[icode].operand[0].mode;
14122 mode0 = insn_data[icode].operand[1].mode;
14123 mode1 = insn_data[icode].operand[2].mode;
14124 mode2 = insn_data[icode].operand[3].mode;
14126 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14127 op0 = copy_to_mode_reg (mode0, op0);
14128 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14129 op1 = copy_to_mode_reg (mode1, op1);
14130 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14132 error ("selector must be an integer constant in the range 0..%i",
14133 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14134 return const0_rtx;
14136 if (target == 0
14137 || GET_MODE (target) != tmode
14138 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14139 target = gen_reg_rtx (tmode);
14140 pat = GEN_FCN (icode) (target, op0, op1, op2);
14141 if (! pat)
14142 return 0;
14143 emit_insn (pat);
14144 return target;
14146 case IX86_BUILTIN_MASKMOVQ:
14147 case IX86_BUILTIN_MASKMOVDQU:
14148 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14149 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14150 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14151 : CODE_FOR_sse2_maskmovdqu));
14152 /* Note the arg order is different from the operand order. */
14153 arg1 = TREE_VALUE (arglist);
14154 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14155 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14156 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14157 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14158 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14159 mode0 = insn_data[icode].operand[0].mode;
14160 mode1 = insn_data[icode].operand[1].mode;
14161 mode2 = insn_data[icode].operand[2].mode;
14163 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14164 op0 = copy_to_mode_reg (mode0, op0);
14165 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14166 op1 = copy_to_mode_reg (mode1, op1);
14167 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14168 op2 = copy_to_mode_reg (mode2, op2);
14169 pat = GEN_FCN (icode) (op0, op1, op2);
14170 if (! pat)
14171 return 0;
14172 emit_insn (pat);
14173 return 0;
14175 case IX86_BUILTIN_SQRTSS:
14176 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14177 case IX86_BUILTIN_RSQRTSS:
14178 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14179 case IX86_BUILTIN_RCPSS:
14180 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14182 case IX86_BUILTIN_LOADAPS:
14183 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14185 case IX86_BUILTIN_LOADUPS:
14186 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14188 case IX86_BUILTIN_STOREAPS:
14189 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14191 case IX86_BUILTIN_STOREUPS:
14192 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14194 case IX86_BUILTIN_LOADSS:
14195 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14197 case IX86_BUILTIN_STORESS:
14198 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14200 case IX86_BUILTIN_LOADHPS:
14201 case IX86_BUILTIN_LOADLPS:
14202 case IX86_BUILTIN_LOADHPD:
14203 case IX86_BUILTIN_LOADLPD:
14204 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14205 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14206 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14207 : CODE_FOR_sse2_movsd);
14208 arg0 = TREE_VALUE (arglist);
14209 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14210 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14211 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14212 tmode = insn_data[icode].operand[0].mode;
14213 mode0 = insn_data[icode].operand[1].mode;
14214 mode1 = insn_data[icode].operand[2].mode;
14216 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14217 op0 = copy_to_mode_reg (mode0, op0);
14218 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14219 if (target == 0
14220 || GET_MODE (target) != tmode
14221 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14222 target = gen_reg_rtx (tmode);
14223 pat = GEN_FCN (icode) (target, op0, op1);
14224 if (! pat)
14225 return 0;
14226 emit_insn (pat);
14227 return target;
14229 case IX86_BUILTIN_STOREHPS:
14230 case IX86_BUILTIN_STORELPS:
14231 case IX86_BUILTIN_STOREHPD:
14232 case IX86_BUILTIN_STORELPD:
14233 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14234 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14235 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14236 : CODE_FOR_sse2_movsd);
14237 arg0 = TREE_VALUE (arglist);
14238 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14239 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14240 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14241 mode0 = insn_data[icode].operand[1].mode;
14242 mode1 = insn_data[icode].operand[2].mode;
14244 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14245 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14246 op1 = copy_to_mode_reg (mode1, op1);
14248 pat = GEN_FCN (icode) (op0, op0, op1);
14249 if (! pat)
14250 return 0;
14251 emit_insn (pat);
14252 return 0;
14254 case IX86_BUILTIN_MOVNTPS:
14255 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14256 case IX86_BUILTIN_MOVNTQ:
14257 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14259 case IX86_BUILTIN_LDMXCSR:
14260 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14261 target = assign_386_stack_local (SImode, 0);
14262 emit_move_insn (target, op0);
14263 emit_insn (gen_ldmxcsr (target));
14264 return 0;
14266 case IX86_BUILTIN_STMXCSR:
14267 target = assign_386_stack_local (SImode, 0);
14268 emit_insn (gen_stmxcsr (target));
14269 return copy_to_mode_reg (SImode, target);
14271 case IX86_BUILTIN_SHUFPS:
14272 case IX86_BUILTIN_SHUFPD:
14273 icode = (fcode == IX86_BUILTIN_SHUFPS
14274 ? CODE_FOR_sse_shufps
14275 : CODE_FOR_sse2_shufpd);
14276 arg0 = TREE_VALUE (arglist);
14277 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14278 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14279 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14280 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14281 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14282 tmode = insn_data[icode].operand[0].mode;
14283 mode0 = insn_data[icode].operand[1].mode;
14284 mode1 = insn_data[icode].operand[2].mode;
14285 mode2 = insn_data[icode].operand[3].mode;
14287 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14288 op0 = copy_to_mode_reg (mode0, op0);
14289 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14290 op1 = copy_to_mode_reg (mode1, op1);
14291 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14293 /* @@@ better error message */
14294 error ("mask must be an immediate");
14295 return gen_reg_rtx (tmode);
14297 if (target == 0
14298 || GET_MODE (target) != tmode
14299 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14300 target = gen_reg_rtx (tmode);
14301 pat = GEN_FCN (icode) (target, op0, op1, op2);
14302 if (! pat)
14303 return 0;
14304 emit_insn (pat);
14305 return target;
14307 case IX86_BUILTIN_PSHUFW:
14308 case IX86_BUILTIN_PSHUFD:
14309 case IX86_BUILTIN_PSHUFHW:
14310 case IX86_BUILTIN_PSHUFLW:
14311 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14312 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14313 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14314 : CODE_FOR_mmx_pshufw);
14315 arg0 = TREE_VALUE (arglist);
14316 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14317 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14318 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14319 tmode = insn_data[icode].operand[0].mode;
14320 mode1 = insn_data[icode].operand[1].mode;
14321 mode2 = insn_data[icode].operand[2].mode;
14323 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14324 op0 = copy_to_mode_reg (mode1, op0);
14325 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14327 /* @@@ better error message */
14328 error ("mask must be an immediate");
14329 return const0_rtx;
14331 if (target == 0
14332 || GET_MODE (target) != tmode
14333 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14334 target = gen_reg_rtx (tmode);
14335 pat = GEN_FCN (icode) (target, op0, op1);
14336 if (! pat)
14337 return 0;
14338 emit_insn (pat);
14339 return target;
14341 case IX86_BUILTIN_PSLLDQI128:
14342 case IX86_BUILTIN_PSRLDQI128:
14343 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14344 : CODE_FOR_sse2_lshrti3);
14345 arg0 = TREE_VALUE (arglist);
14346 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14347 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14348 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14349 tmode = insn_data[icode].operand[0].mode;
14350 mode1 = insn_data[icode].operand[1].mode;
14351 mode2 = insn_data[icode].operand[2].mode;
14353 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14355 op0 = copy_to_reg (op0);
14356 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14358 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14360 error ("shift must be an immediate");
14361 return const0_rtx;
14363 target = gen_reg_rtx (V2DImode);
14364 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14365 if (! pat)
14366 return 0;
14367 emit_insn (pat);
14368 return target;
14370 case IX86_BUILTIN_FEMMS:
14371 emit_insn (gen_femms ());
14372 return NULL_RTX;
14374 case IX86_BUILTIN_PAVGUSB:
14375 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14377 case IX86_BUILTIN_PF2ID:
14378 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14380 case IX86_BUILTIN_PFACC:
14381 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14383 case IX86_BUILTIN_PFADD:
14384 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14386 case IX86_BUILTIN_PFCMPEQ:
14387 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14389 case IX86_BUILTIN_PFCMPGE:
14390 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14392 case IX86_BUILTIN_PFCMPGT:
14393 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14395 case IX86_BUILTIN_PFMAX:
14396 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14398 case IX86_BUILTIN_PFMIN:
14399 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14401 case IX86_BUILTIN_PFMUL:
14402 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14404 case IX86_BUILTIN_PFRCP:
14405 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14407 case IX86_BUILTIN_PFRCPIT1:
14408 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14410 case IX86_BUILTIN_PFRCPIT2:
14411 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14413 case IX86_BUILTIN_PFRSQIT1:
14414 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14416 case IX86_BUILTIN_PFRSQRT:
14417 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14419 case IX86_BUILTIN_PFSUB:
14420 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14422 case IX86_BUILTIN_PFSUBR:
14423 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14425 case IX86_BUILTIN_PI2FD:
14426 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14428 case IX86_BUILTIN_PMULHRW:
14429 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14431 case IX86_BUILTIN_PF2IW:
14432 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14434 case IX86_BUILTIN_PFNACC:
14435 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14437 case IX86_BUILTIN_PFPNACC:
14438 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14440 case IX86_BUILTIN_PI2FW:
14441 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14443 case IX86_BUILTIN_PSWAPDSI:
14444 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14446 case IX86_BUILTIN_PSWAPDSF:
14447 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14449 case IX86_BUILTIN_SSE_ZERO:
14450 target = gen_reg_rtx (V4SFmode);
14451 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14452 return target;
14454 case IX86_BUILTIN_MMX_ZERO:
14455 target = gen_reg_rtx (DImode);
14456 emit_insn (gen_mmx_clrdi (target));
14457 return target;
14459 case IX86_BUILTIN_CLRTI:
14460 target = gen_reg_rtx (V2DImode);
14461 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14462 return target;
14465 case IX86_BUILTIN_SQRTSD:
14466 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14467 case IX86_BUILTIN_LOADAPD:
14468 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14469 case IX86_BUILTIN_LOADUPD:
14470 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14472 case IX86_BUILTIN_STOREAPD:
14473 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14474 case IX86_BUILTIN_STOREUPD:
14475 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14477 case IX86_BUILTIN_LOADSD:
14478 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14480 case IX86_BUILTIN_STORESD:
14481 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14483 case IX86_BUILTIN_SETPD1:
14484 target = assign_386_stack_local (DFmode, 0);
14485 arg0 = TREE_VALUE (arglist);
14486 emit_move_insn (adjust_address (target, DFmode, 0),
14487 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14488 op0 = gen_reg_rtx (V2DFmode);
14489 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14490 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14491 return op0;
14493 case IX86_BUILTIN_SETPD:
14494 target = assign_386_stack_local (V2DFmode, 0);
14495 arg0 = TREE_VALUE (arglist);
14496 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14497 emit_move_insn (adjust_address (target, DFmode, 0),
14498 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14499 emit_move_insn (adjust_address (target, DFmode, 8),
14500 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14501 op0 = gen_reg_rtx (V2DFmode);
14502 emit_insn (gen_sse2_movapd (op0, target));
14503 return op0;
14505 case IX86_BUILTIN_LOADRPD:
14506 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14507 gen_reg_rtx (V2DFmode), 1);
14508 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14509 return target;
14511 case IX86_BUILTIN_LOADPD1:
14512 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14513 gen_reg_rtx (V2DFmode), 1);
14514 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14515 return target;
14517 case IX86_BUILTIN_STOREPD1:
14518 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14519 case IX86_BUILTIN_STORERPD:
14520 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14522 case IX86_BUILTIN_CLRPD:
14523 target = gen_reg_rtx (V2DFmode);
14524 emit_insn (gen_sse_clrv2df (target));
14525 return target;
14527 case IX86_BUILTIN_MFENCE:
14528 emit_insn (gen_sse2_mfence ());
14529 return 0;
14530 case IX86_BUILTIN_LFENCE:
14531 emit_insn (gen_sse2_lfence ());
14532 return 0;
14534 case IX86_BUILTIN_CLFLUSH:
14535 arg0 = TREE_VALUE (arglist);
14536 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14537 icode = CODE_FOR_sse2_clflush;
14538 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14539 op0 = copy_to_mode_reg (Pmode, op0);
14541 emit_insn (gen_sse2_clflush (op0));
14542 return 0;
14544 case IX86_BUILTIN_MOVNTPD:
14545 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14546 case IX86_BUILTIN_MOVNTDQ:
14547 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14548 case IX86_BUILTIN_MOVNTI:
14549 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14551 case IX86_BUILTIN_LOADDQA:
14552 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14553 case IX86_BUILTIN_LOADDQU:
14554 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14555 case IX86_BUILTIN_LOADD:
14556 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14558 case IX86_BUILTIN_STOREDQA:
14559 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14560 case IX86_BUILTIN_STOREDQU:
14561 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14562 case IX86_BUILTIN_STORED:
14563 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14565 case IX86_BUILTIN_MONITOR:
14566 arg0 = TREE_VALUE (arglist);
14567 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14568 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14569 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14570 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14571 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14572 if (!REG_P (op0))
14573 op0 = copy_to_mode_reg (SImode, op0);
14574 if (!REG_P (op1))
14575 op1 = copy_to_mode_reg (SImode, op1);
14576 if (!REG_P (op2))
14577 op2 = copy_to_mode_reg (SImode, op2);
14578 emit_insn (gen_monitor (op0, op1, op2));
14579 return 0;
14581 case IX86_BUILTIN_MWAIT:
14582 arg0 = TREE_VALUE (arglist);
14583 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14584 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14585 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14586 if (!REG_P (op0))
14587 op0 = copy_to_mode_reg (SImode, op0);
14588 if (!REG_P (op1))
14589 op1 = copy_to_mode_reg (SImode, op1);
14590 emit_insn (gen_mwait (op0, op1));
14591 return 0;
14593 case IX86_BUILTIN_LOADDDUP:
14594 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14596 case IX86_BUILTIN_LDDQU:
14597 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14600 default:
14601 break;
14604 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14605 if (d->code == fcode)
14607 /* Compares are treated specially. */
14608 if (d->icode == CODE_FOR_maskcmpv4sf3
14609 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14610 || d->icode == CODE_FOR_maskncmpv4sf3
14611 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14612 || d->icode == CODE_FOR_maskcmpv2df3
14613 || d->icode == CODE_FOR_vmmaskcmpv2df3
14614 || d->icode == CODE_FOR_maskncmpv2df3
14615 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14616 return ix86_expand_sse_compare (d, arglist, target);
14618 return ix86_expand_binop_builtin (d->icode, arglist, target);
14621 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14622 if (d->code == fcode)
14623 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14625 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14626 if (d->code == fcode)
14627 return ix86_expand_sse_comi (d, arglist, target);
14629 /* @@@ Should really do something sensible here. */
14630 return 0;
14633 /* Store OPERAND to the memory after reload is completed. This means
14634 that we can't easily use assign_stack_local. */
14636 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14638 rtx result;
14639 if (!reload_completed)
14640 abort ();
14641 if (TARGET_RED_ZONE)
14643 result = gen_rtx_MEM (mode,
14644 gen_rtx_PLUS (Pmode,
14645 stack_pointer_rtx,
14646 GEN_INT (-RED_ZONE_SIZE)));
14647 emit_move_insn (result, operand);
14649 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14651 switch (mode)
14653 case HImode:
14654 case SImode:
14655 operand = gen_lowpart (DImode, operand);
14656 /* Fall through. */
14657 case DImode:
14658 emit_insn (
14659 gen_rtx_SET (VOIDmode,
14660 gen_rtx_MEM (DImode,
14661 gen_rtx_PRE_DEC (DImode,
14662 stack_pointer_rtx)),
14663 operand));
14664 break;
14665 default:
14666 abort ();
14668 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14670 else
14672 switch (mode)
14674 case DImode:
14676 rtx operands[2];
14677 split_di (&operand, 1, operands, operands + 1);
14678 emit_insn (
14679 gen_rtx_SET (VOIDmode,
14680 gen_rtx_MEM (SImode,
14681 gen_rtx_PRE_DEC (Pmode,
14682 stack_pointer_rtx)),
14683 operands[1]));
14684 emit_insn (
14685 gen_rtx_SET (VOIDmode,
14686 gen_rtx_MEM (SImode,
14687 gen_rtx_PRE_DEC (Pmode,
14688 stack_pointer_rtx)),
14689 operands[0]));
14691 break;
14692 case HImode:
14693 /* It is better to store HImodes as SImodes. */
14694 if (!TARGET_PARTIAL_REG_STALL)
14695 operand = gen_lowpart (SImode, operand);
14696 /* Fall through. */
14697 case SImode:
14698 emit_insn (
14699 gen_rtx_SET (VOIDmode,
14700 gen_rtx_MEM (GET_MODE (operand),
14701 gen_rtx_PRE_DEC (SImode,
14702 stack_pointer_rtx)),
14703 operand));
14704 break;
14705 default:
14706 abort ();
14708 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14710 return result;
14713 /* Free operand from the memory. */
14714 void
14715 ix86_free_from_memory (enum machine_mode mode)
14717 if (!TARGET_RED_ZONE)
14719 int size;
14721 if (mode == DImode || TARGET_64BIT)
14722 size = 8;
14723 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14724 size = 2;
14725 else
14726 size = 4;
14727 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14728 to pop or add instruction if registers are available. */
14729 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14730 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14731 GEN_INT (size))));
14735 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14736 QImode must go into class Q_REGS.
14737 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14738 movdf to do mem-to-mem moves through integer regs. */
14739 enum reg_class
14740 ix86_preferred_reload_class (rtx x, enum reg_class class)
14742 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14743 return NO_REGS;
14744 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14746 /* SSE can't load any constant directly yet. */
14747 if (SSE_CLASS_P (class))
14748 return NO_REGS;
14749 /* Floats can load 0 and 1. */
14750 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14752 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14753 if (MAYBE_SSE_CLASS_P (class))
14754 return (reg_class_subset_p (class, GENERAL_REGS)
14755 ? GENERAL_REGS : FLOAT_REGS);
14756 else
14757 return class;
14759 /* General regs can load everything. */
14760 if (reg_class_subset_p (class, GENERAL_REGS))
14761 return GENERAL_REGS;
14762 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14763 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14764 return NO_REGS;
14766 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14767 return NO_REGS;
14768 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14769 return Q_REGS;
14770 return class;
14773 /* If we are copying between general and FP registers, we need a memory
14774 location. The same is true for SSE and MMX registers.
14776 The macro can't work reliably when one of the CLASSES is class containing
14777 registers from multiple units (SSE, MMX, integer). We avoid this by never
14778 combining those units in single alternative in the machine description.
14779 Ensure that this constraint holds to avoid unexpected surprises.
14781 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14782 enforce these sanity checks. */
14784 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14785 enum machine_mode mode, int strict)
14787 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14788 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14789 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14790 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14791 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14792 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14794 if (strict)
14795 abort ();
14796 else
14797 return 1;
14799 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14800 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14801 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14802 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14803 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14805 /* Return the cost of moving data from a register in class CLASS1 to
14806 one in class CLASS2.
14808 It is not required that the cost always equal 2 when FROM is the same as TO;
14809 on some machines it is expensive to move between registers if they are not
14810 general registers. */
14812 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14813 enum reg_class class2)
14815 /* In case we require secondary memory, compute cost of the store followed
14816 by load. In order to avoid bad register allocation choices, we need
14817 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14819 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14821 int cost = 1;
14823 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14824 MEMORY_MOVE_COST (mode, class1, 1));
14825 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14826 MEMORY_MOVE_COST (mode, class2, 1));
14828 /* In case of copying from general_purpose_register we may emit multiple
14829 stores followed by single load causing memory size mismatch stall.
14830 Count this as arbitrarily high cost of 20. */
14831 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14832 cost += 20;
14834 /* In the case of FP/MMX moves, the registers actually overlap, and we
14835 have to switch modes in order to treat them differently. */
14836 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14837 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14838 cost += 20;
14840 return cost;
14843 /* Moves between SSE/MMX and integer unit are expensive. */
14844 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14845 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14846 return ix86_cost->mmxsse_to_integer;
14847 if (MAYBE_FLOAT_CLASS_P (class1))
14848 return ix86_cost->fp_move;
14849 if (MAYBE_SSE_CLASS_P (class1))
14850 return ix86_cost->sse_move;
14851 if (MAYBE_MMX_CLASS_P (class1))
14852 return ix86_cost->mmx_move;
14853 return 2;
14856 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14858 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14860 /* Flags and only flags can only hold CCmode values. */
14861 if (CC_REGNO_P (regno))
14862 return GET_MODE_CLASS (mode) == MODE_CC;
14863 if (GET_MODE_CLASS (mode) == MODE_CC
14864 || GET_MODE_CLASS (mode) == MODE_RANDOM
14865 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14866 return 0;
14867 if (FP_REGNO_P (regno))
14868 return VALID_FP_MODE_P (mode);
14869 if (SSE_REGNO_P (regno))
14870 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14871 if (MMX_REGNO_P (regno))
14872 return (TARGET_MMX
14873 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14874 /* We handle both integer and floats in the general purpose registers.
14875 In future we should be able to handle vector modes as well. */
14876 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14877 return 0;
14878 /* Take care for QImode values - they can be in non-QI regs, but then
14879 they do cause partial register stalls. */
14880 if (regno < 4 || mode != QImode || TARGET_64BIT)
14881 return 1;
14882 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14885 /* Return the cost of moving data of mode M between a
14886 register and memory. A value of 2 is the default; this cost is
14887 relative to those in `REGISTER_MOVE_COST'.
14889 If moving between registers and memory is more expensive than
14890 between two registers, you should define this macro to express the
14891 relative cost.
14893 Model also increased moving costs of QImode registers in non
14894 Q_REGS classes.
14897 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14899 if (FLOAT_CLASS_P (class))
14901 int index;
14902 switch (mode)
14904 case SFmode:
14905 index = 0;
14906 break;
14907 case DFmode:
14908 index = 1;
14909 break;
14910 case XFmode:
14911 index = 2;
14912 break;
14913 default:
14914 return 100;
14916 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14918 if (SSE_CLASS_P (class))
14920 int index;
14921 switch (GET_MODE_SIZE (mode))
14923 case 4:
14924 index = 0;
14925 break;
14926 case 8:
14927 index = 1;
14928 break;
14929 case 16:
14930 index = 2;
14931 break;
14932 default:
14933 return 100;
14935 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14937 if (MMX_CLASS_P (class))
14939 int index;
14940 switch (GET_MODE_SIZE (mode))
14942 case 4:
14943 index = 0;
14944 break;
14945 case 8:
14946 index = 1;
14947 break;
14948 default:
14949 return 100;
14951 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14953 switch (GET_MODE_SIZE (mode))
14955 case 1:
14956 if (in)
14957 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14958 : ix86_cost->movzbl_load);
14959 else
14960 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14961 : ix86_cost->int_store[0] + 4);
14962 break;
14963 case 2:
14964 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14965 default:
14966 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14967 if (mode == TFmode)
14968 mode = XFmode;
14969 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14970 * (((int) GET_MODE_SIZE (mode)
14971 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14975 /* Compute a (partial) cost for rtx X. Return true if the complete
14976 cost has been computed, and false if subexpressions should be
14977 scanned. In either case, *TOTAL contains the cost result. */
14979 static bool
14980 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14982 enum machine_mode mode = GET_MODE (x);
14984 switch (code)
14986 case CONST_INT:
14987 case CONST:
14988 case LABEL_REF:
14989 case SYMBOL_REF:
14990 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14991 *total = 3;
14992 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14993 *total = 2;
14994 else if (flag_pic && SYMBOLIC_CONST (x)
14995 && (!TARGET_64BIT
14996 || (!GET_CODE (x) != LABEL_REF
14997 && (GET_CODE (x) != SYMBOL_REF
14998 || !SYMBOL_REF_LOCAL_P (x)))))
14999 *total = 1;
15000 else
15001 *total = 0;
15002 return true;
15004 case CONST_DOUBLE:
15005 if (mode == VOIDmode)
15006 *total = 0;
15007 else
15008 switch (standard_80387_constant_p (x))
15010 case 1: /* 0.0 */
15011 *total = 1;
15012 break;
15013 default: /* Other constants */
15014 *total = 2;
15015 break;
15016 case 0:
15017 case -1:
15018 /* Start with (MEM (SYMBOL_REF)), since that's where
15019 it'll probably end up. Add a penalty for size. */
15020 *total = (COSTS_N_INSNS (1)
15021 + (flag_pic != 0 && !TARGET_64BIT)
15022 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15023 break;
15025 return true;
15027 case ZERO_EXTEND:
15028 /* The zero extensions is often completely free on x86_64, so make
15029 it as cheap as possible. */
15030 if (TARGET_64BIT && mode == DImode
15031 && GET_MODE (XEXP (x, 0)) == SImode)
15032 *total = 1;
15033 else if (TARGET_ZERO_EXTEND_WITH_AND)
15034 *total = COSTS_N_INSNS (ix86_cost->add);
15035 else
15036 *total = COSTS_N_INSNS (ix86_cost->movzx);
15037 return false;
15039 case SIGN_EXTEND:
15040 *total = COSTS_N_INSNS (ix86_cost->movsx);
15041 return false;
15043 case ASHIFT:
15044 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15045 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15047 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15048 if (value == 1)
15050 *total = COSTS_N_INSNS (ix86_cost->add);
15051 return false;
15053 if ((value == 2 || value == 3)
15054 && !TARGET_DECOMPOSE_LEA
15055 && ix86_cost->lea <= ix86_cost->shift_const)
15057 *total = COSTS_N_INSNS (ix86_cost->lea);
15058 return false;
15061 /* Fall through. */
15063 case ROTATE:
15064 case ASHIFTRT:
15065 case LSHIFTRT:
15066 case ROTATERT:
15067 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15069 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15071 if (INTVAL (XEXP (x, 1)) > 32)
15072 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15073 else
15074 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15076 else
15078 if (GET_CODE (XEXP (x, 1)) == AND)
15079 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15080 else
15081 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15084 else
15086 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15087 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15088 else
15089 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15091 return false;
15093 case MULT:
15094 if (FLOAT_MODE_P (mode))
15095 *total = COSTS_N_INSNS (ix86_cost->fmul);
15096 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15098 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15099 int nbits;
15101 for (nbits = 0; value != 0; value >>= 1)
15102 nbits++;
15104 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15105 + nbits * ix86_cost->mult_bit);
15107 else
15109 /* This is arbitrary */
15110 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15111 + 7 * ix86_cost->mult_bit);
15113 return false;
15115 case DIV:
15116 case UDIV:
15117 case MOD:
15118 case UMOD:
15119 if (FLOAT_MODE_P (mode))
15120 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15121 else
15122 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15123 return false;
15125 case PLUS:
15126 if (FLOAT_MODE_P (mode))
15127 *total = COSTS_N_INSNS (ix86_cost->fadd);
15128 else if (!TARGET_DECOMPOSE_LEA
15129 && GET_MODE_CLASS (mode) == MODE_INT
15130 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15132 if (GET_CODE (XEXP (x, 0)) == PLUS
15133 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15134 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15135 && CONSTANT_P (XEXP (x, 1)))
15137 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15138 if (val == 2 || val == 4 || val == 8)
15140 *total = COSTS_N_INSNS (ix86_cost->lea);
15141 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15142 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15143 outer_code);
15144 *total += rtx_cost (XEXP (x, 1), outer_code);
15145 return true;
15148 else if (GET_CODE (XEXP (x, 0)) == MULT
15149 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15151 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15152 if (val == 2 || val == 4 || val == 8)
15154 *total = COSTS_N_INSNS (ix86_cost->lea);
15155 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15156 *total += rtx_cost (XEXP (x, 1), outer_code);
15157 return true;
15160 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15162 *total = COSTS_N_INSNS (ix86_cost->lea);
15163 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15164 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15165 *total += rtx_cost (XEXP (x, 1), outer_code);
15166 return true;
15169 /* Fall through. */
15171 case MINUS:
15172 if (FLOAT_MODE_P (mode))
15174 *total = COSTS_N_INSNS (ix86_cost->fadd);
15175 return false;
15177 /* Fall through. */
15179 case AND:
15180 case IOR:
15181 case XOR:
15182 if (!TARGET_64BIT && mode == DImode)
15184 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15185 + (rtx_cost (XEXP (x, 0), outer_code)
15186 << (GET_MODE (XEXP (x, 0)) != DImode))
15187 + (rtx_cost (XEXP (x, 1), outer_code)
15188 << (GET_MODE (XEXP (x, 1)) != DImode)));
15189 return true;
15191 /* Fall through. */
15193 case NEG:
15194 if (FLOAT_MODE_P (mode))
15196 *total = COSTS_N_INSNS (ix86_cost->fchs);
15197 return false;
15199 /* Fall through. */
15201 case NOT:
15202 if (!TARGET_64BIT && mode == DImode)
15203 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15204 else
15205 *total = COSTS_N_INSNS (ix86_cost->add);
15206 return false;
15208 case FLOAT_EXTEND:
15209 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15210 *total = 0;
15211 return false;
15213 case ABS:
15214 if (FLOAT_MODE_P (mode))
15215 *total = COSTS_N_INSNS (ix86_cost->fabs);
15216 return false;
15218 case SQRT:
15219 if (FLOAT_MODE_P (mode))
15220 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15221 return false;
15223 case UNSPEC:
15224 if (XINT (x, 1) == UNSPEC_TP)
15225 *total = 0;
15226 return false;
15228 default:
15229 return false;
15233 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15234 static void
15235 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15237 init_section ();
15238 fputs ("\tpushl $", asm_out_file);
15239 assemble_name (asm_out_file, XSTR (symbol, 0));
15240 fputc ('\n', asm_out_file);
15242 #endif
15244 #if TARGET_MACHO
15246 static int current_machopic_label_num;
15248 /* Given a symbol name and its associated stub, write out the
15249 definition of the stub. */
15251 void
15252 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15254 unsigned int length;
15255 char *binder_name, *symbol_name, lazy_ptr_name[32];
15256 int label = ++current_machopic_label_num;
15258 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15259 symb = (*targetm.strip_name_encoding) (symb);
15261 length = strlen (stub);
15262 binder_name = alloca (length + 32);
15263 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15265 length = strlen (symb);
15266 symbol_name = alloca (length + 32);
15267 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15269 sprintf (lazy_ptr_name, "L%d$lz", label);
15271 if (MACHOPIC_PURE)
15272 machopic_picsymbol_stub_section ();
15273 else
15274 machopic_symbol_stub_section ();
15276 fprintf (file, "%s:\n", stub);
15277 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15279 if (MACHOPIC_PURE)
15281 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15282 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15283 fprintf (file, "\tjmp %%edx\n");
15285 else
15286 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15288 fprintf (file, "%s:\n", binder_name);
15290 if (MACHOPIC_PURE)
15292 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15293 fprintf (file, "\tpushl %%eax\n");
15295 else
15296 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15298 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15300 machopic_lazy_symbol_ptr_section ();
15301 fprintf (file, "%s:\n", lazy_ptr_name);
15302 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15303 fprintf (file, "\t.long %s\n", binder_name);
15305 #endif /* TARGET_MACHO */
15307 /* Order the registers for register allocator. */
15309 void
15310 x86_order_regs_for_local_alloc (void)
15312 int pos = 0;
15313 int i;
15315 /* First allocate the local general purpose registers. */
15316 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15317 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15318 reg_alloc_order [pos++] = i;
15320 /* Global general purpose registers. */
15321 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15322 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15323 reg_alloc_order [pos++] = i;
15325 /* x87 registers come first in case we are doing FP math
15326 using them. */
15327 if (!TARGET_SSE_MATH)
15328 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15329 reg_alloc_order [pos++] = i;
15331 /* SSE registers. */
15332 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15333 reg_alloc_order [pos++] = i;
15334 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15335 reg_alloc_order [pos++] = i;
15337 /* x87 registers. */
15338 if (TARGET_SSE_MATH)
15339 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15340 reg_alloc_order [pos++] = i;
15342 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15343 reg_alloc_order [pos++] = i;
15345 /* Initialize the rest of array as we do not allocate some registers
15346 at all. */
15347 while (pos < FIRST_PSEUDO_REGISTER)
15348 reg_alloc_order [pos++] = 0;
15351 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15352 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15353 #endif
15355 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15356 struct attribute_spec.handler. */
15357 static tree
15358 ix86_handle_struct_attribute (tree *node, tree name,
15359 tree args ATTRIBUTE_UNUSED,
15360 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15362 tree *type = NULL;
15363 if (DECL_P (*node))
15365 if (TREE_CODE (*node) == TYPE_DECL)
15366 type = &TREE_TYPE (*node);
15368 else
15369 type = node;
15371 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15372 || TREE_CODE (*type) == UNION_TYPE)))
15374 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15375 *no_add_attrs = true;
15378 else if ((is_attribute_p ("ms_struct", name)
15379 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15380 || ((is_attribute_p ("gcc_struct", name)
15381 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15383 warning ("`%s' incompatible attribute ignored",
15384 IDENTIFIER_POINTER (name));
15385 *no_add_attrs = true;
15388 return NULL_TREE;
15391 static bool
15392 ix86_ms_bitfield_layout_p (tree record_type)
15394 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15395 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15396 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15399 /* Returns an expression indicating where the this parameter is
15400 located on entry to the FUNCTION. */
15402 static rtx
15403 x86_this_parameter (tree function)
15405 tree type = TREE_TYPE (function);
15407 if (TARGET_64BIT)
15409 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15410 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15413 if (ix86_function_regparm (type, function) > 0)
15415 tree parm;
15417 parm = TYPE_ARG_TYPES (type);
15418 /* Figure out whether or not the function has a variable number of
15419 arguments. */
15420 for (; parm; parm = TREE_CHAIN (parm))
15421 if (TREE_VALUE (parm) == void_type_node)
15422 break;
15423 /* If not, the this parameter is in the first argument. */
15424 if (parm)
15426 int regno = 0;
15427 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15428 regno = 2;
15429 return gen_rtx_REG (SImode, regno);
15433 if (aggregate_value_p (TREE_TYPE (type), type))
15434 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15435 else
15436 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15439 /* Determine whether x86_output_mi_thunk can succeed. */
15441 static bool
15442 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15443 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15444 HOST_WIDE_INT vcall_offset, tree function)
15446 /* 64-bit can handle anything. */
15447 if (TARGET_64BIT)
15448 return true;
15450 /* For 32-bit, everything's fine if we have one free register. */
15451 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15452 return true;
15454 /* Need a free register for vcall_offset. */
15455 if (vcall_offset)
15456 return false;
15458 /* Need a free register for GOT references. */
15459 if (flag_pic && !(*targetm.binds_local_p) (function))
15460 return false;
15462 /* Otherwise ok. */
15463 return true;
15466 /* Output the assembler code for a thunk function. THUNK_DECL is the
15467 declaration for the thunk function itself, FUNCTION is the decl for
15468 the target function. DELTA is an immediate constant offset to be
15469 added to THIS. If VCALL_OFFSET is nonzero, the word at
15470 *(*this + vcall_offset) should be added to THIS. */
15472 static void
15473 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15474 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15475 HOST_WIDE_INT vcall_offset, tree function)
15477 rtx xops[3];
15478 rtx this = x86_this_parameter (function);
15479 rtx this_reg, tmp;
15481 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15482 pull it in now and let DELTA benefit. */
15483 if (REG_P (this))
15484 this_reg = this;
15485 else if (vcall_offset)
15487 /* Put the this parameter into %eax. */
15488 xops[0] = this;
15489 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15490 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15492 else
15493 this_reg = NULL_RTX;
15495 /* Adjust the this parameter by a fixed constant. */
15496 if (delta)
15498 xops[0] = GEN_INT (delta);
15499 xops[1] = this_reg ? this_reg : this;
15500 if (TARGET_64BIT)
15502 if (!x86_64_general_operand (xops[0], DImode))
15504 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15505 xops[1] = tmp;
15506 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15507 xops[0] = tmp;
15508 xops[1] = this;
15510 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15512 else
15513 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15516 /* Adjust the this parameter by a value stored in the vtable. */
15517 if (vcall_offset)
15519 if (TARGET_64BIT)
15520 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15521 else
15523 int tmp_regno = 2 /* ECX */;
15524 if (lookup_attribute ("fastcall",
15525 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15526 tmp_regno = 0 /* EAX */;
15527 tmp = gen_rtx_REG (SImode, tmp_regno);
15530 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15531 xops[1] = tmp;
15532 if (TARGET_64BIT)
15533 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15534 else
15535 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15537 /* Adjust the this parameter. */
15538 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15539 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15541 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15542 xops[0] = GEN_INT (vcall_offset);
15543 xops[1] = tmp2;
15544 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15545 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15547 xops[1] = this_reg;
15548 if (TARGET_64BIT)
15549 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15550 else
15551 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15554 /* If necessary, drop THIS back to its stack slot. */
15555 if (this_reg && this_reg != this)
15557 xops[0] = this_reg;
15558 xops[1] = this;
15559 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15562 xops[0] = XEXP (DECL_RTL (function), 0);
15563 if (TARGET_64BIT)
15565 if (!flag_pic || (*targetm.binds_local_p) (function))
15566 output_asm_insn ("jmp\t%P0", xops);
15567 else
15569 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15570 tmp = gen_rtx_CONST (Pmode, tmp);
15571 tmp = gen_rtx_MEM (QImode, tmp);
15572 xops[0] = tmp;
15573 output_asm_insn ("jmp\t%A0", xops);
15576 else
15578 if (!flag_pic || (*targetm.binds_local_p) (function))
15579 output_asm_insn ("jmp\t%P0", xops);
15580 else
15581 #if TARGET_MACHO
15582 if (TARGET_MACHO)
15584 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15585 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15586 tmp = gen_rtx_MEM (QImode, tmp);
15587 xops[0] = tmp;
15588 output_asm_insn ("jmp\t%0", xops);
15590 else
15591 #endif /* TARGET_MACHO */
15593 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15594 output_set_got (tmp);
15596 xops[1] = tmp;
15597 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15598 output_asm_insn ("jmp\t{*}%1", xops);
15603 static void
15604 x86_file_start (void)
15606 default_file_start ();
15607 if (X86_FILE_START_VERSION_DIRECTIVE)
15608 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15609 if (X86_FILE_START_FLTUSED)
15610 fputs ("\t.global\t__fltused\n", asm_out_file);
15611 if (ix86_asm_dialect == ASM_INTEL)
15612 fputs ("\t.intel_syntax\n", asm_out_file);
15616 x86_field_alignment (tree field, int computed)
15618 enum machine_mode mode;
15619 tree type = TREE_TYPE (field);
15621 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15622 return computed;
15623 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15624 ? get_inner_array_type (type) : type);
15625 if (mode == DFmode || mode == DCmode
15626 || GET_MODE_CLASS (mode) == MODE_INT
15627 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15628 return MIN (32, computed);
15629 return computed;
15632 /* Output assembler code to FILE to increment profiler label # LABELNO
15633 for profiling a function entry. */
15634 void
15635 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15637 if (TARGET_64BIT)
15638 if (flag_pic)
15640 #ifndef NO_PROFILE_COUNTERS
15641 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15642 #endif
15643 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15645 else
15647 #ifndef NO_PROFILE_COUNTERS
15648 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15649 #endif
15650 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15652 else if (flag_pic)
15654 #ifndef NO_PROFILE_COUNTERS
15655 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15656 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15657 #endif
15658 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15660 else
15662 #ifndef NO_PROFILE_COUNTERS
15663 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15664 PROFILE_COUNT_REGISTER);
15665 #endif
15666 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15670 /* We don't have exact information about the insn sizes, but we may assume
15671 quite safely that we are informed about all 1 byte insns and memory
15672 address sizes. This is enough to eliminate unnecessary padding in
15673 99% of cases. */
15675 static int
15676 min_insn_size (rtx insn)
15678 int l = 0;
15680 if (!INSN_P (insn) || !active_insn_p (insn))
15681 return 0;
15683 /* Discard alignments we've emit and jump instructions. */
15684 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15685 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15686 return 0;
15687 if (GET_CODE (insn) == JUMP_INSN
15688 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15689 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15690 return 0;
15692 /* Important case - calls are always 5 bytes.
15693 It is common to have many calls in the row. */
15694 if (GET_CODE (insn) == CALL_INSN
15695 && symbolic_reference_mentioned_p (PATTERN (insn))
15696 && !SIBLING_CALL_P (insn))
15697 return 5;
15698 if (get_attr_length (insn) <= 1)
15699 return 1;
15701 /* For normal instructions we may rely on the sizes of addresses
15702 and the presence of symbol to require 4 bytes of encoding.
15703 This is not the case for jumps where references are PC relative. */
15704 if (GET_CODE (insn) != JUMP_INSN)
15706 l = get_attr_length_address (insn);
15707 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15708 l = 4;
15710 if (l)
15711 return 1+l;
15712 else
15713 return 2;
15716 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15717 window. */
15719 static void
15720 k8_avoid_jump_misspredicts (void)
15722 rtx insn, start = get_insns ();
15723 int nbytes = 0, njumps = 0;
15724 int isjump = 0;
15726 /* Look for all minimal intervals of instructions containing 4 jumps.
15727 The intervals are bounded by START and INSN. NBYTES is the total
15728 size of instructions in the interval including INSN and not including
15729 START. When the NBYTES is smaller than 16 bytes, it is possible
15730 that the end of START and INSN ends up in the same 16byte page.
15732 The smallest offset in the page INSN can start is the case where START
15733 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15734 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15736 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15739 nbytes += min_insn_size (insn);
15740 if (rtl_dump_file)
15741 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15742 INSN_UID (insn), min_insn_size (insn));
15743 if ((GET_CODE (insn) == JUMP_INSN
15744 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15745 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15746 || GET_CODE (insn) == CALL_INSN)
15747 njumps++;
15748 else
15749 continue;
15751 while (njumps > 3)
15753 start = NEXT_INSN (start);
15754 if ((GET_CODE (start) == JUMP_INSN
15755 && GET_CODE (PATTERN (start)) != ADDR_VEC
15756 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15757 || GET_CODE (start) == CALL_INSN)
15758 njumps--, isjump = 1;
15759 else
15760 isjump = 0;
15761 nbytes -= min_insn_size (start);
15763 if (njumps < 0)
15764 abort ();
15765 if (rtl_dump_file)
15766 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15767 INSN_UID (start), INSN_UID (insn), nbytes);
15769 if (njumps == 3 && isjump && nbytes < 16)
15771 int padsize = 15 - nbytes + min_insn_size (insn);
15773 if (rtl_dump_file)
15774 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15775 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15780 /* Implement machine specific optimizations.
15781 At the moment we implement single transformation: AMD Athlon works faster
15782 when RET is not destination of conditional jump or directly preceded
15783 by other jump instruction. We avoid the penalty by inserting NOP just
15784 before the RET instructions in such cases. */
15785 static void
15786 ix86_reorg (void)
15788 edge e;
15790 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15791 return;
15792 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15794 basic_block bb = e->src;
15795 rtx ret = BB_END (bb);
15796 rtx prev;
15797 bool replace = false;
15799 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15800 || !maybe_hot_bb_p (bb))
15801 continue;
15802 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15803 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15804 break;
15805 if (prev && GET_CODE (prev) == CODE_LABEL)
15807 edge e;
15808 for (e = bb->pred; e; e = e->pred_next)
15809 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15810 && !(e->flags & EDGE_FALLTHRU))
15811 replace = true;
15813 if (!replace)
15815 prev = prev_active_insn (ret);
15816 if (prev
15817 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15818 || GET_CODE (prev) == CALL_INSN))
15819 replace = true;
15820 /* Empty functions get branch mispredict even when the jump destination
15821 is not visible to us. */
15822 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15823 replace = true;
15825 if (replace)
15827 emit_insn_before (gen_return_internal_long (), ret);
15828 delete_insn (ret);
15831 k8_avoid_jump_misspredicts ();
15834 /* Return nonzero when QImode register that must be represented via REX prefix
15835 is used. */
15836 bool
15837 x86_extended_QIreg_mentioned_p (rtx insn)
15839 int i;
15840 extract_insn_cached (insn);
15841 for (i = 0; i < recog_data.n_operands; i++)
15842 if (REG_P (recog_data.operand[i])
15843 && REGNO (recog_data.operand[i]) >= 4)
15844 return true;
15845 return false;
15848 /* Return nonzero when P points to register encoded via REX prefix.
15849 Called via for_each_rtx. */
15850 static int
15851 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15853 unsigned int regno;
15854 if (!REG_P (*p))
15855 return 0;
15856 regno = REGNO (*p);
15857 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15860 /* Return true when INSN mentions register that must be encoded using REX
15861 prefix. */
15862 bool
15863 x86_extended_reg_mentioned_p (rtx insn)
15865 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15868 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15869 optabs would emit if we didn't have TFmode patterns. */
15871 void
15872 x86_emit_floatuns (rtx operands[2])
15874 rtx neglab, donelab, i0, i1, f0, in, out;
15875 enum machine_mode mode, inmode;
15877 inmode = GET_MODE (operands[1]);
15878 if (inmode != SImode
15879 && inmode != DImode)
15880 abort ();
15882 out = operands[0];
15883 in = force_reg (inmode, operands[1]);
15884 mode = GET_MODE (out);
15885 neglab = gen_label_rtx ();
15886 donelab = gen_label_rtx ();
15887 i1 = gen_reg_rtx (Pmode);
15888 f0 = gen_reg_rtx (mode);
15890 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15892 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15893 emit_jump_insn (gen_jump (donelab));
15894 emit_barrier ();
15896 emit_label (neglab);
15898 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15899 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15900 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15901 expand_float (f0, i0, 0);
15902 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15904 emit_label (donelab);
15907 /* Return if we do not know how to pass TYPE solely in registers. */
15908 bool
15909 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15911 if (default_must_pass_in_stack (mode, type))
15912 return true;
15913 return (!TARGET_64BIT && type && mode == TImode);
15916 /* Initialize vector TARGET via VALS. */
15917 void
15918 ix86_expand_vector_init (rtx target, rtx vals)
15920 enum machine_mode mode = GET_MODE (target);
15921 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15922 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15923 int i;
15925 for (i = n_elts - 1; i >= 0; i--)
15926 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15927 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15928 break;
15930 /* Few special cases first...
15931 ... constants are best loaded from constant pool. */
15932 if (i < 0)
15934 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15935 return;
15938 /* ... values where only first field is non-constant are best loaded
15939 from the pool and overwriten via move later. */
15940 if (!i)
15942 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15943 GET_MODE_INNER (mode), 0);
15945 op = force_reg (mode, op);
15946 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15947 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15948 switch (GET_MODE (target))
15950 case V2DFmode:
15951 emit_insn (gen_sse2_movsd (target, target, op));
15952 break;
15953 case V4SFmode:
15954 emit_insn (gen_sse_movss (target, target, op));
15955 break;
15956 default:
15957 break;
15959 return;
15962 /* And the busy sequence doing rotations. */
15963 switch (GET_MODE (target))
15965 case V2DFmode:
15967 rtx vecop0 =
15968 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15969 rtx vecop1 =
15970 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15972 vecop0 = force_reg (V2DFmode, vecop0);
15973 vecop1 = force_reg (V2DFmode, vecop1);
15974 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15976 break;
15977 case V4SFmode:
15979 rtx vecop0 =
15980 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15981 rtx vecop1 =
15982 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15983 rtx vecop2 =
15984 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15985 rtx vecop3 =
15986 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15987 rtx tmp1 = gen_reg_rtx (V4SFmode);
15988 rtx tmp2 = gen_reg_rtx (V4SFmode);
15990 vecop0 = force_reg (V4SFmode, vecop0);
15991 vecop1 = force_reg (V4SFmode, vecop1);
15992 vecop2 = force_reg (V4SFmode, vecop2);
15993 vecop3 = force_reg (V4SFmode, vecop3);
15994 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15995 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15996 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15998 break;
15999 default:
16000 abort ();
16004 #include "gt-i386.h"