PR target/10795
[official-gcc.git] / gcc / config / i386 / i386.c
blobe314ba53c2f559faf86f84b17b1ec1d1f498815e
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
52 #endif
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
62 /* Processor costs (relative to an add) */
63 static const
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
98 1, /* Branch cost */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
108 static const
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
120 3, /* MOVE_RATIO */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
143 1, /* Branch cost */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
152 static const
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
164 3, /* MOVE_RATIO */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
187 1, /* Branch cost */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
196 static const
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
208 6, /* MOVE_RATIO */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
231 2, /* Branch cost */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
240 static const
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
252 6, /* MOVE_RATIO */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
275 2, /* Branch cost */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
284 static const
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
296 4, /* MOVE_RATIO */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
319 1, /* Branch cost */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
328 static const
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
340 9, /* MOVE_RATIO */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
363 2, /* Branch cost */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
372 static const
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
416 static const
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
451 2, /* Branch cost */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
530 epilogue code. */
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
543 /* ax, dx, cx, bx */
544 AREG, DREG, CREG, BREG,
545 /* si, di, bp, sp */
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
547 /* FP registers */
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
550 /* arg pointer */
551 NON_Q_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
555 SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
557 MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
561 SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
644 numbers.
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
679 unsigned short mode;
680 unsigned short n;
681 rtx rtl;
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
688 [arguments]
689 <- ARG_POINTER
690 saved pc
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
694 [saved regs]
696 [padding1] \
698 [va_arg registers] (
699 > to_allocate <- FRAME_POINTER
700 [frame] (
702 [padding2] /
704 struct ix86_frame
706 int nregs;
707 int padding1;
708 int va_arg_size;
709 HOST_WIDE_INT frame;
710 int padding2;
711 int outgoing_arguments_size;
712 int red_zone_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
729 /* Parsed value. */
730 enum cmodel ix86_cmodel;
731 /* Asm dialect. */
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
734 /* TLS dialext. */
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
758 int ix86_regparm;
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
789 int, int, FILE *);
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
795 rtx *);
796 static rtx get_thread_pointer (int);
797 static rtx legitimize_tls_address (rtx, enum tls_model, int);
798 static void get_pc_thunk_name (char [32], unsigned int);
799 static rtx gen_push (rtx);
800 static int memory_address_length (rtx addr);
801 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804 static void ix86_dump_ppro_packet (FILE *);
805 static void ix86_reorder_insn (rtx *, rtx *);
806 static struct machine_function * ix86_init_machine_status (void);
807 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808 static int ix86_nsaved_regs (void);
809 static void ix86_emit_save_regs (void);
810 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811 static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813 static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814 static void ix86_sched_reorder_ppro (rtx *, rtx *);
815 static HOST_WIDE_INT ix86_GOT_alias_set (void);
816 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817 static rtx ix86_expand_aligntest (rtx, int);
818 static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819 static int ix86_issue_rate (void);
820 static int ix86_adjust_cost (rtx, rtx, rtx, int);
821 static void ix86_sched_init (FILE *, int, int);
822 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823 static int ix86_variable_issue (FILE *, int, rtx, int);
824 static int ia32_use_dfa_pipeline_interface (void);
825 static int ia32_multipass_dfa_lookahead (void);
826 static void ix86_init_mmx_sse_builtins (void);
827 static rtx x86_this_parameter (tree);
828 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831 static void x86_file_start (void);
832 static void ix86_reorg (void);
833 bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
835 struct ix86_address
837 rtx base, index, disp;
838 HOST_WIDE_INT scale;
839 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
842 static int ix86_decompose_address (rtx, struct ix86_address *);
843 static int ix86_address_cost (rtx);
844 static bool ix86_cannot_force_const_mem (rtx);
845 static rtx ix86_delegitimize_address (rtx);
847 struct builtin_description;
848 static rtx ix86_expand_sse_comi (const struct builtin_description *,
849 tree, rtx);
850 static rtx ix86_expand_sse_compare (const struct builtin_description *,
851 tree, rtx);
852 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
853 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
854 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
855 static rtx ix86_expand_store_builtin (enum insn_code, tree);
856 static rtx safe_vector_operand (rtx, enum machine_mode);
857 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
858 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
859 enum rtx_code *, enum rtx_code *);
860 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
861 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
862 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
863 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
864 static int ix86_fp_comparison_cost (enum rtx_code code);
865 static unsigned int ix86_select_alt_pic_regnum (void);
866 static int ix86_save_reg (unsigned int, int);
867 static void ix86_compute_frame_layout (struct ix86_frame *);
868 static int ix86_comp_type_attributes (tree, tree);
869 static int ix86_fntype_regparm (tree);
870 const struct attribute_spec ix86_attribute_table[];
871 static bool ix86_function_ok_for_sibcall (tree, tree);
872 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
873 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
874 static int ix86_value_regno (enum machine_mode);
875 static bool contains_128bit_aligned_vector_p (tree);
876 static bool ix86_ms_bitfield_layout_p (tree);
877 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
878 static int extended_reg_mentioned_1 (rtx *, void *);
879 static bool ix86_rtx_costs (rtx, int, int, int *);
880 static int min_insn_size (rtx);
881 static void k8_avoid_jump_misspredicts (void);
883 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
884 static void ix86_svr3_asm_out_constructor (rtx, int);
885 #endif
887 /* Register class used for passing given 64bit part of the argument.
888 These represent classes as documented by the PS ABI, with the exception
889 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
890 use SF or DFmode move instead of DImode to avoid reformatting penalties.
892 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
893 whenever possible (upper half does contain padding).
895 enum x86_64_reg_class
897 X86_64_NO_CLASS,
898 X86_64_INTEGER_CLASS,
899 X86_64_INTEGERSI_CLASS,
900 X86_64_SSE_CLASS,
901 X86_64_SSESF_CLASS,
902 X86_64_SSEDF_CLASS,
903 X86_64_SSEUP_CLASS,
904 X86_64_X87_CLASS,
905 X86_64_X87UP_CLASS,
906 X86_64_MEMORY_CLASS
908 static const char * const x86_64_reg_class_name[] =
909 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
911 #define MAX_CLASSES 4
912 static int classify_argument (enum machine_mode, tree,
913 enum x86_64_reg_class [MAX_CLASSES], int);
914 static int examine_argument (enum machine_mode, tree, int, int *, int *);
915 static rtx construct_container (enum machine_mode, tree, int, int, int,
916 const int *, int);
917 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
918 enum x86_64_reg_class);
920 /* Table of constants used by fldpi, fldln2, etc... */
921 static REAL_VALUE_TYPE ext_80387_constants_table [5];
922 static bool ext_80387_constants_init = 0;
923 static void init_ext_80387_constants (void);
925 /* Initialize the GCC target structure. */
926 #undef TARGET_ATTRIBUTE_TABLE
927 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
928 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
929 # undef TARGET_MERGE_DECL_ATTRIBUTES
930 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
931 #endif
933 #undef TARGET_COMP_TYPE_ATTRIBUTES
934 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
936 #undef TARGET_INIT_BUILTINS
937 #define TARGET_INIT_BUILTINS ix86_init_builtins
939 #undef TARGET_EXPAND_BUILTIN
940 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
942 #undef TARGET_ASM_FUNCTION_EPILOGUE
943 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
945 #undef TARGET_ASM_OPEN_PAREN
946 #define TARGET_ASM_OPEN_PAREN ""
947 #undef TARGET_ASM_CLOSE_PAREN
948 #define TARGET_ASM_CLOSE_PAREN ""
950 #undef TARGET_ASM_ALIGNED_HI_OP
951 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952 #undef TARGET_ASM_ALIGNED_SI_OP
953 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
954 #ifdef ASM_QUAD
955 #undef TARGET_ASM_ALIGNED_DI_OP
956 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
957 #endif
959 #undef TARGET_ASM_UNALIGNED_HI_OP
960 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961 #undef TARGET_ASM_UNALIGNED_SI_OP
962 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963 #undef TARGET_ASM_UNALIGNED_DI_OP
964 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
966 #undef TARGET_SCHED_ADJUST_COST
967 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968 #undef TARGET_SCHED_ISSUE_RATE
969 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970 #undef TARGET_SCHED_VARIABLE_ISSUE
971 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972 #undef TARGET_SCHED_INIT
973 #define TARGET_SCHED_INIT ix86_sched_init
974 #undef TARGET_SCHED_REORDER
975 #define TARGET_SCHED_REORDER ix86_sched_reorder
976 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
977 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
983 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
984 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
986 #ifdef HAVE_AS_TLS
987 #undef TARGET_HAVE_TLS
988 #define TARGET_HAVE_TLS true
989 #endif
990 #undef TARGET_CANNOT_FORCE_CONST_MEM
991 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
993 #undef TARGET_DELEGITIMIZE_ADDRESS
994 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
996 #undef TARGET_MS_BITFIELD_LAYOUT_P
997 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
999 #undef TARGET_ASM_OUTPUT_MI_THUNK
1000 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1001 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1002 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1004 #undef TARGET_ASM_FILE_START
1005 #define TARGET_ASM_FILE_START x86_file_start
1007 #undef TARGET_RTX_COSTS
1008 #define TARGET_RTX_COSTS ix86_rtx_costs
1009 #undef TARGET_ADDRESS_COST
1010 #define TARGET_ADDRESS_COST ix86_address_cost
1012 #undef TARGET_MACHINE_DEPENDENT_REORG
1013 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1015 struct gcc_target targetm = TARGET_INITIALIZER;
1017 /* The svr4 ABI for the i386 says that records and unions are returned
1018 in memory. */
1019 #ifndef DEFAULT_PCC_STRUCT_RETURN
1020 #define DEFAULT_PCC_STRUCT_RETURN 1
1021 #endif
1023 /* Sometimes certain combinations of command options do not make
1024 sense on a particular target machine. You can define a macro
1025 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1026 defined, is executed once just after all the command options have
1027 been parsed.
1029 Don't use this macro to turn on various extra optimizations for
1030 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1032 void
1033 override_options (void)
1035 int i;
1036 /* Comes from final.c -- no real reason to change it. */
1037 #define MAX_CODE_ALIGN 16
1039 static struct ptt
1041 const struct processor_costs *cost; /* Processor costs */
1042 const int target_enable; /* Target flags to enable. */
1043 const int target_disable; /* Target flags to disable. */
1044 const int align_loop; /* Default alignments. */
1045 const int align_loop_max_skip;
1046 const int align_jump;
1047 const int align_jump_max_skip;
1048 const int align_func;
1050 const processor_target_table[PROCESSOR_max] =
1052 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1053 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1054 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1055 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1056 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1057 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1058 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1059 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1062 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1063 static struct pta
1065 const char *const name; /* processor name or nickname. */
1066 const enum processor_type processor;
1067 const enum pta_flags
1069 PTA_SSE = 1,
1070 PTA_SSE2 = 2,
1071 PTA_MMX = 4,
1072 PTA_PREFETCH_SSE = 8,
1073 PTA_3DNOW = 16,
1074 PTA_3DNOW_A = 64,
1075 PTA_64BIT = 128
1076 } flags;
1078 const processor_alias_table[] =
1080 {"i386", PROCESSOR_I386, 0},
1081 {"i486", PROCESSOR_I486, 0},
1082 {"i586", PROCESSOR_PENTIUM, 0},
1083 {"pentium", PROCESSOR_PENTIUM, 0},
1084 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1085 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1086 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1087 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1088 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1089 {"i686", PROCESSOR_PENTIUMPRO, 0},
1090 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1091 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1092 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1093 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1094 PTA_MMX | PTA_PREFETCH_SSE},
1095 {"k6", PROCESSOR_K6, PTA_MMX},
1096 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1097 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1098 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1099 | PTA_3DNOW_A},
1100 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1101 | PTA_3DNOW | PTA_3DNOW_A},
1102 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1103 | PTA_3DNOW_A | PTA_SSE},
1104 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1105 | PTA_3DNOW_A | PTA_SSE},
1106 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1109 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1112 int const pta_size = ARRAY_SIZE (processor_alias_table);
1114 /* By default our XFmode is the 80-bit extended format. If we have
1115 use TFmode instead, it's also the 80-bit format, but with padding. */
1116 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1117 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1119 /* Set the default values for switches whose default depends on TARGET_64BIT
1120 in case they weren't overwritten by command line options. */
1121 if (TARGET_64BIT)
1123 if (flag_omit_frame_pointer == 2)
1124 flag_omit_frame_pointer = 1;
1125 if (flag_asynchronous_unwind_tables == 2)
1126 flag_asynchronous_unwind_tables = 1;
1127 if (flag_pcc_struct_return == 2)
1128 flag_pcc_struct_return = 0;
1130 else
1132 if (flag_omit_frame_pointer == 2)
1133 flag_omit_frame_pointer = 0;
1134 if (flag_asynchronous_unwind_tables == 2)
1135 flag_asynchronous_unwind_tables = 0;
1136 if (flag_pcc_struct_return == 2)
1137 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1140 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1141 SUBTARGET_OVERRIDE_OPTIONS;
1142 #endif
1144 if (!ix86_tune_string && ix86_arch_string)
1145 ix86_tune_string = ix86_arch_string;
1146 if (!ix86_tune_string)
1147 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1148 if (!ix86_arch_string)
1149 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1151 if (ix86_cmodel_string != 0)
1153 if (!strcmp (ix86_cmodel_string, "small"))
1154 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1155 else if (flag_pic)
1156 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1157 else if (!strcmp (ix86_cmodel_string, "32"))
1158 ix86_cmodel = CM_32;
1159 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1160 ix86_cmodel = CM_KERNEL;
1161 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1162 ix86_cmodel = CM_MEDIUM;
1163 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1164 ix86_cmodel = CM_LARGE;
1165 else
1166 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1168 else
1170 ix86_cmodel = CM_32;
1171 if (TARGET_64BIT)
1172 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1174 if (ix86_asm_string != 0)
1176 if (!strcmp (ix86_asm_string, "intel"))
1177 ix86_asm_dialect = ASM_INTEL;
1178 else if (!strcmp (ix86_asm_string, "att"))
1179 ix86_asm_dialect = ASM_ATT;
1180 else
1181 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1183 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1184 error ("code model `%s' not supported in the %s bit mode",
1185 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1186 if (ix86_cmodel == CM_LARGE)
1187 sorry ("code model `large' not supported yet");
1188 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1189 sorry ("%i-bit mode not compiled in",
1190 (target_flags & MASK_64BIT) ? 64 : 32);
1192 for (i = 0; i < pta_size; i++)
1193 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1195 ix86_arch = processor_alias_table[i].processor;
1196 /* Default cpu tuning to the architecture. */
1197 ix86_tune = ix86_arch;
1198 if (processor_alias_table[i].flags & PTA_MMX
1199 && !(target_flags_explicit & MASK_MMX))
1200 target_flags |= MASK_MMX;
1201 if (processor_alias_table[i].flags & PTA_3DNOW
1202 && !(target_flags_explicit & MASK_3DNOW))
1203 target_flags |= MASK_3DNOW;
1204 if (processor_alias_table[i].flags & PTA_3DNOW_A
1205 && !(target_flags_explicit & MASK_3DNOW_A))
1206 target_flags |= MASK_3DNOW_A;
1207 if (processor_alias_table[i].flags & PTA_SSE
1208 && !(target_flags_explicit & MASK_SSE))
1209 target_flags |= MASK_SSE;
1210 if (processor_alias_table[i].flags & PTA_SSE2
1211 && !(target_flags_explicit & MASK_SSE2))
1212 target_flags |= MASK_SSE2;
1213 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1214 x86_prefetch_sse = true;
1215 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1216 error ("CPU you selected does not support x86-64 instruction set");
1217 break;
1220 if (i == pta_size)
1221 error ("bad value (%s) for -march= switch", ix86_arch_string);
1223 for (i = 0; i < pta_size; i++)
1224 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1226 ix86_tune = processor_alias_table[i].processor;
1227 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1228 error ("CPU you selected does not support x86-64 instruction set");
1229 break;
1231 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1232 x86_prefetch_sse = true;
1233 if (i == pta_size)
1234 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1236 if (optimize_size)
1237 ix86_cost = &size_cost;
1238 else
1239 ix86_cost = processor_target_table[ix86_tune].cost;
1240 target_flags |= processor_target_table[ix86_tune].target_enable;
1241 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1243 /* Arrange to set up i386_stack_locals for all functions. */
1244 init_machine_status = ix86_init_machine_status;
1246 /* Validate -mregparm= value. */
1247 if (ix86_regparm_string)
1249 i = atoi (ix86_regparm_string);
1250 if (i < 0 || i > REGPARM_MAX)
1251 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1252 else
1253 ix86_regparm = i;
1255 else
1256 if (TARGET_64BIT)
1257 ix86_regparm = REGPARM_MAX;
1259 /* If the user has provided any of the -malign-* options,
1260 warn and use that value only if -falign-* is not set.
1261 Remove this code in GCC 3.2 or later. */
1262 if (ix86_align_loops_string)
1264 warning ("-malign-loops is obsolete, use -falign-loops");
1265 if (align_loops == 0)
1267 i = atoi (ix86_align_loops_string);
1268 if (i < 0 || i > MAX_CODE_ALIGN)
1269 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1270 else
1271 align_loops = 1 << i;
1275 if (ix86_align_jumps_string)
1277 warning ("-malign-jumps is obsolete, use -falign-jumps");
1278 if (align_jumps == 0)
1280 i = atoi (ix86_align_jumps_string);
1281 if (i < 0 || i > MAX_CODE_ALIGN)
1282 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1283 else
1284 align_jumps = 1 << i;
1288 if (ix86_align_funcs_string)
1290 warning ("-malign-functions is obsolete, use -falign-functions");
1291 if (align_functions == 0)
1293 i = atoi (ix86_align_funcs_string);
1294 if (i < 0 || i > MAX_CODE_ALIGN)
1295 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1296 else
1297 align_functions = 1 << i;
1301 /* Default align_* from the processor table. */
1302 if (align_loops == 0)
1304 align_loops = processor_target_table[ix86_tune].align_loop;
1305 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1307 if (align_jumps == 0)
1309 align_jumps = processor_target_table[ix86_tune].align_jump;
1310 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1312 if (align_functions == 0)
1314 align_functions = processor_target_table[ix86_tune].align_func;
1317 /* Validate -mpreferred-stack-boundary= value, or provide default.
1318 The default of 128 bits is for Pentium III's SSE __m128, but we
1319 don't want additional code to keep the stack aligned when
1320 optimizing for code size. */
1321 ix86_preferred_stack_boundary = (optimize_size
1322 ? TARGET_64BIT ? 128 : 32
1323 : 128);
1324 if (ix86_preferred_stack_boundary_string)
1326 i = atoi (ix86_preferred_stack_boundary_string);
1327 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1328 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1329 TARGET_64BIT ? 4 : 2);
1330 else
1331 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1334 /* Validate -mbranch-cost= value, or provide default. */
1335 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1336 if (ix86_branch_cost_string)
1338 i = atoi (ix86_branch_cost_string);
1339 if (i < 0 || i > 5)
1340 error ("-mbranch-cost=%d is not between 0 and 5", i);
1341 else
1342 ix86_branch_cost = i;
1345 if (ix86_tls_dialect_string)
1347 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1348 ix86_tls_dialect = TLS_DIALECT_GNU;
1349 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1350 ix86_tls_dialect = TLS_DIALECT_SUN;
1351 else
1352 error ("bad value (%s) for -mtls-dialect= switch",
1353 ix86_tls_dialect_string);
1356 /* Keep nonleaf frame pointers. */
1357 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1358 flag_omit_frame_pointer = 1;
1360 /* If we're doing fast math, we don't care about comparison order
1361 wrt NaNs. This lets us use a shorter comparison sequence. */
1362 if (flag_unsafe_math_optimizations)
1363 target_flags &= ~MASK_IEEE_FP;
1365 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1366 since the insns won't need emulation. */
1367 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1368 target_flags &= ~MASK_NO_FANCY_MATH_387;
1370 /* Turn on SSE2 builtins for -mpni. */
1371 if (TARGET_PNI)
1372 target_flags |= MASK_SSE2;
1374 /* Turn on SSE builtins for -msse2. */
1375 if (TARGET_SSE2)
1376 target_flags |= MASK_SSE;
1378 if (TARGET_64BIT)
1380 if (TARGET_ALIGN_DOUBLE)
1381 error ("-malign-double makes no sense in the 64bit mode");
1382 if (TARGET_RTD)
1383 error ("-mrtd calling convention not supported in the 64bit mode");
1384 /* Enable by default the SSE and MMX builtins. */
1385 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1386 ix86_fpmath = FPMATH_SSE;
1388 else
1390 ix86_fpmath = FPMATH_387;
1391 /* i386 ABI does not specify red zone. It still makes sense to use it
1392 when programmer takes care to stack from being destroyed. */
1393 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1394 target_flags |= MASK_NO_RED_ZONE;
1397 if (ix86_fpmath_string != 0)
1399 if (! strcmp (ix86_fpmath_string, "387"))
1400 ix86_fpmath = FPMATH_387;
1401 else if (! strcmp (ix86_fpmath_string, "sse"))
1403 if (!TARGET_SSE)
1405 warning ("SSE instruction set disabled, using 387 arithmetics");
1406 ix86_fpmath = FPMATH_387;
1408 else
1409 ix86_fpmath = FPMATH_SSE;
1411 else if (! strcmp (ix86_fpmath_string, "387,sse")
1412 || ! strcmp (ix86_fpmath_string, "sse,387"))
1414 if (!TARGET_SSE)
1416 warning ("SSE instruction set disabled, using 387 arithmetics");
1417 ix86_fpmath = FPMATH_387;
1419 else if (!TARGET_80387)
1421 warning ("387 instruction set disabled, using SSE arithmetics");
1422 ix86_fpmath = FPMATH_SSE;
1424 else
1425 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1427 else
1428 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1431 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1432 on by -msse. */
1433 if (TARGET_SSE)
1435 target_flags |= MASK_MMX;
1436 x86_prefetch_sse = true;
1439 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1440 if (TARGET_3DNOW)
1442 target_flags |= MASK_MMX;
1443 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1444 extensions it adds. */
1445 if (x86_3dnow_a & (1 << ix86_arch))
1446 target_flags |= MASK_3DNOW_A;
1448 if ((x86_accumulate_outgoing_args & TUNEMASK)
1449 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1450 && !optimize_size)
1451 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1453 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1455 char *p;
1456 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1457 p = strchr (internal_label_prefix, 'X');
1458 internal_label_prefix_len = p - internal_label_prefix;
1459 *p = '\0';
1463 void
1464 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1466 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1467 make the problem with not enough registers even worse. */
1468 #ifdef INSN_SCHEDULING
1469 if (level > 1)
1470 flag_schedule_insns = 0;
1471 #endif
1473 /* The default values of these switches depend on the TARGET_64BIT
1474 that is not known at this moment. Mark these values with 2 and
1475 let user the to override these. In case there is no command line option
1476 specifying them, we will set the defaults in override_options. */
1477 if (optimize >= 1)
1478 flag_omit_frame_pointer = 2;
1479 flag_pcc_struct_return = 2;
1480 flag_asynchronous_unwind_tables = 2;
1483 /* Table of valid machine attributes. */
1484 const struct attribute_spec ix86_attribute_table[] =
1486 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1487 /* Stdcall attribute says callee is responsible for popping arguments
1488 if they are not variable. */
1489 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1490 /* Fastcall attribute says callee is responsible for popping arguments
1491 if they are not variable. */
1492 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1493 /* Cdecl attribute says the callee is a normal C declaration */
1494 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1495 /* Regparm attribute specifies how many integer arguments are to be
1496 passed in registers. */
1497 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1498 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1499 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1501 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1502 #endif
1503 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1504 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1505 { NULL, 0, 0, false, false, false, NULL }
1508 /* Decide whether we can make a sibling call to a function. DECL is the
1509 declaration of the function being targeted by the call and EXP is the
1510 CALL_EXPR representing the call. */
1512 static bool
1513 ix86_function_ok_for_sibcall (tree decl, tree exp)
1515 /* If we are generating position-independent code, we cannot sibcall
1516 optimize any indirect call, or a direct call to a global function,
1517 as the PLT requires %ebx be live. */
1518 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1519 return false;
1521 /* If we are returning floats on the 80387 register stack, we cannot
1522 make a sibcall from a function that doesn't return a float to a
1523 function that does or, conversely, from a function that does return
1524 a float to a function that doesn't; the necessary stack adjustment
1525 would not be executed. */
1526 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1527 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1528 return false;
1530 /* If this call is indirect, we'll need to be able to use a call-clobbered
1531 register for the address of the target function. Make sure that all
1532 such registers are not used for passing parameters. */
1533 if (!decl && !TARGET_64BIT)
1535 int regparm = ix86_regparm;
1536 tree attr, type;
1538 /* We're looking at the CALL_EXPR, we need the type of the function. */
1539 type = TREE_OPERAND (exp, 0); /* pointer expression */
1540 type = TREE_TYPE (type); /* pointer type */
1541 type = TREE_TYPE (type); /* function type */
1543 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1544 if (attr)
1545 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1547 if (regparm >= 3)
1549 /* ??? Need to count the actual number of registers to be used,
1550 not the possible number of registers. Fix later. */
1551 return false;
1555 /* Otherwise okay. That also includes certain types of indirect calls. */
1556 return true;
1559 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1560 arguments as in struct attribute_spec.handler. */
1561 static tree
1562 ix86_handle_cdecl_attribute (tree *node, tree name,
1563 tree args ATTRIBUTE_UNUSED,
1564 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1566 if (TREE_CODE (*node) != FUNCTION_TYPE
1567 && TREE_CODE (*node) != METHOD_TYPE
1568 && TREE_CODE (*node) != FIELD_DECL
1569 && TREE_CODE (*node) != TYPE_DECL)
1571 warning ("`%s' attribute only applies to functions",
1572 IDENTIFIER_POINTER (name));
1573 *no_add_attrs = true;
1575 else
1577 if (is_attribute_p ("fastcall", name))
1579 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1581 error ("fastcall and stdcall attributes are not compatible");
1583 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1585 error ("fastcall and regparm attributes are not compatible");
1588 else if (is_attribute_p ("stdcall", name))
1590 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1592 error ("fastcall and stdcall attributes are not compatible");
1597 if (TARGET_64BIT)
1599 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1600 *no_add_attrs = true;
1603 return NULL_TREE;
1606 /* Handle a "regparm" attribute;
1607 arguments as in struct attribute_spec.handler. */
1608 static tree
1609 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1610 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1612 if (TREE_CODE (*node) != FUNCTION_TYPE
1613 && TREE_CODE (*node) != METHOD_TYPE
1614 && TREE_CODE (*node) != FIELD_DECL
1615 && TREE_CODE (*node) != TYPE_DECL)
1617 warning ("`%s' attribute only applies to functions",
1618 IDENTIFIER_POINTER (name));
1619 *no_add_attrs = true;
1621 else
1623 tree cst;
1625 cst = TREE_VALUE (args);
1626 if (TREE_CODE (cst) != INTEGER_CST)
1628 warning ("`%s' attribute requires an integer constant argument",
1629 IDENTIFIER_POINTER (name));
1630 *no_add_attrs = true;
1632 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1634 warning ("argument to `%s' attribute larger than %d",
1635 IDENTIFIER_POINTER (name), REGPARM_MAX);
1636 *no_add_attrs = true;
1639 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1641 error ("fastcall and regparm attributes are not compatible");
1645 return NULL_TREE;
1648 /* Return 0 if the attributes for two types are incompatible, 1 if they
1649 are compatible, and 2 if they are nearly compatible (which causes a
1650 warning to be generated). */
1652 static int
1653 ix86_comp_type_attributes (tree type1, tree type2)
1655 /* Check for mismatch of non-default calling convention. */
1656 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1658 if (TREE_CODE (type1) != FUNCTION_TYPE)
1659 return 1;
1661 /* Check for mismatched fastcall types */
1662 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1663 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1664 return 0;
1666 /* Check for mismatched return types (cdecl vs stdcall). */
1667 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1668 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1669 return 0;
1670 return 1;
1673 /* Return the regparm value for a fuctio with the indicated TYPE. */
1675 static int
1676 ix86_fntype_regparm (tree type)
1678 tree attr;
1680 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1681 if (attr)
1682 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1683 else
1684 return ix86_regparm;
1687 /* Value is the number of bytes of arguments automatically
1688 popped when returning from a subroutine call.
1689 FUNDECL is the declaration node of the function (as a tree),
1690 FUNTYPE is the data type of the function (as a tree),
1691 or for a library call it is an identifier node for the subroutine name.
1692 SIZE is the number of bytes of arguments passed on the stack.
1694 On the 80386, the RTD insn may be used to pop them if the number
1695 of args is fixed, but if the number is variable then the caller
1696 must pop them all. RTD can't be used for library calls now
1697 because the library is compiled with the Unix compiler.
1698 Use of RTD is a selectable option, since it is incompatible with
1699 standard Unix calling sequences. If the option is not selected,
1700 the caller must always pop the args.
1702 The attribute stdcall is equivalent to RTD on a per module basis. */
1705 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1707 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1709 /* Cdecl functions override -mrtd, and never pop the stack. */
1710 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1712 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1713 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1714 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1715 rtd = 1;
1717 if (rtd
1718 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1719 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1720 == void_type_node)))
1721 return size;
1724 /* Lose any fake structure return argument if it is passed on the stack. */
1725 if (aggregate_value_p (TREE_TYPE (funtype))
1726 && !TARGET_64BIT)
1728 int nregs = ix86_fntype_regparm (funtype);
1730 if (!nregs)
1731 return GET_MODE_SIZE (Pmode);
1734 return 0;
1737 /* Argument support functions. */
1739 /* Return true when register may be used to pass function parameters. */
1740 bool
1741 ix86_function_arg_regno_p (int regno)
1743 int i;
1744 if (!TARGET_64BIT)
1745 return (regno < REGPARM_MAX
1746 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1747 if (SSE_REGNO_P (regno) && TARGET_SSE)
1748 return true;
1749 /* RAX is used as hidden argument to va_arg functions. */
1750 if (!regno)
1751 return true;
1752 for (i = 0; i < REGPARM_MAX; i++)
1753 if (regno == x86_64_int_parameter_registers[i])
1754 return true;
1755 return false;
1758 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1759 for a call to a function whose data type is FNTYPE.
1760 For a library call, FNTYPE is 0. */
1762 void
1763 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1764 tree fntype, /* tree ptr for function decl */
1765 rtx libname, /* SYMBOL_REF of library name or 0 */
1766 tree fndecl)
1768 static CUMULATIVE_ARGS zero_cum;
1769 tree param, next_param;
1770 bool user_convention = false;
1772 if (TARGET_DEBUG_ARG)
1774 fprintf (stderr, "\ninit_cumulative_args (");
1775 if (fntype)
1776 fprintf (stderr, "fntype code = %s, ret code = %s",
1777 tree_code_name[(int) TREE_CODE (fntype)],
1778 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1779 else
1780 fprintf (stderr, "no fntype");
1782 if (libname)
1783 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1786 *cum = zero_cum;
1788 /* Set up the number of registers to use for passing arguments. */
1789 cum->nregs = ix86_regparm;
1790 cum->sse_nregs = SSE_REGPARM_MAX;
1791 if (fntype && !TARGET_64BIT)
1793 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1795 if (attr)
1797 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1798 user_convention = true;
1801 cum->maybe_vaarg = false;
1803 /* Use ecx and edx registers if function has fastcall attribute */
1804 if (fntype && !TARGET_64BIT)
1806 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1808 cum->nregs = 2;
1809 cum->fastcall = 1;
1810 user_convention = true;
1814 /* Use register calling convention for local functions when possible. */
1815 if (!TARGET_64BIT && !user_convention && fndecl
1816 && flag_unit_at_a_time)
1818 struct cgraph_local_info *i = cgraph_local_info (fndecl);
1819 if (i && i->local)
1821 /* We can't use regparm(3) for nested functions as these use
1822 static chain pointer in third argument. */
1823 if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl))
1824 cum->nregs = 2;
1825 else
1826 cum->nregs = 3;
1831 /* Determine if this function has variable arguments. This is
1832 indicated by the last argument being 'void_type_mode' if there
1833 are no variable arguments. If there are variable arguments, then
1834 we won't pass anything in registers */
1836 if (cum->nregs)
1838 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1839 param != 0; param = next_param)
1841 next_param = TREE_CHAIN (param);
1842 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1844 if (!TARGET_64BIT)
1846 cum->nregs = 0;
1847 cum->fastcall = 0;
1849 cum->maybe_vaarg = true;
1853 if ((!fntype && !libname)
1854 || (fntype && !TYPE_ARG_TYPES (fntype)))
1855 cum->maybe_vaarg = 1;
1857 if (TARGET_DEBUG_ARG)
1858 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1860 return;
1863 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1864 of this code is to classify each 8bytes of incoming argument by the register
1865 class and assign registers accordingly. */
1867 /* Return the union class of CLASS1 and CLASS2.
1868 See the x86-64 PS ABI for details. */
1870 static enum x86_64_reg_class
1871 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1873 /* Rule #1: If both classes are equal, this is the resulting class. */
1874 if (class1 == class2)
1875 return class1;
1877 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1878 the other class. */
1879 if (class1 == X86_64_NO_CLASS)
1880 return class2;
1881 if (class2 == X86_64_NO_CLASS)
1882 return class1;
1884 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1885 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1886 return X86_64_MEMORY_CLASS;
1888 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1889 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1890 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1891 return X86_64_INTEGERSI_CLASS;
1892 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1893 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1894 return X86_64_INTEGER_CLASS;
1896 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1897 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1898 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1899 return X86_64_MEMORY_CLASS;
1901 /* Rule #6: Otherwise class SSE is used. */
1902 return X86_64_SSE_CLASS;
1905 /* Classify the argument of type TYPE and mode MODE.
1906 CLASSES will be filled by the register class used to pass each word
1907 of the operand. The number of words is returned. In case the parameter
1908 should be passed in memory, 0 is returned. As a special case for zero
1909 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1911 BIT_OFFSET is used internally for handling records and specifies offset
1912 of the offset in bits modulo 256 to avoid overflow cases.
1914 See the x86-64 PS ABI for details.
1917 static int
1918 classify_argument (enum machine_mode mode, tree type,
1919 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1921 int bytes =
1922 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1923 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1925 /* Variable sized entities are always passed/returned in memory. */
1926 if (bytes < 0)
1927 return 0;
1929 if (mode != VOIDmode
1930 && MUST_PASS_IN_STACK (mode, type))
1931 return 0;
1933 if (type && AGGREGATE_TYPE_P (type))
1935 int i;
1936 tree field;
1937 enum x86_64_reg_class subclasses[MAX_CLASSES];
1939 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1940 if (bytes > 16)
1941 return 0;
1943 for (i = 0; i < words; i++)
1944 classes[i] = X86_64_NO_CLASS;
1946 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1947 signalize memory class, so handle it as special case. */
1948 if (!words)
1950 classes[0] = X86_64_NO_CLASS;
1951 return 1;
1954 /* Classify each field of record and merge classes. */
1955 if (TREE_CODE (type) == RECORD_TYPE)
1957 /* For classes first merge in the field of the subclasses. */
1958 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1960 tree bases = TYPE_BINFO_BASETYPES (type);
1961 int n_bases = TREE_VEC_LENGTH (bases);
1962 int i;
1964 for (i = 0; i < n_bases; ++i)
1966 tree binfo = TREE_VEC_ELT (bases, i);
1967 int num;
1968 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1969 tree type = BINFO_TYPE (binfo);
1971 num = classify_argument (TYPE_MODE (type),
1972 type, subclasses,
1973 (offset + bit_offset) % 256);
1974 if (!num)
1975 return 0;
1976 for (i = 0; i < num; i++)
1978 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1979 classes[i + pos] =
1980 merge_classes (subclasses[i], classes[i + pos]);
1984 /* And now merge the fields of structure. */
1985 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1987 if (TREE_CODE (field) == FIELD_DECL)
1989 int num;
1991 /* Bitfields are always classified as integer. Handle them
1992 early, since later code would consider them to be
1993 misaligned integers. */
1994 if (DECL_BIT_FIELD (field))
1996 for (i = int_bit_position (field) / 8 / 8;
1997 i < (int_bit_position (field)
1998 + tree_low_cst (DECL_SIZE (field), 0)
1999 + 63) / 8 / 8; i++)
2000 classes[i] =
2001 merge_classes (X86_64_INTEGER_CLASS,
2002 classes[i]);
2004 else
2006 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2007 TREE_TYPE (field), subclasses,
2008 (int_bit_position (field)
2009 + bit_offset) % 256);
2010 if (!num)
2011 return 0;
2012 for (i = 0; i < num; i++)
2014 int pos =
2015 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2016 classes[i + pos] =
2017 merge_classes (subclasses[i], classes[i + pos]);
2023 /* Arrays are handled as small records. */
2024 else if (TREE_CODE (type) == ARRAY_TYPE)
2026 int num;
2027 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2028 TREE_TYPE (type), subclasses, bit_offset);
2029 if (!num)
2030 return 0;
2032 /* The partial classes are now full classes. */
2033 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2034 subclasses[0] = X86_64_SSE_CLASS;
2035 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2036 subclasses[0] = X86_64_INTEGER_CLASS;
2038 for (i = 0; i < words; i++)
2039 classes[i] = subclasses[i % num];
2041 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2042 else if (TREE_CODE (type) == UNION_TYPE
2043 || TREE_CODE (type) == QUAL_UNION_TYPE)
2045 /* For classes first merge in the field of the subclasses. */
2046 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2048 tree bases = TYPE_BINFO_BASETYPES (type);
2049 int n_bases = TREE_VEC_LENGTH (bases);
2050 int i;
2052 for (i = 0; i < n_bases; ++i)
2054 tree binfo = TREE_VEC_ELT (bases, i);
2055 int num;
2056 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2057 tree type = BINFO_TYPE (binfo);
2059 num = classify_argument (TYPE_MODE (type),
2060 type, subclasses,
2061 (offset + (bit_offset % 64)) % 256);
2062 if (!num)
2063 return 0;
2064 for (i = 0; i < num; i++)
2066 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2067 classes[i + pos] =
2068 merge_classes (subclasses[i], classes[i + pos]);
2072 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2074 if (TREE_CODE (field) == FIELD_DECL)
2076 int num;
2077 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2078 TREE_TYPE (field), subclasses,
2079 bit_offset);
2080 if (!num)
2081 return 0;
2082 for (i = 0; i < num; i++)
2083 classes[i] = merge_classes (subclasses[i], classes[i]);
2087 else
2088 abort ();
2090 /* Final merger cleanup. */
2091 for (i = 0; i < words; i++)
2093 /* If one class is MEMORY, everything should be passed in
2094 memory. */
2095 if (classes[i] == X86_64_MEMORY_CLASS)
2096 return 0;
2098 /* The X86_64_SSEUP_CLASS should be always preceded by
2099 X86_64_SSE_CLASS. */
2100 if (classes[i] == X86_64_SSEUP_CLASS
2101 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2102 classes[i] = X86_64_SSE_CLASS;
2104 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2105 if (classes[i] == X86_64_X87UP_CLASS
2106 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2107 classes[i] = X86_64_SSE_CLASS;
2109 return words;
2112 /* Compute alignment needed. We align all types to natural boundaries with
2113 exception of XFmode that is aligned to 64bits. */
2114 if (mode != VOIDmode && mode != BLKmode)
2116 int mode_alignment = GET_MODE_BITSIZE (mode);
2118 if (mode == XFmode)
2119 mode_alignment = 128;
2120 else if (mode == XCmode)
2121 mode_alignment = 256;
2122 /* Misaligned fields are always returned in memory. */
2123 if (bit_offset % mode_alignment)
2124 return 0;
2127 /* Classification of atomic types. */
2128 switch (mode)
2130 case DImode:
2131 case SImode:
2132 case HImode:
2133 case QImode:
2134 case CSImode:
2135 case CHImode:
2136 case CQImode:
2137 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2138 classes[0] = X86_64_INTEGERSI_CLASS;
2139 else
2140 classes[0] = X86_64_INTEGER_CLASS;
2141 return 1;
2142 case CDImode:
2143 case TImode:
2144 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2145 return 2;
2146 case CTImode:
2147 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2148 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2149 return 4;
2150 case SFmode:
2151 if (!(bit_offset % 64))
2152 classes[0] = X86_64_SSESF_CLASS;
2153 else
2154 classes[0] = X86_64_SSE_CLASS;
2155 return 1;
2156 case DFmode:
2157 classes[0] = X86_64_SSEDF_CLASS;
2158 return 1;
2159 case TFmode:
2160 classes[0] = X86_64_X87_CLASS;
2161 classes[1] = X86_64_X87UP_CLASS;
2162 return 2;
2163 case TCmode:
2164 classes[0] = X86_64_X87_CLASS;
2165 classes[1] = X86_64_X87UP_CLASS;
2166 classes[2] = X86_64_X87_CLASS;
2167 classes[3] = X86_64_X87UP_CLASS;
2168 return 4;
2169 case DCmode:
2170 classes[0] = X86_64_SSEDF_CLASS;
2171 classes[1] = X86_64_SSEDF_CLASS;
2172 return 2;
2173 case SCmode:
2174 classes[0] = X86_64_SSE_CLASS;
2175 return 1;
2176 case V4SFmode:
2177 case V4SImode:
2178 case V16QImode:
2179 case V8HImode:
2180 case V2DFmode:
2181 case V2DImode:
2182 classes[0] = X86_64_SSE_CLASS;
2183 classes[1] = X86_64_SSEUP_CLASS;
2184 return 2;
2185 case V2SFmode:
2186 case V2SImode:
2187 case V4HImode:
2188 case V8QImode:
2189 return 0;
2190 case BLKmode:
2191 case VOIDmode:
2192 return 0;
2193 default:
2194 abort ();
2198 /* Examine the argument and return set number of register required in each
2199 class. Return 0 iff parameter should be passed in memory. */
2200 static int
2201 examine_argument (enum machine_mode mode, tree type, int in_return,
2202 int *int_nregs, int *sse_nregs)
2204 enum x86_64_reg_class class[MAX_CLASSES];
2205 int n = classify_argument (mode, type, class, 0);
2207 *int_nregs = 0;
2208 *sse_nregs = 0;
2209 if (!n)
2210 return 0;
2211 for (n--; n >= 0; n--)
2212 switch (class[n])
2214 case X86_64_INTEGER_CLASS:
2215 case X86_64_INTEGERSI_CLASS:
2216 (*int_nregs)++;
2217 break;
2218 case X86_64_SSE_CLASS:
2219 case X86_64_SSESF_CLASS:
2220 case X86_64_SSEDF_CLASS:
2221 (*sse_nregs)++;
2222 break;
2223 case X86_64_NO_CLASS:
2224 case X86_64_SSEUP_CLASS:
2225 break;
2226 case X86_64_X87_CLASS:
2227 case X86_64_X87UP_CLASS:
2228 if (!in_return)
2229 return 0;
2230 break;
2231 case X86_64_MEMORY_CLASS:
2232 abort ();
2234 return 1;
2236 /* Construct container for the argument used by GCC interface. See
2237 FUNCTION_ARG for the detailed description. */
2238 static rtx
2239 construct_container (enum machine_mode mode, tree type, int in_return,
2240 int nintregs, int nsseregs, const int * intreg,
2241 int sse_regno)
2243 enum machine_mode tmpmode;
2244 int bytes =
2245 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2246 enum x86_64_reg_class class[MAX_CLASSES];
2247 int n;
2248 int i;
2249 int nexps = 0;
2250 int needed_sseregs, needed_intregs;
2251 rtx exp[MAX_CLASSES];
2252 rtx ret;
2254 n = classify_argument (mode, type, class, 0);
2255 if (TARGET_DEBUG_ARG)
2257 if (!n)
2258 fprintf (stderr, "Memory class\n");
2259 else
2261 fprintf (stderr, "Classes:");
2262 for (i = 0; i < n; i++)
2264 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2266 fprintf (stderr, "\n");
2269 if (!n)
2270 return NULL;
2271 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2272 return NULL;
2273 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2274 return NULL;
2276 /* First construct simple cases. Avoid SCmode, since we want to use
2277 single register to pass this type. */
2278 if (n == 1 && mode != SCmode)
2279 switch (class[0])
2281 case X86_64_INTEGER_CLASS:
2282 case X86_64_INTEGERSI_CLASS:
2283 return gen_rtx_REG (mode, intreg[0]);
2284 case X86_64_SSE_CLASS:
2285 case X86_64_SSESF_CLASS:
2286 case X86_64_SSEDF_CLASS:
2287 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2288 case X86_64_X87_CLASS:
2289 return gen_rtx_REG (mode, FIRST_STACK_REG);
2290 case X86_64_NO_CLASS:
2291 /* Zero sized array, struct or class. */
2292 return NULL;
2293 default:
2294 abort ();
2296 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2297 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2298 if (n == 2
2299 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2300 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2301 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2302 && class[1] == X86_64_INTEGER_CLASS
2303 && (mode == CDImode || mode == TImode)
2304 && intreg[0] + 1 == intreg[1])
2305 return gen_rtx_REG (mode, intreg[0]);
2306 if (n == 4
2307 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2308 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2309 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2311 /* Otherwise figure out the entries of the PARALLEL. */
2312 for (i = 0; i < n; i++)
2314 switch (class[i])
2316 case X86_64_NO_CLASS:
2317 break;
2318 case X86_64_INTEGER_CLASS:
2319 case X86_64_INTEGERSI_CLASS:
2320 /* Merge TImodes on aligned occasions here too. */
2321 if (i * 8 + 8 > bytes)
2322 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2323 else if (class[i] == X86_64_INTEGERSI_CLASS)
2324 tmpmode = SImode;
2325 else
2326 tmpmode = DImode;
2327 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2328 if (tmpmode == BLKmode)
2329 tmpmode = DImode;
2330 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2331 gen_rtx_REG (tmpmode, *intreg),
2332 GEN_INT (i*8));
2333 intreg++;
2334 break;
2335 case X86_64_SSESF_CLASS:
2336 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2337 gen_rtx_REG (SFmode,
2338 SSE_REGNO (sse_regno)),
2339 GEN_INT (i*8));
2340 sse_regno++;
2341 break;
2342 case X86_64_SSEDF_CLASS:
2343 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2344 gen_rtx_REG (DFmode,
2345 SSE_REGNO (sse_regno)),
2346 GEN_INT (i*8));
2347 sse_regno++;
2348 break;
2349 case X86_64_SSE_CLASS:
2350 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2351 tmpmode = TImode;
2352 else
2353 tmpmode = DImode;
2354 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2355 gen_rtx_REG (tmpmode,
2356 SSE_REGNO (sse_regno)),
2357 GEN_INT (i*8));
2358 if (tmpmode == TImode)
2359 i++;
2360 sse_regno++;
2361 break;
2362 default:
2363 abort ();
2366 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2367 for (i = 0; i < nexps; i++)
2368 XVECEXP (ret, 0, i) = exp [i];
2369 return ret;
2372 /* Update the data in CUM to advance over an argument
2373 of mode MODE and data type TYPE.
2374 (TYPE is null for libcalls where that information may not be available.) */
2376 void
2377 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2378 enum machine_mode mode, /* current arg mode */
2379 tree type, /* type of the argument or 0 if lib support */
2380 int named) /* whether or not the argument was named */
2382 int bytes =
2383 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2384 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2386 if (TARGET_DEBUG_ARG)
2387 fprintf (stderr,
2388 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2389 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2390 if (TARGET_64BIT)
2392 int int_nregs, sse_nregs;
2393 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2394 cum->words += words;
2395 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2397 cum->nregs -= int_nregs;
2398 cum->sse_nregs -= sse_nregs;
2399 cum->regno += int_nregs;
2400 cum->sse_regno += sse_nregs;
2402 else
2403 cum->words += words;
2405 else
2407 if (TARGET_SSE && mode == TImode)
2409 cum->sse_words += words;
2410 cum->sse_nregs -= 1;
2411 cum->sse_regno += 1;
2412 if (cum->sse_nregs <= 0)
2414 cum->sse_nregs = 0;
2415 cum->sse_regno = 0;
2418 else
2420 cum->words += words;
2421 cum->nregs -= words;
2422 cum->regno += words;
2424 if (cum->nregs <= 0)
2426 cum->nregs = 0;
2427 cum->regno = 0;
2431 return;
2434 /* Define where to put the arguments to a function.
2435 Value is zero to push the argument on the stack,
2436 or a hard register in which to store the argument.
2438 MODE is the argument's machine mode.
2439 TYPE is the data type of the argument (as a tree).
2440 This is null for libcalls where that information may
2441 not be available.
2442 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2443 the preceding args and about the function being called.
2444 NAMED is nonzero if this argument is a named parameter
2445 (otherwise it is an extra parameter matching an ellipsis). */
2448 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2449 enum machine_mode mode, /* current arg mode */
2450 tree type, /* type of the argument or 0 if lib support */
2451 int named) /* != 0 for normal args, == 0 for ... args */
2453 rtx ret = NULL_RTX;
2454 int bytes =
2455 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2456 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2458 /* Handle a hidden AL argument containing number of registers for varargs
2459 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2460 any AL settings. */
2461 if (mode == VOIDmode)
2463 if (TARGET_64BIT)
2464 return GEN_INT (cum->maybe_vaarg
2465 ? (cum->sse_nregs < 0
2466 ? SSE_REGPARM_MAX
2467 : cum->sse_regno)
2468 : -1);
2469 else
2470 return constm1_rtx;
2472 if (TARGET_64BIT)
2473 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2474 &x86_64_int_parameter_registers [cum->regno],
2475 cum->sse_regno);
2476 else
2477 switch (mode)
2479 /* For now, pass fp/complex values on the stack. */
2480 default:
2481 break;
2483 case BLKmode:
2484 if (bytes < 0)
2485 break;
2486 /* FALLTHRU */
2487 case DImode:
2488 case SImode:
2489 case HImode:
2490 case QImode:
2491 if (words <= cum->nregs)
2493 int regno = cum->regno;
2495 /* Fastcall allocates the first two DWORD (SImode) or
2496 smaller arguments to ECX and EDX. */
2497 if (cum->fastcall)
2499 if (mode == BLKmode || mode == DImode)
2500 break;
2502 /* ECX not EAX is the first allocated register. */
2503 if (regno == 0)
2504 regno = 2;
2506 ret = gen_rtx_REG (mode, regno);
2508 break;
2509 case TImode:
2510 if (cum->sse_nregs)
2511 ret = gen_rtx_REG (mode, cum->sse_regno);
2512 break;
2515 if (TARGET_DEBUG_ARG)
2517 fprintf (stderr,
2518 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2519 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2521 if (ret)
2522 print_simple_rtl (stderr, ret);
2523 else
2524 fprintf (stderr, ", stack");
2526 fprintf (stderr, " )\n");
2529 return ret;
2532 /* A C expression that indicates when an argument must be passed by
2533 reference. If nonzero for an argument, a copy of that argument is
2534 made in memory and a pointer to the argument is passed instead of
2535 the argument itself. The pointer is passed in whatever way is
2536 appropriate for passing a pointer to that type. */
2539 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2540 enum machine_mode mode ATTRIBUTE_UNUSED,
2541 tree type, int named ATTRIBUTE_UNUSED)
2543 if (!TARGET_64BIT)
2544 return 0;
2546 if (type && int_size_in_bytes (type) == -1)
2548 if (TARGET_DEBUG_ARG)
2549 fprintf (stderr, "function_arg_pass_by_reference\n");
2550 return 1;
2553 return 0;
2556 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2557 ABI */
2558 static bool
2559 contains_128bit_aligned_vector_p (tree type)
2561 enum machine_mode mode = TYPE_MODE (type);
2562 if (SSE_REG_MODE_P (mode)
2563 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2564 return true;
2565 if (TYPE_ALIGN (type) < 128)
2566 return false;
2568 if (AGGREGATE_TYPE_P (type))
2570 /* Walk the aggregates recursively. */
2571 if (TREE_CODE (type) == RECORD_TYPE
2572 || TREE_CODE (type) == UNION_TYPE
2573 || TREE_CODE (type) == QUAL_UNION_TYPE)
2575 tree field;
2577 if (TYPE_BINFO (type) != NULL
2578 && TYPE_BINFO_BASETYPES (type) != NULL)
2580 tree bases = TYPE_BINFO_BASETYPES (type);
2581 int n_bases = TREE_VEC_LENGTH (bases);
2582 int i;
2584 for (i = 0; i < n_bases; ++i)
2586 tree binfo = TREE_VEC_ELT (bases, i);
2587 tree type = BINFO_TYPE (binfo);
2589 if (contains_128bit_aligned_vector_p (type))
2590 return true;
2593 /* And now merge the fields of structure. */
2594 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2596 if (TREE_CODE (field) == FIELD_DECL
2597 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2598 return true;
2601 /* Just for use if some languages passes arrays by value. */
2602 else if (TREE_CODE (type) == ARRAY_TYPE)
2604 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2605 return true;
2607 else
2608 abort ();
2610 return false;
2613 /* Gives the alignment boundary, in bits, of an argument with the
2614 specified mode and type. */
2617 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2619 int align;
2620 if (type)
2621 align = TYPE_ALIGN (type);
2622 else
2623 align = GET_MODE_ALIGNMENT (mode);
2624 if (align < PARM_BOUNDARY)
2625 align = PARM_BOUNDARY;
2626 if (!TARGET_64BIT)
2628 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2629 make an exception for SSE modes since these require 128bit
2630 alignment.
2632 The handling here differs from field_alignment. ICC aligns MMX
2633 arguments to 4 byte boundaries, while structure fields are aligned
2634 to 8 byte boundaries. */
2635 if (!type)
2637 if (!SSE_REG_MODE_P (mode))
2638 align = PARM_BOUNDARY;
2640 else
2642 if (!contains_128bit_aligned_vector_p (type))
2643 align = PARM_BOUNDARY;
2646 if (align > 128)
2647 align = 128;
2648 return align;
2651 /* Return true if N is a possible register number of function value. */
2652 bool
2653 ix86_function_value_regno_p (int regno)
2655 if (!TARGET_64BIT)
2657 return ((regno) == 0
2658 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2659 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2661 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2662 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2663 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2666 /* Define how to find the value returned by a function.
2667 VALTYPE is the data type of the value (as a tree).
2668 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2669 otherwise, FUNC is 0. */
2671 ix86_function_value (tree valtype)
2673 if (TARGET_64BIT)
2675 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2676 REGPARM_MAX, SSE_REGPARM_MAX,
2677 x86_64_int_return_registers, 0);
2678 /* For zero sized structures, construct_container return NULL, but we need
2679 to keep rest of compiler happy by returning meaningful value. */
2680 if (!ret)
2681 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2682 return ret;
2684 else
2685 return gen_rtx_REG (TYPE_MODE (valtype),
2686 ix86_value_regno (TYPE_MODE (valtype)));
2689 /* Return false iff type is returned in memory. */
2691 ix86_return_in_memory (tree type)
2693 int needed_intregs, needed_sseregs;
2694 if (TARGET_64BIT)
2696 return !examine_argument (TYPE_MODE (type), type, 1,
2697 &needed_intregs, &needed_sseregs);
2699 else
2701 if (TYPE_MODE (type) == BLKmode)
2702 return 1;
2703 else if (MS_AGGREGATE_RETURN
2704 && AGGREGATE_TYPE_P (type)
2705 && int_size_in_bytes(type) <= 8)
2706 return 0;
2707 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2708 && int_size_in_bytes (type) == 8)
2709 || (int_size_in_bytes (type) > 12
2710 && TYPE_MODE (type) != TImode
2711 && TYPE_MODE (type) != TFmode
2712 && !VECTOR_MODE_P (TYPE_MODE (type))))
2713 return 1;
2714 return 0;
2718 /* Define how to find the value returned by a library function
2719 assuming the value has mode MODE. */
2721 ix86_libcall_value (enum machine_mode mode)
2723 if (TARGET_64BIT)
2725 switch (mode)
2727 case SFmode:
2728 case SCmode:
2729 case DFmode:
2730 case DCmode:
2731 return gen_rtx_REG (mode, FIRST_SSE_REG);
2732 case TFmode:
2733 case TCmode:
2734 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2735 default:
2736 return gen_rtx_REG (mode, 0);
2739 else
2740 return gen_rtx_REG (mode, ix86_value_regno (mode));
2743 /* Given a mode, return the register to use for a return value. */
2745 static int
2746 ix86_value_regno (enum machine_mode mode)
2748 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2749 return FIRST_FLOAT_REG;
2750 if (mode == TImode || VECTOR_MODE_P (mode))
2751 return FIRST_SSE_REG;
2752 return 0;
2755 /* Create the va_list data type. */
2757 tree
2758 ix86_build_va_list (void)
2760 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2762 /* For i386 we use plain pointer to argument area. */
2763 if (!TARGET_64BIT)
2764 return build_pointer_type (char_type_node);
2766 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2767 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2769 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2770 unsigned_type_node);
2771 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2772 unsigned_type_node);
2773 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2774 ptr_type_node);
2775 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2776 ptr_type_node);
2778 DECL_FIELD_CONTEXT (f_gpr) = record;
2779 DECL_FIELD_CONTEXT (f_fpr) = record;
2780 DECL_FIELD_CONTEXT (f_ovf) = record;
2781 DECL_FIELD_CONTEXT (f_sav) = record;
2783 TREE_CHAIN (record) = type_decl;
2784 TYPE_NAME (record) = type_decl;
2785 TYPE_FIELDS (record) = f_gpr;
2786 TREE_CHAIN (f_gpr) = f_fpr;
2787 TREE_CHAIN (f_fpr) = f_ovf;
2788 TREE_CHAIN (f_ovf) = f_sav;
2790 layout_type (record);
2792 /* The correct type is an array type of one element. */
2793 return build_array_type (record, build_index_type (size_zero_node));
2796 /* Perform any needed actions needed for a function that is receiving a
2797 variable number of arguments.
2799 CUM is as above.
2801 MODE and TYPE are the mode and type of the current parameter.
2803 PRETEND_SIZE is a variable that should be set to the amount of stack
2804 that must be pushed by the prolog to pretend that our caller pushed
2807 Normally, this macro will push all remaining incoming registers on the
2808 stack and set PRETEND_SIZE to the length of the registers pushed. */
2810 void
2811 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2812 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2813 int no_rtl)
2815 CUMULATIVE_ARGS next_cum;
2816 rtx save_area = NULL_RTX, mem;
2817 rtx label;
2818 rtx label_ref;
2819 rtx tmp_reg;
2820 rtx nsse_reg;
2821 int set;
2822 tree fntype;
2823 int stdarg_p;
2824 int i;
2826 if (!TARGET_64BIT)
2827 return;
2829 /* Indicate to allocate space on the stack for varargs save area. */
2830 ix86_save_varrargs_registers = 1;
2832 cfun->stack_alignment_needed = 128;
2834 fntype = TREE_TYPE (current_function_decl);
2835 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2836 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2837 != void_type_node));
2839 /* For varargs, we do not want to skip the dummy va_dcl argument.
2840 For stdargs, we do want to skip the last named argument. */
2841 next_cum = *cum;
2842 if (stdarg_p)
2843 function_arg_advance (&next_cum, mode, type, 1);
2845 if (!no_rtl)
2846 save_area = frame_pointer_rtx;
2848 set = get_varargs_alias_set ();
2850 for (i = next_cum.regno; i < ix86_regparm; i++)
2852 mem = gen_rtx_MEM (Pmode,
2853 plus_constant (save_area, i * UNITS_PER_WORD));
2854 set_mem_alias_set (mem, set);
2855 emit_move_insn (mem, gen_rtx_REG (Pmode,
2856 x86_64_int_parameter_registers[i]));
2859 if (next_cum.sse_nregs)
2861 /* Now emit code to save SSE registers. The AX parameter contains number
2862 of SSE parameter registers used to call this function. We use
2863 sse_prologue_save insn template that produces computed jump across
2864 SSE saves. We need some preparation work to get this working. */
2866 label = gen_label_rtx ();
2867 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2869 /* Compute address to jump to :
2870 label - 5*eax + nnamed_sse_arguments*5 */
2871 tmp_reg = gen_reg_rtx (Pmode);
2872 nsse_reg = gen_reg_rtx (Pmode);
2873 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2874 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2875 gen_rtx_MULT (Pmode, nsse_reg,
2876 GEN_INT (4))));
2877 if (next_cum.sse_regno)
2878 emit_move_insn
2879 (nsse_reg,
2880 gen_rtx_CONST (DImode,
2881 gen_rtx_PLUS (DImode,
2882 label_ref,
2883 GEN_INT (next_cum.sse_regno * 4))));
2884 else
2885 emit_move_insn (nsse_reg, label_ref);
2886 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2888 /* Compute address of memory block we save into. We always use pointer
2889 pointing 127 bytes after first byte to store - this is needed to keep
2890 instruction size limited by 4 bytes. */
2891 tmp_reg = gen_reg_rtx (Pmode);
2892 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2893 plus_constant (save_area,
2894 8 * REGPARM_MAX + 127)));
2895 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2896 set_mem_alias_set (mem, set);
2897 set_mem_align (mem, BITS_PER_WORD);
2899 /* And finally do the dirty job! */
2900 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2901 GEN_INT (next_cum.sse_regno), label));
2906 /* Implement va_start. */
2908 void
2909 ix86_va_start (tree valist, rtx nextarg)
2911 HOST_WIDE_INT words, n_gpr, n_fpr;
2912 tree f_gpr, f_fpr, f_ovf, f_sav;
2913 tree gpr, fpr, ovf, sav, t;
2915 /* Only 64bit target needs something special. */
2916 if (!TARGET_64BIT)
2918 std_expand_builtin_va_start (valist, nextarg);
2919 return;
2922 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2923 f_fpr = TREE_CHAIN (f_gpr);
2924 f_ovf = TREE_CHAIN (f_fpr);
2925 f_sav = TREE_CHAIN (f_ovf);
2927 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2928 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2929 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2930 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2931 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2933 /* Count number of gp and fp argument registers used. */
2934 words = current_function_args_info.words;
2935 n_gpr = current_function_args_info.regno;
2936 n_fpr = current_function_args_info.sse_regno;
2938 if (TARGET_DEBUG_ARG)
2939 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2940 (int) words, (int) n_gpr, (int) n_fpr);
2942 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2943 build_int_2 (n_gpr * 8, 0));
2944 TREE_SIDE_EFFECTS (t) = 1;
2945 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2947 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2948 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2949 TREE_SIDE_EFFECTS (t) = 1;
2950 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2952 /* Find the overflow area. */
2953 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2954 if (words != 0)
2955 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2956 build_int_2 (words * UNITS_PER_WORD, 0));
2957 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2958 TREE_SIDE_EFFECTS (t) = 1;
2959 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2961 /* Find the register save area.
2962 Prologue of the function save it right above stack frame. */
2963 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2964 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2965 TREE_SIDE_EFFECTS (t) = 1;
2966 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2969 /* Implement va_arg. */
2971 ix86_va_arg (tree valist, tree type)
2973 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2974 tree f_gpr, f_fpr, f_ovf, f_sav;
2975 tree gpr, fpr, ovf, sav, t;
2976 int size, rsize;
2977 rtx lab_false, lab_over = NULL_RTX;
2978 rtx addr_rtx, r;
2979 rtx container;
2980 int indirect_p = 0;
2982 /* Only 64bit target needs something special. */
2983 if (!TARGET_64BIT)
2985 return std_expand_builtin_va_arg (valist, type);
2988 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2989 f_fpr = TREE_CHAIN (f_gpr);
2990 f_ovf = TREE_CHAIN (f_fpr);
2991 f_sav = TREE_CHAIN (f_ovf);
2993 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2994 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2995 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2996 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2997 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2999 size = int_size_in_bytes (type);
3000 if (size == -1)
3002 /* Passed by reference. */
3003 indirect_p = 1;
3004 type = build_pointer_type (type);
3005 size = int_size_in_bytes (type);
3007 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3009 container = construct_container (TYPE_MODE (type), type, 0,
3010 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3012 * Pull the value out of the saved registers ...
3015 addr_rtx = gen_reg_rtx (Pmode);
3017 if (container)
3019 rtx int_addr_rtx, sse_addr_rtx;
3020 int needed_intregs, needed_sseregs;
3021 int need_temp;
3023 lab_over = gen_label_rtx ();
3024 lab_false = gen_label_rtx ();
3026 examine_argument (TYPE_MODE (type), type, 0,
3027 &needed_intregs, &needed_sseregs);
3030 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3031 || TYPE_ALIGN (type) > 128);
3033 /* In case we are passing structure, verify that it is consecutive block
3034 on the register save area. If not we need to do moves. */
3035 if (!need_temp && !REG_P (container))
3037 /* Verify that all registers are strictly consecutive */
3038 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3040 int i;
3042 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3044 rtx slot = XVECEXP (container, 0, i);
3045 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3046 || INTVAL (XEXP (slot, 1)) != i * 16)
3047 need_temp = 1;
3050 else
3052 int i;
3054 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3056 rtx slot = XVECEXP (container, 0, i);
3057 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3058 || INTVAL (XEXP (slot, 1)) != i * 8)
3059 need_temp = 1;
3063 if (!need_temp)
3065 int_addr_rtx = addr_rtx;
3066 sse_addr_rtx = addr_rtx;
3068 else
3070 int_addr_rtx = gen_reg_rtx (Pmode);
3071 sse_addr_rtx = gen_reg_rtx (Pmode);
3073 /* First ensure that we fit completely in registers. */
3074 if (needed_intregs)
3076 emit_cmp_and_jump_insns (expand_expr
3077 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3078 GEN_INT ((REGPARM_MAX - needed_intregs +
3079 1) * 8), GE, const1_rtx, SImode,
3080 1, lab_false);
3082 if (needed_sseregs)
3084 emit_cmp_and_jump_insns (expand_expr
3085 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3086 GEN_INT ((SSE_REGPARM_MAX -
3087 needed_sseregs + 1) * 16 +
3088 REGPARM_MAX * 8), GE, const1_rtx,
3089 SImode, 1, lab_false);
3092 /* Compute index to start of area used for integer regs. */
3093 if (needed_intregs)
3095 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3096 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3097 if (r != int_addr_rtx)
3098 emit_move_insn (int_addr_rtx, r);
3100 if (needed_sseregs)
3102 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3103 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3104 if (r != sse_addr_rtx)
3105 emit_move_insn (sse_addr_rtx, r);
3107 if (need_temp)
3109 int i;
3110 rtx mem;
3111 rtx x;
3113 /* Never use the memory itself, as it has the alias set. */
3114 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3115 mem = gen_rtx_MEM (BLKmode, x);
3116 force_operand (x, addr_rtx);
3117 set_mem_alias_set (mem, get_varargs_alias_set ());
3118 set_mem_align (mem, BITS_PER_UNIT);
3120 for (i = 0; i < XVECLEN (container, 0); i++)
3122 rtx slot = XVECEXP (container, 0, i);
3123 rtx reg = XEXP (slot, 0);
3124 enum machine_mode mode = GET_MODE (reg);
3125 rtx src_addr;
3126 rtx src_mem;
3127 int src_offset;
3128 rtx dest_mem;
3130 if (SSE_REGNO_P (REGNO (reg)))
3132 src_addr = sse_addr_rtx;
3133 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3135 else
3137 src_addr = int_addr_rtx;
3138 src_offset = REGNO (reg) * 8;
3140 src_mem = gen_rtx_MEM (mode, src_addr);
3141 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3142 src_mem = adjust_address (src_mem, mode, src_offset);
3143 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3144 emit_move_insn (dest_mem, src_mem);
3148 if (needed_intregs)
3151 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3152 build_int_2 (needed_intregs * 8, 0));
3153 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3154 TREE_SIDE_EFFECTS (t) = 1;
3155 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3157 if (needed_sseregs)
3160 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3161 build_int_2 (needed_sseregs * 16, 0));
3162 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3163 TREE_SIDE_EFFECTS (t) = 1;
3164 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3167 emit_jump_insn (gen_jump (lab_over));
3168 emit_barrier ();
3169 emit_label (lab_false);
3172 /* ... otherwise out of the overflow area. */
3174 /* Care for on-stack alignment if needed. */
3175 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3176 t = ovf;
3177 else
3179 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3180 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3181 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3183 t = save_expr (t);
3185 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3186 if (r != addr_rtx)
3187 emit_move_insn (addr_rtx, r);
3190 build (PLUS_EXPR, TREE_TYPE (t), t,
3191 build_int_2 (rsize * UNITS_PER_WORD, 0));
3192 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3193 TREE_SIDE_EFFECTS (t) = 1;
3194 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3196 if (container)
3197 emit_label (lab_over);
3199 if (indirect_p)
3201 r = gen_rtx_MEM (Pmode, addr_rtx);
3202 set_mem_alias_set (r, get_varargs_alias_set ());
3203 emit_move_insn (addr_rtx, r);
3206 return addr_rtx;
3209 /* Return nonzero if OP is either a i387 or SSE fp register. */
3211 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3213 return ANY_FP_REG_P (op);
3216 /* Return nonzero if OP is an i387 fp register. */
3218 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3220 return FP_REG_P (op);
3223 /* Return nonzero if OP is a non-fp register_operand. */
3225 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3227 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3230 /* Return nonzero if OP is a register operand other than an
3231 i387 fp register. */
3233 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3235 return register_operand (op, mode) && !FP_REG_P (op);
3238 /* Return nonzero if OP is general operand representable on x86_64. */
3241 x86_64_general_operand (rtx op, enum machine_mode mode)
3243 if (!TARGET_64BIT)
3244 return general_operand (op, mode);
3245 if (nonimmediate_operand (op, mode))
3246 return 1;
3247 return x86_64_sign_extended_value (op);
3250 /* Return nonzero if OP is general operand representable on x86_64
3251 as either sign extended or zero extended constant. */
3254 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3256 if (!TARGET_64BIT)
3257 return general_operand (op, mode);
3258 if (nonimmediate_operand (op, mode))
3259 return 1;
3260 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3263 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3266 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3268 if (!TARGET_64BIT)
3269 return nonmemory_operand (op, mode);
3270 if (register_operand (op, mode))
3271 return 1;
3272 return x86_64_sign_extended_value (op);
3275 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3278 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3280 if (!TARGET_64BIT || !flag_pic)
3281 return nonmemory_operand (op, mode);
3282 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3283 return 1;
3284 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3285 return 1;
3286 return 0;
3289 /* Return nonzero if OPNUM's MEM should be matched
3290 in movabs* patterns. */
3293 ix86_check_movabs (rtx insn, int opnum)
3295 rtx set, mem;
3297 set = PATTERN (insn);
3298 if (GET_CODE (set) == PARALLEL)
3299 set = XVECEXP (set, 0, 0);
3300 if (GET_CODE (set) != SET)
3301 abort ();
3302 mem = XEXP (set, opnum);
3303 while (GET_CODE (mem) == SUBREG)
3304 mem = SUBREG_REG (mem);
3305 if (GET_CODE (mem) != MEM)
3306 abort ();
3307 return (volatile_ok || !MEM_VOLATILE_P (mem));
3310 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3313 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3315 if (!TARGET_64BIT)
3316 return nonmemory_operand (op, mode);
3317 if (register_operand (op, mode))
3318 return 1;
3319 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3322 /* Return nonzero if OP is immediate operand representable on x86_64. */
3325 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3327 if (!TARGET_64BIT)
3328 return immediate_operand (op, mode);
3329 return x86_64_sign_extended_value (op);
3332 /* Return nonzero if OP is immediate operand representable on x86_64. */
3335 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3337 return x86_64_zero_extended_value (op);
3340 /* Return nonzero if OP is (const_int 1), else return zero. */
3343 const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3345 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3348 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3349 for shift & compare patterns, as shifting by 0 does not change flags),
3350 else return zero. */
3353 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3355 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3358 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3359 reference and a constant. */
3362 symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3364 switch (GET_CODE (op))
3366 case SYMBOL_REF:
3367 case LABEL_REF:
3368 return 1;
3370 case CONST:
3371 op = XEXP (op, 0);
3372 if (GET_CODE (op) == SYMBOL_REF
3373 || GET_CODE (op) == LABEL_REF
3374 || (GET_CODE (op) == UNSPEC
3375 && (XINT (op, 1) == UNSPEC_GOT
3376 || XINT (op, 1) == UNSPEC_GOTOFF
3377 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3378 return 1;
3379 if (GET_CODE (op) != PLUS
3380 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3381 return 0;
3383 op = XEXP (op, 0);
3384 if (GET_CODE (op) == SYMBOL_REF
3385 || GET_CODE (op) == LABEL_REF)
3386 return 1;
3387 /* Only @GOTOFF gets offsets. */
3388 if (GET_CODE (op) != UNSPEC
3389 || XINT (op, 1) != UNSPEC_GOTOFF)
3390 return 0;
3392 op = XVECEXP (op, 0, 0);
3393 if (GET_CODE (op) == SYMBOL_REF
3394 || GET_CODE (op) == LABEL_REF)
3395 return 1;
3396 return 0;
3398 default:
3399 return 0;
3403 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3406 pic_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3408 if (GET_CODE (op) != CONST)
3409 return 0;
3410 op = XEXP (op, 0);
3411 if (TARGET_64BIT)
3413 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3414 return 1;
3416 else
3418 if (GET_CODE (op) == UNSPEC)
3419 return 1;
3420 if (GET_CODE (op) != PLUS
3421 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3422 return 0;
3423 op = XEXP (op, 0);
3424 if (GET_CODE (op) == UNSPEC)
3425 return 1;
3427 return 0;
3430 /* Return true if OP is a symbolic operand that resolves locally. */
3432 static int
3433 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3435 if (GET_CODE (op) == CONST
3436 && GET_CODE (XEXP (op, 0)) == PLUS
3437 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3438 op = XEXP (XEXP (op, 0), 0);
3440 if (GET_CODE (op) == LABEL_REF)
3441 return 1;
3443 if (GET_CODE (op) != SYMBOL_REF)
3444 return 0;
3446 if (SYMBOL_REF_LOCAL_P (op))
3447 return 1;
3449 /* There is, however, a not insubstantial body of code in the rest of
3450 the compiler that assumes it can just stick the results of
3451 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3452 /* ??? This is a hack. Should update the body of the compiler to
3453 always create a DECL an invoke targetm.encode_section_info. */
3454 if (strncmp (XSTR (op, 0), internal_label_prefix,
3455 internal_label_prefix_len) == 0)
3456 return 1;
3458 return 0;
3461 /* Test for various thread-local symbols. */
3464 tls_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3466 if (GET_CODE (op) != SYMBOL_REF)
3467 return 0;
3468 return SYMBOL_REF_TLS_MODEL (op);
3471 static inline int
3472 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3474 if (GET_CODE (op) != SYMBOL_REF)
3475 return 0;
3476 return SYMBOL_REF_TLS_MODEL (op) == kind;
3480 global_dynamic_symbolic_operand (register rtx op,
3481 enum machine_mode mode ATTRIBUTE_UNUSED)
3483 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3487 local_dynamic_symbolic_operand (register rtx op,
3488 enum machine_mode mode ATTRIBUTE_UNUSED)
3490 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3494 initial_exec_symbolic_operand (register rtx op,
3495 enum machine_mode mode ATTRIBUTE_UNUSED)
3497 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3501 local_exec_symbolic_operand (register rtx op,
3502 enum machine_mode mode ATTRIBUTE_UNUSED)
3504 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3507 /* Test for a valid operand for a call instruction. Don't allow the
3508 arg pointer register or virtual regs since they may decay into
3509 reg + const, which the patterns can't handle. */
3512 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3514 /* Disallow indirect through a virtual register. This leads to
3515 compiler aborts when trying to eliminate them. */
3516 if (GET_CODE (op) == REG
3517 && (op == arg_pointer_rtx
3518 || op == frame_pointer_rtx
3519 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3520 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3521 return 0;
3523 /* Disallow `call 1234'. Due to varying assembler lameness this
3524 gets either rejected or translated to `call .+1234'. */
3525 if (GET_CODE (op) == CONST_INT)
3526 return 0;
3528 /* Explicitly allow SYMBOL_REF even if pic. */
3529 if (GET_CODE (op) == SYMBOL_REF)
3530 return 1;
3532 /* Otherwise we can allow any general_operand in the address. */
3533 return general_operand (op, Pmode);
3536 /* Test for a valid operand for a call instruction. Don't allow the
3537 arg pointer register or virtual regs since they may decay into
3538 reg + const, which the patterns can't handle. */
3541 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3543 /* Disallow indirect through a virtual register. This leads to
3544 compiler aborts when trying to eliminate them. */
3545 if (GET_CODE (op) == REG
3546 && (op == arg_pointer_rtx
3547 || op == frame_pointer_rtx
3548 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3549 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3550 return 0;
3552 /* Explicitly allow SYMBOL_REF even if pic. */
3553 if (GET_CODE (op) == SYMBOL_REF)
3554 return 1;
3556 /* Otherwise we can only allow register operands. */
3557 return register_operand (op, Pmode);
3561 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3563 if (GET_CODE (op) == CONST
3564 && GET_CODE (XEXP (op, 0)) == PLUS
3565 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3566 op = XEXP (XEXP (op, 0), 0);
3567 return GET_CODE (op) == SYMBOL_REF;
3570 /* Match exactly zero and one. */
3573 const0_operand (register rtx op, enum machine_mode mode)
3575 return op == CONST0_RTX (mode);
3579 const1_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3581 return op == const1_rtx;
3584 /* Match 2, 4, or 8. Used for leal multiplicands. */
3587 const248_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3589 return (GET_CODE (op) == CONST_INT
3590 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3593 /* True if this is a constant appropriate for an increment or decrement. */
3596 incdec_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3598 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3599 registers, since carry flag is not set. */
3600 if (TARGET_PENTIUM4 && !optimize_size)
3601 return 0;
3602 return op == const1_rtx || op == constm1_rtx;
3605 /* Return nonzero if OP is acceptable as operand of DImode shift
3606 expander. */
3609 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3611 if (TARGET_64BIT)
3612 return nonimmediate_operand (op, mode);
3613 else
3614 return register_operand (op, mode);
3617 /* Return false if this is the stack pointer, or any other fake
3618 register eliminable to the stack pointer. Otherwise, this is
3619 a register operand.
3621 This is used to prevent esp from being used as an index reg.
3622 Which would only happen in pathological cases. */
3625 reg_no_sp_operand (register rtx op, enum machine_mode mode)
3627 rtx t = op;
3628 if (GET_CODE (t) == SUBREG)
3629 t = SUBREG_REG (t);
3630 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3631 return 0;
3633 return register_operand (op, mode);
3637 mmx_reg_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3639 return MMX_REG_P (op);
3642 /* Return false if this is any eliminable register. Otherwise
3643 general_operand. */
3646 general_no_elim_operand (register rtx op, enum machine_mode mode)
3648 rtx t = op;
3649 if (GET_CODE (t) == SUBREG)
3650 t = SUBREG_REG (t);
3651 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3652 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3653 || t == virtual_stack_dynamic_rtx)
3654 return 0;
3655 if (REG_P (t)
3656 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3657 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3658 return 0;
3660 return general_operand (op, mode);
3663 /* Return false if this is any eliminable register. Otherwise
3664 register_operand or const_int. */
3667 nonmemory_no_elim_operand (register rtx op, enum machine_mode mode)
3669 rtx t = op;
3670 if (GET_CODE (t) == SUBREG)
3671 t = SUBREG_REG (t);
3672 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3673 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3674 || t == virtual_stack_dynamic_rtx)
3675 return 0;
3677 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3680 /* Return false if this is any eliminable register or stack register,
3681 otherwise work like register_operand. */
3684 index_register_operand (register rtx op, enum machine_mode mode)
3686 rtx t = op;
3687 if (GET_CODE (t) == SUBREG)
3688 t = SUBREG_REG (t);
3689 if (!REG_P (t))
3690 return 0;
3691 if (t == arg_pointer_rtx
3692 || t == frame_pointer_rtx
3693 || t == virtual_incoming_args_rtx
3694 || t == virtual_stack_vars_rtx
3695 || t == virtual_stack_dynamic_rtx
3696 || REGNO (t) == STACK_POINTER_REGNUM)
3697 return 0;
3699 return general_operand (op, mode);
3702 /* Return true if op is a Q_REGS class register. */
3705 q_regs_operand (register rtx op, enum machine_mode mode)
3707 if (mode != VOIDmode && GET_MODE (op) != mode)
3708 return 0;
3709 if (GET_CODE (op) == SUBREG)
3710 op = SUBREG_REG (op);
3711 return ANY_QI_REG_P (op);
3714 /* Return true if op is an flags register. */
3717 flags_reg_operand (register rtx op, enum machine_mode mode)
3719 if (mode != VOIDmode && GET_MODE (op) != mode)
3720 return 0;
3721 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3724 /* Return true if op is a NON_Q_REGS class register. */
3727 non_q_regs_operand (register rtx op, enum machine_mode mode)
3729 if (mode != VOIDmode && GET_MODE (op) != mode)
3730 return 0;
3731 if (GET_CODE (op) == SUBREG)
3732 op = SUBREG_REG (op);
3733 return NON_QI_REG_P (op);
3737 zero_extended_scalar_load_operand (rtx op,
3738 enum machine_mode mode ATTRIBUTE_UNUSED)
3740 unsigned n_elts;
3741 if (GET_CODE (op) != MEM)
3742 return 0;
3743 op = maybe_get_pool_constant (op);
3744 if (!op)
3745 return 0;
3746 if (GET_CODE (op) != CONST_VECTOR)
3747 return 0;
3748 n_elts =
3749 (GET_MODE_SIZE (GET_MODE (op)) /
3750 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3751 for (n_elts--; n_elts > 0; n_elts--)
3753 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3754 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3755 return 0;
3757 return 1;
3760 /* Return 1 when OP is operand acceptable for standard SSE move. */
3762 vector_move_operand (rtx op, enum machine_mode mode)
3764 if (nonimmediate_operand (op, mode))
3765 return 1;
3766 if (GET_MODE (op) != mode && mode != VOIDmode)
3767 return 0;
3768 return (op == CONST0_RTX (GET_MODE (op)));
3771 /* Return true if op if a valid address, and does not contain
3772 a segment override. */
3775 no_seg_address_operand (register rtx op, enum machine_mode mode)
3777 struct ix86_address parts;
3779 if (! address_operand (op, mode))
3780 return 0;
3782 if (! ix86_decompose_address (op, &parts))
3783 abort ();
3785 return parts.seg == SEG_DEFAULT;
3788 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3789 insns. */
3791 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3793 enum rtx_code code = GET_CODE (op);
3794 switch (code)
3796 /* Operations supported directly. */
3797 case EQ:
3798 case LT:
3799 case LE:
3800 case UNORDERED:
3801 case NE:
3802 case UNGE:
3803 case UNGT:
3804 case ORDERED:
3805 return 1;
3806 /* These are equivalent to ones above in non-IEEE comparisons. */
3807 case UNEQ:
3808 case UNLT:
3809 case UNLE:
3810 case LTGT:
3811 case GE:
3812 case GT:
3813 return !TARGET_IEEE_FP;
3814 default:
3815 return 0;
3818 /* Return 1 if OP is a valid comparison operator in valid mode. */
3820 ix86_comparison_operator (register rtx op, enum machine_mode mode)
3822 enum machine_mode inmode;
3823 enum rtx_code code = GET_CODE (op);
3824 if (mode != VOIDmode && GET_MODE (op) != mode)
3825 return 0;
3826 if (GET_RTX_CLASS (code) != '<')
3827 return 0;
3828 inmode = GET_MODE (XEXP (op, 0));
3830 if (inmode == CCFPmode || inmode == CCFPUmode)
3832 enum rtx_code second_code, bypass_code;
3833 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3834 return (bypass_code == NIL && second_code == NIL);
3836 switch (code)
3838 case EQ: case NE:
3839 return 1;
3840 case LT: case GE:
3841 if (inmode == CCmode || inmode == CCGCmode
3842 || inmode == CCGOCmode || inmode == CCNOmode)
3843 return 1;
3844 return 0;
3845 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3846 if (inmode == CCmode)
3847 return 1;
3848 return 0;
3849 case GT: case LE:
3850 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3851 return 1;
3852 return 0;
3853 default:
3854 return 0;
3858 /* Return 1 if OP is a valid comparison operator testing carry flag
3859 to be set. */
3861 ix86_carry_flag_operator (register rtx op, enum machine_mode mode)
3863 enum machine_mode inmode;
3864 enum rtx_code code = GET_CODE (op);
3866 if (mode != VOIDmode && GET_MODE (op) != mode)
3867 return 0;
3868 if (GET_RTX_CLASS (code) != '<')
3869 return 0;
3870 inmode = GET_MODE (XEXP (op, 0));
3871 if (GET_CODE (XEXP (op, 0)) != REG
3872 || REGNO (XEXP (op, 0)) != 17
3873 || XEXP (op, 1) != const0_rtx)
3874 return 0;
3876 if (inmode == CCFPmode || inmode == CCFPUmode)
3878 enum rtx_code second_code, bypass_code;
3880 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3881 if (bypass_code != NIL || second_code != NIL)
3882 return 0;
3883 code = ix86_fp_compare_code_to_integer (code);
3885 else if (inmode != CCmode)
3886 return 0;
3887 return code == LTU;
3890 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3893 fcmov_comparison_operator (register rtx op, enum machine_mode mode)
3895 enum machine_mode inmode;
3896 enum rtx_code code = GET_CODE (op);
3898 if (mode != VOIDmode && GET_MODE (op) != mode)
3899 return 0;
3900 if (GET_RTX_CLASS (code) != '<')
3901 return 0;
3902 inmode = GET_MODE (XEXP (op, 0));
3903 if (inmode == CCFPmode || inmode == CCFPUmode)
3905 enum rtx_code second_code, bypass_code;
3907 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3908 if (bypass_code != NIL || second_code != NIL)
3909 return 0;
3910 code = ix86_fp_compare_code_to_integer (code);
3912 /* i387 supports just limited amount of conditional codes. */
3913 switch (code)
3915 case LTU: case GTU: case LEU: case GEU:
3916 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3917 return 1;
3918 return 0;
3919 case ORDERED: case UNORDERED:
3920 case EQ: case NE:
3921 return 1;
3922 default:
3923 return 0;
3927 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3930 promotable_binary_operator (register rtx op,
3931 enum machine_mode mode ATTRIBUTE_UNUSED)
3933 switch (GET_CODE (op))
3935 case MULT:
3936 /* Modern CPUs have same latency for HImode and SImode multiply,
3937 but 386 and 486 do HImode multiply faster. */
3938 return ix86_tune > PROCESSOR_I486;
3939 case PLUS:
3940 case AND:
3941 case IOR:
3942 case XOR:
3943 case ASHIFT:
3944 return 1;
3945 default:
3946 return 0;
3950 /* Nearly general operand, but accept any const_double, since we wish
3951 to be able to drop them into memory rather than have them get pulled
3952 into registers. */
3955 cmp_fp_expander_operand (register rtx op, enum machine_mode mode)
3957 if (mode != VOIDmode && mode != GET_MODE (op))
3958 return 0;
3959 if (GET_CODE (op) == CONST_DOUBLE)
3960 return 1;
3961 return general_operand (op, mode);
3964 /* Match an SI or HImode register for a zero_extract. */
3967 ext_register_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3969 int regno;
3970 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3971 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3972 return 0;
3974 if (!register_operand (op, VOIDmode))
3975 return 0;
3977 /* Be careful to accept only registers having upper parts. */
3978 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3979 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3982 /* Return 1 if this is a valid binary floating-point operation.
3983 OP is the expression matched, and MODE is its mode. */
3986 binary_fp_operator (register rtx op, enum machine_mode mode)
3988 if (mode != VOIDmode && mode != GET_MODE (op))
3989 return 0;
3991 switch (GET_CODE (op))
3993 case PLUS:
3994 case MINUS:
3995 case MULT:
3996 case DIV:
3997 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3999 default:
4000 return 0;
4005 mult_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4007 return GET_CODE (op) == MULT;
4011 div_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4013 return GET_CODE (op) == DIV;
4017 arith_or_logical_operator (rtx op, enum machine_mode mode)
4019 return ((mode == VOIDmode || GET_MODE (op) == mode)
4020 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4021 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4024 /* Returns 1 if OP is memory operand with a displacement. */
4027 memory_displacement_operand (register rtx op, enum machine_mode mode)
4029 struct ix86_address parts;
4031 if (! memory_operand (op, mode))
4032 return 0;
4034 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4035 abort ();
4037 return parts.disp != NULL_RTX;
4040 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4041 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4043 ??? It seems likely that this will only work because cmpsi is an
4044 expander, and no actual insns use this. */
4047 cmpsi_operand (rtx op, enum machine_mode mode)
4049 if (nonimmediate_operand (op, mode))
4050 return 1;
4052 if (GET_CODE (op) == AND
4053 && GET_MODE (op) == SImode
4054 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4055 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4056 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4057 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4058 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4059 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4060 return 1;
4062 return 0;
4065 /* Returns 1 if OP is memory operand that can not be represented by the
4066 modRM array. */
4069 long_memory_operand (register rtx op, enum machine_mode mode)
4071 if (! memory_operand (op, mode))
4072 return 0;
4074 return memory_address_length (op) != 0;
4077 /* Return nonzero if the rtx is known aligned. */
4080 aligned_operand (rtx op, enum machine_mode mode)
4082 struct ix86_address parts;
4084 if (!general_operand (op, mode))
4085 return 0;
4087 /* Registers and immediate operands are always "aligned". */
4088 if (GET_CODE (op) != MEM)
4089 return 1;
4091 /* Don't even try to do any aligned optimizations with volatiles. */
4092 if (MEM_VOLATILE_P (op))
4093 return 0;
4095 op = XEXP (op, 0);
4097 /* Pushes and pops are only valid on the stack pointer. */
4098 if (GET_CODE (op) == PRE_DEC
4099 || GET_CODE (op) == POST_INC)
4100 return 1;
4102 /* Decode the address. */
4103 if (! ix86_decompose_address (op, &parts))
4104 abort ();
4106 if (parts.base && GET_CODE (parts.base) == SUBREG)
4107 parts.base = SUBREG_REG (parts.base);
4108 if (parts.index && GET_CODE (parts.index) == SUBREG)
4109 parts.index = SUBREG_REG (parts.index);
4111 /* Look for some component that isn't known to be aligned. */
4112 if (parts.index)
4114 if (parts.scale < 4
4115 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4116 return 0;
4118 if (parts.base)
4120 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4121 return 0;
4123 if (parts.disp)
4125 if (GET_CODE (parts.disp) != CONST_INT
4126 || (INTVAL (parts.disp) & 3) != 0)
4127 return 0;
4130 /* Didn't find one -- this must be an aligned address. */
4131 return 1;
4134 /* Initialize the table of extra 80387 mathematical constants. */
4136 static void
4137 init_ext_80387_constants (void)
4139 static const char * cst[5] =
4141 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4142 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4143 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4144 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4145 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4147 int i;
4149 for (i = 0; i < 5; i++)
4151 real_from_string (&ext_80387_constants_table[i], cst[i]);
4152 /* Ensure each constant is rounded to XFmode precision. */
4153 real_convert (&ext_80387_constants_table[i],
4154 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode,
4155 &ext_80387_constants_table[i]);
4158 ext_80387_constants_init = 1;
4161 /* Return true if the constant is something that can be loaded with
4162 a special instruction. */
4165 standard_80387_constant_p (rtx x)
4167 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4168 return -1;
4170 if (x == CONST0_RTX (GET_MODE (x)))
4171 return 1;
4172 if (x == CONST1_RTX (GET_MODE (x)))
4173 return 2;
4175 /* For XFmode constants, try to find a special 80387 instruction on
4176 those CPUs that benefit from them. */
4177 if ((GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)
4178 && x86_ext_80387_constants & TUNEMASK)
4180 REAL_VALUE_TYPE r;
4181 int i;
4183 if (! ext_80387_constants_init)
4184 init_ext_80387_constants ();
4186 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4187 for (i = 0; i < 5; i++)
4188 if (real_identical (&r, &ext_80387_constants_table[i]))
4189 return i + 3;
4192 return 0;
4195 /* Return the opcode of the special instruction to be used to load
4196 the constant X. */
4198 const char *
4199 standard_80387_constant_opcode (rtx x)
4201 switch (standard_80387_constant_p (x))
4203 case 1:
4204 return "fldz";
4205 case 2:
4206 return "fld1";
4207 case 3:
4208 return "fldlg2";
4209 case 4:
4210 return "fldln2";
4211 case 5:
4212 return "fldl2e";
4213 case 6:
4214 return "fldl2t";
4215 case 7:
4216 return "fldpi";
4218 abort ();
4221 /* Return the CONST_DOUBLE representing the 80387 constant that is
4222 loaded by the specified special instruction. The argument IDX
4223 matches the return value from standard_80387_constant_p. */
4226 standard_80387_constant_rtx (int idx)
4228 int i;
4230 if (! ext_80387_constants_init)
4231 init_ext_80387_constants ();
4233 switch (idx)
4235 case 3:
4236 case 4:
4237 case 5:
4238 case 6:
4239 case 7:
4240 i = idx - 3;
4241 break;
4243 default:
4244 abort ();
4247 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4248 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode);
4251 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4254 standard_sse_constant_p (rtx x)
4256 if (x == const0_rtx)
4257 return 1;
4258 return (x == CONST0_RTX (GET_MODE (x)));
4261 /* Returns 1 if OP contains a symbol reference */
4264 symbolic_reference_mentioned_p (rtx op)
4266 register const char *fmt;
4267 register int i;
4269 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4270 return 1;
4272 fmt = GET_RTX_FORMAT (GET_CODE (op));
4273 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4275 if (fmt[i] == 'E')
4277 register int j;
4279 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4280 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4281 return 1;
4284 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4285 return 1;
4288 return 0;
4291 /* Return 1 if it is appropriate to emit `ret' instructions in the
4292 body of a function. Do this only if the epilogue is simple, needing a
4293 couple of insns. Prior to reloading, we can't tell how many registers
4294 must be saved, so return 0 then. Return 0 if there is no frame
4295 marker to de-allocate.
4297 If NON_SAVING_SETJMP is defined and true, then it is not possible
4298 for the epilogue to be simple, so return 0. This is a special case
4299 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4300 until final, but jump_optimize may need to know sooner if a
4301 `return' is OK. */
4304 ix86_can_use_return_insn_p (void)
4306 struct ix86_frame frame;
4308 #ifdef NON_SAVING_SETJMP
4309 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4310 return 0;
4311 #endif
4313 if (! reload_completed || frame_pointer_needed)
4314 return 0;
4316 /* Don't allow more than 32 pop, since that's all we can do
4317 with one instruction. */
4318 if (current_function_pops_args
4319 && current_function_args_size >= 32768)
4320 return 0;
4322 ix86_compute_frame_layout (&frame);
4323 return frame.to_allocate == 0 && frame.nregs == 0;
4326 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4328 x86_64_sign_extended_value (rtx value)
4330 switch (GET_CODE (value))
4332 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4333 to be at least 32 and this all acceptable constants are
4334 represented as CONST_INT. */
4335 case CONST_INT:
4336 if (HOST_BITS_PER_WIDE_INT == 32)
4337 return 1;
4338 else
4340 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4341 return trunc_int_for_mode (val, SImode) == val;
4343 break;
4345 /* For certain code models, the symbolic references are known to fit.
4346 in CM_SMALL_PIC model we know it fits if it is local to the shared
4347 library. Don't count TLS SYMBOL_REFs here, since they should fit
4348 only if inside of UNSPEC handled below. */
4349 case SYMBOL_REF:
4350 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4352 /* For certain code models, the code is near as well. */
4353 case LABEL_REF:
4354 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4355 || ix86_cmodel == CM_KERNEL);
4357 /* We also may accept the offsetted memory references in certain special
4358 cases. */
4359 case CONST:
4360 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4361 switch (XINT (XEXP (value, 0), 1))
4363 case UNSPEC_GOTPCREL:
4364 case UNSPEC_DTPOFF:
4365 case UNSPEC_GOTNTPOFF:
4366 case UNSPEC_NTPOFF:
4367 return 1;
4368 default:
4369 break;
4371 if (GET_CODE (XEXP (value, 0)) == PLUS)
4373 rtx op1 = XEXP (XEXP (value, 0), 0);
4374 rtx op2 = XEXP (XEXP (value, 0), 1);
4375 HOST_WIDE_INT offset;
4377 if (ix86_cmodel == CM_LARGE)
4378 return 0;
4379 if (GET_CODE (op2) != CONST_INT)
4380 return 0;
4381 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4382 switch (GET_CODE (op1))
4384 case SYMBOL_REF:
4385 /* For CM_SMALL assume that latest object is 16MB before
4386 end of 31bits boundary. We may also accept pretty
4387 large negative constants knowing that all objects are
4388 in the positive half of address space. */
4389 if (ix86_cmodel == CM_SMALL
4390 && offset < 16*1024*1024
4391 && trunc_int_for_mode (offset, SImode) == offset)
4392 return 1;
4393 /* For CM_KERNEL we know that all object resist in the
4394 negative half of 32bits address space. We may not
4395 accept negative offsets, since they may be just off
4396 and we may accept pretty large positive ones. */
4397 if (ix86_cmodel == CM_KERNEL
4398 && offset > 0
4399 && trunc_int_for_mode (offset, SImode) == offset)
4400 return 1;
4401 break;
4402 case LABEL_REF:
4403 /* These conditions are similar to SYMBOL_REF ones, just the
4404 constraints for code models differ. */
4405 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4406 && offset < 16*1024*1024
4407 && trunc_int_for_mode (offset, SImode) == offset)
4408 return 1;
4409 if (ix86_cmodel == CM_KERNEL
4410 && offset > 0
4411 && trunc_int_for_mode (offset, SImode) == offset)
4412 return 1;
4413 break;
4414 case UNSPEC:
4415 switch (XINT (op1, 1))
4417 case UNSPEC_DTPOFF:
4418 case UNSPEC_NTPOFF:
4419 if (offset > 0
4420 && trunc_int_for_mode (offset, SImode) == offset)
4421 return 1;
4423 break;
4424 default:
4425 return 0;
4428 return 0;
4429 default:
4430 return 0;
4434 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4436 x86_64_zero_extended_value (rtx value)
4438 switch (GET_CODE (value))
4440 case CONST_DOUBLE:
4441 if (HOST_BITS_PER_WIDE_INT == 32)
4442 return (GET_MODE (value) == VOIDmode
4443 && !CONST_DOUBLE_HIGH (value));
4444 else
4445 return 0;
4446 case CONST_INT:
4447 if (HOST_BITS_PER_WIDE_INT == 32)
4448 return INTVAL (value) >= 0;
4449 else
4450 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4451 break;
4453 /* For certain code models, the symbolic references are known to fit. */
4454 case SYMBOL_REF:
4455 return ix86_cmodel == CM_SMALL;
4457 /* For certain code models, the code is near as well. */
4458 case LABEL_REF:
4459 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4461 /* We also may accept the offsetted memory references in certain special
4462 cases. */
4463 case CONST:
4464 if (GET_CODE (XEXP (value, 0)) == PLUS)
4466 rtx op1 = XEXP (XEXP (value, 0), 0);
4467 rtx op2 = XEXP (XEXP (value, 0), 1);
4469 if (ix86_cmodel == CM_LARGE)
4470 return 0;
4471 switch (GET_CODE (op1))
4473 case SYMBOL_REF:
4474 return 0;
4475 /* For small code model we may accept pretty large positive
4476 offsets, since one bit is available for free. Negative
4477 offsets are limited by the size of NULL pointer area
4478 specified by the ABI. */
4479 if (ix86_cmodel == CM_SMALL
4480 && GET_CODE (op2) == CONST_INT
4481 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4482 && (trunc_int_for_mode (INTVAL (op2), SImode)
4483 == INTVAL (op2)))
4484 return 1;
4485 /* ??? For the kernel, we may accept adjustment of
4486 -0x10000000, since we know that it will just convert
4487 negative address space to positive, but perhaps this
4488 is not worthwhile. */
4489 break;
4490 case LABEL_REF:
4491 /* These conditions are similar to SYMBOL_REF ones, just the
4492 constraints for code models differ. */
4493 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4494 && GET_CODE (op2) == CONST_INT
4495 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4496 && (trunc_int_for_mode (INTVAL (op2), SImode)
4497 == INTVAL (op2)))
4498 return 1;
4499 break;
4500 default:
4501 return 0;
4504 return 0;
4505 default:
4506 return 0;
4510 /* Value should be nonzero if functions must have frame pointers.
4511 Zero means the frame pointer need not be set up (and parms may
4512 be accessed via the stack pointer) in functions that seem suitable. */
4515 ix86_frame_pointer_required (void)
4517 /* If we accessed previous frames, then the generated code expects
4518 to be able to access the saved ebp value in our frame. */
4519 if (cfun->machine->accesses_prev_frame)
4520 return 1;
4522 /* Several x86 os'es need a frame pointer for other reasons,
4523 usually pertaining to setjmp. */
4524 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4525 return 1;
4527 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4528 the frame pointer by default. Turn it back on now if we've not
4529 got a leaf function. */
4530 if (TARGET_OMIT_LEAF_FRAME_POINTER
4531 && (!current_function_is_leaf))
4532 return 1;
4534 if (current_function_profile)
4535 return 1;
4537 return 0;
4540 /* Record that the current function accesses previous call frames. */
4542 void
4543 ix86_setup_frame_addresses (void)
4545 cfun->machine->accesses_prev_frame = 1;
4548 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4549 # define USE_HIDDEN_LINKONCE 1
4550 #else
4551 # define USE_HIDDEN_LINKONCE 0
4552 #endif
4554 static int pic_labels_used;
4556 /* Fills in the label name that should be used for a pc thunk for
4557 the given register. */
4559 static void
4560 get_pc_thunk_name (char name[32], unsigned int regno)
4562 if (USE_HIDDEN_LINKONCE)
4563 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4564 else
4565 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4569 /* This function generates code for -fpic that loads %ebx with
4570 the return address of the caller and then returns. */
4572 void
4573 ix86_file_end (void)
4575 rtx xops[2];
4576 int regno;
4578 for (regno = 0; regno < 8; ++regno)
4580 char name[32];
4582 if (! ((pic_labels_used >> regno) & 1))
4583 continue;
4585 get_pc_thunk_name (name, regno);
4587 if (USE_HIDDEN_LINKONCE)
4589 tree decl;
4591 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4592 error_mark_node);
4593 TREE_PUBLIC (decl) = 1;
4594 TREE_STATIC (decl) = 1;
4595 DECL_ONE_ONLY (decl) = 1;
4597 (*targetm.asm_out.unique_section) (decl, 0);
4598 named_section (decl, NULL, 0);
4600 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4601 fputs ("\t.hidden\t", asm_out_file);
4602 assemble_name (asm_out_file, name);
4603 fputc ('\n', asm_out_file);
4604 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4606 else
4608 text_section ();
4609 ASM_OUTPUT_LABEL (asm_out_file, name);
4612 xops[0] = gen_rtx_REG (SImode, regno);
4613 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4614 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4615 output_asm_insn ("ret", xops);
4618 if (NEED_INDICATE_EXEC_STACK)
4619 file_end_indicate_exec_stack ();
4622 /* Emit code for the SET_GOT patterns. */
4624 const char *
4625 output_set_got (rtx dest)
4627 rtx xops[3];
4629 xops[0] = dest;
4630 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4632 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4634 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4636 if (!flag_pic)
4637 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4638 else
4639 output_asm_insn ("call\t%a2", xops);
4641 #if TARGET_MACHO
4642 /* Output the "canonical" label name ("Lxx$pb") here too. This
4643 is what will be referred to by the Mach-O PIC subsystem. */
4644 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4645 #endif
4646 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4647 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4649 if (flag_pic)
4650 output_asm_insn ("pop{l}\t%0", xops);
4652 else
4654 char name[32];
4655 get_pc_thunk_name (name, REGNO (dest));
4656 pic_labels_used |= 1 << REGNO (dest);
4658 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4659 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4660 output_asm_insn ("call\t%X2", xops);
4663 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4664 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4665 else if (!TARGET_MACHO)
4666 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4668 return "";
4671 /* Generate an "push" pattern for input ARG. */
4673 static rtx
4674 gen_push (rtx arg)
4676 return gen_rtx_SET (VOIDmode,
4677 gen_rtx_MEM (Pmode,
4678 gen_rtx_PRE_DEC (Pmode,
4679 stack_pointer_rtx)),
4680 arg);
4683 /* Return >= 0 if there is an unused call-clobbered register available
4684 for the entire function. */
4686 static unsigned int
4687 ix86_select_alt_pic_regnum (void)
4689 if (current_function_is_leaf && !current_function_profile)
4691 int i;
4692 for (i = 2; i >= 0; --i)
4693 if (!regs_ever_live[i])
4694 return i;
4697 return INVALID_REGNUM;
4700 /* Return 1 if we need to save REGNO. */
4701 static int
4702 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4704 if (pic_offset_table_rtx
4705 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4706 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4707 || current_function_profile
4708 || current_function_calls_eh_return
4709 || current_function_uses_const_pool))
4711 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4712 return 0;
4713 return 1;
4716 if (current_function_calls_eh_return && maybe_eh_return)
4718 unsigned i;
4719 for (i = 0; ; i++)
4721 unsigned test = EH_RETURN_DATA_REGNO (i);
4722 if (test == INVALID_REGNUM)
4723 break;
4724 if (test == regno)
4725 return 1;
4729 return (regs_ever_live[regno]
4730 && !call_used_regs[regno]
4731 && !fixed_regs[regno]
4732 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4735 /* Return number of registers to be saved on the stack. */
4737 static int
4738 ix86_nsaved_regs (void)
4740 int nregs = 0;
4741 int regno;
4743 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4744 if (ix86_save_reg (regno, true))
4745 nregs++;
4746 return nregs;
4749 /* Return the offset between two registers, one to be eliminated, and the other
4750 its replacement, at the start of a routine. */
4752 HOST_WIDE_INT
4753 ix86_initial_elimination_offset (int from, int to)
4755 struct ix86_frame frame;
4756 ix86_compute_frame_layout (&frame);
4758 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4759 return frame.hard_frame_pointer_offset;
4760 else if (from == FRAME_POINTER_REGNUM
4761 && to == HARD_FRAME_POINTER_REGNUM)
4762 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4763 else
4765 if (to != STACK_POINTER_REGNUM)
4766 abort ();
4767 else if (from == ARG_POINTER_REGNUM)
4768 return frame.stack_pointer_offset;
4769 else if (from != FRAME_POINTER_REGNUM)
4770 abort ();
4771 else
4772 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4776 /* Fill structure ix86_frame about frame of currently computed function. */
4778 static void
4779 ix86_compute_frame_layout (struct ix86_frame *frame)
4781 HOST_WIDE_INT total_size;
4782 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4783 int offset;
4784 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4785 HOST_WIDE_INT size = get_frame_size ();
4787 frame->nregs = ix86_nsaved_regs ();
4788 total_size = size;
4790 /* During reload iteration the amount of registers saved can change.
4791 Recompute the value as needed. Do not recompute when amount of registers
4792 didn't change as reload does mutiple calls to the function and does not
4793 expect the decision to change within single iteration. */
4794 if (!optimize_size
4795 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4797 int count = frame->nregs;
4799 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4800 /* The fast prologue uses move instead of push to save registers. This
4801 is significantly longer, but also executes faster as modern hardware
4802 can execute the moves in parallel, but can't do that for push/pop.
4804 Be careful about choosing what prologue to emit: When function takes
4805 many instructions to execute we may use slow version as well as in
4806 case function is known to be outside hot spot (this is known with
4807 feedback only). Weight the size of function by number of registers
4808 to save as it is cheap to use one or two push instructions but very
4809 slow to use many of them. */
4810 if (count)
4811 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4812 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4813 || (flag_branch_probabilities
4814 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4815 cfun->machine->use_fast_prologue_epilogue = false;
4816 else
4817 cfun->machine->use_fast_prologue_epilogue
4818 = !expensive_function_p (count);
4820 if (TARGET_PROLOGUE_USING_MOVE
4821 && cfun->machine->use_fast_prologue_epilogue)
4822 frame->save_regs_using_mov = true;
4823 else
4824 frame->save_regs_using_mov = false;
4827 /* Skip return address and saved base pointer. */
4828 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4830 frame->hard_frame_pointer_offset = offset;
4832 /* Do some sanity checking of stack_alignment_needed and
4833 preferred_alignment, since i386 port is the only using those features
4834 that may break easily. */
4836 if (size && !stack_alignment_needed)
4837 abort ();
4838 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4839 abort ();
4840 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4841 abort ();
4842 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4843 abort ();
4845 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4846 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4848 /* Register save area */
4849 offset += frame->nregs * UNITS_PER_WORD;
4851 /* Va-arg area */
4852 if (ix86_save_varrargs_registers)
4854 offset += X86_64_VARARGS_SIZE;
4855 frame->va_arg_size = X86_64_VARARGS_SIZE;
4857 else
4858 frame->va_arg_size = 0;
4860 /* Align start of frame for local function. */
4861 frame->padding1 = ((offset + stack_alignment_needed - 1)
4862 & -stack_alignment_needed) - offset;
4864 offset += frame->padding1;
4866 /* Frame pointer points here. */
4867 frame->frame_pointer_offset = offset;
4869 offset += size;
4871 /* Add outgoing arguments area. Can be skipped if we eliminated
4872 all the function calls as dead code. */
4873 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4875 offset += current_function_outgoing_args_size;
4876 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4878 else
4879 frame->outgoing_arguments_size = 0;
4881 /* Align stack boundary. Only needed if we're calling another function
4882 or using alloca. */
4883 if (!current_function_is_leaf || current_function_calls_alloca)
4884 frame->padding2 = ((offset + preferred_alignment - 1)
4885 & -preferred_alignment) - offset;
4886 else
4887 frame->padding2 = 0;
4889 offset += frame->padding2;
4891 /* We've reached end of stack frame. */
4892 frame->stack_pointer_offset = offset;
4894 /* Size prologue needs to allocate. */
4895 frame->to_allocate =
4896 (size + frame->padding1 + frame->padding2
4897 + frame->outgoing_arguments_size + frame->va_arg_size);
4899 if (!frame->to_allocate && frame->nregs <= 1)
4900 frame->save_regs_using_mov = false;
4902 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4903 && current_function_is_leaf)
4905 frame->red_zone_size = frame->to_allocate;
4906 if (frame->save_regs_using_mov)
4907 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4908 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4909 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4911 else
4912 frame->red_zone_size = 0;
4913 frame->to_allocate -= frame->red_zone_size;
4914 frame->stack_pointer_offset -= frame->red_zone_size;
4915 #if 0
4916 fprintf (stderr, "nregs: %i\n", frame->nregs);
4917 fprintf (stderr, "size: %i\n", size);
4918 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4919 fprintf (stderr, "padding1: %i\n", frame->padding1);
4920 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4921 fprintf (stderr, "padding2: %i\n", frame->padding2);
4922 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4923 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4924 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4925 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4926 frame->hard_frame_pointer_offset);
4927 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4928 #endif
4931 /* Emit code to save registers in the prologue. */
4933 static void
4934 ix86_emit_save_regs (void)
4936 register int regno;
4937 rtx insn;
4939 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4940 if (ix86_save_reg (regno, true))
4942 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4943 RTX_FRAME_RELATED_P (insn) = 1;
4947 /* Emit code to save registers using MOV insns. First register
4948 is restored from POINTER + OFFSET. */
4949 static void
4950 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4952 int regno;
4953 rtx insn;
4955 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4956 if (ix86_save_reg (regno, true))
4958 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4959 Pmode, offset),
4960 gen_rtx_REG (Pmode, regno));
4961 RTX_FRAME_RELATED_P (insn) = 1;
4962 offset += UNITS_PER_WORD;
4966 /* Expand the prologue into a bunch of separate insns. */
4968 void
4969 ix86_expand_prologue (void)
4971 rtx insn;
4972 bool pic_reg_used;
4973 struct ix86_frame frame;
4974 HOST_WIDE_INT allocate;
4976 ix86_compute_frame_layout (&frame);
4978 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4979 slower on all targets. Also sdb doesn't like it. */
4981 if (frame_pointer_needed)
4983 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4984 RTX_FRAME_RELATED_P (insn) = 1;
4986 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4987 RTX_FRAME_RELATED_P (insn) = 1;
4990 allocate = frame.to_allocate;
4992 if (!frame.save_regs_using_mov)
4993 ix86_emit_save_regs ();
4994 else
4995 allocate += frame.nregs * UNITS_PER_WORD;
4997 /* When using red zone we may start register saving before allocating
4998 the stack frame saving one cycle of the prologue. */
4999 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5000 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5001 : stack_pointer_rtx,
5002 -frame.nregs * UNITS_PER_WORD);
5004 if (allocate == 0)
5006 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5008 insn = emit_insn (gen_pro_epilogue_adjust_stack
5009 (stack_pointer_rtx, stack_pointer_rtx,
5010 GEN_INT (-allocate)));
5011 RTX_FRAME_RELATED_P (insn) = 1;
5013 else
5015 /* ??? Is this only valid for Win32? */
5017 rtx arg0, sym;
5019 if (TARGET_64BIT)
5020 abort ();
5022 arg0 = gen_rtx_REG (SImode, 0);
5023 emit_move_insn (arg0, GEN_INT (allocate));
5025 sym = gen_rtx_MEM (FUNCTION_MODE,
5026 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5027 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5029 CALL_INSN_FUNCTION_USAGE (insn)
5030 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5031 CALL_INSN_FUNCTION_USAGE (insn));
5033 /* Don't allow scheduling pass to move insns across __alloca
5034 call. */
5035 emit_insn (gen_blockage (const0_rtx));
5037 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5039 if (!frame_pointer_needed || !frame.to_allocate)
5040 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5041 else
5042 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5043 -frame.nregs * UNITS_PER_WORD);
5046 pic_reg_used = false;
5047 if (pic_offset_table_rtx
5048 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5049 || current_function_profile))
5051 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5053 if (alt_pic_reg_used != INVALID_REGNUM)
5054 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5056 pic_reg_used = true;
5059 if (pic_reg_used)
5061 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5063 /* Even with accurate pre-reload life analysis, we can wind up
5064 deleting all references to the pic register after reload.
5065 Consider if cross-jumping unifies two sides of a branch
5066 controlled by a comparison vs the only read from a global.
5067 In which case, allow the set_got to be deleted, though we're
5068 too late to do anything about the ebx save in the prologue. */
5069 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5072 /* Prevent function calls from be scheduled before the call to mcount.
5073 In the pic_reg_used case, make sure that the got load isn't deleted. */
5074 if (current_function_profile)
5075 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5078 /* Emit code to restore saved registers using MOV insns. First register
5079 is restored from POINTER + OFFSET. */
5080 static void
5081 ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
5083 int regno;
5085 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5086 if (ix86_save_reg (regno, maybe_eh_return))
5088 emit_move_insn (gen_rtx_REG (Pmode, regno),
5089 adjust_address (gen_rtx_MEM (Pmode, pointer),
5090 Pmode, offset));
5091 offset += UNITS_PER_WORD;
5095 /* Restore function stack, frame, and registers. */
5097 void
5098 ix86_expand_epilogue (int style)
5100 int regno;
5101 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5102 struct ix86_frame frame;
5103 HOST_WIDE_INT offset;
5105 ix86_compute_frame_layout (&frame);
5107 /* Calculate start of saved registers relative to ebp. Special care
5108 must be taken for the normal return case of a function using
5109 eh_return: the eax and edx registers are marked as saved, but not
5110 restored along this path. */
5111 offset = frame.nregs;
5112 if (current_function_calls_eh_return && style != 2)
5113 offset -= 2;
5114 offset *= -UNITS_PER_WORD;
5116 /* If we're only restoring one register and sp is not valid then
5117 using a move instruction to restore the register since it's
5118 less work than reloading sp and popping the register.
5120 The default code result in stack adjustment using add/lea instruction,
5121 while this code results in LEAVE instruction (or discrete equivalent),
5122 so it is profitable in some other cases as well. Especially when there
5123 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5124 and there is exactly one register to pop. This heuristic may need some
5125 tuning in future. */
5126 if ((!sp_valid && frame.nregs <= 1)
5127 || (TARGET_EPILOGUE_USING_MOVE
5128 && cfun->machine->use_fast_prologue_epilogue
5129 && (frame.nregs > 1 || frame.to_allocate))
5130 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5131 || (frame_pointer_needed && TARGET_USE_LEAVE
5132 && cfun->machine->use_fast_prologue_epilogue
5133 && frame.nregs == 1)
5134 || current_function_calls_eh_return)
5136 /* Restore registers. We can use ebp or esp to address the memory
5137 locations. If both are available, default to ebp, since offsets
5138 are known to be small. Only exception is esp pointing directly to the
5139 end of block of saved registers, where we may simplify addressing
5140 mode. */
5142 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5143 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5144 frame.to_allocate, style == 2);
5145 else
5146 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5147 offset, style == 2);
5149 /* eh_return epilogues need %ecx added to the stack pointer. */
5150 if (style == 2)
5152 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5154 if (frame_pointer_needed)
5156 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5157 tmp = plus_constant (tmp, UNITS_PER_WORD);
5158 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5160 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5161 emit_move_insn (hard_frame_pointer_rtx, tmp);
5163 emit_insn (gen_pro_epilogue_adjust_stack
5164 (stack_pointer_rtx, sa, const0_rtx));
5166 else
5168 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5169 tmp = plus_constant (tmp, (frame.to_allocate
5170 + frame.nregs * UNITS_PER_WORD));
5171 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5174 else if (!frame_pointer_needed)
5175 emit_insn (gen_pro_epilogue_adjust_stack
5176 (stack_pointer_rtx, stack_pointer_rtx,
5177 GEN_INT (frame.to_allocate
5178 + frame.nregs * UNITS_PER_WORD)));
5179 /* If not an i386, mov & pop is faster than "leave". */
5180 else if (TARGET_USE_LEAVE || optimize_size
5181 || !cfun->machine->use_fast_prologue_epilogue)
5182 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5183 else
5185 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5186 hard_frame_pointer_rtx,
5187 const0_rtx));
5188 if (TARGET_64BIT)
5189 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5190 else
5191 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5194 else
5196 /* First step is to deallocate the stack frame so that we can
5197 pop the registers. */
5198 if (!sp_valid)
5200 if (!frame_pointer_needed)
5201 abort ();
5202 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5203 hard_frame_pointer_rtx,
5204 GEN_INT (offset)));
5206 else if (frame.to_allocate)
5207 emit_insn (gen_pro_epilogue_adjust_stack
5208 (stack_pointer_rtx, stack_pointer_rtx,
5209 GEN_INT (frame.to_allocate)));
5211 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5212 if (ix86_save_reg (regno, false))
5214 if (TARGET_64BIT)
5215 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5216 else
5217 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5219 if (frame_pointer_needed)
5221 /* Leave results in shorter dependency chains on CPUs that are
5222 able to grok it fast. */
5223 if (TARGET_USE_LEAVE)
5224 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5225 else if (TARGET_64BIT)
5226 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5227 else
5228 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5232 /* Sibcall epilogues don't want a return instruction. */
5233 if (style == 0)
5234 return;
5236 if (current_function_pops_args && current_function_args_size)
5238 rtx popc = GEN_INT (current_function_pops_args);
5240 /* i386 can only pop 64K bytes. If asked to pop more, pop
5241 return address, do explicit add, and jump indirectly to the
5242 caller. */
5244 if (current_function_pops_args >= 65536)
5246 rtx ecx = gen_rtx_REG (SImode, 2);
5248 /* There are is no "pascal" calling convention in 64bit ABI. */
5249 if (TARGET_64BIT)
5250 abort ();
5252 emit_insn (gen_popsi1 (ecx));
5253 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5254 emit_jump_insn (gen_return_indirect_internal (ecx));
5256 else
5257 emit_jump_insn (gen_return_pop_internal (popc));
5259 else
5260 emit_jump_insn (gen_return_internal ());
5263 /* Reset from the function's potential modifications. */
5265 static void
5266 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5267 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5269 if (pic_offset_table_rtx)
5270 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5273 /* Extract the parts of an RTL expression that is a valid memory address
5274 for an instruction. Return 0 if the structure of the address is
5275 grossly off. Return -1 if the address contains ASHIFT, so it is not
5276 strictly valid, but still used for computing length of lea instruction. */
5278 static int
5279 ix86_decompose_address (register rtx addr, struct ix86_address *out)
5281 rtx base = NULL_RTX;
5282 rtx index = NULL_RTX;
5283 rtx disp = NULL_RTX;
5284 HOST_WIDE_INT scale = 1;
5285 rtx scale_rtx = NULL_RTX;
5286 int retval = 1;
5287 enum ix86_address_seg seg = SEG_DEFAULT;
5289 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5290 base = addr;
5291 else if (GET_CODE (addr) == PLUS)
5293 rtx addends[4], op;
5294 int n = 0, i;
5296 op = addr;
5299 if (n >= 4)
5300 return 0;
5301 addends[n++] = XEXP (op, 1);
5302 op = XEXP (op, 0);
5304 while (GET_CODE (op) == PLUS);
5305 if (n >= 4)
5306 return 0;
5307 addends[n] = op;
5309 for (i = n; i >= 0; --i)
5311 op = addends[i];
5312 switch (GET_CODE (op))
5314 case MULT:
5315 if (index)
5316 return 0;
5317 index = XEXP (op, 0);
5318 scale_rtx = XEXP (op, 1);
5319 break;
5321 case UNSPEC:
5322 if (XINT (op, 1) == UNSPEC_TP
5323 && TARGET_TLS_DIRECT_SEG_REFS
5324 && seg == SEG_DEFAULT)
5325 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5326 else
5327 return 0;
5328 break;
5330 case REG:
5331 case SUBREG:
5332 if (!base)
5333 base = op;
5334 else if (!index)
5335 index = op;
5336 else
5337 return 0;
5338 break;
5340 case CONST:
5341 case CONST_INT:
5342 case SYMBOL_REF:
5343 case LABEL_REF:
5344 if (disp)
5345 return 0;
5346 disp = op;
5347 break;
5349 default:
5350 return 0;
5354 else if (GET_CODE (addr) == MULT)
5356 index = XEXP (addr, 0); /* index*scale */
5357 scale_rtx = XEXP (addr, 1);
5359 else if (GET_CODE (addr) == ASHIFT)
5361 rtx tmp;
5363 /* We're called for lea too, which implements ashift on occasion. */
5364 index = XEXP (addr, 0);
5365 tmp = XEXP (addr, 1);
5366 if (GET_CODE (tmp) != CONST_INT)
5367 return 0;
5368 scale = INTVAL (tmp);
5369 if ((unsigned HOST_WIDE_INT) scale > 3)
5370 return 0;
5371 scale = 1 << scale;
5372 retval = -1;
5374 else
5375 disp = addr; /* displacement */
5377 /* Extract the integral value of scale. */
5378 if (scale_rtx)
5380 if (GET_CODE (scale_rtx) != CONST_INT)
5381 return 0;
5382 scale = INTVAL (scale_rtx);
5385 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5386 if (base && index && scale == 1
5387 && (index == arg_pointer_rtx
5388 || index == frame_pointer_rtx
5389 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5391 rtx tmp = base;
5392 base = index;
5393 index = tmp;
5396 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5397 if ((base == hard_frame_pointer_rtx
5398 || base == frame_pointer_rtx
5399 || base == arg_pointer_rtx) && !disp)
5400 disp = const0_rtx;
5402 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5403 Avoid this by transforming to [%esi+0]. */
5404 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5405 && base && !index && !disp
5406 && REG_P (base)
5407 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5408 disp = const0_rtx;
5410 /* Special case: encode reg+reg instead of reg*2. */
5411 if (!base && index && scale && scale == 2)
5412 base = index, scale = 1;
5414 /* Special case: scaling cannot be encoded without base or displacement. */
5415 if (!base && !disp && index && scale != 1)
5416 disp = const0_rtx;
5418 out->base = base;
5419 out->index = index;
5420 out->disp = disp;
5421 out->scale = scale;
5422 out->seg = seg;
5424 return retval;
5427 /* Return cost of the memory address x.
5428 For i386, it is better to use a complex address than let gcc copy
5429 the address into a reg and make a new pseudo. But not if the address
5430 requires to two regs - that would mean more pseudos with longer
5431 lifetimes. */
5432 static int
5433 ix86_address_cost (rtx x)
5435 struct ix86_address parts;
5436 int cost = 1;
5438 if (!ix86_decompose_address (x, &parts))
5439 abort ();
5441 if (parts.base && GET_CODE (parts.base) == SUBREG)
5442 parts.base = SUBREG_REG (parts.base);
5443 if (parts.index && GET_CODE (parts.index) == SUBREG)
5444 parts.index = SUBREG_REG (parts.index);
5446 /* More complex memory references are better. */
5447 if (parts.disp && parts.disp != const0_rtx)
5448 cost--;
5449 if (parts.seg != SEG_DEFAULT)
5450 cost--;
5452 /* Attempt to minimize number of registers in the address. */
5453 if ((parts.base
5454 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5455 || (parts.index
5456 && (!REG_P (parts.index)
5457 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5458 cost++;
5460 if (parts.base
5461 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5462 && parts.index
5463 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5464 && parts.base != parts.index)
5465 cost++;
5467 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5468 since it's predecode logic can't detect the length of instructions
5469 and it degenerates to vector decoded. Increase cost of such
5470 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5471 to split such addresses or even refuse such addresses at all.
5473 Following addressing modes are affected:
5474 [base+scale*index]
5475 [scale*index+disp]
5476 [base+index]
5478 The first and last case may be avoidable by explicitly coding the zero in
5479 memory address, but I don't have AMD-K6 machine handy to check this
5480 theory. */
5482 if (TARGET_K6
5483 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5484 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5485 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5486 cost += 10;
5488 return cost;
5491 /* If X is a machine specific address (i.e. a symbol or label being
5492 referenced as a displacement from the GOT implemented using an
5493 UNSPEC), then return the base term. Otherwise return X. */
5496 ix86_find_base_term (rtx x)
5498 rtx term;
5500 if (TARGET_64BIT)
5502 if (GET_CODE (x) != CONST)
5503 return x;
5504 term = XEXP (x, 0);
5505 if (GET_CODE (term) == PLUS
5506 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5507 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5508 term = XEXP (term, 0);
5509 if (GET_CODE (term) != UNSPEC
5510 || XINT (term, 1) != UNSPEC_GOTPCREL)
5511 return x;
5513 term = XVECEXP (term, 0, 0);
5515 if (GET_CODE (term) != SYMBOL_REF
5516 && GET_CODE (term) != LABEL_REF)
5517 return x;
5519 return term;
5522 term = ix86_delegitimize_address (x);
5524 if (GET_CODE (term) != SYMBOL_REF
5525 && GET_CODE (term) != LABEL_REF)
5526 return x;
5528 return term;
5531 /* Determine if a given RTX is a valid constant. We already know this
5532 satisfies CONSTANT_P. */
5534 bool
5535 legitimate_constant_p (rtx x)
5537 rtx inner;
5539 switch (GET_CODE (x))
5541 case SYMBOL_REF:
5542 /* TLS symbols are not constant. */
5543 if (tls_symbolic_operand (x, Pmode))
5544 return false;
5545 break;
5547 case CONST:
5548 inner = XEXP (x, 0);
5550 /* Offsets of TLS symbols are never valid.
5551 Discourage CSE from creating them. */
5552 if (GET_CODE (inner) == PLUS
5553 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5554 return false;
5556 if (GET_CODE (inner) == PLUS)
5558 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5559 return false;
5560 inner = XEXP (inner, 0);
5563 /* Only some unspecs are valid as "constants". */
5564 if (GET_CODE (inner) == UNSPEC)
5565 switch (XINT (inner, 1))
5567 case UNSPEC_TPOFF:
5568 case UNSPEC_NTPOFF:
5569 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5570 case UNSPEC_DTPOFF:
5571 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5572 default:
5573 return false;
5575 break;
5577 default:
5578 break;
5581 /* Otherwise we handle everything else in the move patterns. */
5582 return true;
5585 /* Determine if it's legal to put X into the constant pool. This
5586 is not possible for the address of thread-local symbols, which
5587 is checked above. */
5589 static bool
5590 ix86_cannot_force_const_mem (rtx x)
5592 return !legitimate_constant_p (x);
5595 /* Determine if a given RTX is a valid constant address. */
5597 bool
5598 constant_address_p (rtx x)
5600 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5603 /* Nonzero if the constant value X is a legitimate general operand
5604 when generating PIC code. It is given that flag_pic is on and
5605 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5607 bool
5608 legitimate_pic_operand_p (rtx x)
5610 rtx inner;
5612 switch (GET_CODE (x))
5614 case CONST:
5615 inner = XEXP (x, 0);
5617 /* Only some unspecs are valid as "constants". */
5618 if (GET_CODE (inner) == UNSPEC)
5619 switch (XINT (inner, 1))
5621 case UNSPEC_TPOFF:
5622 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5623 default:
5624 return false;
5626 /* FALLTHRU */
5628 case SYMBOL_REF:
5629 case LABEL_REF:
5630 return legitimate_pic_address_disp_p (x);
5632 default:
5633 return true;
5637 /* Determine if a given CONST RTX is a valid memory displacement
5638 in PIC mode. */
5641 legitimate_pic_address_disp_p (register rtx disp)
5643 bool saw_plus;
5645 /* In 64bit mode we can allow direct addresses of symbols and labels
5646 when they are not dynamic symbols. */
5647 if (TARGET_64BIT)
5649 /* TLS references should always be enclosed in UNSPEC. */
5650 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5651 return 0;
5652 if (GET_CODE (disp) == SYMBOL_REF
5653 && ix86_cmodel == CM_SMALL_PIC
5654 && SYMBOL_REF_LOCAL_P (disp))
5655 return 1;
5656 if (GET_CODE (disp) == LABEL_REF)
5657 return 1;
5658 if (GET_CODE (disp) == CONST
5659 && GET_CODE (XEXP (disp, 0)) == PLUS
5660 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5661 && ix86_cmodel == CM_SMALL_PIC
5662 && SYMBOL_REF_LOCAL_P (XEXP (XEXP (disp, 0), 0)))
5663 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5664 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5665 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5666 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5667 return 1;
5669 if (GET_CODE (disp) != CONST)
5670 return 0;
5671 disp = XEXP (disp, 0);
5673 if (TARGET_64BIT)
5675 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5676 of GOT tables. We should not need these anyway. */
5677 if (GET_CODE (disp) != UNSPEC
5678 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5679 return 0;
5681 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5682 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5683 return 0;
5684 return 1;
5687 saw_plus = false;
5688 if (GET_CODE (disp) == PLUS)
5690 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5691 return 0;
5692 disp = XEXP (disp, 0);
5693 saw_plus = true;
5696 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5697 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5699 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5700 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5701 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5703 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5704 if (strstr (sym_name, "$pb") != 0)
5705 return 1;
5709 if (GET_CODE (disp) != UNSPEC)
5710 return 0;
5712 switch (XINT (disp, 1))
5714 case UNSPEC_GOT:
5715 if (saw_plus)
5716 return false;
5717 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5718 case UNSPEC_GOTOFF:
5719 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5720 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5721 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5722 return false;
5723 case UNSPEC_GOTTPOFF:
5724 case UNSPEC_GOTNTPOFF:
5725 case UNSPEC_INDNTPOFF:
5726 if (saw_plus)
5727 return false;
5728 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5729 case UNSPEC_NTPOFF:
5730 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5731 case UNSPEC_DTPOFF:
5732 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5735 return 0;
5738 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5739 memory address for an instruction. The MODE argument is the machine mode
5740 for the MEM expression that wants to use this address.
5742 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5743 convert common non-canonical forms to canonical form so that they will
5744 be recognized. */
5747 legitimate_address_p (enum machine_mode mode, register rtx addr, int strict)
5749 struct ix86_address parts;
5750 rtx base, index, disp;
5751 HOST_WIDE_INT scale;
5752 const char *reason = NULL;
5753 rtx reason_rtx = NULL_RTX;
5755 if (TARGET_DEBUG_ADDR)
5757 fprintf (stderr,
5758 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5759 GET_MODE_NAME (mode), strict);
5760 debug_rtx (addr);
5763 if (ix86_decompose_address (addr, &parts) <= 0)
5765 reason = "decomposition failed";
5766 goto report_error;
5769 base = parts.base;
5770 index = parts.index;
5771 disp = parts.disp;
5772 scale = parts.scale;
5774 /* Validate base register.
5776 Don't allow SUBREG's here, it can lead to spill failures when the base
5777 is one word out of a two word structure, which is represented internally
5778 as a DImode int. */
5780 if (base)
5782 rtx reg;
5783 reason_rtx = base;
5785 if (GET_CODE (base) == SUBREG)
5786 reg = SUBREG_REG (base);
5787 else
5788 reg = base;
5790 if (GET_CODE (reg) != REG)
5792 reason = "base is not a register";
5793 goto report_error;
5796 if (GET_MODE (base) != Pmode)
5798 reason = "base is not in Pmode";
5799 goto report_error;
5802 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5803 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5805 reason = "base is not valid";
5806 goto report_error;
5810 /* Validate index register.
5812 Don't allow SUBREG's here, it can lead to spill failures when the index
5813 is one word out of a two word structure, which is represented internally
5814 as a DImode int. */
5816 if (index)
5818 rtx reg;
5819 reason_rtx = index;
5821 if (GET_CODE (index) == SUBREG)
5822 reg = SUBREG_REG (index);
5823 else
5824 reg = index;
5826 if (GET_CODE (reg) != REG)
5828 reason = "index is not a register";
5829 goto report_error;
5832 if (GET_MODE (index) != Pmode)
5834 reason = "index is not in Pmode";
5835 goto report_error;
5838 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5839 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5841 reason = "index is not valid";
5842 goto report_error;
5846 /* Validate scale factor. */
5847 if (scale != 1)
5849 reason_rtx = GEN_INT (scale);
5850 if (!index)
5852 reason = "scale without index";
5853 goto report_error;
5856 if (scale != 2 && scale != 4 && scale != 8)
5858 reason = "scale is not a valid multiplier";
5859 goto report_error;
5863 /* Validate displacement. */
5864 if (disp)
5866 reason_rtx = disp;
5868 if (GET_CODE (disp) == CONST
5869 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5870 switch (XINT (XEXP (disp, 0), 1))
5872 case UNSPEC_GOT:
5873 case UNSPEC_GOTOFF:
5874 case UNSPEC_GOTPCREL:
5875 if (!flag_pic)
5876 abort ();
5877 goto is_legitimate_pic;
5879 case UNSPEC_GOTTPOFF:
5880 case UNSPEC_GOTNTPOFF:
5881 case UNSPEC_INDNTPOFF:
5882 case UNSPEC_NTPOFF:
5883 case UNSPEC_DTPOFF:
5884 break;
5886 default:
5887 reason = "invalid address unspec";
5888 goto report_error;
5891 else if (flag_pic && (SYMBOLIC_CONST (disp)
5892 #if TARGET_MACHO
5893 && !machopic_operand_p (disp)
5894 #endif
5897 is_legitimate_pic:
5898 if (TARGET_64BIT && (index || base))
5900 /* foo@dtpoff(%rX) is ok. */
5901 if (GET_CODE (disp) != CONST
5902 || GET_CODE (XEXP (disp, 0)) != PLUS
5903 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5904 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5905 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5906 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5908 reason = "non-constant pic memory reference";
5909 goto report_error;
5912 else if (! legitimate_pic_address_disp_p (disp))
5914 reason = "displacement is an invalid pic construct";
5915 goto report_error;
5918 /* This code used to verify that a symbolic pic displacement
5919 includes the pic_offset_table_rtx register.
5921 While this is good idea, unfortunately these constructs may
5922 be created by "adds using lea" optimization for incorrect
5923 code like:
5925 int a;
5926 int foo(int i)
5928 return *(&a+i);
5931 This code is nonsensical, but results in addressing
5932 GOT table with pic_offset_table_rtx base. We can't
5933 just refuse it easily, since it gets matched by
5934 "addsi3" pattern, that later gets split to lea in the
5935 case output register differs from input. While this
5936 can be handled by separate addsi pattern for this case
5937 that never results in lea, this seems to be easier and
5938 correct fix for crash to disable this test. */
5940 else if (GET_CODE (disp) != LABEL_REF
5941 && GET_CODE (disp) != CONST_INT
5942 && (GET_CODE (disp) != CONST
5943 || !legitimate_constant_p (disp))
5944 && (GET_CODE (disp) != SYMBOL_REF
5945 || !legitimate_constant_p (disp)))
5947 reason = "displacement is not constant";
5948 goto report_error;
5950 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5952 reason = "displacement is out of range";
5953 goto report_error;
5957 /* Everything looks valid. */
5958 if (TARGET_DEBUG_ADDR)
5959 fprintf (stderr, "Success.\n");
5960 return TRUE;
5962 report_error:
5963 if (TARGET_DEBUG_ADDR)
5965 fprintf (stderr, "Error: %s\n", reason);
5966 debug_rtx (reason_rtx);
5968 return FALSE;
5971 /* Return an unique alias set for the GOT. */
5973 static HOST_WIDE_INT
5974 ix86_GOT_alias_set (void)
5976 static HOST_WIDE_INT set = -1;
5977 if (set == -1)
5978 set = new_alias_set ();
5979 return set;
5982 /* Return a legitimate reference for ORIG (an address) using the
5983 register REG. If REG is 0, a new pseudo is generated.
5985 There are two types of references that must be handled:
5987 1. Global data references must load the address from the GOT, via
5988 the PIC reg. An insn is emitted to do this load, and the reg is
5989 returned.
5991 2. Static data references, constant pool addresses, and code labels
5992 compute the address as an offset from the GOT, whose base is in
5993 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
5994 differentiate them from global data objects. The returned
5995 address is the PIC reg + an unspec constant.
5997 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5998 reg also appears in the address. */
6001 legitimize_pic_address (rtx orig, rtx reg)
6003 rtx addr = orig;
6004 rtx new = orig;
6005 rtx base;
6007 #if TARGET_MACHO
6008 if (reg == 0)
6009 reg = gen_reg_rtx (Pmode);
6010 /* Use the generic Mach-O PIC machinery. */
6011 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6012 #endif
6014 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6015 new = addr;
6016 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6018 /* This symbol may be referenced via a displacement from the PIC
6019 base address (@GOTOFF). */
6021 if (reload_in_progress)
6022 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6023 if (GET_CODE (addr) == CONST)
6024 addr = XEXP (addr, 0);
6025 if (GET_CODE (addr) == PLUS)
6027 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6028 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6030 else
6031 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6032 new = gen_rtx_CONST (Pmode, new);
6033 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6035 if (reg != 0)
6037 emit_move_insn (reg, new);
6038 new = reg;
6041 else if (GET_CODE (addr) == SYMBOL_REF)
6043 if (TARGET_64BIT)
6045 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6046 new = gen_rtx_CONST (Pmode, new);
6047 new = gen_rtx_MEM (Pmode, new);
6048 RTX_UNCHANGING_P (new) = 1;
6049 set_mem_alias_set (new, ix86_GOT_alias_set ());
6051 if (reg == 0)
6052 reg = gen_reg_rtx (Pmode);
6053 /* Use directly gen_movsi, otherwise the address is loaded
6054 into register for CSE. We don't want to CSE this addresses,
6055 instead we CSE addresses from the GOT table, so skip this. */
6056 emit_insn (gen_movsi (reg, new));
6057 new = reg;
6059 else
6061 /* This symbol must be referenced via a load from the
6062 Global Offset Table (@GOT). */
6064 if (reload_in_progress)
6065 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6066 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6067 new = gen_rtx_CONST (Pmode, new);
6068 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6069 new = gen_rtx_MEM (Pmode, new);
6070 RTX_UNCHANGING_P (new) = 1;
6071 set_mem_alias_set (new, ix86_GOT_alias_set ());
6073 if (reg == 0)
6074 reg = gen_reg_rtx (Pmode);
6075 emit_move_insn (reg, new);
6076 new = reg;
6079 else
6081 if (GET_CODE (addr) == CONST)
6083 addr = XEXP (addr, 0);
6085 /* We must match stuff we generate before. Assume the only
6086 unspecs that can get here are ours. Not that we could do
6087 anything with them anyway... */
6088 if (GET_CODE (addr) == UNSPEC
6089 || (GET_CODE (addr) == PLUS
6090 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6091 return orig;
6092 if (GET_CODE (addr) != PLUS)
6093 abort ();
6095 if (GET_CODE (addr) == PLUS)
6097 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6099 /* Check first to see if this is a constant offset from a @GOTOFF
6100 symbol reference. */
6101 if (local_symbolic_operand (op0, Pmode)
6102 && GET_CODE (op1) == CONST_INT)
6104 if (!TARGET_64BIT)
6106 if (reload_in_progress)
6107 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6108 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6109 UNSPEC_GOTOFF);
6110 new = gen_rtx_PLUS (Pmode, new, op1);
6111 new = gen_rtx_CONST (Pmode, new);
6112 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6114 if (reg != 0)
6116 emit_move_insn (reg, new);
6117 new = reg;
6120 else
6122 if (INTVAL (op1) < -16*1024*1024
6123 || INTVAL (op1) >= 16*1024*1024)
6124 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6127 else
6129 base = legitimize_pic_address (XEXP (addr, 0), reg);
6130 new = legitimize_pic_address (XEXP (addr, 1),
6131 base == reg ? NULL_RTX : reg);
6133 if (GET_CODE (new) == CONST_INT)
6134 new = plus_constant (base, INTVAL (new));
6135 else
6137 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6139 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6140 new = XEXP (new, 1);
6142 new = gen_rtx_PLUS (Pmode, base, new);
6147 return new;
6150 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6152 static rtx
6153 get_thread_pointer (int to_reg)
6155 rtx tp, reg, insn;
6157 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6158 if (!to_reg)
6159 return tp;
6161 reg = gen_reg_rtx (Pmode);
6162 insn = gen_rtx_SET (VOIDmode, reg, tp);
6163 insn = emit_insn (insn);
6165 return reg;
6168 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6169 false if we expect this to be used for a memory address and true if
6170 we expect to load the address into a register. */
6172 static rtx
6173 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6175 rtx dest, base, off, pic;
6176 int type;
6178 switch (model)
6180 case TLS_MODEL_GLOBAL_DYNAMIC:
6181 dest = gen_reg_rtx (Pmode);
6182 if (TARGET_64BIT)
6184 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6186 start_sequence ();
6187 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6188 insns = get_insns ();
6189 end_sequence ();
6191 emit_libcall_block (insns, dest, rax, x);
6193 else
6194 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6195 break;
6197 case TLS_MODEL_LOCAL_DYNAMIC:
6198 base = gen_reg_rtx (Pmode);
6199 if (TARGET_64BIT)
6201 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6203 start_sequence ();
6204 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6205 insns = get_insns ();
6206 end_sequence ();
6208 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6209 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6210 emit_libcall_block (insns, base, rax, note);
6212 else
6213 emit_insn (gen_tls_local_dynamic_base_32 (base));
6215 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6216 off = gen_rtx_CONST (Pmode, off);
6218 return gen_rtx_PLUS (Pmode, base, off);
6220 case TLS_MODEL_INITIAL_EXEC:
6221 if (TARGET_64BIT)
6223 pic = NULL;
6224 type = UNSPEC_GOTNTPOFF;
6226 else if (flag_pic)
6228 if (reload_in_progress)
6229 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6230 pic = pic_offset_table_rtx;
6231 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6233 else if (!TARGET_GNU_TLS)
6235 pic = gen_reg_rtx (Pmode);
6236 emit_insn (gen_set_got (pic));
6237 type = UNSPEC_GOTTPOFF;
6239 else
6241 pic = NULL;
6242 type = UNSPEC_INDNTPOFF;
6245 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6246 off = gen_rtx_CONST (Pmode, off);
6247 if (pic)
6248 off = gen_rtx_PLUS (Pmode, pic, off);
6249 off = gen_rtx_MEM (Pmode, off);
6250 RTX_UNCHANGING_P (off) = 1;
6251 set_mem_alias_set (off, ix86_GOT_alias_set ());
6253 if (TARGET_64BIT || TARGET_GNU_TLS)
6255 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6256 off = force_reg (Pmode, off);
6257 return gen_rtx_PLUS (Pmode, base, off);
6259 else
6261 base = get_thread_pointer (true);
6262 dest = gen_reg_rtx (Pmode);
6263 emit_insn (gen_subsi3 (dest, base, off));
6265 break;
6267 case TLS_MODEL_LOCAL_EXEC:
6268 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6269 (TARGET_64BIT || TARGET_GNU_TLS)
6270 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6271 off = gen_rtx_CONST (Pmode, off);
6273 if (TARGET_64BIT || TARGET_GNU_TLS)
6275 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6276 return gen_rtx_PLUS (Pmode, base, off);
6278 else
6280 base = get_thread_pointer (true);
6281 dest = gen_reg_rtx (Pmode);
6282 emit_insn (gen_subsi3 (dest, base, off));
6284 break;
6286 default:
6287 abort ();
6290 return dest;
6293 /* Try machine-dependent ways of modifying an illegitimate address
6294 to be legitimate. If we find one, return the new, valid address.
6295 This macro is used in only one place: `memory_address' in explow.c.
6297 OLDX is the address as it was before break_out_memory_refs was called.
6298 In some cases it is useful to look at this to decide what needs to be done.
6300 MODE and WIN are passed so that this macro can use
6301 GO_IF_LEGITIMATE_ADDRESS.
6303 It is always safe for this macro to do nothing. It exists to recognize
6304 opportunities to optimize the output.
6306 For the 80386, we handle X+REG by loading X into a register R and
6307 using R+REG. R will go in a general reg and indexing will be used.
6308 However, if REG is a broken-out memory address or multiplication,
6309 nothing needs to be done because REG can certainly go in a general reg.
6311 When -fpic is used, special handling is needed for symbolic references.
6312 See comments by legitimize_pic_address in i386.c for details. */
6315 legitimize_address (register rtx x, register rtx oldx ATTRIBUTE_UNUSED,
6316 enum machine_mode mode)
6318 int changed = 0;
6319 unsigned log;
6321 if (TARGET_DEBUG_ADDR)
6323 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6324 GET_MODE_NAME (mode));
6325 debug_rtx (x);
6328 log = tls_symbolic_operand (x, mode);
6329 if (log)
6330 return legitimize_tls_address (x, log, false);
6332 if (flag_pic && SYMBOLIC_CONST (x))
6333 return legitimize_pic_address (x, 0);
6335 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6336 if (GET_CODE (x) == ASHIFT
6337 && GET_CODE (XEXP (x, 1)) == CONST_INT
6338 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6340 changed = 1;
6341 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6342 GEN_INT (1 << log));
6345 if (GET_CODE (x) == PLUS)
6347 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6349 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6350 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6351 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6353 changed = 1;
6354 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6355 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6356 GEN_INT (1 << log));
6359 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6360 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6361 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6363 changed = 1;
6364 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6365 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6366 GEN_INT (1 << log));
6369 /* Put multiply first if it isn't already. */
6370 if (GET_CODE (XEXP (x, 1)) == MULT)
6372 rtx tmp = XEXP (x, 0);
6373 XEXP (x, 0) = XEXP (x, 1);
6374 XEXP (x, 1) = tmp;
6375 changed = 1;
6378 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6379 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6380 created by virtual register instantiation, register elimination, and
6381 similar optimizations. */
6382 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6384 changed = 1;
6385 x = gen_rtx_PLUS (Pmode,
6386 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6387 XEXP (XEXP (x, 1), 0)),
6388 XEXP (XEXP (x, 1), 1));
6391 /* Canonicalize
6392 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6393 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6394 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6395 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6396 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6397 && CONSTANT_P (XEXP (x, 1)))
6399 rtx constant;
6400 rtx other = NULL_RTX;
6402 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6404 constant = XEXP (x, 1);
6405 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6407 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6409 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6410 other = XEXP (x, 1);
6412 else
6413 constant = 0;
6415 if (constant)
6417 changed = 1;
6418 x = gen_rtx_PLUS (Pmode,
6419 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6420 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6421 plus_constant (other, INTVAL (constant)));
6425 if (changed && legitimate_address_p (mode, x, FALSE))
6426 return x;
6428 if (GET_CODE (XEXP (x, 0)) == MULT)
6430 changed = 1;
6431 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6434 if (GET_CODE (XEXP (x, 1)) == MULT)
6436 changed = 1;
6437 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6440 if (changed
6441 && GET_CODE (XEXP (x, 1)) == REG
6442 && GET_CODE (XEXP (x, 0)) == REG)
6443 return x;
6445 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6447 changed = 1;
6448 x = legitimize_pic_address (x, 0);
6451 if (changed && legitimate_address_p (mode, x, FALSE))
6452 return x;
6454 if (GET_CODE (XEXP (x, 0)) == REG)
6456 register rtx temp = gen_reg_rtx (Pmode);
6457 register rtx val = force_operand (XEXP (x, 1), temp);
6458 if (val != temp)
6459 emit_move_insn (temp, val);
6461 XEXP (x, 1) = temp;
6462 return x;
6465 else if (GET_CODE (XEXP (x, 1)) == REG)
6467 register rtx temp = gen_reg_rtx (Pmode);
6468 register rtx val = force_operand (XEXP (x, 0), temp);
6469 if (val != temp)
6470 emit_move_insn (temp, val);
6472 XEXP (x, 0) = temp;
6473 return x;
6477 return x;
6480 /* Print an integer constant expression in assembler syntax. Addition
6481 and subtraction are the only arithmetic that may appear in these
6482 expressions. FILE is the stdio stream to write to, X is the rtx, and
6483 CODE is the operand print code from the output string. */
6485 static void
6486 output_pic_addr_const (FILE *file, rtx x, int code)
6488 char buf[256];
6490 switch (GET_CODE (x))
6492 case PC:
6493 if (flag_pic)
6494 putc ('.', file);
6495 else
6496 abort ();
6497 break;
6499 case SYMBOL_REF:
6500 assemble_name (file, XSTR (x, 0));
6501 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6502 fputs ("@PLT", file);
6503 break;
6505 case LABEL_REF:
6506 x = XEXP (x, 0);
6507 /* FALLTHRU */
6508 case CODE_LABEL:
6509 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6510 assemble_name (asm_out_file, buf);
6511 break;
6513 case CONST_INT:
6514 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6515 break;
6517 case CONST:
6518 /* This used to output parentheses around the expression,
6519 but that does not work on the 386 (either ATT or BSD assembler). */
6520 output_pic_addr_const (file, XEXP (x, 0), code);
6521 break;
6523 case CONST_DOUBLE:
6524 if (GET_MODE (x) == VOIDmode)
6526 /* We can use %d if the number is <32 bits and positive. */
6527 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6528 fprintf (file, "0x%lx%08lx",
6529 (unsigned long) CONST_DOUBLE_HIGH (x),
6530 (unsigned long) CONST_DOUBLE_LOW (x));
6531 else
6532 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6534 else
6535 /* We can't handle floating point constants;
6536 PRINT_OPERAND must handle them. */
6537 output_operand_lossage ("floating constant misused");
6538 break;
6540 case PLUS:
6541 /* Some assemblers need integer constants to appear first. */
6542 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6544 output_pic_addr_const (file, XEXP (x, 0), code);
6545 putc ('+', file);
6546 output_pic_addr_const (file, XEXP (x, 1), code);
6548 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6550 output_pic_addr_const (file, XEXP (x, 1), code);
6551 putc ('+', file);
6552 output_pic_addr_const (file, XEXP (x, 0), code);
6554 else
6555 abort ();
6556 break;
6558 case MINUS:
6559 if (!TARGET_MACHO)
6560 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6561 output_pic_addr_const (file, XEXP (x, 0), code);
6562 putc ('-', file);
6563 output_pic_addr_const (file, XEXP (x, 1), code);
6564 if (!TARGET_MACHO)
6565 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6566 break;
6568 case UNSPEC:
6569 if (XVECLEN (x, 0) != 1)
6570 abort ();
6571 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6572 switch (XINT (x, 1))
6574 case UNSPEC_GOT:
6575 fputs ("@GOT", file);
6576 break;
6577 case UNSPEC_GOTOFF:
6578 fputs ("@GOTOFF", file);
6579 break;
6580 case UNSPEC_GOTPCREL:
6581 fputs ("@GOTPCREL(%rip)", file);
6582 break;
6583 case UNSPEC_GOTTPOFF:
6584 /* FIXME: This might be @TPOFF in Sun ld too. */
6585 fputs ("@GOTTPOFF", file);
6586 break;
6587 case UNSPEC_TPOFF:
6588 fputs ("@TPOFF", file);
6589 break;
6590 case UNSPEC_NTPOFF:
6591 if (TARGET_64BIT)
6592 fputs ("@TPOFF", file);
6593 else
6594 fputs ("@NTPOFF", file);
6595 break;
6596 case UNSPEC_DTPOFF:
6597 fputs ("@DTPOFF", file);
6598 break;
6599 case UNSPEC_GOTNTPOFF:
6600 if (TARGET_64BIT)
6601 fputs ("@GOTTPOFF(%rip)", file);
6602 else
6603 fputs ("@GOTNTPOFF", file);
6604 break;
6605 case UNSPEC_INDNTPOFF:
6606 fputs ("@INDNTPOFF", file);
6607 break;
6608 default:
6609 output_operand_lossage ("invalid UNSPEC as operand");
6610 break;
6612 break;
6614 default:
6615 output_operand_lossage ("invalid expression as operand");
6619 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6620 We need to handle our special PIC relocations. */
6622 void
6623 i386_dwarf_output_addr_const (FILE *file, rtx x)
6625 #ifdef ASM_QUAD
6626 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6627 #else
6628 if (TARGET_64BIT)
6629 abort ();
6630 fprintf (file, "%s", ASM_LONG);
6631 #endif
6632 if (flag_pic)
6633 output_pic_addr_const (file, x, '\0');
6634 else
6635 output_addr_const (file, x);
6636 fputc ('\n', file);
6639 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6640 We need to emit DTP-relative relocations. */
6642 void
6643 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6645 fputs (ASM_LONG, file);
6646 output_addr_const (file, x);
6647 fputs ("@DTPOFF", file);
6648 switch (size)
6650 case 4:
6651 break;
6652 case 8:
6653 fputs (", 0", file);
6654 break;
6655 default:
6656 abort ();
6660 /* In the name of slightly smaller debug output, and to cater to
6661 general assembler losage, recognize PIC+GOTOFF and turn it back
6662 into a direct symbol reference. */
6664 static rtx
6665 ix86_delegitimize_address (rtx orig_x)
6667 rtx x = orig_x, y;
6669 if (GET_CODE (x) == MEM)
6670 x = XEXP (x, 0);
6672 if (TARGET_64BIT)
6674 if (GET_CODE (x) != CONST
6675 || GET_CODE (XEXP (x, 0)) != UNSPEC
6676 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6677 || GET_CODE (orig_x) != MEM)
6678 return orig_x;
6679 return XVECEXP (XEXP (x, 0), 0, 0);
6682 if (GET_CODE (x) != PLUS
6683 || GET_CODE (XEXP (x, 1)) != CONST)
6684 return orig_x;
6686 if (GET_CODE (XEXP (x, 0)) == REG
6687 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6688 /* %ebx + GOT/GOTOFF */
6689 y = NULL;
6690 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6692 /* %ebx + %reg * scale + GOT/GOTOFF */
6693 y = XEXP (x, 0);
6694 if (GET_CODE (XEXP (y, 0)) == REG
6695 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6696 y = XEXP (y, 1);
6697 else if (GET_CODE (XEXP (y, 1)) == REG
6698 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6699 y = XEXP (y, 0);
6700 else
6701 return orig_x;
6702 if (GET_CODE (y) != REG
6703 && GET_CODE (y) != MULT
6704 && GET_CODE (y) != ASHIFT)
6705 return orig_x;
6707 else
6708 return orig_x;
6710 x = XEXP (XEXP (x, 1), 0);
6711 if (GET_CODE (x) == UNSPEC
6712 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6713 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6715 if (y)
6716 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6717 return XVECEXP (x, 0, 0);
6720 if (GET_CODE (x) == PLUS
6721 && GET_CODE (XEXP (x, 0)) == UNSPEC
6722 && GET_CODE (XEXP (x, 1)) == CONST_INT
6723 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6724 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6725 && GET_CODE (orig_x) != MEM)))
6727 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6728 if (y)
6729 return gen_rtx_PLUS (Pmode, y, x);
6730 return x;
6733 return orig_x;
6736 static void
6737 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6738 int fp, FILE *file)
6740 const char *suffix;
6742 if (mode == CCFPmode || mode == CCFPUmode)
6744 enum rtx_code second_code, bypass_code;
6745 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6746 if (bypass_code != NIL || second_code != NIL)
6747 abort ();
6748 code = ix86_fp_compare_code_to_integer (code);
6749 mode = CCmode;
6751 if (reverse)
6752 code = reverse_condition (code);
6754 switch (code)
6756 case EQ:
6757 suffix = "e";
6758 break;
6759 case NE:
6760 suffix = "ne";
6761 break;
6762 case GT:
6763 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6764 abort ();
6765 suffix = "g";
6766 break;
6767 case GTU:
6768 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6769 Those same assemblers have the same but opposite losage on cmov. */
6770 if (mode != CCmode)
6771 abort ();
6772 suffix = fp ? "nbe" : "a";
6773 break;
6774 case LT:
6775 if (mode == CCNOmode || mode == CCGOCmode)
6776 suffix = "s";
6777 else if (mode == CCmode || mode == CCGCmode)
6778 suffix = "l";
6779 else
6780 abort ();
6781 break;
6782 case LTU:
6783 if (mode != CCmode)
6784 abort ();
6785 suffix = "b";
6786 break;
6787 case GE:
6788 if (mode == CCNOmode || mode == CCGOCmode)
6789 suffix = "ns";
6790 else if (mode == CCmode || mode == CCGCmode)
6791 suffix = "ge";
6792 else
6793 abort ();
6794 break;
6795 case GEU:
6796 /* ??? As above. */
6797 if (mode != CCmode)
6798 abort ();
6799 suffix = fp ? "nb" : "ae";
6800 break;
6801 case LE:
6802 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6803 abort ();
6804 suffix = "le";
6805 break;
6806 case LEU:
6807 if (mode != CCmode)
6808 abort ();
6809 suffix = "be";
6810 break;
6811 case UNORDERED:
6812 suffix = fp ? "u" : "p";
6813 break;
6814 case ORDERED:
6815 suffix = fp ? "nu" : "np";
6816 break;
6817 default:
6818 abort ();
6820 fputs (suffix, file);
6823 void
6824 print_reg (rtx x, int code, FILE *file)
6826 if (REGNO (x) == ARG_POINTER_REGNUM
6827 || REGNO (x) == FRAME_POINTER_REGNUM
6828 || REGNO (x) == FLAGS_REG
6829 || REGNO (x) == FPSR_REG)
6830 abort ();
6832 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6833 putc ('%', file);
6835 if (code == 'w' || MMX_REG_P (x))
6836 code = 2;
6837 else if (code == 'b')
6838 code = 1;
6839 else if (code == 'k')
6840 code = 4;
6841 else if (code == 'q')
6842 code = 8;
6843 else if (code == 'y')
6844 code = 3;
6845 else if (code == 'h')
6846 code = 0;
6847 else
6848 code = GET_MODE_SIZE (GET_MODE (x));
6850 /* Irritatingly, AMD extended registers use different naming convention
6851 from the normal registers. */
6852 if (REX_INT_REG_P (x))
6854 if (!TARGET_64BIT)
6855 abort ();
6856 switch (code)
6858 case 0:
6859 error ("extended registers have no high halves");
6860 break;
6861 case 1:
6862 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6863 break;
6864 case 2:
6865 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6866 break;
6867 case 4:
6868 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6869 break;
6870 case 8:
6871 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6872 break;
6873 default:
6874 error ("unsupported operand size for extended register");
6875 break;
6877 return;
6879 switch (code)
6881 case 3:
6882 if (STACK_TOP_P (x))
6884 fputs ("st(0)", file);
6885 break;
6887 /* FALLTHRU */
6888 case 8:
6889 case 4:
6890 case 12:
6891 if (! ANY_FP_REG_P (x))
6892 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6893 /* FALLTHRU */
6894 case 16:
6895 case 2:
6896 fputs (hi_reg_name[REGNO (x)], file);
6897 break;
6898 case 1:
6899 fputs (qi_reg_name[REGNO (x)], file);
6900 break;
6901 case 0:
6902 fputs (qi_high_reg_name[REGNO (x)], file);
6903 break;
6904 default:
6905 abort ();
6909 /* Locate some local-dynamic symbol still in use by this function
6910 so that we can print its name in some tls_local_dynamic_base
6911 pattern. */
6913 static const char *
6914 get_some_local_dynamic_name (void)
6916 rtx insn;
6918 if (cfun->machine->some_ld_name)
6919 return cfun->machine->some_ld_name;
6921 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6922 if (INSN_P (insn)
6923 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6924 return cfun->machine->some_ld_name;
6926 abort ();
6929 static int
6930 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
6932 rtx x = *px;
6934 if (GET_CODE (x) == SYMBOL_REF
6935 && local_dynamic_symbolic_operand (x, Pmode))
6937 cfun->machine->some_ld_name = XSTR (x, 0);
6938 return 1;
6941 return 0;
6944 /* Meaning of CODE:
6945 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6946 C -- print opcode suffix for set/cmov insn.
6947 c -- like C, but print reversed condition
6948 F,f -- likewise, but for floating-point.
6949 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6950 nothing
6951 R -- print the prefix for register names.
6952 z -- print the opcode suffix for the size of the current operand.
6953 * -- print a star (in certain assembler syntax)
6954 A -- print an absolute memory reference.
6955 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6956 s -- print a shift double count, followed by the assemblers argument
6957 delimiter.
6958 b -- print the QImode name of the register for the indicated operand.
6959 %b0 would print %al if operands[0] is reg 0.
6960 w -- likewise, print the HImode name of the register.
6961 k -- likewise, print the SImode name of the register.
6962 q -- likewise, print the DImode name of the register.
6963 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6964 y -- print "st(0)" instead of "st" as a register.
6965 D -- print condition for SSE cmp instruction.
6966 P -- if PIC, print an @PLT suffix.
6967 X -- don't print any sort of PIC '@' suffix for a symbol.
6968 & -- print some in-use local-dynamic symbol name.
6971 void
6972 print_operand (FILE *file, rtx x, int code)
6974 if (code)
6976 switch (code)
6978 case '*':
6979 if (ASSEMBLER_DIALECT == ASM_ATT)
6980 putc ('*', file);
6981 return;
6983 case '&':
6984 assemble_name (file, get_some_local_dynamic_name ());
6985 return;
6987 case 'A':
6988 if (ASSEMBLER_DIALECT == ASM_ATT)
6989 putc ('*', file);
6990 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6992 /* Intel syntax. For absolute addresses, registers should not
6993 be surrounded by braces. */
6994 if (GET_CODE (x) != REG)
6996 putc ('[', file);
6997 PRINT_OPERAND (file, x, 0);
6998 putc (']', file);
6999 return;
7002 else
7003 abort ();
7005 PRINT_OPERAND (file, x, 0);
7006 return;
7009 case 'L':
7010 if (ASSEMBLER_DIALECT == ASM_ATT)
7011 putc ('l', file);
7012 return;
7014 case 'W':
7015 if (ASSEMBLER_DIALECT == ASM_ATT)
7016 putc ('w', file);
7017 return;
7019 case 'B':
7020 if (ASSEMBLER_DIALECT == ASM_ATT)
7021 putc ('b', file);
7022 return;
7024 case 'Q':
7025 if (ASSEMBLER_DIALECT == ASM_ATT)
7026 putc ('l', file);
7027 return;
7029 case 'S':
7030 if (ASSEMBLER_DIALECT == ASM_ATT)
7031 putc ('s', file);
7032 return;
7034 case 'T':
7035 if (ASSEMBLER_DIALECT == ASM_ATT)
7036 putc ('t', file);
7037 return;
7039 case 'z':
7040 /* 387 opcodes don't get size suffixes if the operands are
7041 registers. */
7042 if (STACK_REG_P (x))
7043 return;
7045 /* Likewise if using Intel opcodes. */
7046 if (ASSEMBLER_DIALECT == ASM_INTEL)
7047 return;
7049 /* This is the size of op from size of operand. */
7050 switch (GET_MODE_SIZE (GET_MODE (x)))
7052 case 2:
7053 #ifdef HAVE_GAS_FILDS_FISTS
7054 putc ('s', file);
7055 #endif
7056 return;
7058 case 4:
7059 if (GET_MODE (x) == SFmode)
7061 putc ('s', file);
7062 return;
7064 else
7065 putc ('l', file);
7066 return;
7068 case 12:
7069 case 16:
7070 putc ('t', file);
7071 return;
7073 case 8:
7074 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7076 #ifdef GAS_MNEMONICS
7077 putc ('q', file);
7078 #else
7079 putc ('l', file);
7080 putc ('l', file);
7081 #endif
7083 else
7084 putc ('l', file);
7085 return;
7087 default:
7088 abort ();
7091 case 'b':
7092 case 'w':
7093 case 'k':
7094 case 'q':
7095 case 'h':
7096 case 'y':
7097 case 'X':
7098 case 'P':
7099 break;
7101 case 's':
7102 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7104 PRINT_OPERAND (file, x, 0);
7105 putc (',', file);
7107 return;
7109 case 'D':
7110 /* Little bit of braindamage here. The SSE compare instructions
7111 does use completely different names for the comparisons that the
7112 fp conditional moves. */
7113 switch (GET_CODE (x))
7115 case EQ:
7116 case UNEQ:
7117 fputs ("eq", file);
7118 break;
7119 case LT:
7120 case UNLT:
7121 fputs ("lt", file);
7122 break;
7123 case LE:
7124 case UNLE:
7125 fputs ("le", file);
7126 break;
7127 case UNORDERED:
7128 fputs ("unord", file);
7129 break;
7130 case NE:
7131 case LTGT:
7132 fputs ("neq", file);
7133 break;
7134 case UNGE:
7135 case GE:
7136 fputs ("nlt", file);
7137 break;
7138 case UNGT:
7139 case GT:
7140 fputs ("nle", file);
7141 break;
7142 case ORDERED:
7143 fputs ("ord", file);
7144 break;
7145 default:
7146 abort ();
7147 break;
7149 return;
7150 case 'O':
7151 #ifdef CMOV_SUN_AS_SYNTAX
7152 if (ASSEMBLER_DIALECT == ASM_ATT)
7154 switch (GET_MODE (x))
7156 case HImode: putc ('w', file); break;
7157 case SImode:
7158 case SFmode: putc ('l', file); break;
7159 case DImode:
7160 case DFmode: putc ('q', file); break;
7161 default: abort ();
7163 putc ('.', file);
7165 #endif
7166 return;
7167 case 'C':
7168 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7169 return;
7170 case 'F':
7171 #ifdef CMOV_SUN_AS_SYNTAX
7172 if (ASSEMBLER_DIALECT == ASM_ATT)
7173 putc ('.', file);
7174 #endif
7175 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7176 return;
7178 /* Like above, but reverse condition */
7179 case 'c':
7180 /* Check to see if argument to %c is really a constant
7181 and not a condition code which needs to be reversed. */
7182 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7184 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7185 return;
7187 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7188 return;
7189 case 'f':
7190 #ifdef CMOV_SUN_AS_SYNTAX
7191 if (ASSEMBLER_DIALECT == ASM_ATT)
7192 putc ('.', file);
7193 #endif
7194 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7195 return;
7196 case '+':
7198 rtx x;
7200 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7201 return;
7203 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7204 if (x)
7206 int pred_val = INTVAL (XEXP (x, 0));
7208 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7209 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7211 int taken = pred_val > REG_BR_PROB_BASE / 2;
7212 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7214 /* Emit hints only in the case default branch prediction
7215 heuristics would fail. */
7216 if (taken != cputaken)
7218 /* We use 3e (DS) prefix for taken branches and
7219 2e (CS) prefix for not taken branches. */
7220 if (taken)
7221 fputs ("ds ; ", file);
7222 else
7223 fputs ("cs ; ", file);
7227 return;
7229 default:
7230 output_operand_lossage ("invalid operand code `%c'", code);
7234 if (GET_CODE (x) == REG)
7236 PRINT_REG (x, code, file);
7239 else if (GET_CODE (x) == MEM)
7241 /* No `byte ptr' prefix for call instructions. */
7242 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7244 const char * size;
7245 switch (GET_MODE_SIZE (GET_MODE (x)))
7247 case 1: size = "BYTE"; break;
7248 case 2: size = "WORD"; break;
7249 case 4: size = "DWORD"; break;
7250 case 8: size = "QWORD"; break;
7251 case 12: size = "XWORD"; break;
7252 case 16: size = "XMMWORD"; break;
7253 default:
7254 abort ();
7257 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7258 if (code == 'b')
7259 size = "BYTE";
7260 else if (code == 'w')
7261 size = "WORD";
7262 else if (code == 'k')
7263 size = "DWORD";
7265 fputs (size, file);
7266 fputs (" PTR ", file);
7269 x = XEXP (x, 0);
7270 /* Avoid (%rip) for call operands. */
7271 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7272 && GET_CODE (x) != CONST_INT)
7273 output_addr_const (file, x);
7274 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7275 output_operand_lossage ("invalid constraints for operand");
7276 else
7277 output_address (x);
7280 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7282 REAL_VALUE_TYPE r;
7283 long l;
7285 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7286 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7288 if (ASSEMBLER_DIALECT == ASM_ATT)
7289 putc ('$', file);
7290 fprintf (file, "0x%lx", l);
7293 /* These float cases don't actually occur as immediate operands. */
7294 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7296 char dstr[30];
7298 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7299 fprintf (file, "%s", dstr);
7302 else if (GET_CODE (x) == CONST_DOUBLE
7303 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7305 char dstr[30];
7307 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7308 fprintf (file, "%s", dstr);
7311 else
7313 if (code != 'P')
7315 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7317 if (ASSEMBLER_DIALECT == ASM_ATT)
7318 putc ('$', file);
7320 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7321 || GET_CODE (x) == LABEL_REF)
7323 if (ASSEMBLER_DIALECT == ASM_ATT)
7324 putc ('$', file);
7325 else
7326 fputs ("OFFSET FLAT:", file);
7329 if (GET_CODE (x) == CONST_INT)
7330 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7331 else if (flag_pic)
7332 output_pic_addr_const (file, x, code);
7333 else
7334 output_addr_const (file, x);
7338 /* Print a memory operand whose address is ADDR. */
7340 void
7341 print_operand_address (FILE *file, register rtx addr)
7343 struct ix86_address parts;
7344 rtx base, index, disp;
7345 int scale;
7347 if (! ix86_decompose_address (addr, &parts))
7348 abort ();
7350 base = parts.base;
7351 index = parts.index;
7352 disp = parts.disp;
7353 scale = parts.scale;
7355 switch (parts.seg)
7357 case SEG_DEFAULT:
7358 break;
7359 case SEG_FS:
7360 case SEG_GS:
7361 if (USER_LABEL_PREFIX[0] == 0)
7362 putc ('%', file);
7363 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7364 break;
7365 default:
7366 abort ();
7369 if (!base && !index)
7371 /* Displacement only requires special attention. */
7373 if (GET_CODE (disp) == CONST_INT)
7375 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7377 if (USER_LABEL_PREFIX[0] == 0)
7378 putc ('%', file);
7379 fputs ("ds:", file);
7381 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7383 else if (flag_pic)
7384 output_pic_addr_const (file, disp, 0);
7385 else
7386 output_addr_const (file, disp);
7388 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7389 if (TARGET_64BIT
7390 && ((GET_CODE (disp) == SYMBOL_REF
7391 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7392 || GET_CODE (disp) == LABEL_REF
7393 || (GET_CODE (disp) == CONST
7394 && GET_CODE (XEXP (disp, 0)) == PLUS
7395 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7396 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7397 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7398 fputs ("(%rip)", file);
7400 else
7402 if (ASSEMBLER_DIALECT == ASM_ATT)
7404 if (disp)
7406 if (flag_pic)
7407 output_pic_addr_const (file, disp, 0);
7408 else if (GET_CODE (disp) == LABEL_REF)
7409 output_asm_label (disp);
7410 else
7411 output_addr_const (file, disp);
7414 putc ('(', file);
7415 if (base)
7416 PRINT_REG (base, 0, file);
7417 if (index)
7419 putc (',', file);
7420 PRINT_REG (index, 0, file);
7421 if (scale != 1)
7422 fprintf (file, ",%d", scale);
7424 putc (')', file);
7426 else
7428 rtx offset = NULL_RTX;
7430 if (disp)
7432 /* Pull out the offset of a symbol; print any symbol itself. */
7433 if (GET_CODE (disp) == CONST
7434 && GET_CODE (XEXP (disp, 0)) == PLUS
7435 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7437 offset = XEXP (XEXP (disp, 0), 1);
7438 disp = gen_rtx_CONST (VOIDmode,
7439 XEXP (XEXP (disp, 0), 0));
7442 if (flag_pic)
7443 output_pic_addr_const (file, disp, 0);
7444 else if (GET_CODE (disp) == LABEL_REF)
7445 output_asm_label (disp);
7446 else if (GET_CODE (disp) == CONST_INT)
7447 offset = disp;
7448 else
7449 output_addr_const (file, disp);
7452 putc ('[', file);
7453 if (base)
7455 PRINT_REG (base, 0, file);
7456 if (offset)
7458 if (INTVAL (offset) >= 0)
7459 putc ('+', file);
7460 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7463 else if (offset)
7464 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7465 else
7466 putc ('0', file);
7468 if (index)
7470 putc ('+', file);
7471 PRINT_REG (index, 0, file);
7472 if (scale != 1)
7473 fprintf (file, "*%d", scale);
7475 putc (']', file);
7480 bool
7481 output_addr_const_extra (FILE *file, rtx x)
7483 rtx op;
7485 if (GET_CODE (x) != UNSPEC)
7486 return false;
7488 op = XVECEXP (x, 0, 0);
7489 switch (XINT (x, 1))
7491 case UNSPEC_GOTTPOFF:
7492 output_addr_const (file, op);
7493 /* FIXME: This might be @TPOFF in Sun ld. */
7494 fputs ("@GOTTPOFF", file);
7495 break;
7496 case UNSPEC_TPOFF:
7497 output_addr_const (file, op);
7498 fputs ("@TPOFF", file);
7499 break;
7500 case UNSPEC_NTPOFF:
7501 output_addr_const (file, op);
7502 if (TARGET_64BIT)
7503 fputs ("@TPOFF", file);
7504 else
7505 fputs ("@NTPOFF", file);
7506 break;
7507 case UNSPEC_DTPOFF:
7508 output_addr_const (file, op);
7509 fputs ("@DTPOFF", file);
7510 break;
7511 case UNSPEC_GOTNTPOFF:
7512 output_addr_const (file, op);
7513 if (TARGET_64BIT)
7514 fputs ("@GOTTPOFF(%rip)", file);
7515 else
7516 fputs ("@GOTNTPOFF", file);
7517 break;
7518 case UNSPEC_INDNTPOFF:
7519 output_addr_const (file, op);
7520 fputs ("@INDNTPOFF", file);
7521 break;
7523 default:
7524 return false;
7527 return true;
7530 /* Split one or more DImode RTL references into pairs of SImode
7531 references. The RTL can be REG, offsettable MEM, integer constant, or
7532 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7533 split and "num" is its length. lo_half and hi_half are output arrays
7534 that parallel "operands". */
7536 void
7537 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7539 while (num--)
7541 rtx op = operands[num];
7543 /* simplify_subreg refuse to split volatile memory addresses,
7544 but we still have to handle it. */
7545 if (GET_CODE (op) == MEM)
7547 lo_half[num] = adjust_address (op, SImode, 0);
7548 hi_half[num] = adjust_address (op, SImode, 4);
7550 else
7552 lo_half[num] = simplify_gen_subreg (SImode, op,
7553 GET_MODE (op) == VOIDmode
7554 ? DImode : GET_MODE (op), 0);
7555 hi_half[num] = simplify_gen_subreg (SImode, op,
7556 GET_MODE (op) == VOIDmode
7557 ? DImode : GET_MODE (op), 4);
7561 /* Split one or more TImode RTL references into pairs of SImode
7562 references. The RTL can be REG, offsettable MEM, integer constant, or
7563 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7564 split and "num" is its length. lo_half and hi_half are output arrays
7565 that parallel "operands". */
7567 void
7568 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7570 while (num--)
7572 rtx op = operands[num];
7574 /* simplify_subreg refuse to split volatile memory addresses, but we
7575 still have to handle it. */
7576 if (GET_CODE (op) == MEM)
7578 lo_half[num] = adjust_address (op, DImode, 0);
7579 hi_half[num] = adjust_address (op, DImode, 8);
7581 else
7583 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7584 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7589 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7590 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7591 is the expression of the binary operation. The output may either be
7592 emitted here, or returned to the caller, like all output_* functions.
7594 There is no guarantee that the operands are the same mode, as they
7595 might be within FLOAT or FLOAT_EXTEND expressions. */
7597 #ifndef SYSV386_COMPAT
7598 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7599 wants to fix the assemblers because that causes incompatibility
7600 with gcc. No-one wants to fix gcc because that causes
7601 incompatibility with assemblers... You can use the option of
7602 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7603 #define SYSV386_COMPAT 1
7604 #endif
7606 const char *
7607 output_387_binary_op (rtx insn, rtx *operands)
7609 static char buf[30];
7610 const char *p;
7611 const char *ssep;
7612 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7614 #ifdef ENABLE_CHECKING
7615 /* Even if we do not want to check the inputs, this documents input
7616 constraints. Which helps in understanding the following code. */
7617 if (STACK_REG_P (operands[0])
7618 && ((REG_P (operands[1])
7619 && REGNO (operands[0]) == REGNO (operands[1])
7620 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7621 || (REG_P (operands[2])
7622 && REGNO (operands[0]) == REGNO (operands[2])
7623 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7624 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7625 ; /* ok */
7626 else if (!is_sse)
7627 abort ();
7628 #endif
7630 switch (GET_CODE (operands[3]))
7632 case PLUS:
7633 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7634 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7635 p = "fiadd";
7636 else
7637 p = "fadd";
7638 ssep = "add";
7639 break;
7641 case MINUS:
7642 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7643 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7644 p = "fisub";
7645 else
7646 p = "fsub";
7647 ssep = "sub";
7648 break;
7650 case MULT:
7651 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7652 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7653 p = "fimul";
7654 else
7655 p = "fmul";
7656 ssep = "mul";
7657 break;
7659 case DIV:
7660 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7661 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7662 p = "fidiv";
7663 else
7664 p = "fdiv";
7665 ssep = "div";
7666 break;
7668 default:
7669 abort ();
7672 if (is_sse)
7674 strcpy (buf, ssep);
7675 if (GET_MODE (operands[0]) == SFmode)
7676 strcat (buf, "ss\t{%2, %0|%0, %2}");
7677 else
7678 strcat (buf, "sd\t{%2, %0|%0, %2}");
7679 return buf;
7681 strcpy (buf, p);
7683 switch (GET_CODE (operands[3]))
7685 case MULT:
7686 case PLUS:
7687 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7689 rtx temp = operands[2];
7690 operands[2] = operands[1];
7691 operands[1] = temp;
7694 /* know operands[0] == operands[1]. */
7696 if (GET_CODE (operands[2]) == MEM)
7698 p = "%z2\t%2";
7699 break;
7702 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7704 if (STACK_TOP_P (operands[0]))
7705 /* How is it that we are storing to a dead operand[2]?
7706 Well, presumably operands[1] is dead too. We can't
7707 store the result to st(0) as st(0) gets popped on this
7708 instruction. Instead store to operands[2] (which I
7709 think has to be st(1)). st(1) will be popped later.
7710 gcc <= 2.8.1 didn't have this check and generated
7711 assembly code that the Unixware assembler rejected. */
7712 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7713 else
7714 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7715 break;
7718 if (STACK_TOP_P (operands[0]))
7719 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7720 else
7721 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7722 break;
7724 case MINUS:
7725 case DIV:
7726 if (GET_CODE (operands[1]) == MEM)
7728 p = "r%z1\t%1";
7729 break;
7732 if (GET_CODE (operands[2]) == MEM)
7734 p = "%z2\t%2";
7735 break;
7738 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7740 #if SYSV386_COMPAT
7741 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7742 derived assemblers, confusingly reverse the direction of
7743 the operation for fsub{r} and fdiv{r} when the
7744 destination register is not st(0). The Intel assembler
7745 doesn't have this brain damage. Read !SYSV386_COMPAT to
7746 figure out what the hardware really does. */
7747 if (STACK_TOP_P (operands[0]))
7748 p = "{p\t%0, %2|rp\t%2, %0}";
7749 else
7750 p = "{rp\t%2, %0|p\t%0, %2}";
7751 #else
7752 if (STACK_TOP_P (operands[0]))
7753 /* As above for fmul/fadd, we can't store to st(0). */
7754 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7755 else
7756 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7757 #endif
7758 break;
7761 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7763 #if SYSV386_COMPAT
7764 if (STACK_TOP_P (operands[0]))
7765 p = "{rp\t%0, %1|p\t%1, %0}";
7766 else
7767 p = "{p\t%1, %0|rp\t%0, %1}";
7768 #else
7769 if (STACK_TOP_P (operands[0]))
7770 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7771 else
7772 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7773 #endif
7774 break;
7777 if (STACK_TOP_P (operands[0]))
7779 if (STACK_TOP_P (operands[1]))
7780 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7781 else
7782 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7783 break;
7785 else if (STACK_TOP_P (operands[1]))
7787 #if SYSV386_COMPAT
7788 p = "{\t%1, %0|r\t%0, %1}";
7789 #else
7790 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7791 #endif
7793 else
7795 #if SYSV386_COMPAT
7796 p = "{r\t%2, %0|\t%0, %2}";
7797 #else
7798 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7799 #endif
7801 break;
7803 default:
7804 abort ();
7807 strcat (buf, p);
7808 return buf;
7811 /* Output code to initialize control word copies used by
7812 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7813 is set to control word rounding downwards. */
7814 void
7815 emit_i387_cw_initialization (rtx normal, rtx round_down)
7817 rtx reg = gen_reg_rtx (HImode);
7819 emit_insn (gen_x86_fnstcw_1 (normal));
7820 emit_move_insn (reg, normal);
7821 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7822 && !TARGET_64BIT)
7823 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7824 else
7825 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7826 emit_move_insn (round_down, reg);
7829 /* Output code for INSN to convert a float to a signed int. OPERANDS
7830 are the insn operands. The output may be [HSD]Imode and the input
7831 operand may be [SDX]Fmode. */
7833 const char *
7834 output_fix_trunc (rtx insn, rtx *operands)
7836 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7837 int dimode_p = GET_MODE (operands[0]) == DImode;
7839 /* Jump through a hoop or two for DImode, since the hardware has no
7840 non-popping instruction. We used to do this a different way, but
7841 that was somewhat fragile and broke with post-reload splitters. */
7842 if (dimode_p && !stack_top_dies)
7843 output_asm_insn ("fld\t%y1", operands);
7845 if (!STACK_TOP_P (operands[1]))
7846 abort ();
7848 if (GET_CODE (operands[0]) != MEM)
7849 abort ();
7851 output_asm_insn ("fldcw\t%3", operands);
7852 if (stack_top_dies || dimode_p)
7853 output_asm_insn ("fistp%z0\t%0", operands);
7854 else
7855 output_asm_insn ("fist%z0\t%0", operands);
7856 output_asm_insn ("fldcw\t%2", operands);
7858 return "";
7861 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7862 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7863 when fucom should be used. */
7865 const char *
7866 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7868 int stack_top_dies;
7869 rtx cmp_op0 = operands[0];
7870 rtx cmp_op1 = operands[1];
7871 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7873 if (eflags_p == 2)
7875 cmp_op0 = cmp_op1;
7876 cmp_op1 = operands[2];
7878 if (is_sse)
7880 if (GET_MODE (operands[0]) == SFmode)
7881 if (unordered_p)
7882 return "ucomiss\t{%1, %0|%0, %1}";
7883 else
7884 return "comiss\t{%1, %0|%0, %1}";
7885 else
7886 if (unordered_p)
7887 return "ucomisd\t{%1, %0|%0, %1}";
7888 else
7889 return "comisd\t{%1, %0|%0, %1}";
7892 if (! STACK_TOP_P (cmp_op0))
7893 abort ();
7895 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7897 if (STACK_REG_P (cmp_op1)
7898 && stack_top_dies
7899 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7900 && REGNO (cmp_op1) != FIRST_STACK_REG)
7902 /* If both the top of the 387 stack dies, and the other operand
7903 is also a stack register that dies, then this must be a
7904 `fcompp' float compare */
7906 if (eflags_p == 1)
7908 /* There is no double popping fcomi variant. Fortunately,
7909 eflags is immune from the fstp's cc clobbering. */
7910 if (unordered_p)
7911 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7912 else
7913 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7914 return "fstp\t%y0";
7916 else
7918 if (eflags_p == 2)
7920 if (unordered_p)
7921 return "fucompp\n\tfnstsw\t%0";
7922 else
7923 return "fcompp\n\tfnstsw\t%0";
7925 else
7927 if (unordered_p)
7928 return "fucompp";
7929 else
7930 return "fcompp";
7934 else
7936 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7938 static const char * const alt[24] =
7940 "fcom%z1\t%y1",
7941 "fcomp%z1\t%y1",
7942 "fucom%z1\t%y1",
7943 "fucomp%z1\t%y1",
7945 "ficom%z1\t%y1",
7946 "ficomp%z1\t%y1",
7947 NULL,
7948 NULL,
7950 "fcomi\t{%y1, %0|%0, %y1}",
7951 "fcomip\t{%y1, %0|%0, %y1}",
7952 "fucomi\t{%y1, %0|%0, %y1}",
7953 "fucomip\t{%y1, %0|%0, %y1}",
7955 NULL,
7956 NULL,
7957 NULL,
7958 NULL,
7960 "fcom%z2\t%y2\n\tfnstsw\t%0",
7961 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7962 "fucom%z2\t%y2\n\tfnstsw\t%0",
7963 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7965 "ficom%z2\t%y2\n\tfnstsw\t%0",
7966 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7967 NULL,
7968 NULL
7971 int mask;
7972 const char *ret;
7974 mask = eflags_p << 3;
7975 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7976 mask |= unordered_p << 1;
7977 mask |= stack_top_dies;
7979 if (mask >= 24)
7980 abort ();
7981 ret = alt[mask];
7982 if (ret == NULL)
7983 abort ();
7985 return ret;
7989 void
7990 ix86_output_addr_vec_elt (FILE *file, int value)
7992 const char *directive = ASM_LONG;
7994 if (TARGET_64BIT)
7996 #ifdef ASM_QUAD
7997 directive = ASM_QUAD;
7998 #else
7999 abort ();
8000 #endif
8003 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8006 void
8007 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8009 if (TARGET_64BIT)
8010 fprintf (file, "%s%s%d-%s%d\n",
8011 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8012 else if (HAVE_AS_GOTOFF_IN_DATA)
8013 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8014 #if TARGET_MACHO
8015 else if (TARGET_MACHO)
8016 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8017 machopic_function_base_name () + 1);
8018 #endif
8019 else
8020 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8021 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8024 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8025 for the target. */
8027 void
8028 ix86_expand_clear (rtx dest)
8030 rtx tmp;
8032 /* We play register width games, which are only valid after reload. */
8033 if (!reload_completed)
8034 abort ();
8036 /* Avoid HImode and its attendant prefix byte. */
8037 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8038 dest = gen_rtx_REG (SImode, REGNO (dest));
8040 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8042 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8043 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8045 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8046 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8049 emit_insn (tmp);
8052 /* X is an unchanging MEM. If it is a constant pool reference, return
8053 the constant pool rtx, else NULL. */
8055 static rtx
8056 maybe_get_pool_constant (rtx x)
8058 x = ix86_delegitimize_address (XEXP (x, 0));
8060 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8061 return get_pool_constant (x);
8063 return NULL_RTX;
8066 void
8067 ix86_expand_move (enum machine_mode mode, rtx operands[])
8069 int strict = (reload_in_progress || reload_completed);
8070 rtx op0, op1;
8071 enum tls_model model;
8073 op0 = operands[0];
8074 op1 = operands[1];
8076 model = tls_symbolic_operand (op1, Pmode);
8077 if (model)
8079 op1 = legitimize_tls_address (op1, model, true);
8080 op1 = force_operand (op1, op0);
8081 if (op1 == op0)
8082 return;
8085 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8087 #if TARGET_MACHO
8088 if (MACHOPIC_PURE)
8090 rtx temp = ((reload_in_progress
8091 || ((op0 && GET_CODE (op0) == REG)
8092 && mode == Pmode))
8093 ? op0 : gen_reg_rtx (Pmode));
8094 op1 = machopic_indirect_data_reference (op1, temp);
8095 op1 = machopic_legitimize_pic_address (op1, mode,
8096 temp == op1 ? 0 : temp);
8098 else if (MACHOPIC_INDIRECT)
8099 op1 = machopic_indirect_data_reference (op1, 0);
8100 if (op0 == op1)
8101 return;
8102 #else
8103 if (GET_CODE (op0) == MEM)
8104 op1 = force_reg (Pmode, op1);
8105 else
8107 rtx temp = op0;
8108 if (GET_CODE (temp) != REG)
8109 temp = gen_reg_rtx (Pmode);
8110 temp = legitimize_pic_address (op1, temp);
8111 if (temp == op0)
8112 return;
8113 op1 = temp;
8115 #endif /* TARGET_MACHO */
8117 else
8119 if (GET_CODE (op0) == MEM
8120 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8121 || !push_operand (op0, mode))
8122 && GET_CODE (op1) == MEM)
8123 op1 = force_reg (mode, op1);
8125 if (push_operand (op0, mode)
8126 && ! general_no_elim_operand (op1, mode))
8127 op1 = copy_to_mode_reg (mode, op1);
8129 /* Force large constants in 64bit compilation into register
8130 to get them CSEed. */
8131 if (TARGET_64BIT && mode == DImode
8132 && immediate_operand (op1, mode)
8133 && !x86_64_zero_extended_value (op1)
8134 && !register_operand (op0, mode)
8135 && optimize && !reload_completed && !reload_in_progress)
8136 op1 = copy_to_mode_reg (mode, op1);
8138 if (FLOAT_MODE_P (mode))
8140 /* If we are loading a floating point constant to a register,
8141 force the value to memory now, since we'll get better code
8142 out the back end. */
8144 if (strict)
8146 else if (GET_CODE (op1) == CONST_DOUBLE)
8148 op1 = validize_mem (force_const_mem (mode, op1));
8149 if (!register_operand (op0, mode))
8151 rtx temp = gen_reg_rtx (mode);
8152 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8153 emit_move_insn (op0, temp);
8154 return;
8160 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8163 void
8164 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8166 /* Force constants other than zero into memory. We do not know how
8167 the instructions used to build constants modify the upper 64 bits
8168 of the register, once we have that information we may be able
8169 to handle some of them more efficiently. */
8170 if ((reload_in_progress | reload_completed) == 0
8171 && register_operand (operands[0], mode)
8172 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8173 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8175 /* Make operand1 a register if it isn't already. */
8176 if (!no_new_pseudos
8177 && !register_operand (operands[0], mode)
8178 && !register_operand (operands[1], mode))
8180 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8181 emit_move_insn (operands[0], temp);
8182 return;
8185 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8188 /* Attempt to expand a binary operator. Make the expansion closer to the
8189 actual machine, then just general_operand, which will allow 3 separate
8190 memory references (one output, two input) in a single insn. */
8192 void
8193 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8194 rtx operands[])
8196 int matching_memory;
8197 rtx src1, src2, dst, op, clob;
8199 dst = operands[0];
8200 src1 = operands[1];
8201 src2 = operands[2];
8203 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8204 if (GET_RTX_CLASS (code) == 'c'
8205 && (rtx_equal_p (dst, src2)
8206 || immediate_operand (src1, mode)))
8208 rtx temp = src1;
8209 src1 = src2;
8210 src2 = temp;
8213 /* If the destination is memory, and we do not have matching source
8214 operands, do things in registers. */
8215 matching_memory = 0;
8216 if (GET_CODE (dst) == MEM)
8218 if (rtx_equal_p (dst, src1))
8219 matching_memory = 1;
8220 else if (GET_RTX_CLASS (code) == 'c'
8221 && rtx_equal_p (dst, src2))
8222 matching_memory = 2;
8223 else
8224 dst = gen_reg_rtx (mode);
8227 /* Both source operands cannot be in memory. */
8228 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8230 if (matching_memory != 2)
8231 src2 = force_reg (mode, src2);
8232 else
8233 src1 = force_reg (mode, src1);
8236 /* If the operation is not commutable, source 1 cannot be a constant
8237 or non-matching memory. */
8238 if ((CONSTANT_P (src1)
8239 || (!matching_memory && GET_CODE (src1) == MEM))
8240 && GET_RTX_CLASS (code) != 'c')
8241 src1 = force_reg (mode, src1);
8243 /* If optimizing, copy to regs to improve CSE */
8244 if (optimize && ! no_new_pseudos)
8246 if (GET_CODE (dst) == MEM)
8247 dst = gen_reg_rtx (mode);
8248 if (GET_CODE (src1) == MEM)
8249 src1 = force_reg (mode, src1);
8250 if (GET_CODE (src2) == MEM)
8251 src2 = force_reg (mode, src2);
8254 /* Emit the instruction. */
8256 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8257 if (reload_in_progress)
8259 /* Reload doesn't know about the flags register, and doesn't know that
8260 it doesn't want to clobber it. We can only do this with PLUS. */
8261 if (code != PLUS)
8262 abort ();
8263 emit_insn (op);
8265 else
8267 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8268 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8271 /* Fix up the destination if needed. */
8272 if (dst != operands[0])
8273 emit_move_insn (operands[0], dst);
8276 /* Return TRUE or FALSE depending on whether the binary operator meets the
8277 appropriate constraints. */
8280 ix86_binary_operator_ok (enum rtx_code code,
8281 enum machine_mode mode ATTRIBUTE_UNUSED,
8282 rtx operands[3])
8284 /* Both source operands cannot be in memory. */
8285 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8286 return 0;
8287 /* If the operation is not commutable, source 1 cannot be a constant. */
8288 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8289 return 0;
8290 /* If the destination is memory, we must have a matching source operand. */
8291 if (GET_CODE (operands[0]) == MEM
8292 && ! (rtx_equal_p (operands[0], operands[1])
8293 || (GET_RTX_CLASS (code) == 'c'
8294 && rtx_equal_p (operands[0], operands[2]))))
8295 return 0;
8296 /* If the operation is not commutable and the source 1 is memory, we must
8297 have a matching destination. */
8298 if (GET_CODE (operands[1]) == MEM
8299 && GET_RTX_CLASS (code) != 'c'
8300 && ! rtx_equal_p (operands[0], operands[1]))
8301 return 0;
8302 return 1;
8305 /* Attempt to expand a unary operator. Make the expansion closer to the
8306 actual machine, then just general_operand, which will allow 2 separate
8307 memory references (one output, one input) in a single insn. */
8309 void
8310 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8311 rtx operands[])
8313 int matching_memory;
8314 rtx src, dst, op, clob;
8316 dst = operands[0];
8317 src = operands[1];
8319 /* If the destination is memory, and we do not have matching source
8320 operands, do things in registers. */
8321 matching_memory = 0;
8322 if (GET_CODE (dst) == MEM)
8324 if (rtx_equal_p (dst, src))
8325 matching_memory = 1;
8326 else
8327 dst = gen_reg_rtx (mode);
8330 /* When source operand is memory, destination must match. */
8331 if (!matching_memory && GET_CODE (src) == MEM)
8332 src = force_reg (mode, src);
8334 /* If optimizing, copy to regs to improve CSE */
8335 if (optimize && ! no_new_pseudos)
8337 if (GET_CODE (dst) == MEM)
8338 dst = gen_reg_rtx (mode);
8339 if (GET_CODE (src) == MEM)
8340 src = force_reg (mode, src);
8343 /* Emit the instruction. */
8345 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8346 if (reload_in_progress || code == NOT)
8348 /* Reload doesn't know about the flags register, and doesn't know that
8349 it doesn't want to clobber it. */
8350 if (code != NOT)
8351 abort ();
8352 emit_insn (op);
8354 else
8356 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8357 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8360 /* Fix up the destination if needed. */
8361 if (dst != operands[0])
8362 emit_move_insn (operands[0], dst);
8365 /* Return TRUE or FALSE depending on whether the unary operator meets the
8366 appropriate constraints. */
8369 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8370 enum machine_mode mode ATTRIBUTE_UNUSED,
8371 rtx operands[2] ATTRIBUTE_UNUSED)
8373 /* If one of operands is memory, source and destination must match. */
8374 if ((GET_CODE (operands[0]) == MEM
8375 || GET_CODE (operands[1]) == MEM)
8376 && ! rtx_equal_p (operands[0], operands[1]))
8377 return FALSE;
8378 return TRUE;
8381 /* Return TRUE or FALSE depending on whether the first SET in INSN
8382 has source and destination with matching CC modes, and that the
8383 CC mode is at least as constrained as REQ_MODE. */
8386 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8388 rtx set;
8389 enum machine_mode set_mode;
8391 set = PATTERN (insn);
8392 if (GET_CODE (set) == PARALLEL)
8393 set = XVECEXP (set, 0, 0);
8394 if (GET_CODE (set) != SET)
8395 abort ();
8396 if (GET_CODE (SET_SRC (set)) != COMPARE)
8397 abort ();
8399 set_mode = GET_MODE (SET_DEST (set));
8400 switch (set_mode)
8402 case CCNOmode:
8403 if (req_mode != CCNOmode
8404 && (req_mode != CCmode
8405 || XEXP (SET_SRC (set), 1) != const0_rtx))
8406 return 0;
8407 break;
8408 case CCmode:
8409 if (req_mode == CCGCmode)
8410 return 0;
8411 /* FALLTHRU */
8412 case CCGCmode:
8413 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8414 return 0;
8415 /* FALLTHRU */
8416 case CCGOCmode:
8417 if (req_mode == CCZmode)
8418 return 0;
8419 /* FALLTHRU */
8420 case CCZmode:
8421 break;
8423 default:
8424 abort ();
8427 return (GET_MODE (SET_SRC (set)) == set_mode);
8430 /* Generate insn patterns to do an integer compare of OPERANDS. */
8432 static rtx
8433 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8435 enum machine_mode cmpmode;
8436 rtx tmp, flags;
8438 cmpmode = SELECT_CC_MODE (code, op0, op1);
8439 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8441 /* This is very simple, but making the interface the same as in the
8442 FP case makes the rest of the code easier. */
8443 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8444 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8446 /* Return the test that should be put into the flags user, i.e.
8447 the bcc, scc, or cmov instruction. */
8448 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8451 /* Figure out whether to use ordered or unordered fp comparisons.
8452 Return the appropriate mode to use. */
8454 enum machine_mode
8455 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8457 /* ??? In order to make all comparisons reversible, we do all comparisons
8458 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8459 all forms trapping and nontrapping comparisons, we can make inequality
8460 comparisons trapping again, since it results in better code when using
8461 FCOM based compares. */
8462 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8465 enum machine_mode
8466 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8468 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8469 return ix86_fp_compare_mode (code);
8470 switch (code)
8472 /* Only zero flag is needed. */
8473 case EQ: /* ZF=0 */
8474 case NE: /* ZF!=0 */
8475 return CCZmode;
8476 /* Codes needing carry flag. */
8477 case GEU: /* CF=0 */
8478 case GTU: /* CF=0 & ZF=0 */
8479 case LTU: /* CF=1 */
8480 case LEU: /* CF=1 | ZF=1 */
8481 return CCmode;
8482 /* Codes possibly doable only with sign flag when
8483 comparing against zero. */
8484 case GE: /* SF=OF or SF=0 */
8485 case LT: /* SF<>OF or SF=1 */
8486 if (op1 == const0_rtx)
8487 return CCGOCmode;
8488 else
8489 /* For other cases Carry flag is not required. */
8490 return CCGCmode;
8491 /* Codes doable only with sign flag when comparing
8492 against zero, but we miss jump instruction for it
8493 so we need to use relational tests against overflow
8494 that thus needs to be zero. */
8495 case GT: /* ZF=0 & SF=OF */
8496 case LE: /* ZF=1 | SF<>OF */
8497 if (op1 == const0_rtx)
8498 return CCNOmode;
8499 else
8500 return CCGCmode;
8501 /* strcmp pattern do (use flags) and combine may ask us for proper
8502 mode. */
8503 case USE:
8504 return CCmode;
8505 default:
8506 abort ();
8510 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8513 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8515 enum rtx_code swapped_code = swap_condition (code);
8516 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8517 || (ix86_fp_comparison_cost (swapped_code)
8518 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8521 /* Swap, force into registers, or otherwise massage the two operands
8522 to a fp comparison. The operands are updated in place; the new
8523 comparison code is returned. */
8525 static enum rtx_code
8526 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8528 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8529 rtx op0 = *pop0, op1 = *pop1;
8530 enum machine_mode op_mode = GET_MODE (op0);
8531 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8533 /* All of the unordered compare instructions only work on registers.
8534 The same is true of the XFmode compare instructions. The same is
8535 true of the fcomi compare instructions. */
8537 if (!is_sse
8538 && (fpcmp_mode == CCFPUmode
8539 || op_mode == XFmode
8540 || op_mode == TFmode
8541 || ix86_use_fcomi_compare (code)))
8543 op0 = force_reg (op_mode, op0);
8544 op1 = force_reg (op_mode, op1);
8546 else
8548 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8549 things around if they appear profitable, otherwise force op0
8550 into a register. */
8552 if (standard_80387_constant_p (op0) == 0
8553 || (GET_CODE (op0) == MEM
8554 && ! (standard_80387_constant_p (op1) == 0
8555 || GET_CODE (op1) == MEM)))
8557 rtx tmp;
8558 tmp = op0, op0 = op1, op1 = tmp;
8559 code = swap_condition (code);
8562 if (GET_CODE (op0) != REG)
8563 op0 = force_reg (op_mode, op0);
8565 if (CONSTANT_P (op1))
8567 if (standard_80387_constant_p (op1))
8568 op1 = force_reg (op_mode, op1);
8569 else
8570 op1 = validize_mem (force_const_mem (op_mode, op1));
8574 /* Try to rearrange the comparison to make it cheaper. */
8575 if (ix86_fp_comparison_cost (code)
8576 > ix86_fp_comparison_cost (swap_condition (code))
8577 && (GET_CODE (op1) == REG || !no_new_pseudos))
8579 rtx tmp;
8580 tmp = op0, op0 = op1, op1 = tmp;
8581 code = swap_condition (code);
8582 if (GET_CODE (op0) != REG)
8583 op0 = force_reg (op_mode, op0);
8586 *pop0 = op0;
8587 *pop1 = op1;
8588 return code;
8591 /* Convert comparison codes we use to represent FP comparison to integer
8592 code that will result in proper branch. Return UNKNOWN if no such code
8593 is available. */
8594 static enum rtx_code
8595 ix86_fp_compare_code_to_integer (enum rtx_code code)
8597 switch (code)
8599 case GT:
8600 return GTU;
8601 case GE:
8602 return GEU;
8603 case ORDERED:
8604 case UNORDERED:
8605 return code;
8606 break;
8607 case UNEQ:
8608 return EQ;
8609 break;
8610 case UNLT:
8611 return LTU;
8612 break;
8613 case UNLE:
8614 return LEU;
8615 break;
8616 case LTGT:
8617 return NE;
8618 break;
8619 default:
8620 return UNKNOWN;
8624 /* Split comparison code CODE into comparisons we can do using branch
8625 instructions. BYPASS_CODE is comparison code for branch that will
8626 branch around FIRST_CODE and SECOND_CODE. If some of branches
8627 is not required, set value to NIL.
8628 We never require more than two branches. */
8629 static void
8630 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8631 enum rtx_code *first_code,
8632 enum rtx_code *second_code)
8634 *first_code = code;
8635 *bypass_code = NIL;
8636 *second_code = NIL;
8638 /* The fcomi comparison sets flags as follows:
8640 cmp ZF PF CF
8641 > 0 0 0
8642 < 0 0 1
8643 = 1 0 0
8644 un 1 1 1 */
8646 switch (code)
8648 case GT: /* GTU - CF=0 & ZF=0 */
8649 case GE: /* GEU - CF=0 */
8650 case ORDERED: /* PF=0 */
8651 case UNORDERED: /* PF=1 */
8652 case UNEQ: /* EQ - ZF=1 */
8653 case UNLT: /* LTU - CF=1 */
8654 case UNLE: /* LEU - CF=1 | ZF=1 */
8655 case LTGT: /* EQ - ZF=0 */
8656 break;
8657 case LT: /* LTU - CF=1 - fails on unordered */
8658 *first_code = UNLT;
8659 *bypass_code = UNORDERED;
8660 break;
8661 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8662 *first_code = UNLE;
8663 *bypass_code = UNORDERED;
8664 break;
8665 case EQ: /* EQ - ZF=1 - fails on unordered */
8666 *first_code = UNEQ;
8667 *bypass_code = UNORDERED;
8668 break;
8669 case NE: /* NE - ZF=0 - fails on unordered */
8670 *first_code = LTGT;
8671 *second_code = UNORDERED;
8672 break;
8673 case UNGE: /* GEU - CF=0 - fails on unordered */
8674 *first_code = GE;
8675 *second_code = UNORDERED;
8676 break;
8677 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8678 *first_code = GT;
8679 *second_code = UNORDERED;
8680 break;
8681 default:
8682 abort ();
8684 if (!TARGET_IEEE_FP)
8686 *second_code = NIL;
8687 *bypass_code = NIL;
8691 /* Return cost of comparison done fcom + arithmetics operations on AX.
8692 All following functions do use number of instructions as a cost metrics.
8693 In future this should be tweaked to compute bytes for optimize_size and
8694 take into account performance of various instructions on various CPUs. */
8695 static int
8696 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8698 if (!TARGET_IEEE_FP)
8699 return 4;
8700 /* The cost of code output by ix86_expand_fp_compare. */
8701 switch (code)
8703 case UNLE:
8704 case UNLT:
8705 case LTGT:
8706 case GT:
8707 case GE:
8708 case UNORDERED:
8709 case ORDERED:
8710 case UNEQ:
8711 return 4;
8712 break;
8713 case LT:
8714 case NE:
8715 case EQ:
8716 case UNGE:
8717 return 5;
8718 break;
8719 case LE:
8720 case UNGT:
8721 return 6;
8722 break;
8723 default:
8724 abort ();
8728 /* Return cost of comparison done using fcomi operation.
8729 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8730 static int
8731 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8733 enum rtx_code bypass_code, first_code, second_code;
8734 /* Return arbitrarily high cost when instruction is not supported - this
8735 prevents gcc from using it. */
8736 if (!TARGET_CMOVE)
8737 return 1024;
8738 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8739 return (bypass_code != NIL || second_code != NIL) + 2;
8742 /* Return cost of comparison done using sahf operation.
8743 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8744 static int
8745 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8747 enum rtx_code bypass_code, first_code, second_code;
8748 /* Return arbitrarily high cost when instruction is not preferred - this
8749 avoids gcc from using it. */
8750 if (!TARGET_USE_SAHF && !optimize_size)
8751 return 1024;
8752 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8753 return (bypass_code != NIL || second_code != NIL) + 3;
8756 /* Compute cost of the comparison done using any method.
8757 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8758 static int
8759 ix86_fp_comparison_cost (enum rtx_code code)
8761 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8762 int min;
8764 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8765 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8767 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8768 if (min > sahf_cost)
8769 min = sahf_cost;
8770 if (min > fcomi_cost)
8771 min = fcomi_cost;
8772 return min;
8775 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8777 static rtx
8778 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8779 rtx *second_test, rtx *bypass_test)
8781 enum machine_mode fpcmp_mode, intcmp_mode;
8782 rtx tmp, tmp2;
8783 int cost = ix86_fp_comparison_cost (code);
8784 enum rtx_code bypass_code, first_code, second_code;
8786 fpcmp_mode = ix86_fp_compare_mode (code);
8787 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8789 if (second_test)
8790 *second_test = NULL_RTX;
8791 if (bypass_test)
8792 *bypass_test = NULL_RTX;
8794 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8796 /* Do fcomi/sahf based test when profitable. */
8797 if ((bypass_code == NIL || bypass_test)
8798 && (second_code == NIL || second_test)
8799 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8801 if (TARGET_CMOVE)
8803 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8804 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8805 tmp);
8806 emit_insn (tmp);
8808 else
8810 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8811 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8812 if (!scratch)
8813 scratch = gen_reg_rtx (HImode);
8814 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8815 emit_insn (gen_x86_sahf_1 (scratch));
8818 /* The FP codes work out to act like unsigned. */
8819 intcmp_mode = fpcmp_mode;
8820 code = first_code;
8821 if (bypass_code != NIL)
8822 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8823 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8824 const0_rtx);
8825 if (second_code != NIL)
8826 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8827 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8828 const0_rtx);
8830 else
8832 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8833 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8834 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8835 if (!scratch)
8836 scratch = gen_reg_rtx (HImode);
8837 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8839 /* In the unordered case, we have to check C2 for NaN's, which
8840 doesn't happen to work out to anything nice combination-wise.
8841 So do some bit twiddling on the value we've got in AH to come
8842 up with an appropriate set of condition codes. */
8844 intcmp_mode = CCNOmode;
8845 switch (code)
8847 case GT:
8848 case UNGT:
8849 if (code == GT || !TARGET_IEEE_FP)
8851 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8852 code = EQ;
8854 else
8856 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8857 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8858 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8859 intcmp_mode = CCmode;
8860 code = GEU;
8862 break;
8863 case LT:
8864 case UNLT:
8865 if (code == LT && TARGET_IEEE_FP)
8867 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8868 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8869 intcmp_mode = CCmode;
8870 code = EQ;
8872 else
8874 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8875 code = NE;
8877 break;
8878 case GE:
8879 case UNGE:
8880 if (code == GE || !TARGET_IEEE_FP)
8882 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8883 code = EQ;
8885 else
8887 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8888 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8889 GEN_INT (0x01)));
8890 code = NE;
8892 break;
8893 case LE:
8894 case UNLE:
8895 if (code == LE && TARGET_IEEE_FP)
8897 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8898 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8899 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8900 intcmp_mode = CCmode;
8901 code = LTU;
8903 else
8905 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8906 code = NE;
8908 break;
8909 case EQ:
8910 case UNEQ:
8911 if (code == EQ && TARGET_IEEE_FP)
8913 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8914 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8915 intcmp_mode = CCmode;
8916 code = EQ;
8918 else
8920 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8921 code = NE;
8922 break;
8924 break;
8925 case NE:
8926 case LTGT:
8927 if (code == NE && TARGET_IEEE_FP)
8929 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8930 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8931 GEN_INT (0x40)));
8932 code = NE;
8934 else
8936 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8937 code = EQ;
8939 break;
8941 case UNORDERED:
8942 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8943 code = NE;
8944 break;
8945 case ORDERED:
8946 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8947 code = EQ;
8948 break;
8950 default:
8951 abort ();
8955 /* Return the test that should be put into the flags user, i.e.
8956 the bcc, scc, or cmov instruction. */
8957 return gen_rtx_fmt_ee (code, VOIDmode,
8958 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8959 const0_rtx);
8963 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
8965 rtx op0, op1, ret;
8966 op0 = ix86_compare_op0;
8967 op1 = ix86_compare_op1;
8969 if (second_test)
8970 *second_test = NULL_RTX;
8971 if (bypass_test)
8972 *bypass_test = NULL_RTX;
8974 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8975 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8976 second_test, bypass_test);
8977 else
8978 ret = ix86_expand_int_compare (code, op0, op1);
8980 return ret;
8983 /* Return true if the CODE will result in nontrivial jump sequence. */
8984 bool
8985 ix86_fp_jump_nontrivial_p (enum rtx_code code)
8987 enum rtx_code bypass_code, first_code, second_code;
8988 if (!TARGET_CMOVE)
8989 return true;
8990 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8991 return bypass_code != NIL || second_code != NIL;
8994 void
8995 ix86_expand_branch (enum rtx_code code, rtx label)
8997 rtx tmp;
8999 switch (GET_MODE (ix86_compare_op0))
9001 case QImode:
9002 case HImode:
9003 case SImode:
9004 simple:
9005 tmp = ix86_expand_compare (code, NULL, NULL);
9006 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9007 gen_rtx_LABEL_REF (VOIDmode, label),
9008 pc_rtx);
9009 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9010 return;
9012 case SFmode:
9013 case DFmode:
9014 case XFmode:
9015 case TFmode:
9017 rtvec vec;
9018 int use_fcomi;
9019 enum rtx_code bypass_code, first_code, second_code;
9021 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9022 &ix86_compare_op1);
9024 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9026 /* Check whether we will use the natural sequence with one jump. If
9027 so, we can expand jump early. Otherwise delay expansion by
9028 creating compound insn to not confuse optimizers. */
9029 if (bypass_code == NIL && second_code == NIL
9030 && TARGET_CMOVE)
9032 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9033 gen_rtx_LABEL_REF (VOIDmode, label),
9034 pc_rtx, NULL_RTX);
9036 else
9038 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9039 ix86_compare_op0, ix86_compare_op1);
9040 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9041 gen_rtx_LABEL_REF (VOIDmode, label),
9042 pc_rtx);
9043 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9045 use_fcomi = ix86_use_fcomi_compare (code);
9046 vec = rtvec_alloc (3 + !use_fcomi);
9047 RTVEC_ELT (vec, 0) = tmp;
9048 RTVEC_ELT (vec, 1)
9049 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9050 RTVEC_ELT (vec, 2)
9051 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9052 if (! use_fcomi)
9053 RTVEC_ELT (vec, 3)
9054 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9056 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9058 return;
9061 case DImode:
9062 if (TARGET_64BIT)
9063 goto simple;
9064 /* Expand DImode branch into multiple compare+branch. */
9066 rtx lo[2], hi[2], label2;
9067 enum rtx_code code1, code2, code3;
9069 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9071 tmp = ix86_compare_op0;
9072 ix86_compare_op0 = ix86_compare_op1;
9073 ix86_compare_op1 = tmp;
9074 code = swap_condition (code);
9076 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9077 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9079 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9080 avoid two branches. This costs one extra insn, so disable when
9081 optimizing for size. */
9083 if ((code == EQ || code == NE)
9084 && (!optimize_size
9085 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9087 rtx xor0, xor1;
9089 xor1 = hi[0];
9090 if (hi[1] != const0_rtx)
9091 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9092 NULL_RTX, 0, OPTAB_WIDEN);
9094 xor0 = lo[0];
9095 if (lo[1] != const0_rtx)
9096 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9097 NULL_RTX, 0, OPTAB_WIDEN);
9099 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9100 NULL_RTX, 0, OPTAB_WIDEN);
9102 ix86_compare_op0 = tmp;
9103 ix86_compare_op1 = const0_rtx;
9104 ix86_expand_branch (code, label);
9105 return;
9108 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9109 op1 is a constant and the low word is zero, then we can just
9110 examine the high word. */
9112 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9113 switch (code)
9115 case LT: case LTU: case GE: case GEU:
9116 ix86_compare_op0 = hi[0];
9117 ix86_compare_op1 = hi[1];
9118 ix86_expand_branch (code, label);
9119 return;
9120 default:
9121 break;
9124 /* Otherwise, we need two or three jumps. */
9126 label2 = gen_label_rtx ();
9128 code1 = code;
9129 code2 = swap_condition (code);
9130 code3 = unsigned_condition (code);
9132 switch (code)
9134 case LT: case GT: case LTU: case GTU:
9135 break;
9137 case LE: code1 = LT; code2 = GT; break;
9138 case GE: code1 = GT; code2 = LT; break;
9139 case LEU: code1 = LTU; code2 = GTU; break;
9140 case GEU: code1 = GTU; code2 = LTU; break;
9142 case EQ: code1 = NIL; code2 = NE; break;
9143 case NE: code2 = NIL; break;
9145 default:
9146 abort ();
9150 * a < b =>
9151 * if (hi(a) < hi(b)) goto true;
9152 * if (hi(a) > hi(b)) goto false;
9153 * if (lo(a) < lo(b)) goto true;
9154 * false:
9157 ix86_compare_op0 = hi[0];
9158 ix86_compare_op1 = hi[1];
9160 if (code1 != NIL)
9161 ix86_expand_branch (code1, label);
9162 if (code2 != NIL)
9163 ix86_expand_branch (code2, label2);
9165 ix86_compare_op0 = lo[0];
9166 ix86_compare_op1 = lo[1];
9167 ix86_expand_branch (code3, label);
9169 if (code2 != NIL)
9170 emit_label (label2);
9171 return;
9174 default:
9175 abort ();
9179 /* Split branch based on floating point condition. */
9180 void
9181 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9182 rtx target1, rtx target2, rtx tmp)
9184 rtx second, bypass;
9185 rtx label = NULL_RTX;
9186 rtx condition;
9187 int bypass_probability = -1, second_probability = -1, probability = -1;
9188 rtx i;
9190 if (target2 != pc_rtx)
9192 rtx tmp = target2;
9193 code = reverse_condition_maybe_unordered (code);
9194 target2 = target1;
9195 target1 = tmp;
9198 condition = ix86_expand_fp_compare (code, op1, op2,
9199 tmp, &second, &bypass);
9201 if (split_branch_probability >= 0)
9203 /* Distribute the probabilities across the jumps.
9204 Assume the BYPASS and SECOND to be always test
9205 for UNORDERED. */
9206 probability = split_branch_probability;
9208 /* Value of 1 is low enough to make no need for probability
9209 to be updated. Later we may run some experiments and see
9210 if unordered values are more frequent in practice. */
9211 if (bypass)
9212 bypass_probability = 1;
9213 if (second)
9214 second_probability = 1;
9216 if (bypass != NULL_RTX)
9218 label = gen_label_rtx ();
9219 i = emit_jump_insn (gen_rtx_SET
9220 (VOIDmode, pc_rtx,
9221 gen_rtx_IF_THEN_ELSE (VOIDmode,
9222 bypass,
9223 gen_rtx_LABEL_REF (VOIDmode,
9224 label),
9225 pc_rtx)));
9226 if (bypass_probability >= 0)
9227 REG_NOTES (i)
9228 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9229 GEN_INT (bypass_probability),
9230 REG_NOTES (i));
9232 i = emit_jump_insn (gen_rtx_SET
9233 (VOIDmode, pc_rtx,
9234 gen_rtx_IF_THEN_ELSE (VOIDmode,
9235 condition, target1, target2)));
9236 if (probability >= 0)
9237 REG_NOTES (i)
9238 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9239 GEN_INT (probability),
9240 REG_NOTES (i));
9241 if (second != NULL_RTX)
9243 i = emit_jump_insn (gen_rtx_SET
9244 (VOIDmode, pc_rtx,
9245 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9246 target2)));
9247 if (second_probability >= 0)
9248 REG_NOTES (i)
9249 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9250 GEN_INT (second_probability),
9251 REG_NOTES (i));
9253 if (label != NULL_RTX)
9254 emit_label (label);
9258 ix86_expand_setcc (enum rtx_code code, rtx dest)
9260 rtx ret, tmp, tmpreg;
9261 rtx second_test, bypass_test;
9263 if (GET_MODE (ix86_compare_op0) == DImode
9264 && !TARGET_64BIT)
9265 return 0; /* FAIL */
9267 if (GET_MODE (dest) != QImode)
9268 abort ();
9270 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9271 PUT_MODE (ret, QImode);
9273 tmp = dest;
9274 tmpreg = dest;
9276 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9277 if (bypass_test || second_test)
9279 rtx test = second_test;
9280 int bypass = 0;
9281 rtx tmp2 = gen_reg_rtx (QImode);
9282 if (bypass_test)
9284 if (second_test)
9285 abort ();
9286 test = bypass_test;
9287 bypass = 1;
9288 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9290 PUT_MODE (test, QImode);
9291 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9293 if (bypass)
9294 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9295 else
9296 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9299 return 1; /* DONE */
9302 /* Expand comparison setting or clearing carry flag. Return true when successful
9303 and set pop for the operation. */
9304 bool
9305 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9307 enum machine_mode mode =
9308 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9310 /* Do not handle DImode compares that go trought special path. Also we can't
9311 deal with FP compares yet. This is possible to add. */
9312 if ((mode == DImode && !TARGET_64BIT))
9313 return false;
9314 if (FLOAT_MODE_P (mode))
9316 rtx second_test = NULL, bypass_test = NULL;
9317 rtx compare_op, compare_seq;
9319 /* Shortcut: following common codes never translate into carry flag compares. */
9320 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9321 || code == ORDERED || code == UNORDERED)
9322 return false;
9324 /* These comparisons require zero flag; swap operands so they won't. */
9325 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9326 && !TARGET_IEEE_FP)
9328 rtx tmp = op0;
9329 op0 = op1;
9330 op1 = tmp;
9331 code = swap_condition (code);
9334 /* Try to expand the comparison and verify that we end up with carry flag
9335 based comparison. This is fails to be true only when we decide to expand
9336 comparison using arithmetic that is not too common scenario. */
9337 start_sequence ();
9338 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9339 &second_test, &bypass_test);
9340 compare_seq = get_insns ();
9341 end_sequence ();
9343 if (second_test || bypass_test)
9344 return false;
9345 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9346 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9347 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9348 else
9349 code = GET_CODE (compare_op);
9350 if (code != LTU && code != GEU)
9351 return false;
9352 emit_insn (compare_seq);
9353 *pop = compare_op;
9354 return true;
9356 if (!INTEGRAL_MODE_P (mode))
9357 return false;
9358 switch (code)
9360 case LTU:
9361 case GEU:
9362 break;
9364 /* Convert a==0 into (unsigned)a<1. */
9365 case EQ:
9366 case NE:
9367 if (op1 != const0_rtx)
9368 return false;
9369 op1 = const1_rtx;
9370 code = (code == EQ ? LTU : GEU);
9371 break;
9373 /* Convert a>b into b<a or a>=b-1. */
9374 case GTU:
9375 case LEU:
9376 if (GET_CODE (op1) == CONST_INT)
9378 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9379 /* Bail out on overflow. We still can swap operands but that
9380 would force loading of the constant into register. */
9381 if (op1 == const0_rtx
9382 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9383 return false;
9384 code = (code == GTU ? GEU : LTU);
9386 else if (!nonimmediate_operand (op1, mode)
9387 || !general_operand (op0, mode))
9388 /* Swapping operands in this case would generate an
9389 unrecognizable insn. */
9390 return false;
9391 else
9393 rtx tmp = op1;
9394 op1 = op0;
9395 op0 = tmp;
9396 code = (code == GTU ? LTU : GEU);
9398 break;
9400 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9401 case LT:
9402 case GE:
9403 if (mode == DImode || op1 != const0_rtx)
9404 return false;
9405 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9406 code = (code == LT ? GEU : LTU);
9407 break;
9408 case LE:
9409 case GT:
9410 if (mode == DImode || op1 != constm1_rtx)
9411 return false;
9412 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9413 code = (code == LE ? GEU : LTU);
9414 break;
9416 default:
9417 return false;
9419 ix86_compare_op0 = op0;
9420 ix86_compare_op1 = op1;
9421 *pop = ix86_expand_compare (code, NULL, NULL);
9422 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9423 abort ();
9424 return true;
9428 ix86_expand_int_movcc (rtx operands[])
9430 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9431 rtx compare_seq, compare_op;
9432 rtx second_test, bypass_test;
9433 enum machine_mode mode = GET_MODE (operands[0]);
9434 bool sign_bit_compare_p = false;;
9436 start_sequence ();
9437 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9438 compare_seq = get_insns ();
9439 end_sequence ();
9441 compare_code = GET_CODE (compare_op);
9443 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9444 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9445 sign_bit_compare_p = true;
9447 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9448 HImode insns, we'd be swallowed in word prefix ops. */
9450 if ((mode != HImode || TARGET_FAST_PREFIX)
9451 && (mode != DImode || TARGET_64BIT)
9452 && GET_CODE (operands[2]) == CONST_INT
9453 && GET_CODE (operands[3]) == CONST_INT)
9455 rtx out = operands[0];
9456 HOST_WIDE_INT ct = INTVAL (operands[2]);
9457 HOST_WIDE_INT cf = INTVAL (operands[3]);
9458 HOST_WIDE_INT diff;
9460 diff = ct - cf;
9461 /* Sign bit compares are better done using shifts than we do by using
9462 sbb. */
9463 if (sign_bit_compare_p
9464 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9465 ix86_compare_op1, &compare_op))
9467 /* Detect overlap between destination and compare sources. */
9468 rtx tmp = out;
9470 if (!sign_bit_compare_p)
9472 bool fpcmp = false;
9474 compare_code = GET_CODE (compare_op);
9476 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9477 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9479 fpcmp = true;
9480 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9483 /* To simplify rest of code, restrict to the GEU case. */
9484 if (compare_code == LTU)
9486 HOST_WIDE_INT tmp = ct;
9487 ct = cf;
9488 cf = tmp;
9489 compare_code = reverse_condition (compare_code);
9490 code = reverse_condition (code);
9492 else
9494 if (fpcmp)
9495 PUT_CODE (compare_op,
9496 reverse_condition_maybe_unordered
9497 (GET_CODE (compare_op)));
9498 else
9499 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9501 diff = ct - cf;
9503 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9504 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9505 tmp = gen_reg_rtx (mode);
9507 if (mode == DImode)
9508 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9509 else
9510 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9512 else
9514 if (code == GT || code == GE)
9515 code = reverse_condition (code);
9516 else
9518 HOST_WIDE_INT tmp = ct;
9519 ct = cf;
9520 cf = tmp;
9521 diff = ct - cf;
9523 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9524 ix86_compare_op1, VOIDmode, 0, -1);
9527 if (diff == 1)
9530 * cmpl op0,op1
9531 * sbbl dest,dest
9532 * [addl dest, ct]
9534 * Size 5 - 8.
9536 if (ct)
9537 tmp = expand_simple_binop (mode, PLUS,
9538 tmp, GEN_INT (ct),
9539 copy_rtx (tmp), 1, OPTAB_DIRECT);
9541 else if (cf == -1)
9544 * cmpl op0,op1
9545 * sbbl dest,dest
9546 * orl $ct, dest
9548 * Size 8.
9550 tmp = expand_simple_binop (mode, IOR,
9551 tmp, GEN_INT (ct),
9552 copy_rtx (tmp), 1, OPTAB_DIRECT);
9554 else if (diff == -1 && ct)
9557 * cmpl op0,op1
9558 * sbbl dest,dest
9559 * notl dest
9560 * [addl dest, cf]
9562 * Size 8 - 11.
9564 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9565 if (cf)
9566 tmp = expand_simple_binop (mode, PLUS,
9567 copy_rtx (tmp), GEN_INT (cf),
9568 copy_rtx (tmp), 1, OPTAB_DIRECT);
9570 else
9573 * cmpl op0,op1
9574 * sbbl dest,dest
9575 * [notl dest]
9576 * andl cf - ct, dest
9577 * [addl dest, ct]
9579 * Size 8 - 11.
9582 if (cf == 0)
9584 cf = ct;
9585 ct = 0;
9586 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9589 tmp = expand_simple_binop (mode, AND,
9590 copy_rtx (tmp),
9591 gen_int_mode (cf - ct, mode),
9592 copy_rtx (tmp), 1, OPTAB_DIRECT);
9593 if (ct)
9594 tmp = expand_simple_binop (mode, PLUS,
9595 copy_rtx (tmp), GEN_INT (ct),
9596 copy_rtx (tmp), 1, OPTAB_DIRECT);
9599 if (!rtx_equal_p (tmp, out))
9600 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9602 return 1; /* DONE */
9605 if (diff < 0)
9607 HOST_WIDE_INT tmp;
9608 tmp = ct, ct = cf, cf = tmp;
9609 diff = -diff;
9610 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9612 /* We may be reversing unordered compare to normal compare, that
9613 is not valid in general (we may convert non-trapping condition
9614 to trapping one), however on i386 we currently emit all
9615 comparisons unordered. */
9616 compare_code = reverse_condition_maybe_unordered (compare_code);
9617 code = reverse_condition_maybe_unordered (code);
9619 else
9621 compare_code = reverse_condition (compare_code);
9622 code = reverse_condition (code);
9626 compare_code = NIL;
9627 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9628 && GET_CODE (ix86_compare_op1) == CONST_INT)
9630 if (ix86_compare_op1 == const0_rtx
9631 && (code == LT || code == GE))
9632 compare_code = code;
9633 else if (ix86_compare_op1 == constm1_rtx)
9635 if (code == LE)
9636 compare_code = LT;
9637 else if (code == GT)
9638 compare_code = GE;
9642 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9643 if (compare_code != NIL
9644 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9645 && (cf == -1 || ct == -1))
9647 /* If lea code below could be used, only optimize
9648 if it results in a 2 insn sequence. */
9650 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9651 || diff == 3 || diff == 5 || diff == 9)
9652 || (compare_code == LT && ct == -1)
9653 || (compare_code == GE && cf == -1))
9656 * notl op1 (if necessary)
9657 * sarl $31, op1
9658 * orl cf, op1
9660 if (ct != -1)
9662 cf = ct;
9663 ct = -1;
9664 code = reverse_condition (code);
9667 out = emit_store_flag (out, code, ix86_compare_op0,
9668 ix86_compare_op1, VOIDmode, 0, -1);
9670 out = expand_simple_binop (mode, IOR,
9671 out, GEN_INT (cf),
9672 out, 1, OPTAB_DIRECT);
9673 if (out != operands[0])
9674 emit_move_insn (operands[0], out);
9676 return 1; /* DONE */
9681 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9682 || diff == 3 || diff == 5 || diff == 9)
9683 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9684 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9687 * xorl dest,dest
9688 * cmpl op1,op2
9689 * setcc dest
9690 * lea cf(dest*(ct-cf)),dest
9692 * Size 14.
9694 * This also catches the degenerate setcc-only case.
9697 rtx tmp;
9698 int nops;
9700 out = emit_store_flag (out, code, ix86_compare_op0,
9701 ix86_compare_op1, VOIDmode, 0, 1);
9703 nops = 0;
9704 /* On x86_64 the lea instruction operates on Pmode, so we need
9705 to get arithmetics done in proper mode to match. */
9706 if (diff == 1)
9707 tmp = copy_rtx (out);
9708 else
9710 rtx out1;
9711 out1 = copy_rtx (out);
9712 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9713 nops++;
9714 if (diff & 1)
9716 tmp = gen_rtx_PLUS (mode, tmp, out1);
9717 nops++;
9720 if (cf != 0)
9722 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9723 nops++;
9725 if (!rtx_equal_p (tmp, out))
9727 if (nops == 1)
9728 out = force_operand (tmp, copy_rtx (out));
9729 else
9730 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9732 if (!rtx_equal_p (out, operands[0]))
9733 emit_move_insn (operands[0], copy_rtx (out));
9735 return 1; /* DONE */
9739 * General case: Jumpful:
9740 * xorl dest,dest cmpl op1, op2
9741 * cmpl op1, op2 movl ct, dest
9742 * setcc dest jcc 1f
9743 * decl dest movl cf, dest
9744 * andl (cf-ct),dest 1:
9745 * addl ct,dest
9747 * Size 20. Size 14.
9749 * This is reasonably steep, but branch mispredict costs are
9750 * high on modern cpus, so consider failing only if optimizing
9751 * for space.
9754 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9755 && BRANCH_COST >= 2)
9757 if (cf == 0)
9759 cf = ct;
9760 ct = 0;
9761 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9762 /* We may be reversing unordered compare to normal compare,
9763 that is not valid in general (we may convert non-trapping
9764 condition to trapping one), however on i386 we currently
9765 emit all comparisons unordered. */
9766 code = reverse_condition_maybe_unordered (code);
9767 else
9769 code = reverse_condition (code);
9770 if (compare_code != NIL)
9771 compare_code = reverse_condition (compare_code);
9775 if (compare_code != NIL)
9777 /* notl op1 (if needed)
9778 sarl $31, op1
9779 andl (cf-ct), op1
9780 addl ct, op1
9782 For x < 0 (resp. x <= -1) there will be no notl,
9783 so if possible swap the constants to get rid of the
9784 complement.
9785 True/false will be -1/0 while code below (store flag
9786 followed by decrement) is 0/-1, so the constants need
9787 to be exchanged once more. */
9789 if (compare_code == GE || !cf)
9791 code = reverse_condition (code);
9792 compare_code = LT;
9794 else
9796 HOST_WIDE_INT tmp = cf;
9797 cf = ct;
9798 ct = tmp;
9801 out = emit_store_flag (out, code, ix86_compare_op0,
9802 ix86_compare_op1, VOIDmode, 0, -1);
9804 else
9806 out = emit_store_flag (out, code, ix86_compare_op0,
9807 ix86_compare_op1, VOIDmode, 0, 1);
9809 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9810 copy_rtx (out), 1, OPTAB_DIRECT);
9813 out = expand_simple_binop (mode, AND, copy_rtx (out),
9814 gen_int_mode (cf - ct, mode),
9815 copy_rtx (out), 1, OPTAB_DIRECT);
9816 if (ct)
9817 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9818 copy_rtx (out), 1, OPTAB_DIRECT);
9819 if (!rtx_equal_p (out, operands[0]))
9820 emit_move_insn (operands[0], copy_rtx (out));
9822 return 1; /* DONE */
9826 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9828 /* Try a few things more with specific constants and a variable. */
9830 optab op;
9831 rtx var, orig_out, out, tmp;
9833 if (BRANCH_COST <= 2)
9834 return 0; /* FAIL */
9836 /* If one of the two operands is an interesting constant, load a
9837 constant with the above and mask it in with a logical operation. */
9839 if (GET_CODE (operands[2]) == CONST_INT)
9841 var = operands[3];
9842 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9843 operands[3] = constm1_rtx, op = and_optab;
9844 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9845 operands[3] = const0_rtx, op = ior_optab;
9846 else
9847 return 0; /* FAIL */
9849 else if (GET_CODE (operands[3]) == CONST_INT)
9851 var = operands[2];
9852 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9853 operands[2] = constm1_rtx, op = and_optab;
9854 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9855 operands[2] = const0_rtx, op = ior_optab;
9856 else
9857 return 0; /* FAIL */
9859 else
9860 return 0; /* FAIL */
9862 orig_out = operands[0];
9863 tmp = gen_reg_rtx (mode);
9864 operands[0] = tmp;
9866 /* Recurse to get the constant loaded. */
9867 if (ix86_expand_int_movcc (operands) == 0)
9868 return 0; /* FAIL */
9870 /* Mask in the interesting variable. */
9871 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9872 OPTAB_WIDEN);
9873 if (!rtx_equal_p (out, orig_out))
9874 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9876 return 1; /* DONE */
9880 * For comparison with above,
9882 * movl cf,dest
9883 * movl ct,tmp
9884 * cmpl op1,op2
9885 * cmovcc tmp,dest
9887 * Size 15.
9890 if (! nonimmediate_operand (operands[2], mode))
9891 operands[2] = force_reg (mode, operands[2]);
9892 if (! nonimmediate_operand (operands[3], mode))
9893 operands[3] = force_reg (mode, operands[3]);
9895 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9897 rtx tmp = gen_reg_rtx (mode);
9898 emit_move_insn (tmp, operands[3]);
9899 operands[3] = tmp;
9901 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9903 rtx tmp = gen_reg_rtx (mode);
9904 emit_move_insn (tmp, operands[2]);
9905 operands[2] = tmp;
9908 if (! register_operand (operands[2], VOIDmode)
9909 && (mode == QImode
9910 || ! register_operand (operands[3], VOIDmode)))
9911 operands[2] = force_reg (mode, operands[2]);
9913 if (mode == QImode
9914 && ! register_operand (operands[3], VOIDmode))
9915 operands[3] = force_reg (mode, operands[3]);
9917 emit_insn (compare_seq);
9918 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9919 gen_rtx_IF_THEN_ELSE (mode,
9920 compare_op, operands[2],
9921 operands[3])));
9922 if (bypass_test)
9923 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9924 gen_rtx_IF_THEN_ELSE (mode,
9925 bypass_test,
9926 copy_rtx (operands[3]),
9927 copy_rtx (operands[0]))));
9928 if (second_test)
9929 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9930 gen_rtx_IF_THEN_ELSE (mode,
9931 second_test,
9932 copy_rtx (operands[2]),
9933 copy_rtx (operands[0]))));
9935 return 1; /* DONE */
9939 ix86_expand_fp_movcc (rtx operands[])
9941 enum rtx_code code;
9942 rtx tmp;
9943 rtx compare_op, second_test, bypass_test;
9945 /* For SF/DFmode conditional moves based on comparisons
9946 in same mode, we may want to use SSE min/max instructions. */
9947 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9948 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9949 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9950 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9951 && (!TARGET_IEEE_FP
9952 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9953 /* We may be called from the post-reload splitter. */
9954 && (!REG_P (operands[0])
9955 || SSE_REG_P (operands[0])
9956 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9958 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9959 code = GET_CODE (operands[1]);
9961 /* See if we have (cross) match between comparison operands and
9962 conditional move operands. */
9963 if (rtx_equal_p (operands[2], op1))
9965 rtx tmp = op0;
9966 op0 = op1;
9967 op1 = tmp;
9968 code = reverse_condition_maybe_unordered (code);
9970 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9972 /* Check for min operation. */
9973 if (code == LT || code == UNLE)
9975 if (code == UNLE)
9977 rtx tmp = op0;
9978 op0 = op1;
9979 op1 = tmp;
9981 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9982 if (memory_operand (op0, VOIDmode))
9983 op0 = force_reg (GET_MODE (operands[0]), op0);
9984 if (GET_MODE (operands[0]) == SFmode)
9985 emit_insn (gen_minsf3 (operands[0], op0, op1));
9986 else
9987 emit_insn (gen_mindf3 (operands[0], op0, op1));
9988 return 1;
9990 /* Check for max operation. */
9991 if (code == GT || code == UNGE)
9993 if (code == UNGE)
9995 rtx tmp = op0;
9996 op0 = op1;
9997 op1 = tmp;
9999 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10000 if (memory_operand (op0, VOIDmode))
10001 op0 = force_reg (GET_MODE (operands[0]), op0);
10002 if (GET_MODE (operands[0]) == SFmode)
10003 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10004 else
10005 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10006 return 1;
10009 /* Manage condition to be sse_comparison_operator. In case we are
10010 in non-ieee mode, try to canonicalize the destination operand
10011 to be first in the comparison - this helps reload to avoid extra
10012 moves. */
10013 if (!sse_comparison_operator (operands[1], VOIDmode)
10014 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10016 rtx tmp = ix86_compare_op0;
10017 ix86_compare_op0 = ix86_compare_op1;
10018 ix86_compare_op1 = tmp;
10019 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10020 VOIDmode, ix86_compare_op0,
10021 ix86_compare_op1);
10023 /* Similarly try to manage result to be first operand of conditional
10024 move. We also don't support the NE comparison on SSE, so try to
10025 avoid it. */
10026 if ((rtx_equal_p (operands[0], operands[3])
10027 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10028 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10030 rtx tmp = operands[2];
10031 operands[2] = operands[3];
10032 operands[3] = tmp;
10033 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10034 (GET_CODE (operands[1])),
10035 VOIDmode, ix86_compare_op0,
10036 ix86_compare_op1);
10038 if (GET_MODE (operands[0]) == SFmode)
10039 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10040 operands[2], operands[3],
10041 ix86_compare_op0, ix86_compare_op1));
10042 else
10043 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10044 operands[2], operands[3],
10045 ix86_compare_op0, ix86_compare_op1));
10046 return 1;
10049 /* The floating point conditional move instructions don't directly
10050 support conditions resulting from a signed integer comparison. */
10052 code = GET_CODE (operands[1]);
10053 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10055 /* The floating point conditional move instructions don't directly
10056 support signed integer comparisons. */
10058 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10060 if (second_test != NULL || bypass_test != NULL)
10061 abort ();
10062 tmp = gen_reg_rtx (QImode);
10063 ix86_expand_setcc (code, tmp);
10064 code = NE;
10065 ix86_compare_op0 = tmp;
10066 ix86_compare_op1 = const0_rtx;
10067 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10069 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10071 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10072 emit_move_insn (tmp, operands[3]);
10073 operands[3] = tmp;
10075 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10077 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10078 emit_move_insn (tmp, operands[2]);
10079 operands[2] = tmp;
10082 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10083 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10084 compare_op,
10085 operands[2],
10086 operands[3])));
10087 if (bypass_test)
10088 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10089 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10090 bypass_test,
10091 operands[3],
10092 operands[0])));
10093 if (second_test)
10094 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10095 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10096 second_test,
10097 operands[2],
10098 operands[0])));
10100 return 1;
10103 /* Expand conditional increment or decrement using adb/sbb instructions.
10104 The default case using setcc followed by the conditional move can be
10105 done by generic code. */
10107 ix86_expand_int_addcc (rtx operands[])
10109 enum rtx_code code = GET_CODE (operands[1]);
10110 rtx compare_op;
10111 rtx val = const0_rtx;
10112 bool fpcmp = false;
10113 enum machine_mode mode = GET_MODE (operands[0]);
10115 if (operands[3] != const1_rtx
10116 && operands[3] != constm1_rtx)
10117 return 0;
10118 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10119 ix86_compare_op1, &compare_op))
10120 return 0;
10121 code = GET_CODE (compare_op);
10123 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10124 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10126 fpcmp = true;
10127 code = ix86_fp_compare_code_to_integer (code);
10130 if (code != LTU)
10132 val = constm1_rtx;
10133 if (fpcmp)
10134 PUT_CODE (compare_op,
10135 reverse_condition_maybe_unordered
10136 (GET_CODE (compare_op)));
10137 else
10138 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10140 PUT_MODE (compare_op, mode);
10142 /* Construct either adc or sbb insn. */
10143 if ((code == LTU) == (operands[3] == constm1_rtx))
10145 switch (GET_MODE (operands[0]))
10147 case QImode:
10148 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10149 break;
10150 case HImode:
10151 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10152 break;
10153 case SImode:
10154 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10155 break;
10156 case DImode:
10157 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10158 break;
10159 default:
10160 abort ();
10163 else
10165 switch (GET_MODE (operands[0]))
10167 case QImode:
10168 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10169 break;
10170 case HImode:
10171 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10172 break;
10173 case SImode:
10174 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10175 break;
10176 case DImode:
10177 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10178 break;
10179 default:
10180 abort ();
10183 return 1; /* DONE */
10187 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10188 works for floating pointer parameters and nonoffsetable memories.
10189 For pushes, it returns just stack offsets; the values will be saved
10190 in the right order. Maximally three parts are generated. */
10192 static int
10193 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10195 int size;
10197 if (!TARGET_64BIT)
10198 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10199 else
10200 size = (GET_MODE_SIZE (mode) + 4) / 8;
10202 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10203 abort ();
10204 if (size < 2 || size > 3)
10205 abort ();
10207 /* Optimize constant pool reference to immediates. This is used by fp
10208 moves, that force all constants to memory to allow combining. */
10209 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10211 rtx tmp = maybe_get_pool_constant (operand);
10212 if (tmp)
10213 operand = tmp;
10216 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10218 /* The only non-offsetable memories we handle are pushes. */
10219 if (! push_operand (operand, VOIDmode))
10220 abort ();
10222 operand = copy_rtx (operand);
10223 PUT_MODE (operand, Pmode);
10224 parts[0] = parts[1] = parts[2] = operand;
10226 else if (!TARGET_64BIT)
10228 if (mode == DImode)
10229 split_di (&operand, 1, &parts[0], &parts[1]);
10230 else
10232 if (REG_P (operand))
10234 if (!reload_completed)
10235 abort ();
10236 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10237 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10238 if (size == 3)
10239 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10241 else if (offsettable_memref_p (operand))
10243 operand = adjust_address (operand, SImode, 0);
10244 parts[0] = operand;
10245 parts[1] = adjust_address (operand, SImode, 4);
10246 if (size == 3)
10247 parts[2] = adjust_address (operand, SImode, 8);
10249 else if (GET_CODE (operand) == CONST_DOUBLE)
10251 REAL_VALUE_TYPE r;
10252 long l[4];
10254 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10255 switch (mode)
10257 case XFmode:
10258 case TFmode:
10259 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10260 parts[2] = gen_int_mode (l[2], SImode);
10261 break;
10262 case DFmode:
10263 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10264 break;
10265 default:
10266 abort ();
10268 parts[1] = gen_int_mode (l[1], SImode);
10269 parts[0] = gen_int_mode (l[0], SImode);
10271 else
10272 abort ();
10275 else
10277 if (mode == TImode)
10278 split_ti (&operand, 1, &parts[0], &parts[1]);
10279 if (mode == XFmode || mode == TFmode)
10281 if (REG_P (operand))
10283 if (!reload_completed)
10284 abort ();
10285 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10286 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10288 else if (offsettable_memref_p (operand))
10290 operand = adjust_address (operand, DImode, 0);
10291 parts[0] = operand;
10292 parts[1] = adjust_address (operand, SImode, 8);
10294 else if (GET_CODE (operand) == CONST_DOUBLE)
10296 REAL_VALUE_TYPE r;
10297 long l[3];
10299 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10300 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10301 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10302 if (HOST_BITS_PER_WIDE_INT >= 64)
10303 parts[0]
10304 = gen_int_mode
10305 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10306 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10307 DImode);
10308 else
10309 parts[0] = immed_double_const (l[0], l[1], DImode);
10310 parts[1] = gen_int_mode (l[2], SImode);
10312 else
10313 abort ();
10317 return size;
10320 /* Emit insns to perform a move or push of DI, DF, and XF values.
10321 Return false when normal moves are needed; true when all required
10322 insns have been emitted. Operands 2-4 contain the input values
10323 int the correct order; operands 5-7 contain the output values. */
10325 void
10326 ix86_split_long_move (rtx operands[])
10328 rtx part[2][3];
10329 int nparts;
10330 int push = 0;
10331 int collisions = 0;
10332 enum machine_mode mode = GET_MODE (operands[0]);
10334 /* The DFmode expanders may ask us to move double.
10335 For 64bit target this is single move. By hiding the fact
10336 here we simplify i386.md splitters. */
10337 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10339 /* Optimize constant pool reference to immediates. This is used by
10340 fp moves, that force all constants to memory to allow combining. */
10342 if (GET_CODE (operands[1]) == MEM
10343 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10344 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10345 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10346 if (push_operand (operands[0], VOIDmode))
10348 operands[0] = copy_rtx (operands[0]);
10349 PUT_MODE (operands[0], Pmode);
10351 else
10352 operands[0] = gen_lowpart (DImode, operands[0]);
10353 operands[1] = gen_lowpart (DImode, operands[1]);
10354 emit_move_insn (operands[0], operands[1]);
10355 return;
10358 /* The only non-offsettable memory we handle is push. */
10359 if (push_operand (operands[0], VOIDmode))
10360 push = 1;
10361 else if (GET_CODE (operands[0]) == MEM
10362 && ! offsettable_memref_p (operands[0]))
10363 abort ();
10365 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10366 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10368 /* When emitting push, take care for source operands on the stack. */
10369 if (push && GET_CODE (operands[1]) == MEM
10370 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10372 if (nparts == 3)
10373 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10374 XEXP (part[1][2], 0));
10375 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10376 XEXP (part[1][1], 0));
10379 /* We need to do copy in the right order in case an address register
10380 of the source overlaps the destination. */
10381 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10383 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10384 collisions++;
10385 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10386 collisions++;
10387 if (nparts == 3
10388 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10389 collisions++;
10391 /* Collision in the middle part can be handled by reordering. */
10392 if (collisions == 1 && nparts == 3
10393 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10395 rtx tmp;
10396 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10397 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10400 /* If there are more collisions, we can't handle it by reordering.
10401 Do an lea to the last part and use only one colliding move. */
10402 else if (collisions > 1)
10404 rtx base;
10406 collisions = 1;
10408 base = part[0][nparts - 1];
10410 /* Handle the case when the last part isn't valid for lea.
10411 Happens in 64-bit mode storing the 12-byte XFmode. */
10412 if (GET_MODE (base) != Pmode)
10413 base = gen_rtx_REG (Pmode, REGNO (base));
10415 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10416 part[1][0] = replace_equiv_address (part[1][0], base);
10417 part[1][1] = replace_equiv_address (part[1][1],
10418 plus_constant (base, UNITS_PER_WORD));
10419 if (nparts == 3)
10420 part[1][2] = replace_equiv_address (part[1][2],
10421 plus_constant (base, 8));
10425 if (push)
10427 if (!TARGET_64BIT)
10429 if (nparts == 3)
10431 /* We use only first 12 bytes of TFmode value, but for pushing we
10432 are required to adjust stack as if we were pushing real 16byte
10433 value. */
10434 if (mode == TFmode && !TARGET_64BIT)
10435 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10436 GEN_INT (-4)));
10437 emit_move_insn (part[0][2], part[1][2]);
10440 else
10442 /* In 64bit mode we don't have 32bit push available. In case this is
10443 register, it is OK - we will just use larger counterpart. We also
10444 retype memory - these comes from attempt to avoid REX prefix on
10445 moving of second half of TFmode value. */
10446 if (GET_MODE (part[1][1]) == SImode)
10448 if (GET_CODE (part[1][1]) == MEM)
10449 part[1][1] = adjust_address (part[1][1], DImode, 0);
10450 else if (REG_P (part[1][1]))
10451 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10452 else
10453 abort ();
10454 if (GET_MODE (part[1][0]) == SImode)
10455 part[1][0] = part[1][1];
10458 emit_move_insn (part[0][1], part[1][1]);
10459 emit_move_insn (part[0][0], part[1][0]);
10460 return;
10463 /* Choose correct order to not overwrite the source before it is copied. */
10464 if ((REG_P (part[0][0])
10465 && REG_P (part[1][1])
10466 && (REGNO (part[0][0]) == REGNO (part[1][1])
10467 || (nparts == 3
10468 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10469 || (collisions > 0
10470 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10472 if (nparts == 3)
10474 operands[2] = part[0][2];
10475 operands[3] = part[0][1];
10476 operands[4] = part[0][0];
10477 operands[5] = part[1][2];
10478 operands[6] = part[1][1];
10479 operands[7] = part[1][0];
10481 else
10483 operands[2] = part[0][1];
10484 operands[3] = part[0][0];
10485 operands[5] = part[1][1];
10486 operands[6] = part[1][0];
10489 else
10491 if (nparts == 3)
10493 operands[2] = part[0][0];
10494 operands[3] = part[0][1];
10495 operands[4] = part[0][2];
10496 operands[5] = part[1][0];
10497 operands[6] = part[1][1];
10498 operands[7] = part[1][2];
10500 else
10502 operands[2] = part[0][0];
10503 operands[3] = part[0][1];
10504 operands[5] = part[1][0];
10505 operands[6] = part[1][1];
10508 emit_move_insn (operands[2], operands[5]);
10509 emit_move_insn (operands[3], operands[6]);
10510 if (nparts == 3)
10511 emit_move_insn (operands[4], operands[7]);
10513 return;
10516 void
10517 ix86_split_ashldi (rtx *operands, rtx scratch)
10519 rtx low[2], high[2];
10520 int count;
10522 if (GET_CODE (operands[2]) == CONST_INT)
10524 split_di (operands, 2, low, high);
10525 count = INTVAL (operands[2]) & 63;
10527 if (count >= 32)
10529 emit_move_insn (high[0], low[1]);
10530 emit_move_insn (low[0], const0_rtx);
10532 if (count > 32)
10533 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10535 else
10537 if (!rtx_equal_p (operands[0], operands[1]))
10538 emit_move_insn (operands[0], operands[1]);
10539 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10540 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10543 else
10545 if (!rtx_equal_p (operands[0], operands[1]))
10546 emit_move_insn (operands[0], operands[1]);
10548 split_di (operands, 1, low, high);
10550 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10551 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10553 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10555 if (! no_new_pseudos)
10556 scratch = force_reg (SImode, const0_rtx);
10557 else
10558 emit_move_insn (scratch, const0_rtx);
10560 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10561 scratch));
10563 else
10564 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10568 void
10569 ix86_split_ashrdi (rtx *operands, rtx scratch)
10571 rtx low[2], high[2];
10572 int count;
10574 if (GET_CODE (operands[2]) == CONST_INT)
10576 split_di (operands, 2, low, high);
10577 count = INTVAL (operands[2]) & 63;
10579 if (count >= 32)
10581 emit_move_insn (low[0], high[1]);
10583 if (! reload_completed)
10584 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10585 else
10587 emit_move_insn (high[0], low[0]);
10588 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10591 if (count > 32)
10592 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10594 else
10596 if (!rtx_equal_p (operands[0], operands[1]))
10597 emit_move_insn (operands[0], operands[1]);
10598 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10599 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10602 else
10604 if (!rtx_equal_p (operands[0], operands[1]))
10605 emit_move_insn (operands[0], operands[1]);
10607 split_di (operands, 1, low, high);
10609 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10610 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10612 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10614 if (! no_new_pseudos)
10615 scratch = gen_reg_rtx (SImode);
10616 emit_move_insn (scratch, high[0]);
10617 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10618 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10619 scratch));
10621 else
10622 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10626 void
10627 ix86_split_lshrdi (rtx *operands, rtx scratch)
10629 rtx low[2], high[2];
10630 int count;
10632 if (GET_CODE (operands[2]) == CONST_INT)
10634 split_di (operands, 2, low, high);
10635 count = INTVAL (operands[2]) & 63;
10637 if (count >= 32)
10639 emit_move_insn (low[0], high[1]);
10640 emit_move_insn (high[0], const0_rtx);
10642 if (count > 32)
10643 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10645 else
10647 if (!rtx_equal_p (operands[0], operands[1]))
10648 emit_move_insn (operands[0], operands[1]);
10649 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10650 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10653 else
10655 if (!rtx_equal_p (operands[0], operands[1]))
10656 emit_move_insn (operands[0], operands[1]);
10658 split_di (operands, 1, low, high);
10660 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10661 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10663 /* Heh. By reversing the arguments, we can reuse this pattern. */
10664 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10666 if (! no_new_pseudos)
10667 scratch = force_reg (SImode, const0_rtx);
10668 else
10669 emit_move_insn (scratch, const0_rtx);
10671 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10672 scratch));
10674 else
10675 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10679 /* Helper function for the string operations below. Dest VARIABLE whether
10680 it is aligned to VALUE bytes. If true, jump to the label. */
10681 static rtx
10682 ix86_expand_aligntest (rtx variable, int value)
10684 rtx label = gen_label_rtx ();
10685 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10686 if (GET_MODE (variable) == DImode)
10687 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10688 else
10689 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10690 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10691 1, label);
10692 return label;
10695 /* Adjust COUNTER by the VALUE. */
10696 static void
10697 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10699 if (GET_MODE (countreg) == DImode)
10700 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10701 else
10702 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10705 /* Zero extend possibly SImode EXP to Pmode register. */
10707 ix86_zero_extend_to_Pmode (rtx exp)
10709 rtx r;
10710 if (GET_MODE (exp) == VOIDmode)
10711 return force_reg (Pmode, exp);
10712 if (GET_MODE (exp) == Pmode)
10713 return copy_to_mode_reg (Pmode, exp);
10714 r = gen_reg_rtx (Pmode);
10715 emit_insn (gen_zero_extendsidi2 (r, exp));
10716 return r;
10719 /* Expand string move (memcpy) operation. Use i386 string operations when
10720 profitable. expand_clrstr contains similar code. */
10722 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10724 rtx srcreg, destreg, countreg;
10725 enum machine_mode counter_mode;
10726 HOST_WIDE_INT align = 0;
10727 unsigned HOST_WIDE_INT count = 0;
10728 rtx insns;
10730 if (GET_CODE (align_exp) == CONST_INT)
10731 align = INTVAL (align_exp);
10733 /* Can't use any of this if the user has appropriated esi or edi. */
10734 if (global_regs[4] || global_regs[5])
10735 return 0;
10737 /* This simple hack avoids all inlining code and simplifies code below. */
10738 if (!TARGET_ALIGN_STRINGOPS)
10739 align = 64;
10741 if (GET_CODE (count_exp) == CONST_INT)
10743 count = INTVAL (count_exp);
10744 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10745 return 0;
10748 /* Figure out proper mode for counter. For 32bits it is always SImode,
10749 for 64bits use SImode when possible, otherwise DImode.
10750 Set count to number of bytes copied when known at compile time. */
10751 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10752 || x86_64_zero_extended_value (count_exp))
10753 counter_mode = SImode;
10754 else
10755 counter_mode = DImode;
10757 start_sequence ();
10759 if (counter_mode != SImode && counter_mode != DImode)
10760 abort ();
10762 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10763 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10765 emit_insn (gen_cld ());
10767 /* When optimizing for size emit simple rep ; movsb instruction for
10768 counts not divisible by 4. */
10770 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10772 countreg = ix86_zero_extend_to_Pmode (count_exp);
10773 if (TARGET_64BIT)
10774 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10775 destreg, srcreg, countreg));
10776 else
10777 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10778 destreg, srcreg, countreg));
10781 /* For constant aligned (or small unaligned) copies use rep movsl
10782 followed by code copying the rest. For PentiumPro ensure 8 byte
10783 alignment to allow rep movsl acceleration. */
10785 else if (count != 0
10786 && (align >= 8
10787 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10788 || optimize_size || count < (unsigned int) 64))
10790 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10791 if (count & ~(size - 1))
10793 countreg = copy_to_mode_reg (counter_mode,
10794 GEN_INT ((count >> (size == 4 ? 2 : 3))
10795 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10796 countreg = ix86_zero_extend_to_Pmode (countreg);
10797 if (size == 4)
10799 if (TARGET_64BIT)
10800 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10801 destreg, srcreg, countreg));
10802 else
10803 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10804 destreg, srcreg, countreg));
10806 else
10807 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10808 destreg, srcreg, countreg));
10810 if (size == 8 && (count & 0x04))
10811 emit_insn (gen_strmovsi (destreg, srcreg));
10812 if (count & 0x02)
10813 emit_insn (gen_strmovhi (destreg, srcreg));
10814 if (count & 0x01)
10815 emit_insn (gen_strmovqi (destreg, srcreg));
10817 /* The generic code based on the glibc implementation:
10818 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10819 allowing accelerated copying there)
10820 - copy the data using rep movsl
10821 - copy the rest. */
10822 else
10824 rtx countreg2;
10825 rtx label = NULL;
10826 int desired_alignment = (TARGET_PENTIUMPRO
10827 && (count == 0 || count >= (unsigned int) 260)
10828 ? 8 : UNITS_PER_WORD);
10830 /* In case we don't know anything about the alignment, default to
10831 library version, since it is usually equally fast and result in
10832 shorter code.
10834 Also emit call when we know that the count is large and call overhead
10835 will not be important. */
10836 if (!TARGET_INLINE_ALL_STRINGOPS
10837 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10839 end_sequence ();
10840 return 0;
10843 if (TARGET_SINGLE_STRINGOP)
10844 emit_insn (gen_cld ());
10846 countreg2 = gen_reg_rtx (Pmode);
10847 countreg = copy_to_mode_reg (counter_mode, count_exp);
10849 /* We don't use loops to align destination and to copy parts smaller
10850 than 4 bytes, because gcc is able to optimize such code better (in
10851 the case the destination or the count really is aligned, gcc is often
10852 able to predict the branches) and also it is friendlier to the
10853 hardware branch prediction.
10855 Using loops is beneficial for generic case, because we can
10856 handle small counts using the loops. Many CPUs (such as Athlon)
10857 have large REP prefix setup costs.
10859 This is quite costly. Maybe we can revisit this decision later or
10860 add some customizability to this code. */
10862 if (count == 0 && align < desired_alignment)
10864 label = gen_label_rtx ();
10865 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10866 LEU, 0, counter_mode, 1, label);
10868 if (align <= 1)
10870 rtx label = ix86_expand_aligntest (destreg, 1);
10871 emit_insn (gen_strmovqi (destreg, srcreg));
10872 ix86_adjust_counter (countreg, 1);
10873 emit_label (label);
10874 LABEL_NUSES (label) = 1;
10876 if (align <= 2)
10878 rtx label = ix86_expand_aligntest (destreg, 2);
10879 emit_insn (gen_strmovhi (destreg, srcreg));
10880 ix86_adjust_counter (countreg, 2);
10881 emit_label (label);
10882 LABEL_NUSES (label) = 1;
10884 if (align <= 4 && desired_alignment > 4)
10886 rtx label = ix86_expand_aligntest (destreg, 4);
10887 emit_insn (gen_strmovsi (destreg, srcreg));
10888 ix86_adjust_counter (countreg, 4);
10889 emit_label (label);
10890 LABEL_NUSES (label) = 1;
10893 if (label && desired_alignment > 4 && !TARGET_64BIT)
10895 emit_label (label);
10896 LABEL_NUSES (label) = 1;
10897 label = NULL_RTX;
10899 if (!TARGET_SINGLE_STRINGOP)
10900 emit_insn (gen_cld ());
10901 if (TARGET_64BIT)
10903 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10904 GEN_INT (3)));
10905 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10906 destreg, srcreg, countreg2));
10908 else
10910 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10911 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10912 destreg, srcreg, countreg2));
10915 if (label)
10917 emit_label (label);
10918 LABEL_NUSES (label) = 1;
10920 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10921 emit_insn (gen_strmovsi (destreg, srcreg));
10922 if ((align <= 4 || count == 0) && TARGET_64BIT)
10924 rtx label = ix86_expand_aligntest (countreg, 4);
10925 emit_insn (gen_strmovsi (destreg, srcreg));
10926 emit_label (label);
10927 LABEL_NUSES (label) = 1;
10929 if (align > 2 && count != 0 && (count & 2))
10930 emit_insn (gen_strmovhi (destreg, srcreg));
10931 if (align <= 2 || count == 0)
10933 rtx label = ix86_expand_aligntest (countreg, 2);
10934 emit_insn (gen_strmovhi (destreg, srcreg));
10935 emit_label (label);
10936 LABEL_NUSES (label) = 1;
10938 if (align > 1 && count != 0 && (count & 1))
10939 emit_insn (gen_strmovqi (destreg, srcreg));
10940 if (align <= 1 || count == 0)
10942 rtx label = ix86_expand_aligntest (countreg, 1);
10943 emit_insn (gen_strmovqi (destreg, srcreg));
10944 emit_label (label);
10945 LABEL_NUSES (label) = 1;
10949 insns = get_insns ();
10950 end_sequence ();
10952 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10953 emit_insn (insns);
10954 return 1;
10957 /* Expand string clear operation (bzero). Use i386 string operations when
10958 profitable. expand_movstr contains similar code. */
10960 ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
10962 rtx destreg, zeroreg, countreg;
10963 enum machine_mode counter_mode;
10964 HOST_WIDE_INT align = 0;
10965 unsigned HOST_WIDE_INT count = 0;
10967 if (GET_CODE (align_exp) == CONST_INT)
10968 align = INTVAL (align_exp);
10970 /* Can't use any of this if the user has appropriated esi. */
10971 if (global_regs[4])
10972 return 0;
10974 /* This simple hack avoids all inlining code and simplifies code below. */
10975 if (!TARGET_ALIGN_STRINGOPS)
10976 align = 32;
10978 if (GET_CODE (count_exp) == CONST_INT)
10980 count = INTVAL (count_exp);
10981 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10982 return 0;
10984 /* Figure out proper mode for counter. For 32bits it is always SImode,
10985 for 64bits use SImode when possible, otherwise DImode.
10986 Set count to number of bytes copied when known at compile time. */
10987 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10988 || x86_64_zero_extended_value (count_exp))
10989 counter_mode = SImode;
10990 else
10991 counter_mode = DImode;
10993 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10995 emit_insn (gen_cld ());
10997 /* When optimizing for size emit simple rep ; movsb instruction for
10998 counts not divisible by 4. */
11000 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11002 countreg = ix86_zero_extend_to_Pmode (count_exp);
11003 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11004 if (TARGET_64BIT)
11005 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11006 destreg, countreg));
11007 else
11008 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11009 destreg, countreg));
11011 else if (count != 0
11012 && (align >= 8
11013 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11014 || optimize_size || count < (unsigned int) 64))
11016 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11017 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11018 if (count & ~(size - 1))
11020 countreg = copy_to_mode_reg (counter_mode,
11021 GEN_INT ((count >> (size == 4 ? 2 : 3))
11022 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11023 countreg = ix86_zero_extend_to_Pmode (countreg);
11024 if (size == 4)
11026 if (TARGET_64BIT)
11027 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11028 destreg, countreg));
11029 else
11030 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11031 destreg, countreg));
11033 else
11034 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11035 destreg, countreg));
11037 if (size == 8 && (count & 0x04))
11038 emit_insn (gen_strsetsi (destreg,
11039 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11040 if (count & 0x02)
11041 emit_insn (gen_strsethi (destreg,
11042 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11043 if (count & 0x01)
11044 emit_insn (gen_strsetqi (destreg,
11045 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11047 else
11049 rtx countreg2;
11050 rtx label = NULL;
11051 /* Compute desired alignment of the string operation. */
11052 int desired_alignment = (TARGET_PENTIUMPRO
11053 && (count == 0 || count >= (unsigned int) 260)
11054 ? 8 : UNITS_PER_WORD);
11056 /* In case we don't know anything about the alignment, default to
11057 library version, since it is usually equally fast and result in
11058 shorter code.
11060 Also emit call when we know that the count is large and call overhead
11061 will not be important. */
11062 if (!TARGET_INLINE_ALL_STRINGOPS
11063 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11064 return 0;
11066 if (TARGET_SINGLE_STRINGOP)
11067 emit_insn (gen_cld ());
11069 countreg2 = gen_reg_rtx (Pmode);
11070 countreg = copy_to_mode_reg (counter_mode, count_exp);
11071 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11073 if (count == 0 && align < desired_alignment)
11075 label = gen_label_rtx ();
11076 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11077 LEU, 0, counter_mode, 1, label);
11079 if (align <= 1)
11081 rtx label = ix86_expand_aligntest (destreg, 1);
11082 emit_insn (gen_strsetqi (destreg,
11083 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11084 ix86_adjust_counter (countreg, 1);
11085 emit_label (label);
11086 LABEL_NUSES (label) = 1;
11088 if (align <= 2)
11090 rtx label = ix86_expand_aligntest (destreg, 2);
11091 emit_insn (gen_strsethi (destreg,
11092 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11093 ix86_adjust_counter (countreg, 2);
11094 emit_label (label);
11095 LABEL_NUSES (label) = 1;
11097 if (align <= 4 && desired_alignment > 4)
11099 rtx label = ix86_expand_aligntest (destreg, 4);
11100 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11101 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11102 : zeroreg)));
11103 ix86_adjust_counter (countreg, 4);
11104 emit_label (label);
11105 LABEL_NUSES (label) = 1;
11108 if (label && desired_alignment > 4 && !TARGET_64BIT)
11110 emit_label (label);
11111 LABEL_NUSES (label) = 1;
11112 label = NULL_RTX;
11115 if (!TARGET_SINGLE_STRINGOP)
11116 emit_insn (gen_cld ());
11117 if (TARGET_64BIT)
11119 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11120 GEN_INT (3)));
11121 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11122 destreg, countreg2));
11124 else
11126 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11127 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11128 destreg, countreg2));
11130 if (label)
11132 emit_label (label);
11133 LABEL_NUSES (label) = 1;
11136 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11137 emit_insn (gen_strsetsi (destreg,
11138 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11139 if (TARGET_64BIT && (align <= 4 || count == 0))
11141 rtx label = ix86_expand_aligntest (countreg, 4);
11142 emit_insn (gen_strsetsi (destreg,
11143 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11144 emit_label (label);
11145 LABEL_NUSES (label) = 1;
11147 if (align > 2 && count != 0 && (count & 2))
11148 emit_insn (gen_strsethi (destreg,
11149 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11150 if (align <= 2 || count == 0)
11152 rtx label = ix86_expand_aligntest (countreg, 2);
11153 emit_insn (gen_strsethi (destreg,
11154 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11155 emit_label (label);
11156 LABEL_NUSES (label) = 1;
11158 if (align > 1 && count != 0 && (count & 1))
11159 emit_insn (gen_strsetqi (destreg,
11160 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11161 if (align <= 1 || count == 0)
11163 rtx label = ix86_expand_aligntest (countreg, 1);
11164 emit_insn (gen_strsetqi (destreg,
11165 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11166 emit_label (label);
11167 LABEL_NUSES (label) = 1;
11170 return 1;
11172 /* Expand strlen. */
11174 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11176 rtx addr, scratch1, scratch2, scratch3, scratch4;
11178 /* The generic case of strlen expander is long. Avoid it's
11179 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11181 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11182 && !TARGET_INLINE_ALL_STRINGOPS
11183 && !optimize_size
11184 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11185 return 0;
11187 addr = force_reg (Pmode, XEXP (src, 0));
11188 scratch1 = gen_reg_rtx (Pmode);
11190 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11191 && !optimize_size)
11193 /* Well it seems that some optimizer does not combine a call like
11194 foo(strlen(bar), strlen(bar));
11195 when the move and the subtraction is done here. It does calculate
11196 the length just once when these instructions are done inside of
11197 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11198 often used and I use one fewer register for the lifetime of
11199 output_strlen_unroll() this is better. */
11201 emit_move_insn (out, addr);
11203 ix86_expand_strlensi_unroll_1 (out, align);
11205 /* strlensi_unroll_1 returns the address of the zero at the end of
11206 the string, like memchr(), so compute the length by subtracting
11207 the start address. */
11208 if (TARGET_64BIT)
11209 emit_insn (gen_subdi3 (out, out, addr));
11210 else
11211 emit_insn (gen_subsi3 (out, out, addr));
11213 else
11215 scratch2 = gen_reg_rtx (Pmode);
11216 scratch3 = gen_reg_rtx (Pmode);
11217 scratch4 = force_reg (Pmode, constm1_rtx);
11219 emit_move_insn (scratch3, addr);
11220 eoschar = force_reg (QImode, eoschar);
11222 emit_insn (gen_cld ());
11223 if (TARGET_64BIT)
11225 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11226 align, scratch4, scratch3));
11227 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11228 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11230 else
11232 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11233 align, scratch4, scratch3));
11234 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11235 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11238 return 1;
11241 /* Expand the appropriate insns for doing strlen if not just doing
11242 repnz; scasb
11244 out = result, initialized with the start address
11245 align_rtx = alignment of the address.
11246 scratch = scratch register, initialized with the startaddress when
11247 not aligned, otherwise undefined
11249 This is just the body. It needs the initializations mentioned above and
11250 some address computing at the end. These things are done in i386.md. */
11252 static void
11253 ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
11255 int align;
11256 rtx tmp;
11257 rtx align_2_label = NULL_RTX;
11258 rtx align_3_label = NULL_RTX;
11259 rtx align_4_label = gen_label_rtx ();
11260 rtx end_0_label = gen_label_rtx ();
11261 rtx mem;
11262 rtx tmpreg = gen_reg_rtx (SImode);
11263 rtx scratch = gen_reg_rtx (SImode);
11264 rtx cmp;
11266 align = 0;
11267 if (GET_CODE (align_rtx) == CONST_INT)
11268 align = INTVAL (align_rtx);
11270 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11272 /* Is there a known alignment and is it less than 4? */
11273 if (align < 4)
11275 rtx scratch1 = gen_reg_rtx (Pmode);
11276 emit_move_insn (scratch1, out);
11277 /* Is there a known alignment and is it not 2? */
11278 if (align != 2)
11280 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11281 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11283 /* Leave just the 3 lower bits. */
11284 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11285 NULL_RTX, 0, OPTAB_WIDEN);
11287 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11288 Pmode, 1, align_4_label);
11289 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11290 Pmode, 1, align_2_label);
11291 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11292 Pmode, 1, align_3_label);
11294 else
11296 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11297 check if is aligned to 4 - byte. */
11299 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11300 NULL_RTX, 0, OPTAB_WIDEN);
11302 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11303 Pmode, 1, align_4_label);
11306 mem = gen_rtx_MEM (QImode, out);
11308 /* Now compare the bytes. */
11310 /* Compare the first n unaligned byte on a byte per byte basis. */
11311 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11312 QImode, 1, end_0_label);
11314 /* Increment the address. */
11315 if (TARGET_64BIT)
11316 emit_insn (gen_adddi3 (out, out, const1_rtx));
11317 else
11318 emit_insn (gen_addsi3 (out, out, const1_rtx));
11320 /* Not needed with an alignment of 2 */
11321 if (align != 2)
11323 emit_label (align_2_label);
11325 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11326 end_0_label);
11328 if (TARGET_64BIT)
11329 emit_insn (gen_adddi3 (out, out, const1_rtx));
11330 else
11331 emit_insn (gen_addsi3 (out, out, const1_rtx));
11333 emit_label (align_3_label);
11336 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11337 end_0_label);
11339 if (TARGET_64BIT)
11340 emit_insn (gen_adddi3 (out, out, const1_rtx));
11341 else
11342 emit_insn (gen_addsi3 (out, out, const1_rtx));
11345 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11346 align this loop. It gives only huge programs, but does not help to
11347 speed up. */
11348 emit_label (align_4_label);
11350 mem = gen_rtx_MEM (SImode, out);
11351 emit_move_insn (scratch, mem);
11352 if (TARGET_64BIT)
11353 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11354 else
11355 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11357 /* This formula yields a nonzero result iff one of the bytes is zero.
11358 This saves three branches inside loop and many cycles. */
11360 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11361 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11362 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11363 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11364 gen_int_mode (0x80808080, SImode)));
11365 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11366 align_4_label);
11368 if (TARGET_CMOVE)
11370 rtx reg = gen_reg_rtx (SImode);
11371 rtx reg2 = gen_reg_rtx (Pmode);
11372 emit_move_insn (reg, tmpreg);
11373 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11375 /* If zero is not in the first two bytes, move two bytes forward. */
11376 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11377 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11378 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11379 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11380 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11381 reg,
11382 tmpreg)));
11383 /* Emit lea manually to avoid clobbering of flags. */
11384 emit_insn (gen_rtx_SET (SImode, reg2,
11385 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11387 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11388 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11389 emit_insn (gen_rtx_SET (VOIDmode, out,
11390 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11391 reg2,
11392 out)));
11395 else
11397 rtx end_2_label = gen_label_rtx ();
11398 /* Is zero in the first two bytes? */
11400 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11401 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11402 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11403 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11404 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11405 pc_rtx);
11406 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11407 JUMP_LABEL (tmp) = end_2_label;
11409 /* Not in the first two. Move two bytes forward. */
11410 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11411 if (TARGET_64BIT)
11412 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11413 else
11414 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11416 emit_label (end_2_label);
11420 /* Avoid branch in fixing the byte. */
11421 tmpreg = gen_lowpart (QImode, tmpreg);
11422 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11423 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11424 if (TARGET_64BIT)
11425 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11426 else
11427 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11429 emit_label (end_0_label);
11432 void
11433 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2,
11434 rtx pop, int sibcall)
11436 rtx use = NULL, call;
11438 if (pop == const0_rtx)
11439 pop = NULL;
11440 if (TARGET_64BIT && pop)
11441 abort ();
11443 #if TARGET_MACHO
11444 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11445 fnaddr = machopic_indirect_call_target (fnaddr);
11446 #else
11447 /* Static functions and indirect calls don't need the pic register. */
11448 if (! TARGET_64BIT && flag_pic
11449 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11450 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11451 use_reg (&use, pic_offset_table_rtx);
11453 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11455 rtx al = gen_rtx_REG (QImode, 0);
11456 emit_move_insn (al, callarg2);
11457 use_reg (&use, al);
11459 #endif /* TARGET_MACHO */
11461 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11463 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11464 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11466 if (sibcall && TARGET_64BIT
11467 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11469 rtx addr;
11470 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11471 fnaddr = gen_rtx_REG (Pmode, 40);
11472 emit_move_insn (fnaddr, addr);
11473 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11476 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11477 if (retval)
11478 call = gen_rtx_SET (VOIDmode, retval, call);
11479 if (pop)
11481 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11482 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11483 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11486 call = emit_call_insn (call);
11487 if (use)
11488 CALL_INSN_FUNCTION_USAGE (call) = use;
11492 /* Clear stack slot assignments remembered from previous functions.
11493 This is called from INIT_EXPANDERS once before RTL is emitted for each
11494 function. */
11496 static struct machine_function *
11497 ix86_init_machine_status (void)
11499 struct machine_function *f;
11501 f = ggc_alloc_cleared (sizeof (struct machine_function));
11502 f->use_fast_prologue_epilogue_nregs = -1;
11504 return f;
11507 /* Return a MEM corresponding to a stack slot with mode MODE.
11508 Allocate a new slot if necessary.
11510 The RTL for a function can have several slots available: N is
11511 which slot to use. */
11514 assign_386_stack_local (enum machine_mode mode, int n)
11516 struct stack_local_entry *s;
11518 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11519 abort ();
11521 for (s = ix86_stack_locals; s; s = s->next)
11522 if (s->mode == mode && s->n == n)
11523 return s->rtl;
11525 s = (struct stack_local_entry *)
11526 ggc_alloc (sizeof (struct stack_local_entry));
11527 s->n = n;
11528 s->mode = mode;
11529 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11531 s->next = ix86_stack_locals;
11532 ix86_stack_locals = s;
11533 return s->rtl;
11536 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11538 static GTY(()) rtx ix86_tls_symbol;
11540 ix86_tls_get_addr (void)
11543 if (!ix86_tls_symbol)
11545 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11546 (TARGET_GNU_TLS && !TARGET_64BIT)
11547 ? "___tls_get_addr"
11548 : "__tls_get_addr");
11551 return ix86_tls_symbol;
11554 /* Calculate the length of the memory address in the instruction
11555 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11557 static int
11558 memory_address_length (rtx addr)
11560 struct ix86_address parts;
11561 rtx base, index, disp;
11562 int len;
11564 if (GET_CODE (addr) == PRE_DEC
11565 || GET_CODE (addr) == POST_INC
11566 || GET_CODE (addr) == PRE_MODIFY
11567 || GET_CODE (addr) == POST_MODIFY)
11568 return 0;
11570 if (! ix86_decompose_address (addr, &parts))
11571 abort ();
11573 base = parts.base;
11574 index = parts.index;
11575 disp = parts.disp;
11576 len = 0;
11578 /* Register Indirect. */
11579 if (base && !index && !disp)
11581 /* Special cases: ebp and esp need the two-byte modrm form. */
11582 if (addr == stack_pointer_rtx
11583 || addr == arg_pointer_rtx
11584 || addr == frame_pointer_rtx
11585 || addr == hard_frame_pointer_rtx)
11586 len = 1;
11589 /* Direct Addressing. */
11590 else if (disp && !base && !index)
11591 len = 4;
11593 else
11595 /* Find the length of the displacement constant. */
11596 if (disp)
11598 if (GET_CODE (disp) == CONST_INT
11599 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11600 && base)
11601 len = 1;
11602 else
11603 len = 4;
11606 /* An index requires the two-byte modrm form. */
11607 if (index)
11608 len += 1;
11611 return len;
11614 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11615 is set, expect that insn have 8bit immediate alternative. */
11617 ix86_attr_length_immediate_default (rtx insn, int shortform)
11619 int len = 0;
11620 int i;
11621 extract_insn_cached (insn);
11622 for (i = recog_data.n_operands - 1; i >= 0; --i)
11623 if (CONSTANT_P (recog_data.operand[i]))
11625 if (len)
11626 abort ();
11627 if (shortform
11628 && GET_CODE (recog_data.operand[i]) == CONST_INT
11629 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11630 len = 1;
11631 else
11633 switch (get_attr_mode (insn))
11635 case MODE_QI:
11636 len+=1;
11637 break;
11638 case MODE_HI:
11639 len+=2;
11640 break;
11641 case MODE_SI:
11642 len+=4;
11643 break;
11644 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11645 case MODE_DI:
11646 len+=4;
11647 break;
11648 default:
11649 fatal_insn ("unknown insn mode", insn);
11653 return len;
11655 /* Compute default value for "length_address" attribute. */
11657 ix86_attr_length_address_default (rtx insn)
11659 int i;
11661 if (get_attr_type (insn) == TYPE_LEA)
11663 rtx set = PATTERN (insn);
11664 if (GET_CODE (set) == SET)
11666 else if (GET_CODE (set) == PARALLEL
11667 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11668 set = XVECEXP (set, 0, 0);
11669 else
11671 #ifdef ENABLE_CHECKING
11672 abort ();
11673 #endif
11674 return 0;
11677 return memory_address_length (SET_SRC (set));
11680 extract_insn_cached (insn);
11681 for (i = recog_data.n_operands - 1; i >= 0; --i)
11682 if (GET_CODE (recog_data.operand[i]) == MEM)
11684 return memory_address_length (XEXP (recog_data.operand[i], 0));
11685 break;
11687 return 0;
11690 /* Return the maximum number of instructions a cpu can issue. */
11692 static int
11693 ix86_issue_rate (void)
11695 switch (ix86_tune)
11697 case PROCESSOR_PENTIUM:
11698 case PROCESSOR_K6:
11699 return 2;
11701 case PROCESSOR_PENTIUMPRO:
11702 case PROCESSOR_PENTIUM4:
11703 case PROCESSOR_ATHLON:
11704 case PROCESSOR_K8:
11705 return 3;
11707 default:
11708 return 1;
11712 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11713 by DEP_INSN and nothing set by DEP_INSN. */
11715 static int
11716 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11718 rtx set, set2;
11720 /* Simplify the test for uninteresting insns. */
11721 if (insn_type != TYPE_SETCC
11722 && insn_type != TYPE_ICMOV
11723 && insn_type != TYPE_FCMOV
11724 && insn_type != TYPE_IBR)
11725 return 0;
11727 if ((set = single_set (dep_insn)) != 0)
11729 set = SET_DEST (set);
11730 set2 = NULL_RTX;
11732 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11733 && XVECLEN (PATTERN (dep_insn), 0) == 2
11734 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11735 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11737 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11738 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11740 else
11741 return 0;
11743 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11744 return 0;
11746 /* This test is true if the dependent insn reads the flags but
11747 not any other potentially set register. */
11748 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11749 return 0;
11751 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11752 return 0;
11754 return 1;
11757 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11758 address with operands set by DEP_INSN. */
11760 static int
11761 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11763 rtx addr;
11765 if (insn_type == TYPE_LEA
11766 && TARGET_PENTIUM)
11768 addr = PATTERN (insn);
11769 if (GET_CODE (addr) == SET)
11771 else if (GET_CODE (addr) == PARALLEL
11772 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11773 addr = XVECEXP (addr, 0, 0);
11774 else
11775 abort ();
11776 addr = SET_SRC (addr);
11778 else
11780 int i;
11781 extract_insn_cached (insn);
11782 for (i = recog_data.n_operands - 1; i >= 0; --i)
11783 if (GET_CODE (recog_data.operand[i]) == MEM)
11785 addr = XEXP (recog_data.operand[i], 0);
11786 goto found;
11788 return 0;
11789 found:;
11792 return modified_in_p (addr, dep_insn);
11795 static int
11796 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11798 enum attr_type insn_type, dep_insn_type;
11799 enum attr_memory memory, dep_memory;
11800 rtx set, set2;
11801 int dep_insn_code_number;
11803 /* Anti and output dependencies have zero cost on all CPUs. */
11804 if (REG_NOTE_KIND (link) != 0)
11805 return 0;
11807 dep_insn_code_number = recog_memoized (dep_insn);
11809 /* If we can't recognize the insns, we can't really do anything. */
11810 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11811 return cost;
11813 insn_type = get_attr_type (insn);
11814 dep_insn_type = get_attr_type (dep_insn);
11816 switch (ix86_tune)
11818 case PROCESSOR_PENTIUM:
11819 /* Address Generation Interlock adds a cycle of latency. */
11820 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11821 cost += 1;
11823 /* ??? Compares pair with jump/setcc. */
11824 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11825 cost = 0;
11827 /* Floating point stores require value to be ready one cycle earlier. */
11828 if (insn_type == TYPE_FMOV
11829 && get_attr_memory (insn) == MEMORY_STORE
11830 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11831 cost += 1;
11832 break;
11834 case PROCESSOR_PENTIUMPRO:
11835 memory = get_attr_memory (insn);
11836 dep_memory = get_attr_memory (dep_insn);
11838 /* Since we can't represent delayed latencies of load+operation,
11839 increase the cost here for non-imov insns. */
11840 if (dep_insn_type != TYPE_IMOV
11841 && dep_insn_type != TYPE_FMOV
11842 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11843 cost += 1;
11845 /* INT->FP conversion is expensive. */
11846 if (get_attr_fp_int_src (dep_insn))
11847 cost += 5;
11849 /* There is one cycle extra latency between an FP op and a store. */
11850 if (insn_type == TYPE_FMOV
11851 && (set = single_set (dep_insn)) != NULL_RTX
11852 && (set2 = single_set (insn)) != NULL_RTX
11853 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11854 && GET_CODE (SET_DEST (set2)) == MEM)
11855 cost += 1;
11857 /* Show ability of reorder buffer to hide latency of load by executing
11858 in parallel with previous instruction in case
11859 previous instruction is not needed to compute the address. */
11860 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11861 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11863 /* Claim moves to take one cycle, as core can issue one load
11864 at time and the next load can start cycle later. */
11865 if (dep_insn_type == TYPE_IMOV
11866 || dep_insn_type == TYPE_FMOV)
11867 cost = 1;
11868 else if (cost > 1)
11869 cost--;
11871 break;
11873 case PROCESSOR_K6:
11874 memory = get_attr_memory (insn);
11875 dep_memory = get_attr_memory (dep_insn);
11876 /* The esp dependency is resolved before the instruction is really
11877 finished. */
11878 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11879 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11880 return 1;
11882 /* Since we can't represent delayed latencies of load+operation,
11883 increase the cost here for non-imov insns. */
11884 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11885 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11887 /* INT->FP conversion is expensive. */
11888 if (get_attr_fp_int_src (dep_insn))
11889 cost += 5;
11891 /* Show ability of reorder buffer to hide latency of load by executing
11892 in parallel with previous instruction in case
11893 previous instruction is not needed to compute the address. */
11894 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11895 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11897 /* Claim moves to take one cycle, as core can issue one load
11898 at time and the next load can start cycle later. */
11899 if (dep_insn_type == TYPE_IMOV
11900 || dep_insn_type == TYPE_FMOV)
11901 cost = 1;
11902 else if (cost > 2)
11903 cost -= 2;
11904 else
11905 cost = 1;
11907 break;
11909 case PROCESSOR_ATHLON:
11910 case PROCESSOR_K8:
11911 memory = get_attr_memory (insn);
11912 dep_memory = get_attr_memory (dep_insn);
11914 /* Show ability of reorder buffer to hide latency of load by executing
11915 in parallel with previous instruction in case
11916 previous instruction is not needed to compute the address. */
11917 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11918 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11920 enum attr_unit unit = get_attr_unit (insn);
11921 int loadcost = 3;
11923 /* Because of the difference between the length of integer and
11924 floating unit pipeline preparation stages, the memory operands
11925 for floating point are cheaper.
11927 ??? For Athlon it the difference is most probably 2. */
11928 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
11929 loadcost = 3;
11930 else
11931 loadcost = TARGET_ATHLON ? 2 : 0;
11933 if (cost >= loadcost)
11934 cost -= loadcost;
11935 else
11936 cost = 0;
11939 default:
11940 break;
11943 return cost;
11946 static union
11948 struct ppro_sched_data
11950 rtx decode[3];
11951 int issued_this_cycle;
11952 } ppro;
11953 } ix86_sched_data;
11955 static enum attr_ppro_uops
11956 ix86_safe_ppro_uops (rtx insn)
11958 if (recog_memoized (insn) >= 0)
11959 return get_attr_ppro_uops (insn);
11960 else
11961 return PPRO_UOPS_MANY;
11964 static void
11965 ix86_dump_ppro_packet (FILE *dump)
11967 if (ix86_sched_data.ppro.decode[0])
11969 fprintf (dump, "PPRO packet: %d",
11970 INSN_UID (ix86_sched_data.ppro.decode[0]));
11971 if (ix86_sched_data.ppro.decode[1])
11972 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11973 if (ix86_sched_data.ppro.decode[2])
11974 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11975 fputc ('\n', dump);
11979 /* We're beginning a new block. Initialize data structures as necessary. */
11981 static void
11982 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
11983 int sched_verbose ATTRIBUTE_UNUSED,
11984 int veclen ATTRIBUTE_UNUSED)
11986 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11989 /* Shift INSN to SLOT, and shift everything else down. */
11991 static void
11992 ix86_reorder_insn (rtx *insnp, rtx *slot)
11994 if (insnp != slot)
11996 rtx insn = *insnp;
11998 insnp[0] = insnp[1];
11999 while (++insnp != slot);
12000 *insnp = insn;
12004 static void
12005 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12007 rtx decode[3];
12008 enum attr_ppro_uops cur_uops;
12009 int issued_this_cycle;
12010 rtx *insnp;
12011 int i;
12013 /* At this point .ppro.decode contains the state of the three
12014 decoders from last "cycle". That is, those insns that were
12015 actually independent. But here we're scheduling for the
12016 decoder, and we may find things that are decodable in the
12017 same cycle. */
12019 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12020 issued_this_cycle = 0;
12022 insnp = e_ready;
12023 cur_uops = ix86_safe_ppro_uops (*insnp);
12025 /* If the decoders are empty, and we've a complex insn at the
12026 head of the priority queue, let it issue without complaint. */
12027 if (decode[0] == NULL)
12029 if (cur_uops == PPRO_UOPS_MANY)
12031 decode[0] = *insnp;
12032 goto ppro_done;
12035 /* Otherwise, search for a 2-4 uop unsn to issue. */
12036 while (cur_uops != PPRO_UOPS_FEW)
12038 if (insnp == ready)
12039 break;
12040 cur_uops = ix86_safe_ppro_uops (*--insnp);
12043 /* If so, move it to the head of the line. */
12044 if (cur_uops == PPRO_UOPS_FEW)
12045 ix86_reorder_insn (insnp, e_ready);
12047 /* Issue the head of the queue. */
12048 issued_this_cycle = 1;
12049 decode[0] = *e_ready--;
12052 /* Look for simple insns to fill in the other two slots. */
12053 for (i = 1; i < 3; ++i)
12054 if (decode[i] == NULL)
12056 if (ready > e_ready)
12057 goto ppro_done;
12059 insnp = e_ready;
12060 cur_uops = ix86_safe_ppro_uops (*insnp);
12061 while (cur_uops != PPRO_UOPS_ONE)
12063 if (insnp == ready)
12064 break;
12065 cur_uops = ix86_safe_ppro_uops (*--insnp);
12068 /* Found one. Move it to the head of the queue and issue it. */
12069 if (cur_uops == PPRO_UOPS_ONE)
12071 ix86_reorder_insn (insnp, e_ready);
12072 decode[i] = *e_ready--;
12073 issued_this_cycle++;
12074 continue;
12077 /* ??? Didn't find one. Ideally, here we would do a lazy split
12078 of 2-uop insns, issue one and queue the other. */
12081 ppro_done:
12082 if (issued_this_cycle == 0)
12083 issued_this_cycle = 1;
12084 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12087 /* We are about to being issuing insns for this clock cycle.
12088 Override the default sort algorithm to better slot instructions. */
12089 static int
12090 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12091 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12092 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12094 int n_ready = *n_readyp;
12095 rtx *e_ready = ready + n_ready - 1;
12097 /* Make sure to go ahead and initialize key items in
12098 ix86_sched_data if we are not going to bother trying to
12099 reorder the ready queue. */
12100 if (n_ready < 2)
12102 ix86_sched_data.ppro.issued_this_cycle = 1;
12103 goto out;
12106 switch (ix86_tune)
12108 default:
12109 break;
12111 case PROCESSOR_PENTIUMPRO:
12112 ix86_sched_reorder_ppro (ready, e_ready);
12113 break;
12116 out:
12117 return ix86_issue_rate ();
12120 /* We are about to issue INSN. Return the number of insns left on the
12121 ready queue that can be issued this cycle. */
12123 static int
12124 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12125 int can_issue_more)
12127 int i;
12128 switch (ix86_tune)
12130 default:
12131 return can_issue_more - 1;
12133 case PROCESSOR_PENTIUMPRO:
12135 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12137 if (uops == PPRO_UOPS_MANY)
12139 if (sched_verbose)
12140 ix86_dump_ppro_packet (dump);
12141 ix86_sched_data.ppro.decode[0] = insn;
12142 ix86_sched_data.ppro.decode[1] = NULL;
12143 ix86_sched_data.ppro.decode[2] = NULL;
12144 if (sched_verbose)
12145 ix86_dump_ppro_packet (dump);
12146 ix86_sched_data.ppro.decode[0] = NULL;
12148 else if (uops == PPRO_UOPS_FEW)
12150 if (sched_verbose)
12151 ix86_dump_ppro_packet (dump);
12152 ix86_sched_data.ppro.decode[0] = insn;
12153 ix86_sched_data.ppro.decode[1] = NULL;
12154 ix86_sched_data.ppro.decode[2] = NULL;
12156 else
12158 for (i = 0; i < 3; ++i)
12159 if (ix86_sched_data.ppro.decode[i] == NULL)
12161 ix86_sched_data.ppro.decode[i] = insn;
12162 break;
12164 if (i == 3)
12165 abort ();
12166 if (i == 2)
12168 if (sched_verbose)
12169 ix86_dump_ppro_packet (dump);
12170 ix86_sched_data.ppro.decode[0] = NULL;
12171 ix86_sched_data.ppro.decode[1] = NULL;
12172 ix86_sched_data.ppro.decode[2] = NULL;
12176 return --ix86_sched_data.ppro.issued_this_cycle;
12180 static int
12181 ia32_use_dfa_pipeline_interface (void)
12183 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12184 return 1;
12185 return 0;
12188 /* How many alternative schedules to try. This should be as wide as the
12189 scheduling freedom in the DFA, but no wider. Making this value too
12190 large results extra work for the scheduler. */
12192 static int
12193 ia32_multipass_dfa_lookahead (void)
12195 if (ix86_tune == PROCESSOR_PENTIUM)
12196 return 2;
12197 else
12198 return 0;
12202 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12203 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12204 appropriate. */
12206 void
12207 ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12208 rtx srcreg)
12210 rtx insn;
12212 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12213 if (INSN_P (insn))
12214 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12215 dstreg, srcreg);
12218 /* Subroutine of above to actually do the updating by recursively walking
12219 the rtx. */
12221 static void
12222 ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12223 rtx srcreg)
12225 enum rtx_code code = GET_CODE (x);
12226 const char *format_ptr = GET_RTX_FORMAT (code);
12227 int i, j;
12229 if (code == MEM && XEXP (x, 0) == dstreg)
12230 MEM_COPY_ATTRIBUTES (x, dstref);
12231 else if (code == MEM && XEXP (x, 0) == srcreg)
12232 MEM_COPY_ATTRIBUTES (x, srcref);
12234 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12236 if (*format_ptr == 'e')
12237 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12238 dstreg, srcreg);
12239 else if (*format_ptr == 'E')
12240 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12241 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12242 dstreg, srcreg);
12246 /* Compute the alignment given to a constant that is being placed in memory.
12247 EXP is the constant and ALIGN is the alignment that the object would
12248 ordinarily have.
12249 The value of this function is used instead of that alignment to align
12250 the object. */
12253 ix86_constant_alignment (tree exp, int align)
12255 if (TREE_CODE (exp) == REAL_CST)
12257 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12258 return 64;
12259 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12260 return 128;
12262 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12263 && align < 256)
12264 return 256;
12266 return align;
12269 /* Compute the alignment for a static variable.
12270 TYPE is the data type, and ALIGN is the alignment that
12271 the object would ordinarily have. The value of this function is used
12272 instead of that alignment to align the object. */
12275 ix86_data_alignment (tree type, int align)
12277 if (AGGREGATE_TYPE_P (type)
12278 && TYPE_SIZE (type)
12279 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12280 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12281 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12282 return 256;
12284 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12285 to 16byte boundary. */
12286 if (TARGET_64BIT)
12288 if (AGGREGATE_TYPE_P (type)
12289 && TYPE_SIZE (type)
12290 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12291 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12292 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12293 return 128;
12296 if (TREE_CODE (type) == ARRAY_TYPE)
12298 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12299 return 64;
12300 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12301 return 128;
12303 else if (TREE_CODE (type) == COMPLEX_TYPE)
12306 if (TYPE_MODE (type) == DCmode && align < 64)
12307 return 64;
12308 if (TYPE_MODE (type) == XCmode && align < 128)
12309 return 128;
12311 else if ((TREE_CODE (type) == RECORD_TYPE
12312 || TREE_CODE (type) == UNION_TYPE
12313 || TREE_CODE (type) == QUAL_UNION_TYPE)
12314 && TYPE_FIELDS (type))
12316 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12317 return 64;
12318 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12319 return 128;
12321 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12322 || TREE_CODE (type) == INTEGER_TYPE)
12324 if (TYPE_MODE (type) == DFmode && align < 64)
12325 return 64;
12326 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12327 return 128;
12330 return align;
12333 /* Compute the alignment for a local variable.
12334 TYPE is the data type, and ALIGN is the alignment that
12335 the object would ordinarily have. The value of this macro is used
12336 instead of that alignment to align the object. */
12339 ix86_local_alignment (tree type, int align)
12341 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12342 to 16byte boundary. */
12343 if (TARGET_64BIT)
12345 if (AGGREGATE_TYPE_P (type)
12346 && TYPE_SIZE (type)
12347 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12348 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12349 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12350 return 128;
12352 if (TREE_CODE (type) == ARRAY_TYPE)
12354 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12355 return 64;
12356 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12357 return 128;
12359 else if (TREE_CODE (type) == COMPLEX_TYPE)
12361 if (TYPE_MODE (type) == DCmode && align < 64)
12362 return 64;
12363 if (TYPE_MODE (type) == XCmode && align < 128)
12364 return 128;
12366 else if ((TREE_CODE (type) == RECORD_TYPE
12367 || TREE_CODE (type) == UNION_TYPE
12368 || TREE_CODE (type) == QUAL_UNION_TYPE)
12369 && TYPE_FIELDS (type))
12371 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12372 return 64;
12373 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12374 return 128;
12376 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12377 || TREE_CODE (type) == INTEGER_TYPE)
12380 if (TYPE_MODE (type) == DFmode && align < 64)
12381 return 64;
12382 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12383 return 128;
12385 return align;
12388 /* Emit RTL insns to initialize the variable parts of a trampoline.
12389 FNADDR is an RTX for the address of the function's pure code.
12390 CXT is an RTX for the static chain value for the function. */
12391 void
12392 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12394 if (!TARGET_64BIT)
12396 /* Compute offset from the end of the jmp to the target function. */
12397 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12398 plus_constant (tramp, 10),
12399 NULL_RTX, 1, OPTAB_DIRECT);
12400 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12401 gen_int_mode (0xb9, QImode));
12402 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12403 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12404 gen_int_mode (0xe9, QImode));
12405 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12407 else
12409 int offset = 0;
12410 /* Try to load address using shorter movl instead of movabs.
12411 We may want to support movq for kernel mode, but kernel does not use
12412 trampolines at the moment. */
12413 if (x86_64_zero_extended_value (fnaddr))
12415 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12416 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12417 gen_int_mode (0xbb41, HImode));
12418 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12419 gen_lowpart (SImode, fnaddr));
12420 offset += 6;
12422 else
12424 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12425 gen_int_mode (0xbb49, HImode));
12426 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12427 fnaddr);
12428 offset += 10;
12430 /* Load static chain using movabs to r10. */
12431 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12432 gen_int_mode (0xba49, HImode));
12433 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12434 cxt);
12435 offset += 10;
12436 /* Jump to the r11 */
12437 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12438 gen_int_mode (0xff49, HImode));
12439 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12440 gen_int_mode (0xe3, QImode));
12441 offset += 3;
12442 if (offset > TRAMPOLINE_SIZE)
12443 abort ();
12446 #ifdef TRANSFER_FROM_TRAMPOLINE
12447 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12448 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12449 #endif
12452 #define def_builtin(MASK, NAME, TYPE, CODE) \
12453 do { \
12454 if ((MASK) & target_flags \
12455 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12456 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12457 NULL, NULL_TREE); \
12458 } while (0)
12460 struct builtin_description
12462 const unsigned int mask;
12463 const enum insn_code icode;
12464 const char *const name;
12465 const enum ix86_builtins code;
12466 const enum rtx_code comparison;
12467 const unsigned int flag;
12470 static const struct builtin_description bdesc_comi[] =
12472 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12473 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12474 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12475 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12476 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12477 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12478 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12479 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12480 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12481 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12482 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12483 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12484 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12485 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12486 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12487 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12488 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12489 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12490 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12491 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12492 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12493 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12494 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12495 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12498 static const struct builtin_description bdesc_2arg[] =
12500 /* SSE */
12501 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12502 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12503 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12504 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12505 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12506 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12507 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12508 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12510 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12511 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12512 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12513 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12514 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12515 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12516 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12517 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12518 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12519 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12520 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12521 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12522 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12523 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12524 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12525 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12526 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12527 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12528 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12529 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12531 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12532 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12533 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12534 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12536 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12537 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12538 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12539 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12541 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12542 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12543 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12544 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12545 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12547 /* MMX */
12548 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12549 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12550 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12551 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12552 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12553 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12554 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12555 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12557 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12558 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12559 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12560 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12561 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12562 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12563 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12564 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12566 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12567 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12568 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12570 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12571 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12572 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12573 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12575 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12576 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12578 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12579 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12580 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12581 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12582 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12583 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12585 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12586 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12587 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12588 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12590 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12591 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12592 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12593 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12594 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12595 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12597 /* Special. */
12598 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12599 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12600 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12602 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12603 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12604 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12606 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12607 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12608 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12609 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12610 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12611 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12613 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12614 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12615 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12616 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12617 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12618 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12620 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12621 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12622 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12623 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12625 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12626 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12628 /* SSE2 */
12629 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12630 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12631 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12632 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12633 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12634 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12635 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12636 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12638 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12639 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12640 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12641 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12642 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12643 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12644 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12645 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12646 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12647 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12648 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12649 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12650 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12651 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12652 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12653 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12654 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12655 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12656 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12657 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12659 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12660 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12661 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12662 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12664 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12665 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12666 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12667 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12669 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12670 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12671 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12673 /* SSE2 MMX */
12674 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12675 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12676 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12677 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12678 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12679 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12680 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12681 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12683 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12684 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12685 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12686 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12687 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12688 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12689 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12690 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12692 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12693 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12694 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12695 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12697 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12698 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12699 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12700 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12702 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12703 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12705 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12706 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12707 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12708 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12709 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12710 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12712 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12713 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12714 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12715 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12717 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12718 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12719 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12720 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12721 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12722 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12723 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12724 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12726 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12730 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12731 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12733 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12734 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12735 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12736 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12737 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12738 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12740 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12741 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12742 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12743 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12744 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12745 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12747 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12748 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12749 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12750 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12752 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12754 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12755 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12756 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12757 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12759 /* PNI MMX */
12760 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12761 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12762 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12763 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12764 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12765 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12768 static const struct builtin_description bdesc_1arg[] =
12770 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12771 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12773 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12774 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12775 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12777 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12778 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12779 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12780 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12781 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12782 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12784 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12785 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12786 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12787 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12789 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12791 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12792 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12794 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12795 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12796 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12797 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12798 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12800 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12802 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12803 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12804 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12805 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12807 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12811 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12813 /* PNI */
12814 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12815 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12816 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12819 void
12820 ix86_init_builtins (void)
12822 if (TARGET_MMX)
12823 ix86_init_mmx_sse_builtins ();
12826 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12827 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12828 builtins. */
12829 static void
12830 ix86_init_mmx_sse_builtins (void)
12832 const struct builtin_description * d;
12833 size_t i;
12835 tree pchar_type_node = build_pointer_type (char_type_node);
12836 tree pcchar_type_node = build_pointer_type (
12837 build_type_variant (char_type_node, 1, 0));
12838 tree pfloat_type_node = build_pointer_type (float_type_node);
12839 tree pcfloat_type_node = build_pointer_type (
12840 build_type_variant (float_type_node, 1, 0));
12841 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12842 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12843 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12845 /* Comparisons. */
12846 tree int_ftype_v4sf_v4sf
12847 = build_function_type_list (integer_type_node,
12848 V4SF_type_node, V4SF_type_node, NULL_TREE);
12849 tree v4si_ftype_v4sf_v4sf
12850 = build_function_type_list (V4SI_type_node,
12851 V4SF_type_node, V4SF_type_node, NULL_TREE);
12852 /* MMX/SSE/integer conversions. */
12853 tree int_ftype_v4sf
12854 = build_function_type_list (integer_type_node,
12855 V4SF_type_node, NULL_TREE);
12856 tree int64_ftype_v4sf
12857 = build_function_type_list (long_long_integer_type_node,
12858 V4SF_type_node, NULL_TREE);
12859 tree int_ftype_v8qi
12860 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12861 tree v4sf_ftype_v4sf_int
12862 = build_function_type_list (V4SF_type_node,
12863 V4SF_type_node, integer_type_node, NULL_TREE);
12864 tree v4sf_ftype_v4sf_int64
12865 = build_function_type_list (V4SF_type_node,
12866 V4SF_type_node, long_long_integer_type_node,
12867 NULL_TREE);
12868 tree v4sf_ftype_v4sf_v2si
12869 = build_function_type_list (V4SF_type_node,
12870 V4SF_type_node, V2SI_type_node, NULL_TREE);
12871 tree int_ftype_v4hi_int
12872 = build_function_type_list (integer_type_node,
12873 V4HI_type_node, integer_type_node, NULL_TREE);
12874 tree v4hi_ftype_v4hi_int_int
12875 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12876 integer_type_node, integer_type_node,
12877 NULL_TREE);
12878 /* Miscellaneous. */
12879 tree v8qi_ftype_v4hi_v4hi
12880 = build_function_type_list (V8QI_type_node,
12881 V4HI_type_node, V4HI_type_node, NULL_TREE);
12882 tree v4hi_ftype_v2si_v2si
12883 = build_function_type_list (V4HI_type_node,
12884 V2SI_type_node, V2SI_type_node, NULL_TREE);
12885 tree v4sf_ftype_v4sf_v4sf_int
12886 = build_function_type_list (V4SF_type_node,
12887 V4SF_type_node, V4SF_type_node,
12888 integer_type_node, NULL_TREE);
12889 tree v2si_ftype_v4hi_v4hi
12890 = build_function_type_list (V2SI_type_node,
12891 V4HI_type_node, V4HI_type_node, NULL_TREE);
12892 tree v4hi_ftype_v4hi_int
12893 = build_function_type_list (V4HI_type_node,
12894 V4HI_type_node, integer_type_node, NULL_TREE);
12895 tree v4hi_ftype_v4hi_di
12896 = build_function_type_list (V4HI_type_node,
12897 V4HI_type_node, long_long_unsigned_type_node,
12898 NULL_TREE);
12899 tree v2si_ftype_v2si_di
12900 = build_function_type_list (V2SI_type_node,
12901 V2SI_type_node, long_long_unsigned_type_node,
12902 NULL_TREE);
12903 tree void_ftype_void
12904 = build_function_type (void_type_node, void_list_node);
12905 tree void_ftype_unsigned
12906 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12907 tree void_ftype_unsigned_unsigned
12908 = build_function_type_list (void_type_node, unsigned_type_node,
12909 unsigned_type_node, NULL_TREE);
12910 tree void_ftype_pcvoid_unsigned_unsigned
12911 = build_function_type_list (void_type_node, const_ptr_type_node,
12912 unsigned_type_node, unsigned_type_node,
12913 NULL_TREE);
12914 tree unsigned_ftype_void
12915 = build_function_type (unsigned_type_node, void_list_node);
12916 tree di_ftype_void
12917 = build_function_type (long_long_unsigned_type_node, void_list_node);
12918 tree v4sf_ftype_void
12919 = build_function_type (V4SF_type_node, void_list_node);
12920 tree v2si_ftype_v4sf
12921 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12922 /* Loads/stores. */
12923 tree void_ftype_v8qi_v8qi_pchar
12924 = build_function_type_list (void_type_node,
12925 V8QI_type_node, V8QI_type_node,
12926 pchar_type_node, NULL_TREE);
12927 tree v4sf_ftype_pcfloat
12928 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12929 /* @@@ the type is bogus */
12930 tree v4sf_ftype_v4sf_pv2si
12931 = build_function_type_list (V4SF_type_node,
12932 V4SF_type_node, pv2si_type_node, NULL_TREE);
12933 tree void_ftype_pv2si_v4sf
12934 = build_function_type_list (void_type_node,
12935 pv2si_type_node, V4SF_type_node, NULL_TREE);
12936 tree void_ftype_pfloat_v4sf
12937 = build_function_type_list (void_type_node,
12938 pfloat_type_node, V4SF_type_node, NULL_TREE);
12939 tree void_ftype_pdi_di
12940 = build_function_type_list (void_type_node,
12941 pdi_type_node, long_long_unsigned_type_node,
12942 NULL_TREE);
12943 tree void_ftype_pv2di_v2di
12944 = build_function_type_list (void_type_node,
12945 pv2di_type_node, V2DI_type_node, NULL_TREE);
12946 /* Normal vector unops. */
12947 tree v4sf_ftype_v4sf
12948 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12950 /* Normal vector binops. */
12951 tree v4sf_ftype_v4sf_v4sf
12952 = build_function_type_list (V4SF_type_node,
12953 V4SF_type_node, V4SF_type_node, NULL_TREE);
12954 tree v8qi_ftype_v8qi_v8qi
12955 = build_function_type_list (V8QI_type_node,
12956 V8QI_type_node, V8QI_type_node, NULL_TREE);
12957 tree v4hi_ftype_v4hi_v4hi
12958 = build_function_type_list (V4HI_type_node,
12959 V4HI_type_node, V4HI_type_node, NULL_TREE);
12960 tree v2si_ftype_v2si_v2si
12961 = build_function_type_list (V2SI_type_node,
12962 V2SI_type_node, V2SI_type_node, NULL_TREE);
12963 tree di_ftype_di_di
12964 = build_function_type_list (long_long_unsigned_type_node,
12965 long_long_unsigned_type_node,
12966 long_long_unsigned_type_node, NULL_TREE);
12968 tree v2si_ftype_v2sf
12969 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12970 tree v2sf_ftype_v2si
12971 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12972 tree v2si_ftype_v2si
12973 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12974 tree v2sf_ftype_v2sf
12975 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12976 tree v2sf_ftype_v2sf_v2sf
12977 = build_function_type_list (V2SF_type_node,
12978 V2SF_type_node, V2SF_type_node, NULL_TREE);
12979 tree v2si_ftype_v2sf_v2sf
12980 = build_function_type_list (V2SI_type_node,
12981 V2SF_type_node, V2SF_type_node, NULL_TREE);
12982 tree pint_type_node = build_pointer_type (integer_type_node);
12983 tree pcint_type_node = build_pointer_type (
12984 build_type_variant (integer_type_node, 1, 0));
12985 tree pdouble_type_node = build_pointer_type (double_type_node);
12986 tree pcdouble_type_node = build_pointer_type (
12987 build_type_variant (double_type_node, 1, 0));
12988 tree int_ftype_v2df_v2df
12989 = build_function_type_list (integer_type_node,
12990 V2DF_type_node, V2DF_type_node, NULL_TREE);
12992 tree ti_ftype_void
12993 = build_function_type (intTI_type_node, void_list_node);
12994 tree v2di_ftype_void
12995 = build_function_type (V2DI_type_node, void_list_node);
12996 tree ti_ftype_ti_ti
12997 = build_function_type_list (intTI_type_node,
12998 intTI_type_node, intTI_type_node, NULL_TREE);
12999 tree void_ftype_pcvoid
13000 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13001 tree v2di_ftype_di
13002 = build_function_type_list (V2DI_type_node,
13003 long_long_unsigned_type_node, NULL_TREE);
13004 tree di_ftype_v2di
13005 = build_function_type_list (long_long_unsigned_type_node,
13006 V2DI_type_node, NULL_TREE);
13007 tree v4sf_ftype_v4si
13008 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13009 tree v4si_ftype_v4sf
13010 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13011 tree v2df_ftype_v4si
13012 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13013 tree v4si_ftype_v2df
13014 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13015 tree v2si_ftype_v2df
13016 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13017 tree v4sf_ftype_v2df
13018 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13019 tree v2df_ftype_v2si
13020 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13021 tree v2df_ftype_v4sf
13022 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13023 tree int_ftype_v2df
13024 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13025 tree int64_ftype_v2df
13026 = build_function_type_list (long_long_integer_type_node,
13027 V2DF_type_node, NULL_TREE);
13028 tree v2df_ftype_v2df_int
13029 = build_function_type_list (V2DF_type_node,
13030 V2DF_type_node, integer_type_node, NULL_TREE);
13031 tree v2df_ftype_v2df_int64
13032 = build_function_type_list (V2DF_type_node,
13033 V2DF_type_node, long_long_integer_type_node,
13034 NULL_TREE);
13035 tree v4sf_ftype_v4sf_v2df
13036 = build_function_type_list (V4SF_type_node,
13037 V4SF_type_node, V2DF_type_node, NULL_TREE);
13038 tree v2df_ftype_v2df_v4sf
13039 = build_function_type_list (V2DF_type_node,
13040 V2DF_type_node, V4SF_type_node, NULL_TREE);
13041 tree v2df_ftype_v2df_v2df_int
13042 = build_function_type_list (V2DF_type_node,
13043 V2DF_type_node, V2DF_type_node,
13044 integer_type_node,
13045 NULL_TREE);
13046 tree v2df_ftype_v2df_pv2si
13047 = build_function_type_list (V2DF_type_node,
13048 V2DF_type_node, pv2si_type_node, NULL_TREE);
13049 tree void_ftype_pv2si_v2df
13050 = build_function_type_list (void_type_node,
13051 pv2si_type_node, V2DF_type_node, NULL_TREE);
13052 tree void_ftype_pdouble_v2df
13053 = build_function_type_list (void_type_node,
13054 pdouble_type_node, V2DF_type_node, NULL_TREE);
13055 tree void_ftype_pint_int
13056 = build_function_type_list (void_type_node,
13057 pint_type_node, integer_type_node, NULL_TREE);
13058 tree void_ftype_v16qi_v16qi_pchar
13059 = build_function_type_list (void_type_node,
13060 V16QI_type_node, V16QI_type_node,
13061 pchar_type_node, NULL_TREE);
13062 tree v2df_ftype_pcdouble
13063 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13064 tree v2df_ftype_v2df_v2df
13065 = build_function_type_list (V2DF_type_node,
13066 V2DF_type_node, V2DF_type_node, NULL_TREE);
13067 tree v16qi_ftype_v16qi_v16qi
13068 = build_function_type_list (V16QI_type_node,
13069 V16QI_type_node, V16QI_type_node, NULL_TREE);
13070 tree v8hi_ftype_v8hi_v8hi
13071 = build_function_type_list (V8HI_type_node,
13072 V8HI_type_node, V8HI_type_node, NULL_TREE);
13073 tree v4si_ftype_v4si_v4si
13074 = build_function_type_list (V4SI_type_node,
13075 V4SI_type_node, V4SI_type_node, NULL_TREE);
13076 tree v2di_ftype_v2di_v2di
13077 = build_function_type_list (V2DI_type_node,
13078 V2DI_type_node, V2DI_type_node, NULL_TREE);
13079 tree v2di_ftype_v2df_v2df
13080 = build_function_type_list (V2DI_type_node,
13081 V2DF_type_node, V2DF_type_node, NULL_TREE);
13082 tree v2df_ftype_v2df
13083 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13084 tree v2df_ftype_double
13085 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13086 tree v2df_ftype_double_double
13087 = build_function_type_list (V2DF_type_node,
13088 double_type_node, double_type_node, NULL_TREE);
13089 tree int_ftype_v8hi_int
13090 = build_function_type_list (integer_type_node,
13091 V8HI_type_node, integer_type_node, NULL_TREE);
13092 tree v8hi_ftype_v8hi_int_int
13093 = build_function_type_list (V8HI_type_node,
13094 V8HI_type_node, integer_type_node,
13095 integer_type_node, NULL_TREE);
13096 tree v2di_ftype_v2di_int
13097 = build_function_type_list (V2DI_type_node,
13098 V2DI_type_node, integer_type_node, NULL_TREE);
13099 tree v4si_ftype_v4si_int
13100 = build_function_type_list (V4SI_type_node,
13101 V4SI_type_node, integer_type_node, NULL_TREE);
13102 tree v8hi_ftype_v8hi_int
13103 = build_function_type_list (V8HI_type_node,
13104 V8HI_type_node, integer_type_node, NULL_TREE);
13105 tree v8hi_ftype_v8hi_v2di
13106 = build_function_type_list (V8HI_type_node,
13107 V8HI_type_node, V2DI_type_node, NULL_TREE);
13108 tree v4si_ftype_v4si_v2di
13109 = build_function_type_list (V4SI_type_node,
13110 V4SI_type_node, V2DI_type_node, NULL_TREE);
13111 tree v4si_ftype_v8hi_v8hi
13112 = build_function_type_list (V4SI_type_node,
13113 V8HI_type_node, V8HI_type_node, NULL_TREE);
13114 tree di_ftype_v8qi_v8qi
13115 = build_function_type_list (long_long_unsigned_type_node,
13116 V8QI_type_node, V8QI_type_node, NULL_TREE);
13117 tree v2di_ftype_v16qi_v16qi
13118 = build_function_type_list (V2DI_type_node,
13119 V16QI_type_node, V16QI_type_node, NULL_TREE);
13120 tree int_ftype_v16qi
13121 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13122 tree v16qi_ftype_pcchar
13123 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13124 tree void_ftype_pchar_v16qi
13125 = build_function_type_list (void_type_node,
13126 pchar_type_node, V16QI_type_node, NULL_TREE);
13127 tree v4si_ftype_pcint
13128 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13129 tree void_ftype_pcint_v4si
13130 = build_function_type_list (void_type_node,
13131 pcint_type_node, V4SI_type_node, NULL_TREE);
13132 tree v2di_ftype_v2di
13133 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13135 /* Add all builtins that are more or less simple operations on two
13136 operands. */
13137 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13139 /* Use one of the operands; the target can have a different mode for
13140 mask-generating compares. */
13141 enum machine_mode mode;
13142 tree type;
13144 if (d->name == 0)
13145 continue;
13146 mode = insn_data[d->icode].operand[1].mode;
13148 switch (mode)
13150 case V16QImode:
13151 type = v16qi_ftype_v16qi_v16qi;
13152 break;
13153 case V8HImode:
13154 type = v8hi_ftype_v8hi_v8hi;
13155 break;
13156 case V4SImode:
13157 type = v4si_ftype_v4si_v4si;
13158 break;
13159 case V2DImode:
13160 type = v2di_ftype_v2di_v2di;
13161 break;
13162 case V2DFmode:
13163 type = v2df_ftype_v2df_v2df;
13164 break;
13165 case TImode:
13166 type = ti_ftype_ti_ti;
13167 break;
13168 case V4SFmode:
13169 type = v4sf_ftype_v4sf_v4sf;
13170 break;
13171 case V8QImode:
13172 type = v8qi_ftype_v8qi_v8qi;
13173 break;
13174 case V4HImode:
13175 type = v4hi_ftype_v4hi_v4hi;
13176 break;
13177 case V2SImode:
13178 type = v2si_ftype_v2si_v2si;
13179 break;
13180 case DImode:
13181 type = di_ftype_di_di;
13182 break;
13184 default:
13185 abort ();
13188 /* Override for comparisons. */
13189 if (d->icode == CODE_FOR_maskcmpv4sf3
13190 || d->icode == CODE_FOR_maskncmpv4sf3
13191 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13192 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13193 type = v4si_ftype_v4sf_v4sf;
13195 if (d->icode == CODE_FOR_maskcmpv2df3
13196 || d->icode == CODE_FOR_maskncmpv2df3
13197 || d->icode == CODE_FOR_vmmaskcmpv2df3
13198 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13199 type = v2di_ftype_v2df_v2df;
13201 def_builtin (d->mask, d->name, type, d->code);
13204 /* Add the remaining MMX insns with somewhat more complicated types. */
13205 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13206 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13207 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13208 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13209 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13211 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13212 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13213 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13215 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13216 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13218 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13219 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13221 /* comi/ucomi insns. */
13222 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13223 if (d->mask == MASK_SSE2)
13224 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13225 else
13226 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13228 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13229 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13230 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13232 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13233 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13234 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13235 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13236 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13237 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13238 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13239 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13240 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13241 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13242 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13244 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13245 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13247 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13249 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13250 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13251 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13252 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13253 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13254 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13256 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13257 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13258 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13259 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13261 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13262 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13263 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13264 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13266 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13268 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13270 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13271 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13272 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13273 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13274 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13275 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13277 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13279 /* Original 3DNow! */
13280 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13281 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13282 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13286 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13287 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13288 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13289 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13290 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13291 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13292 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13293 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13294 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13295 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13296 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13297 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13298 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13299 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13301 /* 3DNow! extension as used in the Athlon CPU. */
13302 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13303 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13304 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13305 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13306 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13307 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13309 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13311 /* SSE2 */
13312 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13313 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13315 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13316 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13317 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13319 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13320 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13321 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13322 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13323 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13324 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13326 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13327 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13328 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13329 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13331 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13332 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13333 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13334 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13335 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13337 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13338 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13339 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13340 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13342 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13343 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13345 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13347 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13348 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13350 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13351 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13352 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13353 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13354 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13356 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13358 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13359 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13360 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13361 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13363 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13364 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13365 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13367 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13368 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13369 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13370 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13372 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13373 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13374 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13375 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13376 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13377 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13378 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13380 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13381 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13382 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13384 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13385 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13386 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13387 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13388 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13389 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13390 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13392 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13394 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13395 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13396 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13398 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13399 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13400 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13402 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13403 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13405 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13406 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13407 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13408 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13410 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13411 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13412 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13413 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13415 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13416 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13418 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13420 /* Prescott New Instructions. */
13421 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13422 void_ftype_pcvoid_unsigned_unsigned,
13423 IX86_BUILTIN_MONITOR);
13424 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13425 void_ftype_unsigned_unsigned,
13426 IX86_BUILTIN_MWAIT);
13427 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13428 v4sf_ftype_v4sf,
13429 IX86_BUILTIN_MOVSHDUP);
13430 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13431 v4sf_ftype_v4sf,
13432 IX86_BUILTIN_MOVSLDUP);
13433 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13434 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13435 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13436 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13437 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13438 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13441 /* Errors in the source file can cause expand_expr to return const0_rtx
13442 where we expect a vector. To avoid crashing, use one of the vector
13443 clear instructions. */
13444 static rtx
13445 safe_vector_operand (rtx x, enum machine_mode mode)
13447 if (x != const0_rtx)
13448 return x;
13449 x = gen_reg_rtx (mode);
13451 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13452 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13453 : gen_rtx_SUBREG (DImode, x, 0)));
13454 else
13455 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13456 : gen_rtx_SUBREG (V4SFmode, x, 0),
13457 CONST0_RTX (V4SFmode)));
13458 return x;
13461 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13463 static rtx
13464 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13466 rtx pat;
13467 tree arg0 = TREE_VALUE (arglist);
13468 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13469 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13470 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13471 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13472 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13473 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13475 if (VECTOR_MODE_P (mode0))
13476 op0 = safe_vector_operand (op0, mode0);
13477 if (VECTOR_MODE_P (mode1))
13478 op1 = safe_vector_operand (op1, mode1);
13480 if (! target
13481 || GET_MODE (target) != tmode
13482 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13483 target = gen_reg_rtx (tmode);
13485 if (GET_MODE (op1) == SImode && mode1 == TImode)
13487 rtx x = gen_reg_rtx (V4SImode);
13488 emit_insn (gen_sse2_loadd (x, op1));
13489 op1 = gen_lowpart (TImode, x);
13492 /* In case the insn wants input operands in modes different from
13493 the result, abort. */
13494 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13495 abort ();
13497 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13498 op0 = copy_to_mode_reg (mode0, op0);
13499 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13500 op1 = copy_to_mode_reg (mode1, op1);
13502 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13503 yet one of the two must not be a memory. This is normally enforced
13504 by expanders, but we didn't bother to create one here. */
13505 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13506 op0 = copy_to_mode_reg (mode0, op0);
13508 pat = GEN_FCN (icode) (target, op0, op1);
13509 if (! pat)
13510 return 0;
13511 emit_insn (pat);
13512 return target;
13515 /* Subroutine of ix86_expand_builtin to take care of stores. */
13517 static rtx
13518 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13520 rtx pat;
13521 tree arg0 = TREE_VALUE (arglist);
13522 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13523 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13524 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13525 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13526 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13528 if (VECTOR_MODE_P (mode1))
13529 op1 = safe_vector_operand (op1, mode1);
13531 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13532 op1 = copy_to_mode_reg (mode1, op1);
13534 pat = GEN_FCN (icode) (op0, op1);
13535 if (pat)
13536 emit_insn (pat);
13537 return 0;
13540 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13542 static rtx
13543 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13544 rtx target, int do_load)
13546 rtx pat;
13547 tree arg0 = TREE_VALUE (arglist);
13548 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13549 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13550 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13552 if (! target
13553 || GET_MODE (target) != tmode
13554 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13555 target = gen_reg_rtx (tmode);
13556 if (do_load)
13557 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13558 else
13560 if (VECTOR_MODE_P (mode0))
13561 op0 = safe_vector_operand (op0, mode0);
13563 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13564 op0 = copy_to_mode_reg (mode0, op0);
13567 pat = GEN_FCN (icode) (target, op0);
13568 if (! pat)
13569 return 0;
13570 emit_insn (pat);
13571 return target;
13574 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13575 sqrtss, rsqrtss, rcpss. */
13577 static rtx
13578 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13580 rtx pat;
13581 tree arg0 = TREE_VALUE (arglist);
13582 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13583 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13584 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13586 if (! target
13587 || GET_MODE (target) != tmode
13588 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13589 target = gen_reg_rtx (tmode);
13591 if (VECTOR_MODE_P (mode0))
13592 op0 = safe_vector_operand (op0, mode0);
13594 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13595 op0 = copy_to_mode_reg (mode0, op0);
13597 op1 = op0;
13598 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13599 op1 = copy_to_mode_reg (mode0, op1);
13601 pat = GEN_FCN (icode) (target, op0, op1);
13602 if (! pat)
13603 return 0;
13604 emit_insn (pat);
13605 return target;
13608 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13610 static rtx
13611 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13612 rtx target)
13614 rtx pat;
13615 tree arg0 = TREE_VALUE (arglist);
13616 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13617 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13618 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13619 rtx op2;
13620 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13621 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13622 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13623 enum rtx_code comparison = d->comparison;
13625 if (VECTOR_MODE_P (mode0))
13626 op0 = safe_vector_operand (op0, mode0);
13627 if (VECTOR_MODE_P (mode1))
13628 op1 = safe_vector_operand (op1, mode1);
13630 /* Swap operands if we have a comparison that isn't available in
13631 hardware. */
13632 if (d->flag)
13634 rtx tmp = gen_reg_rtx (mode1);
13635 emit_move_insn (tmp, op1);
13636 op1 = op0;
13637 op0 = tmp;
13640 if (! target
13641 || GET_MODE (target) != tmode
13642 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13643 target = gen_reg_rtx (tmode);
13645 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13646 op0 = copy_to_mode_reg (mode0, op0);
13647 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13648 op1 = copy_to_mode_reg (mode1, op1);
13650 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13651 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13652 if (! pat)
13653 return 0;
13654 emit_insn (pat);
13655 return target;
13658 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13660 static rtx
13661 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13662 rtx target)
13664 rtx pat;
13665 tree arg0 = TREE_VALUE (arglist);
13666 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13667 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13668 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13669 rtx op2;
13670 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13671 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13672 enum rtx_code comparison = d->comparison;
13674 if (VECTOR_MODE_P (mode0))
13675 op0 = safe_vector_operand (op0, mode0);
13676 if (VECTOR_MODE_P (mode1))
13677 op1 = safe_vector_operand (op1, mode1);
13679 /* Swap operands if we have a comparison that isn't available in
13680 hardware. */
13681 if (d->flag)
13683 rtx tmp = op1;
13684 op1 = op0;
13685 op0 = tmp;
13688 target = gen_reg_rtx (SImode);
13689 emit_move_insn (target, const0_rtx);
13690 target = gen_rtx_SUBREG (QImode, target, 0);
13692 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13693 op0 = copy_to_mode_reg (mode0, op0);
13694 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13695 op1 = copy_to_mode_reg (mode1, op1);
13697 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13698 pat = GEN_FCN (d->icode) (op0, op1);
13699 if (! pat)
13700 return 0;
13701 emit_insn (pat);
13702 emit_insn (gen_rtx_SET (VOIDmode,
13703 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13704 gen_rtx_fmt_ee (comparison, QImode,
13705 SET_DEST (pat),
13706 const0_rtx)));
13708 return SUBREG_REG (target);
13711 /* Expand an expression EXP that calls a built-in function,
13712 with result going to TARGET if that's convenient
13713 (and in mode MODE if that's convenient).
13714 SUBTARGET may be used as the target for computing one of EXP's operands.
13715 IGNORE is nonzero if the value is to be ignored. */
13718 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13719 enum machine_mode mode ATTRIBUTE_UNUSED,
13720 int ignore ATTRIBUTE_UNUSED)
13722 const struct builtin_description *d;
13723 size_t i;
13724 enum insn_code icode;
13725 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13726 tree arglist = TREE_OPERAND (exp, 1);
13727 tree arg0, arg1, arg2;
13728 rtx op0, op1, op2, pat;
13729 enum machine_mode tmode, mode0, mode1, mode2;
13730 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13732 switch (fcode)
13734 case IX86_BUILTIN_EMMS:
13735 emit_insn (gen_emms ());
13736 return 0;
13738 case IX86_BUILTIN_SFENCE:
13739 emit_insn (gen_sfence ());
13740 return 0;
13742 case IX86_BUILTIN_PEXTRW:
13743 case IX86_BUILTIN_PEXTRW128:
13744 icode = (fcode == IX86_BUILTIN_PEXTRW
13745 ? CODE_FOR_mmx_pextrw
13746 : CODE_FOR_sse2_pextrw);
13747 arg0 = TREE_VALUE (arglist);
13748 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13749 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13750 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13751 tmode = insn_data[icode].operand[0].mode;
13752 mode0 = insn_data[icode].operand[1].mode;
13753 mode1 = insn_data[icode].operand[2].mode;
13755 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13756 op0 = copy_to_mode_reg (mode0, op0);
13757 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13759 /* @@@ better error message */
13760 error ("selector must be an immediate");
13761 return gen_reg_rtx (tmode);
13763 if (target == 0
13764 || GET_MODE (target) != tmode
13765 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13766 target = gen_reg_rtx (tmode);
13767 pat = GEN_FCN (icode) (target, op0, op1);
13768 if (! pat)
13769 return 0;
13770 emit_insn (pat);
13771 return target;
13773 case IX86_BUILTIN_PINSRW:
13774 case IX86_BUILTIN_PINSRW128:
13775 icode = (fcode == IX86_BUILTIN_PINSRW
13776 ? CODE_FOR_mmx_pinsrw
13777 : CODE_FOR_sse2_pinsrw);
13778 arg0 = TREE_VALUE (arglist);
13779 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13780 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13781 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13782 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13783 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13784 tmode = insn_data[icode].operand[0].mode;
13785 mode0 = insn_data[icode].operand[1].mode;
13786 mode1 = insn_data[icode].operand[2].mode;
13787 mode2 = insn_data[icode].operand[3].mode;
13789 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13790 op0 = copy_to_mode_reg (mode0, op0);
13791 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13792 op1 = copy_to_mode_reg (mode1, op1);
13793 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13795 /* @@@ better error message */
13796 error ("selector must be an immediate");
13797 return const0_rtx;
13799 if (target == 0
13800 || GET_MODE (target) != tmode
13801 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13802 target = gen_reg_rtx (tmode);
13803 pat = GEN_FCN (icode) (target, op0, op1, op2);
13804 if (! pat)
13805 return 0;
13806 emit_insn (pat);
13807 return target;
13809 case IX86_BUILTIN_MASKMOVQ:
13810 case IX86_BUILTIN_MASKMOVDQU:
13811 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13812 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13813 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13814 : CODE_FOR_sse2_maskmovdqu));
13815 /* Note the arg order is different from the operand order. */
13816 arg1 = TREE_VALUE (arglist);
13817 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13818 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13819 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13820 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13821 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13822 mode0 = insn_data[icode].operand[0].mode;
13823 mode1 = insn_data[icode].operand[1].mode;
13824 mode2 = insn_data[icode].operand[2].mode;
13826 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13827 op0 = copy_to_mode_reg (mode0, op0);
13828 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13829 op1 = copy_to_mode_reg (mode1, op1);
13830 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13831 op2 = copy_to_mode_reg (mode2, op2);
13832 pat = GEN_FCN (icode) (op0, op1, op2);
13833 if (! pat)
13834 return 0;
13835 emit_insn (pat);
13836 return 0;
13838 case IX86_BUILTIN_SQRTSS:
13839 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13840 case IX86_BUILTIN_RSQRTSS:
13841 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13842 case IX86_BUILTIN_RCPSS:
13843 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13845 case IX86_BUILTIN_LOADAPS:
13846 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13848 case IX86_BUILTIN_LOADUPS:
13849 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13851 case IX86_BUILTIN_STOREAPS:
13852 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13854 case IX86_BUILTIN_STOREUPS:
13855 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13857 case IX86_BUILTIN_LOADSS:
13858 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13860 case IX86_BUILTIN_STORESS:
13861 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13863 case IX86_BUILTIN_LOADHPS:
13864 case IX86_BUILTIN_LOADLPS:
13865 case IX86_BUILTIN_LOADHPD:
13866 case IX86_BUILTIN_LOADLPD:
13867 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13868 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13869 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13870 : CODE_FOR_sse2_movlpd);
13871 arg0 = TREE_VALUE (arglist);
13872 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13873 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13874 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13875 tmode = insn_data[icode].operand[0].mode;
13876 mode0 = insn_data[icode].operand[1].mode;
13877 mode1 = insn_data[icode].operand[2].mode;
13879 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13880 op0 = copy_to_mode_reg (mode0, op0);
13881 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13882 if (target == 0
13883 || GET_MODE (target) != tmode
13884 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13885 target = gen_reg_rtx (tmode);
13886 pat = GEN_FCN (icode) (target, op0, op1);
13887 if (! pat)
13888 return 0;
13889 emit_insn (pat);
13890 return target;
13892 case IX86_BUILTIN_STOREHPS:
13893 case IX86_BUILTIN_STORELPS:
13894 case IX86_BUILTIN_STOREHPD:
13895 case IX86_BUILTIN_STORELPD:
13896 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13897 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13898 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13899 : CODE_FOR_sse2_movlpd);
13900 arg0 = TREE_VALUE (arglist);
13901 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13902 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13903 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13904 mode0 = insn_data[icode].operand[1].mode;
13905 mode1 = insn_data[icode].operand[2].mode;
13907 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13908 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13909 op1 = copy_to_mode_reg (mode1, op1);
13911 pat = GEN_FCN (icode) (op0, op0, op1);
13912 if (! pat)
13913 return 0;
13914 emit_insn (pat);
13915 return 0;
13917 case IX86_BUILTIN_MOVNTPS:
13918 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13919 case IX86_BUILTIN_MOVNTQ:
13920 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13922 case IX86_BUILTIN_LDMXCSR:
13923 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13924 target = assign_386_stack_local (SImode, 0);
13925 emit_move_insn (target, op0);
13926 emit_insn (gen_ldmxcsr (target));
13927 return 0;
13929 case IX86_BUILTIN_STMXCSR:
13930 target = assign_386_stack_local (SImode, 0);
13931 emit_insn (gen_stmxcsr (target));
13932 return copy_to_mode_reg (SImode, target);
13934 case IX86_BUILTIN_SHUFPS:
13935 case IX86_BUILTIN_SHUFPD:
13936 icode = (fcode == IX86_BUILTIN_SHUFPS
13937 ? CODE_FOR_sse_shufps
13938 : CODE_FOR_sse2_shufpd);
13939 arg0 = TREE_VALUE (arglist);
13940 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13941 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13942 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13943 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13944 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13945 tmode = insn_data[icode].operand[0].mode;
13946 mode0 = insn_data[icode].operand[1].mode;
13947 mode1 = insn_data[icode].operand[2].mode;
13948 mode2 = insn_data[icode].operand[3].mode;
13950 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13951 op0 = copy_to_mode_reg (mode0, op0);
13952 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13953 op1 = copy_to_mode_reg (mode1, op1);
13954 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13956 /* @@@ better error message */
13957 error ("mask must be an immediate");
13958 return gen_reg_rtx (tmode);
13960 if (target == 0
13961 || GET_MODE (target) != tmode
13962 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13963 target = gen_reg_rtx (tmode);
13964 pat = GEN_FCN (icode) (target, op0, op1, op2);
13965 if (! pat)
13966 return 0;
13967 emit_insn (pat);
13968 return target;
13970 case IX86_BUILTIN_PSHUFW:
13971 case IX86_BUILTIN_PSHUFD:
13972 case IX86_BUILTIN_PSHUFHW:
13973 case IX86_BUILTIN_PSHUFLW:
13974 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13975 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13976 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13977 : CODE_FOR_mmx_pshufw);
13978 arg0 = TREE_VALUE (arglist);
13979 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13980 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13981 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13982 tmode = insn_data[icode].operand[0].mode;
13983 mode1 = insn_data[icode].operand[1].mode;
13984 mode2 = insn_data[icode].operand[2].mode;
13986 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13987 op0 = copy_to_mode_reg (mode1, op0);
13988 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13990 /* @@@ better error message */
13991 error ("mask must be an immediate");
13992 return const0_rtx;
13994 if (target == 0
13995 || GET_MODE (target) != tmode
13996 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13997 target = gen_reg_rtx (tmode);
13998 pat = GEN_FCN (icode) (target, op0, op1);
13999 if (! pat)
14000 return 0;
14001 emit_insn (pat);
14002 return target;
14004 case IX86_BUILTIN_PSLLDQI128:
14005 case IX86_BUILTIN_PSRLDQI128:
14006 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14007 : CODE_FOR_sse2_lshrti3);
14008 arg0 = TREE_VALUE (arglist);
14009 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14010 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14011 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14012 tmode = insn_data[icode].operand[0].mode;
14013 mode1 = insn_data[icode].operand[1].mode;
14014 mode2 = insn_data[icode].operand[2].mode;
14016 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14018 op0 = copy_to_reg (op0);
14019 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14021 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14023 error ("shift must be an immediate");
14024 return const0_rtx;
14026 target = gen_reg_rtx (V2DImode);
14027 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14028 if (! pat)
14029 return 0;
14030 emit_insn (pat);
14031 return target;
14033 case IX86_BUILTIN_FEMMS:
14034 emit_insn (gen_femms ());
14035 return NULL_RTX;
14037 case IX86_BUILTIN_PAVGUSB:
14038 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14040 case IX86_BUILTIN_PF2ID:
14041 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14043 case IX86_BUILTIN_PFACC:
14044 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14046 case IX86_BUILTIN_PFADD:
14047 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14049 case IX86_BUILTIN_PFCMPEQ:
14050 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14052 case IX86_BUILTIN_PFCMPGE:
14053 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14055 case IX86_BUILTIN_PFCMPGT:
14056 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14058 case IX86_BUILTIN_PFMAX:
14059 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14061 case IX86_BUILTIN_PFMIN:
14062 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14064 case IX86_BUILTIN_PFMUL:
14065 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14067 case IX86_BUILTIN_PFRCP:
14068 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14070 case IX86_BUILTIN_PFRCPIT1:
14071 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14073 case IX86_BUILTIN_PFRCPIT2:
14074 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14076 case IX86_BUILTIN_PFRSQIT1:
14077 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14079 case IX86_BUILTIN_PFRSQRT:
14080 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14082 case IX86_BUILTIN_PFSUB:
14083 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14085 case IX86_BUILTIN_PFSUBR:
14086 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14088 case IX86_BUILTIN_PI2FD:
14089 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14091 case IX86_BUILTIN_PMULHRW:
14092 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14094 case IX86_BUILTIN_PF2IW:
14095 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14097 case IX86_BUILTIN_PFNACC:
14098 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14100 case IX86_BUILTIN_PFPNACC:
14101 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14103 case IX86_BUILTIN_PI2FW:
14104 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14106 case IX86_BUILTIN_PSWAPDSI:
14107 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14109 case IX86_BUILTIN_PSWAPDSF:
14110 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14112 case IX86_BUILTIN_SSE_ZERO:
14113 target = gen_reg_rtx (V4SFmode);
14114 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14115 return target;
14117 case IX86_BUILTIN_MMX_ZERO:
14118 target = gen_reg_rtx (DImode);
14119 emit_insn (gen_mmx_clrdi (target));
14120 return target;
14122 case IX86_BUILTIN_CLRTI:
14123 target = gen_reg_rtx (V2DImode);
14124 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14125 return target;
14128 case IX86_BUILTIN_SQRTSD:
14129 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14130 case IX86_BUILTIN_LOADAPD:
14131 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14132 case IX86_BUILTIN_LOADUPD:
14133 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14135 case IX86_BUILTIN_STOREAPD:
14136 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14137 case IX86_BUILTIN_STOREUPD:
14138 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14140 case IX86_BUILTIN_LOADSD:
14141 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14143 case IX86_BUILTIN_STORESD:
14144 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14146 case IX86_BUILTIN_SETPD1:
14147 target = assign_386_stack_local (DFmode, 0);
14148 arg0 = TREE_VALUE (arglist);
14149 emit_move_insn (adjust_address (target, DFmode, 0),
14150 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14151 op0 = gen_reg_rtx (V2DFmode);
14152 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14153 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14154 return op0;
14156 case IX86_BUILTIN_SETPD:
14157 target = assign_386_stack_local (V2DFmode, 0);
14158 arg0 = TREE_VALUE (arglist);
14159 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14160 emit_move_insn (adjust_address (target, DFmode, 0),
14161 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14162 emit_move_insn (adjust_address (target, DFmode, 8),
14163 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14164 op0 = gen_reg_rtx (V2DFmode);
14165 emit_insn (gen_sse2_movapd (op0, target));
14166 return op0;
14168 case IX86_BUILTIN_LOADRPD:
14169 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14170 gen_reg_rtx (V2DFmode), 1);
14171 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14172 return target;
14174 case IX86_BUILTIN_LOADPD1:
14175 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14176 gen_reg_rtx (V2DFmode), 1);
14177 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14178 return target;
14180 case IX86_BUILTIN_STOREPD1:
14181 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14182 case IX86_BUILTIN_STORERPD:
14183 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14185 case IX86_BUILTIN_CLRPD:
14186 target = gen_reg_rtx (V2DFmode);
14187 emit_insn (gen_sse_clrv2df (target));
14188 return target;
14190 case IX86_BUILTIN_MFENCE:
14191 emit_insn (gen_sse2_mfence ());
14192 return 0;
14193 case IX86_BUILTIN_LFENCE:
14194 emit_insn (gen_sse2_lfence ());
14195 return 0;
14197 case IX86_BUILTIN_CLFLUSH:
14198 arg0 = TREE_VALUE (arglist);
14199 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14200 icode = CODE_FOR_sse2_clflush;
14201 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14202 op0 = copy_to_mode_reg (Pmode, op0);
14204 emit_insn (gen_sse2_clflush (op0));
14205 return 0;
14207 case IX86_BUILTIN_MOVNTPD:
14208 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14209 case IX86_BUILTIN_MOVNTDQ:
14210 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14211 case IX86_BUILTIN_MOVNTI:
14212 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14214 case IX86_BUILTIN_LOADDQA:
14215 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14216 case IX86_BUILTIN_LOADDQU:
14217 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14218 case IX86_BUILTIN_LOADD:
14219 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14221 case IX86_BUILTIN_STOREDQA:
14222 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14223 case IX86_BUILTIN_STOREDQU:
14224 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14225 case IX86_BUILTIN_STORED:
14226 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14228 case IX86_BUILTIN_MONITOR:
14229 arg0 = TREE_VALUE (arglist);
14230 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14231 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14232 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14233 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14234 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14235 if (!REG_P (op0))
14236 op0 = copy_to_mode_reg (SImode, op0);
14237 if (!REG_P (op1))
14238 op1 = copy_to_mode_reg (SImode, op1);
14239 if (!REG_P (op2))
14240 op2 = copy_to_mode_reg (SImode, op2);
14241 emit_insn (gen_monitor (op0, op1, op2));
14242 return 0;
14244 case IX86_BUILTIN_MWAIT:
14245 arg0 = TREE_VALUE (arglist);
14246 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14247 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14248 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14249 if (!REG_P (op0))
14250 op0 = copy_to_mode_reg (SImode, op0);
14251 if (!REG_P (op1))
14252 op1 = copy_to_mode_reg (SImode, op1);
14253 emit_insn (gen_mwait (op0, op1));
14254 return 0;
14256 case IX86_BUILTIN_LOADDDUP:
14257 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14259 case IX86_BUILTIN_LDDQU:
14260 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14263 default:
14264 break;
14267 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14268 if (d->code == fcode)
14270 /* Compares are treated specially. */
14271 if (d->icode == CODE_FOR_maskcmpv4sf3
14272 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14273 || d->icode == CODE_FOR_maskncmpv4sf3
14274 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14275 || d->icode == CODE_FOR_maskcmpv2df3
14276 || d->icode == CODE_FOR_vmmaskcmpv2df3
14277 || d->icode == CODE_FOR_maskncmpv2df3
14278 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14279 return ix86_expand_sse_compare (d, arglist, target);
14281 return ix86_expand_binop_builtin (d->icode, arglist, target);
14284 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14285 if (d->code == fcode)
14286 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14288 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14289 if (d->code == fcode)
14290 return ix86_expand_sse_comi (d, arglist, target);
14292 /* @@@ Should really do something sensible here. */
14293 return 0;
14296 /* Store OPERAND to the memory after reload is completed. This means
14297 that we can't easily use assign_stack_local. */
14299 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14301 rtx result;
14302 if (!reload_completed)
14303 abort ();
14304 if (TARGET_RED_ZONE)
14306 result = gen_rtx_MEM (mode,
14307 gen_rtx_PLUS (Pmode,
14308 stack_pointer_rtx,
14309 GEN_INT (-RED_ZONE_SIZE)));
14310 emit_move_insn (result, operand);
14312 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14314 switch (mode)
14316 case HImode:
14317 case SImode:
14318 operand = gen_lowpart (DImode, operand);
14319 /* FALLTHRU */
14320 case DImode:
14321 emit_insn (
14322 gen_rtx_SET (VOIDmode,
14323 gen_rtx_MEM (DImode,
14324 gen_rtx_PRE_DEC (DImode,
14325 stack_pointer_rtx)),
14326 operand));
14327 break;
14328 default:
14329 abort ();
14331 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14333 else
14335 switch (mode)
14337 case DImode:
14339 rtx operands[2];
14340 split_di (&operand, 1, operands, operands + 1);
14341 emit_insn (
14342 gen_rtx_SET (VOIDmode,
14343 gen_rtx_MEM (SImode,
14344 gen_rtx_PRE_DEC (Pmode,
14345 stack_pointer_rtx)),
14346 operands[1]));
14347 emit_insn (
14348 gen_rtx_SET (VOIDmode,
14349 gen_rtx_MEM (SImode,
14350 gen_rtx_PRE_DEC (Pmode,
14351 stack_pointer_rtx)),
14352 operands[0]));
14354 break;
14355 case HImode:
14356 /* It is better to store HImodes as SImodes. */
14357 if (!TARGET_PARTIAL_REG_STALL)
14358 operand = gen_lowpart (SImode, operand);
14359 /* FALLTHRU */
14360 case SImode:
14361 emit_insn (
14362 gen_rtx_SET (VOIDmode,
14363 gen_rtx_MEM (GET_MODE (operand),
14364 gen_rtx_PRE_DEC (SImode,
14365 stack_pointer_rtx)),
14366 operand));
14367 break;
14368 default:
14369 abort ();
14371 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14373 return result;
14376 /* Free operand from the memory. */
14377 void
14378 ix86_free_from_memory (enum machine_mode mode)
14380 if (!TARGET_RED_ZONE)
14382 int size;
14384 if (mode == DImode || TARGET_64BIT)
14385 size = 8;
14386 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14387 size = 2;
14388 else
14389 size = 4;
14390 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14391 to pop or add instruction if registers are available. */
14392 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14393 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14394 GEN_INT (size))));
14398 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14399 QImode must go into class Q_REGS.
14400 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14401 movdf to do mem-to-mem moves through integer regs. */
14402 enum reg_class
14403 ix86_preferred_reload_class (rtx x, enum reg_class class)
14405 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14406 return NO_REGS;
14407 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14409 /* SSE can't load any constant directly yet. */
14410 if (SSE_CLASS_P (class))
14411 return NO_REGS;
14412 /* Floats can load 0 and 1. */
14413 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14415 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14416 if (MAYBE_SSE_CLASS_P (class))
14417 return (reg_class_subset_p (class, GENERAL_REGS)
14418 ? GENERAL_REGS : FLOAT_REGS);
14419 else
14420 return class;
14422 /* General regs can load everything. */
14423 if (reg_class_subset_p (class, GENERAL_REGS))
14424 return GENERAL_REGS;
14425 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14426 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14427 return NO_REGS;
14429 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14430 return NO_REGS;
14431 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14432 return Q_REGS;
14433 return class;
14436 /* If we are copying between general and FP registers, we need a memory
14437 location. The same is true for SSE and MMX registers.
14439 The macro can't work reliably when one of the CLASSES is class containing
14440 registers from multiple units (SSE, MMX, integer). We avoid this by never
14441 combining those units in single alternative in the machine description.
14442 Ensure that this constraint holds to avoid unexpected surprises.
14444 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14445 enforce these sanity checks. */
14447 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14448 enum machine_mode mode, int strict)
14450 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14451 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14452 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14453 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14454 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14455 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14457 if (strict)
14458 abort ();
14459 else
14460 return 1;
14462 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14463 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14464 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14465 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14466 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14468 /* Return the cost of moving data from a register in class CLASS1 to
14469 one in class CLASS2.
14471 It is not required that the cost always equal 2 when FROM is the same as TO;
14472 on some machines it is expensive to move between registers if they are not
14473 general registers. */
14475 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14476 enum reg_class class2)
14478 /* In case we require secondary memory, compute cost of the store followed
14479 by load. In order to avoid bad register allocation choices, we need
14480 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14482 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14484 int cost = 1;
14486 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14487 MEMORY_MOVE_COST (mode, class1, 1));
14488 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14489 MEMORY_MOVE_COST (mode, class2, 1));
14491 /* In case of copying from general_purpose_register we may emit multiple
14492 stores followed by single load causing memory size mismatch stall.
14493 Count this as arbitrarily high cost of 20. */
14494 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14495 cost += 20;
14497 /* In the case of FP/MMX moves, the registers actually overlap, and we
14498 have to switch modes in order to treat them differently. */
14499 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14500 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14501 cost += 20;
14503 return cost;
14506 /* Moves between SSE/MMX and integer unit are expensive. */
14507 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14508 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14509 return ix86_cost->mmxsse_to_integer;
14510 if (MAYBE_FLOAT_CLASS_P (class1))
14511 return ix86_cost->fp_move;
14512 if (MAYBE_SSE_CLASS_P (class1))
14513 return ix86_cost->sse_move;
14514 if (MAYBE_MMX_CLASS_P (class1))
14515 return ix86_cost->mmx_move;
14516 return 2;
14519 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14521 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14523 /* Flags and only flags can only hold CCmode values. */
14524 if (CC_REGNO_P (regno))
14525 return GET_MODE_CLASS (mode) == MODE_CC;
14526 if (GET_MODE_CLASS (mode) == MODE_CC
14527 || GET_MODE_CLASS (mode) == MODE_RANDOM
14528 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14529 return 0;
14530 if (FP_REGNO_P (regno))
14531 return VALID_FP_MODE_P (mode);
14532 if (SSE_REGNO_P (regno))
14533 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14534 if (MMX_REGNO_P (regno))
14535 return (TARGET_MMX
14536 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14537 /* We handle both integer and floats in the general purpose registers.
14538 In future we should be able to handle vector modes as well. */
14539 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14540 return 0;
14541 /* Take care for QImode values - they can be in non-QI regs, but then
14542 they do cause partial register stalls. */
14543 if (regno < 4 || mode != QImode || TARGET_64BIT)
14544 return 1;
14545 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14548 /* Return the cost of moving data of mode M between a
14549 register and memory. A value of 2 is the default; this cost is
14550 relative to those in `REGISTER_MOVE_COST'.
14552 If moving between registers and memory is more expensive than
14553 between two registers, you should define this macro to express the
14554 relative cost.
14556 Model also increased moving costs of QImode registers in non
14557 Q_REGS classes.
14560 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14562 if (FLOAT_CLASS_P (class))
14564 int index;
14565 switch (mode)
14567 case SFmode:
14568 index = 0;
14569 break;
14570 case DFmode:
14571 index = 1;
14572 break;
14573 case XFmode:
14574 case TFmode:
14575 index = 2;
14576 break;
14577 default:
14578 return 100;
14580 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14582 if (SSE_CLASS_P (class))
14584 int index;
14585 switch (GET_MODE_SIZE (mode))
14587 case 4:
14588 index = 0;
14589 break;
14590 case 8:
14591 index = 1;
14592 break;
14593 case 16:
14594 index = 2;
14595 break;
14596 default:
14597 return 100;
14599 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14601 if (MMX_CLASS_P (class))
14603 int index;
14604 switch (GET_MODE_SIZE (mode))
14606 case 4:
14607 index = 0;
14608 break;
14609 case 8:
14610 index = 1;
14611 break;
14612 default:
14613 return 100;
14615 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14617 switch (GET_MODE_SIZE (mode))
14619 case 1:
14620 if (in)
14621 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14622 : ix86_cost->movzbl_load);
14623 else
14624 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14625 : ix86_cost->int_store[0] + 4);
14626 break;
14627 case 2:
14628 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14629 default:
14630 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14631 if (mode == TFmode)
14632 mode = XFmode;
14633 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14634 * (((int) GET_MODE_SIZE (mode)
14635 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14639 /* Compute a (partial) cost for rtx X. Return true if the complete
14640 cost has been computed, and false if subexpressions should be
14641 scanned. In either case, *TOTAL contains the cost result. */
14643 static bool
14644 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14646 enum machine_mode mode = GET_MODE (x);
14648 switch (code)
14650 case CONST_INT:
14651 case CONST:
14652 case LABEL_REF:
14653 case SYMBOL_REF:
14654 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14655 *total = 3;
14656 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14657 *total = 2;
14658 else if (flag_pic && SYMBOLIC_CONST (x)
14659 && (!TARGET_64BIT
14660 || (!GET_CODE (x) != LABEL_REF
14661 && (GET_CODE (x) != SYMBOL_REF
14662 || !SYMBOL_REF_LOCAL_P (x)))))
14663 *total = 1;
14664 else
14665 *total = 0;
14666 return true;
14668 case CONST_DOUBLE:
14669 if (mode == VOIDmode)
14670 *total = 0;
14671 else
14672 switch (standard_80387_constant_p (x))
14674 case 1: /* 0.0 */
14675 *total = 1;
14676 break;
14677 default: /* Other constants */
14678 *total = 2;
14679 break;
14680 case 0:
14681 case -1:
14682 /* Start with (MEM (SYMBOL_REF)), since that's where
14683 it'll probably end up. Add a penalty for size. */
14684 *total = (COSTS_N_INSNS (1)
14685 + (flag_pic != 0 && !TARGET_64BIT)
14686 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14687 break;
14689 return true;
14691 case ZERO_EXTEND:
14692 /* The zero extensions is often completely free on x86_64, so make
14693 it as cheap as possible. */
14694 if (TARGET_64BIT && mode == DImode
14695 && GET_MODE (XEXP (x, 0)) == SImode)
14696 *total = 1;
14697 else if (TARGET_ZERO_EXTEND_WITH_AND)
14698 *total = COSTS_N_INSNS (ix86_cost->add);
14699 else
14700 *total = COSTS_N_INSNS (ix86_cost->movzx);
14701 return false;
14703 case SIGN_EXTEND:
14704 *total = COSTS_N_INSNS (ix86_cost->movsx);
14705 return false;
14707 case ASHIFT:
14708 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14709 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14711 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14712 if (value == 1)
14714 *total = COSTS_N_INSNS (ix86_cost->add);
14715 return false;
14717 if ((value == 2 || value == 3)
14718 && !TARGET_DECOMPOSE_LEA
14719 && ix86_cost->lea <= ix86_cost->shift_const)
14721 *total = COSTS_N_INSNS (ix86_cost->lea);
14722 return false;
14725 /* FALLTHRU */
14727 case ROTATE:
14728 case ASHIFTRT:
14729 case LSHIFTRT:
14730 case ROTATERT:
14731 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14733 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14735 if (INTVAL (XEXP (x, 1)) > 32)
14736 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14737 else
14738 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14740 else
14742 if (GET_CODE (XEXP (x, 1)) == AND)
14743 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14744 else
14745 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14748 else
14750 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14751 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14752 else
14753 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14755 return false;
14757 case MULT:
14758 if (FLOAT_MODE_P (mode))
14759 *total = COSTS_N_INSNS (ix86_cost->fmul);
14760 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14762 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14763 int nbits;
14765 for (nbits = 0; value != 0; value >>= 1)
14766 nbits++;
14768 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14769 + nbits * ix86_cost->mult_bit);
14771 else
14773 /* This is arbitrary */
14774 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14775 + 7 * ix86_cost->mult_bit);
14777 return false;
14779 case DIV:
14780 case UDIV:
14781 case MOD:
14782 case UMOD:
14783 if (FLOAT_MODE_P (mode))
14784 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14785 else
14786 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14787 return false;
14789 case PLUS:
14790 if (FLOAT_MODE_P (mode))
14791 *total = COSTS_N_INSNS (ix86_cost->fadd);
14792 else if (!TARGET_DECOMPOSE_LEA
14793 && GET_MODE_CLASS (mode) == MODE_INT
14794 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14796 if (GET_CODE (XEXP (x, 0)) == PLUS
14797 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14798 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14799 && CONSTANT_P (XEXP (x, 1)))
14801 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14802 if (val == 2 || val == 4 || val == 8)
14804 *total = COSTS_N_INSNS (ix86_cost->lea);
14805 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14806 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14807 outer_code);
14808 *total += rtx_cost (XEXP (x, 1), outer_code);
14809 return true;
14812 else if (GET_CODE (XEXP (x, 0)) == MULT
14813 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14815 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14816 if (val == 2 || val == 4 || val == 8)
14818 *total = COSTS_N_INSNS (ix86_cost->lea);
14819 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14820 *total += rtx_cost (XEXP (x, 1), outer_code);
14821 return true;
14824 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14826 *total = COSTS_N_INSNS (ix86_cost->lea);
14827 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14828 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14829 *total += rtx_cost (XEXP (x, 1), outer_code);
14830 return true;
14833 /* FALLTHRU */
14835 case MINUS:
14836 if (FLOAT_MODE_P (mode))
14838 *total = COSTS_N_INSNS (ix86_cost->fadd);
14839 return false;
14841 /* FALLTHRU */
14843 case AND:
14844 case IOR:
14845 case XOR:
14846 if (!TARGET_64BIT && mode == DImode)
14848 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14849 + (rtx_cost (XEXP (x, 0), outer_code)
14850 << (GET_MODE (XEXP (x, 0)) != DImode))
14851 + (rtx_cost (XEXP (x, 1), outer_code)
14852 << (GET_MODE (XEXP (x, 1)) != DImode)));
14853 return true;
14855 /* FALLTHRU */
14857 case NEG:
14858 if (FLOAT_MODE_P (mode))
14860 *total = COSTS_N_INSNS (ix86_cost->fchs);
14861 return false;
14863 /* FALLTHRU */
14865 case NOT:
14866 if (!TARGET_64BIT && mode == DImode)
14867 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14868 else
14869 *total = COSTS_N_INSNS (ix86_cost->add);
14870 return false;
14872 case FLOAT_EXTEND:
14873 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14874 *total = 0;
14875 return false;
14877 case ABS:
14878 if (FLOAT_MODE_P (mode))
14879 *total = COSTS_N_INSNS (ix86_cost->fabs);
14880 return false;
14882 case SQRT:
14883 if (FLOAT_MODE_P (mode))
14884 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14885 return false;
14887 case UNSPEC:
14888 if (XINT (x, 1) == UNSPEC_TP)
14889 *total = 0;
14890 return false;
14892 default:
14893 return false;
14897 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14898 static void
14899 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
14901 init_section ();
14902 fputs ("\tpushl $", asm_out_file);
14903 assemble_name (asm_out_file, XSTR (symbol, 0));
14904 fputc ('\n', asm_out_file);
14906 #endif
14908 #if TARGET_MACHO
14910 static int current_machopic_label_num;
14912 /* Given a symbol name and its associated stub, write out the
14913 definition of the stub. */
14915 void
14916 machopic_output_stub (FILE *file, const char *symb, const char *stub)
14918 unsigned int length;
14919 char *binder_name, *symbol_name, lazy_ptr_name[32];
14920 int label = ++current_machopic_label_num;
14922 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14923 symb = (*targetm.strip_name_encoding) (symb);
14925 length = strlen (stub);
14926 binder_name = alloca (length + 32);
14927 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14929 length = strlen (symb);
14930 symbol_name = alloca (length + 32);
14931 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14933 sprintf (lazy_ptr_name, "L%d$lz", label);
14935 if (MACHOPIC_PURE)
14936 machopic_picsymbol_stub_section ();
14937 else
14938 machopic_symbol_stub_section ();
14940 fprintf (file, "%s:\n", stub);
14941 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14943 if (MACHOPIC_PURE)
14945 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14946 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14947 fprintf (file, "\tjmp %%edx\n");
14949 else
14950 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14952 fprintf (file, "%s:\n", binder_name);
14954 if (MACHOPIC_PURE)
14956 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14957 fprintf (file, "\tpushl %%eax\n");
14959 else
14960 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14962 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14964 machopic_lazy_symbol_ptr_section ();
14965 fprintf (file, "%s:\n", lazy_ptr_name);
14966 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14967 fprintf (file, "\t.long %s\n", binder_name);
14969 #endif /* TARGET_MACHO */
14971 /* Order the registers for register allocator. */
14973 void
14974 x86_order_regs_for_local_alloc (void)
14976 int pos = 0;
14977 int i;
14979 /* First allocate the local general purpose registers. */
14980 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14981 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14982 reg_alloc_order [pos++] = i;
14984 /* Global general purpose registers. */
14985 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14986 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14987 reg_alloc_order [pos++] = i;
14989 /* x87 registers come first in case we are doing FP math
14990 using them. */
14991 if (!TARGET_SSE_MATH)
14992 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14993 reg_alloc_order [pos++] = i;
14995 /* SSE registers. */
14996 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14997 reg_alloc_order [pos++] = i;
14998 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14999 reg_alloc_order [pos++] = i;
15001 /* x87 registers. */
15002 if (TARGET_SSE_MATH)
15003 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15004 reg_alloc_order [pos++] = i;
15006 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15007 reg_alloc_order [pos++] = i;
15009 /* Initialize the rest of array as we do not allocate some registers
15010 at all. */
15011 while (pos < FIRST_PSEUDO_REGISTER)
15012 reg_alloc_order [pos++] = 0;
15015 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15016 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15017 #endif
15019 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15020 struct attribute_spec.handler. */
15021 static tree
15022 ix86_handle_struct_attribute (tree *node, tree name,
15023 tree args ATTRIBUTE_UNUSED,
15024 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15026 tree *type = NULL;
15027 if (DECL_P (*node))
15029 if (TREE_CODE (*node) == TYPE_DECL)
15030 type = &TREE_TYPE (*node);
15032 else
15033 type = node;
15035 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15036 || TREE_CODE (*type) == UNION_TYPE)))
15038 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15039 *no_add_attrs = true;
15042 else if ((is_attribute_p ("ms_struct", name)
15043 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15044 || ((is_attribute_p ("gcc_struct", name)
15045 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15047 warning ("`%s' incompatible attribute ignored",
15048 IDENTIFIER_POINTER (name));
15049 *no_add_attrs = true;
15052 return NULL_TREE;
15055 static bool
15056 ix86_ms_bitfield_layout_p (tree record_type)
15058 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15059 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15060 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15063 /* Returns an expression indicating where the this parameter is
15064 located on entry to the FUNCTION. */
15066 static rtx
15067 x86_this_parameter (tree function)
15069 tree type = TREE_TYPE (function);
15071 if (TARGET_64BIT)
15073 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15074 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15077 if (ix86_fntype_regparm (type) > 0)
15079 tree parm;
15081 parm = TYPE_ARG_TYPES (type);
15082 /* Figure out whether or not the function has a variable number of
15083 arguments. */
15084 for (; parm; parm = TREE_CHAIN (parm))
15085 if (TREE_VALUE (parm) == void_type_node)
15086 break;
15087 /* If not, the this parameter is in %eax. */
15088 if (parm)
15089 return gen_rtx_REG (SImode, 0);
15092 if (aggregate_value_p (TREE_TYPE (type)))
15093 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15094 else
15095 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15098 /* Determine whether x86_output_mi_thunk can succeed. */
15100 static bool
15101 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15102 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15103 HOST_WIDE_INT vcall_offset, tree function)
15105 /* 64-bit can handle anything. */
15106 if (TARGET_64BIT)
15107 return true;
15109 /* For 32-bit, everything's fine if we have one free register. */
15110 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15111 return true;
15113 /* Need a free register for vcall_offset. */
15114 if (vcall_offset)
15115 return false;
15117 /* Need a free register for GOT references. */
15118 if (flag_pic && !(*targetm.binds_local_p) (function))
15119 return false;
15121 /* Otherwise ok. */
15122 return true;
15125 /* Output the assembler code for a thunk function. THUNK_DECL is the
15126 declaration for the thunk function itself, FUNCTION is the decl for
15127 the target function. DELTA is an immediate constant offset to be
15128 added to THIS. If VCALL_OFFSET is nonzero, the word at
15129 *(*this + vcall_offset) should be added to THIS. */
15131 static void
15132 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15133 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15134 HOST_WIDE_INT vcall_offset, tree function)
15136 rtx xops[3];
15137 rtx this = x86_this_parameter (function);
15138 rtx this_reg, tmp;
15140 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15141 pull it in now and let DELTA benefit. */
15142 if (REG_P (this))
15143 this_reg = this;
15144 else if (vcall_offset)
15146 /* Put the this parameter into %eax. */
15147 xops[0] = this;
15148 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15149 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15151 else
15152 this_reg = NULL_RTX;
15154 /* Adjust the this parameter by a fixed constant. */
15155 if (delta)
15157 xops[0] = GEN_INT (delta);
15158 xops[1] = this_reg ? this_reg : this;
15159 if (TARGET_64BIT)
15161 if (!x86_64_general_operand (xops[0], DImode))
15163 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15164 xops[1] = tmp;
15165 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15166 xops[0] = tmp;
15167 xops[1] = this;
15169 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15171 else
15172 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15175 /* Adjust the this parameter by a value stored in the vtable. */
15176 if (vcall_offset)
15178 if (TARGET_64BIT)
15179 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15180 else
15181 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15183 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15184 xops[1] = tmp;
15185 if (TARGET_64BIT)
15186 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15187 else
15188 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15190 /* Adjust the this parameter. */
15191 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15192 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15194 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15195 xops[0] = GEN_INT (vcall_offset);
15196 xops[1] = tmp2;
15197 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15198 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15200 xops[1] = this_reg;
15201 if (TARGET_64BIT)
15202 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15203 else
15204 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15207 /* If necessary, drop THIS back to its stack slot. */
15208 if (this_reg && this_reg != this)
15210 xops[0] = this_reg;
15211 xops[1] = this;
15212 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15215 xops[0] = XEXP (DECL_RTL (function), 0);
15216 if (TARGET_64BIT)
15218 if (!flag_pic || (*targetm.binds_local_p) (function))
15219 output_asm_insn ("jmp\t%P0", xops);
15220 else
15222 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15223 tmp = gen_rtx_CONST (Pmode, tmp);
15224 tmp = gen_rtx_MEM (QImode, tmp);
15225 xops[0] = tmp;
15226 output_asm_insn ("jmp\t%A0", xops);
15229 else
15231 if (!flag_pic || (*targetm.binds_local_p) (function))
15232 output_asm_insn ("jmp\t%P0", xops);
15233 else
15234 #if TARGET_MACHO
15235 if (TARGET_MACHO)
15237 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15238 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15239 tmp = gen_rtx_MEM (QImode, tmp);
15240 xops[0] = tmp;
15241 output_asm_insn ("jmp\t%0", xops);
15243 else
15244 #endif /* TARGET_MACHO */
15246 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15247 output_set_got (tmp);
15249 xops[1] = tmp;
15250 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15251 output_asm_insn ("jmp\t{*}%1", xops);
15256 static void
15257 x86_file_start (void)
15259 default_file_start ();
15260 if (X86_FILE_START_VERSION_DIRECTIVE)
15261 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15262 if (X86_FILE_START_FLTUSED)
15263 fputs ("\t.global\t__fltused\n", asm_out_file);
15264 if (ix86_asm_dialect == ASM_INTEL)
15265 fputs ("\t.intel_syntax\n", asm_out_file);
15269 x86_field_alignment (tree field, int computed)
15271 enum machine_mode mode;
15272 tree type = TREE_TYPE (field);
15274 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15275 return computed;
15276 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15277 ? get_inner_array_type (type) : type);
15278 if (mode == DFmode || mode == DCmode
15279 || GET_MODE_CLASS (mode) == MODE_INT
15280 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15281 return MIN (32, computed);
15282 return computed;
15285 /* Output assembler code to FILE to increment profiler label # LABELNO
15286 for profiling a function entry. */
15287 void
15288 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15290 if (TARGET_64BIT)
15291 if (flag_pic)
15293 #ifndef NO_PROFILE_COUNTERS
15294 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15295 #endif
15296 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15298 else
15300 #ifndef NO_PROFILE_COUNTERS
15301 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15302 #endif
15303 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15305 else if (flag_pic)
15307 #ifndef NO_PROFILE_COUNTERS
15308 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15309 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15310 #endif
15311 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15313 else
15315 #ifndef NO_PROFILE_COUNTERS
15316 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15317 PROFILE_COUNT_REGISTER);
15318 #endif
15319 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15323 /* We don't have exact information about the insn sizes, but we may assume
15324 quite safely that we are informed about all 1 byte insns and memory
15325 address sizes. This is enough to eliminate unnecessary padding in
15326 99% of cases. */
15328 static int
15329 min_insn_size (rtx insn)
15331 int l = 0;
15333 if (!INSN_P (insn) || !active_insn_p (insn))
15334 return 0;
15336 /* Discard alignments we've emit and jump instructions. */
15337 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15338 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15339 return 0;
15340 if (GET_CODE (insn) == JUMP_INSN
15341 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15342 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15343 return 0;
15345 /* Important case - calls are always 5 bytes.
15346 It is common to have many calls in the row. */
15347 if (GET_CODE (insn) == CALL_INSN
15348 && symbolic_reference_mentioned_p (PATTERN (insn))
15349 && !SIBLING_CALL_P (insn))
15350 return 5;
15351 if (get_attr_length (insn) <= 1)
15352 return 1;
15354 /* For normal instructions we may rely on the sizes of addresses
15355 and the presence of symbol to require 4 bytes of encoding.
15356 This is not the case for jumps where references are PC relative. */
15357 if (GET_CODE (insn) != JUMP_INSN)
15359 l = get_attr_length_address (insn);
15360 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15361 l = 4;
15363 if (l)
15364 return 1+l;
15365 else
15366 return 2;
15369 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15370 window. */
15372 static void
15373 k8_avoid_jump_misspredicts (void)
15375 rtx insn, start = get_insns ();
15376 int nbytes = 0, njumps = 0;
15377 int isjump = 0;
15379 /* Look for all minimal intervals of instructions containing 4 jumps.
15380 The intervals are bounded by START and INSN. NBYTES is the total
15381 size of instructions in the interval including INSN and not including
15382 START. When the NBYTES is smaller than 16 bytes, it is possible
15383 that the end of START and INSN ends up in the same 16byte page.
15385 The smallest offset in the page INSN can start is the case where START
15386 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15387 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15389 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15392 nbytes += min_insn_size (insn);
15393 if (rtl_dump_file)
15394 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15395 INSN_UID (insn), min_insn_size (insn));
15396 if ((GET_CODE (insn) == JUMP_INSN
15397 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15398 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15399 || GET_CODE (insn) == CALL_INSN)
15400 njumps++;
15401 else
15402 continue;
15404 while (njumps > 3)
15406 start = NEXT_INSN (start);
15407 if ((GET_CODE (start) == JUMP_INSN
15408 && GET_CODE (PATTERN (start)) != ADDR_VEC
15409 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15410 || GET_CODE (start) == CALL_INSN)
15411 njumps--, isjump = 1;
15412 else
15413 isjump = 0;
15414 nbytes -= min_insn_size (start);
15416 if (njumps < 0)
15417 abort ();
15418 if (rtl_dump_file)
15419 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15420 INSN_UID (start), INSN_UID (insn), nbytes);
15422 if (njumps == 3 && isjump && nbytes < 16)
15424 int padsize = 15 - nbytes + min_insn_size (insn);
15426 if (rtl_dump_file)
15427 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15428 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15433 /* Implement machine specific optimizations.
15434 At the moment we implement single transformation: AMD Athlon works faster
15435 when RET is not destination of conditional jump or directly preceded
15436 by other jump instruction. We avoid the penalty by inserting NOP just
15437 before the RET instructions in such cases. */
15438 static void
15439 ix86_reorg (void)
15441 edge e;
15443 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15444 return;
15445 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15447 basic_block bb = e->src;
15448 rtx ret = bb->end;
15449 rtx prev;
15450 bool replace = false;
15452 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15453 || !maybe_hot_bb_p (bb))
15454 continue;
15455 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15456 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15457 break;
15458 if (prev && GET_CODE (prev) == CODE_LABEL)
15460 edge e;
15461 for (e = bb->pred; e; e = e->pred_next)
15462 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15463 && !(e->flags & EDGE_FALLTHRU))
15464 replace = true;
15466 if (!replace)
15468 prev = prev_active_insn (ret);
15469 if (prev
15470 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15471 || GET_CODE (prev) == CALL_INSN))
15472 replace = true;
15473 /* Empty functions get branch mispredict even when the jump destination
15474 is not visible to us. */
15475 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15476 replace = true;
15478 if (replace)
15480 emit_insn_before (gen_return_internal_long (), ret);
15481 delete_insn (ret);
15484 k8_avoid_jump_misspredicts ();
15487 /* Return nonzero when QImode register that must be represented via REX prefix
15488 is used. */
15489 bool
15490 x86_extended_QIreg_mentioned_p (rtx insn)
15492 int i;
15493 extract_insn_cached (insn);
15494 for (i = 0; i < recog_data.n_operands; i++)
15495 if (REG_P (recog_data.operand[i])
15496 && REGNO (recog_data.operand[i]) >= 4)
15497 return true;
15498 return false;
15501 /* Return nonzero when P points to register encoded via REX prefix.
15502 Called via for_each_rtx. */
15503 static int
15504 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15506 unsigned int regno;
15507 if (!REG_P (*p))
15508 return 0;
15509 regno = REGNO (*p);
15510 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15513 /* Return true when INSN mentions register that must be encoded using REX
15514 prefix. */
15515 bool
15516 x86_extended_reg_mentioned_p (rtx insn)
15518 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15521 /* Generate an unsigned DImode to FP conversion. This is the same code
15522 optabs would emit if we didn't have TFmode patterns. */
15524 void
15525 x86_emit_floatuns (rtx operands[2])
15527 rtx neglab, donelab, i0, i1, f0, in, out;
15528 enum machine_mode mode;
15530 out = operands[0];
15531 in = force_reg (DImode, operands[1]);
15532 mode = GET_MODE (out);
15533 neglab = gen_label_rtx ();
15534 donelab = gen_label_rtx ();
15535 i1 = gen_reg_rtx (Pmode);
15536 f0 = gen_reg_rtx (mode);
15538 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15540 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15541 emit_jump_insn (gen_jump (donelab));
15542 emit_barrier ();
15544 emit_label (neglab);
15546 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15547 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15548 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15549 expand_float (f0, i0, 0);
15550 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15552 emit_label (donelab);
15555 /* Return if we do not know how to pass TYPE solely in registers. */
15556 bool
15557 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15559 if (default_must_pass_in_stack (mode, type))
15560 return true;
15561 return (!TARGET_64BIT && type && mode == TImode);
15564 #include "gt-i386.h"