* i386.c (ix86_expand_int_movcc): Fix setcc sign bit case.
[official-gcc.git] / gcc / config / i386 / i386.c
blob48b60ad499c4e9673a2c0230a31d0a79c0b3f7ba
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
51 #endif
53 /* Return index of given mode in mult and division cost tables. */
54 #define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
59 : 4)
61 /* Processor costs (relative to an add) */
62 static const
63 struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
69 0, /* cost of multiply per each bit set */
70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
71 3, /* cost of movsx */
72 3, /* cost of movzx */
73 0, /* "large" insn */
74 2, /* MOVE_RATIO */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
97 1, /* Branch cost */
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
106 /* Processor costs (relative to an add) */
107 static const
108 struct processor_costs i386_cost = { /* 386 specific costs */
109 1, /* cost of an add instruction */
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
114 1, /* cost of multiply per each bit set */
115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
116 3, /* cost of movsx */
117 2, /* cost of movzx */
118 15, /* "large" insn */
119 3, /* MOVE_RATIO */
120 4, /* cost for loading QImode using movzbl */
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
123 Relative to reg-reg move (2). */
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
142 1, /* Branch cost */
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
151 static const
152 struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
158 1, /* cost of multiply per each bit set */
159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
160 3, /* cost of movsx */
161 2, /* cost of movzx */
162 15, /* "large" insn */
163 3, /* MOVE_RATIO */
164 4, /* cost for loading QImode using movzbl */
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
167 Relative to reg-reg move (2). */
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
186 1, /* Branch cost */
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
195 static const
196 struct processor_costs pentium_cost = {
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
199 4, /* variable shift costs */
200 1, /* constant shift costs */
201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
202 0, /* cost of multiply per each bit set */
203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
204 3, /* cost of movsx */
205 2, /* cost of movzx */
206 8, /* "large" insn */
207 6, /* MOVE_RATIO */
208 6, /* cost for loading QImode using movzbl */
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
211 Relative to reg-reg move (2). */
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
230 2, /* Branch cost */
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
239 static const
240 struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* variable shift costs */
244 1, /* constant shift costs */
245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
246 0, /* cost of multiply per each bit set */
247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
248 1, /* cost of movsx */
249 1, /* cost of movzx */
250 8, /* "large" insn */
251 6, /* MOVE_RATIO */
252 2, /* cost for loading QImode using movzbl */
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
274 2, /* Branch cost */
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
283 static const
284 struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
286 2, /* cost of a lea instruction */
287 1, /* variable shift costs */
288 1, /* constant shift costs */
289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
290 0, /* cost of multiply per each bit set */
291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
292 2, /* cost of movsx */
293 2, /* cost of movzx */
294 8, /* "large" insn */
295 4, /* MOVE_RATIO */
296 3, /* cost for loading QImode using movzbl */
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
318 1, /* Branch cost */
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
327 static const
328 struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
330 2, /* cost of a lea instruction */
331 1, /* variable shift costs */
332 1, /* constant shift costs */
333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
334 0, /* cost of multiply per each bit set */
335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
336 1, /* cost of movsx */
337 1, /* cost of movzx */
338 8, /* "large" insn */
339 9, /* MOVE_RATIO */
340 4, /* cost for loading QImode using movzbl */
341 {3, 4, 3}, /* cost of loading integer registers
342 in QImode, HImode and SImode.
343 Relative to reg-reg move (2). */
344 {3, 4, 3}, /* cost of storing integer registers */
345 4, /* cost of reg,reg fld/fst */
346 {4, 4, 12}, /* cost of loading fp registers
347 in SFmode, DFmode and XFmode */
348 {6, 6, 8}, /* cost of loading integer registers */
349 2, /* cost of moving MMX register */
350 {4, 4}, /* cost of loading MMX registers
351 in SImode and DImode */
352 {4, 4}, /* cost of storing MMX registers
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
355 {4, 4, 6}, /* cost of loading SSE registers
356 in SImode, DImode and TImode */
357 {4, 4, 5}, /* cost of storing SSE registers
358 in SImode, DImode and TImode */
359 5, /* MMX or SSE register to integer */
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
362 2, /* Branch cost */
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
371 static const
372 struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
383 9, /* MOVE_RATIO */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
406 2, /* Branch cost */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
415 static const
416 struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
422 0, /* cost of multiply per each bit set */
423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
424 1, /* cost of movsx */
425 1, /* cost of movzx */
426 16, /* "large" insn */
427 6, /* MOVE_RATIO */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
450 2, /* Branch cost */
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
459 const struct processor_costs *ix86_cost = &pentium_cost;
461 /* Processor feature/optimization bitmasks. */
462 #define m_386 (1<<PROCESSOR_I386)
463 #define m_486 (1<<PROCESSOR_I486)
464 #define m_PENT (1<<PROCESSOR_PENTIUM)
465 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466 #define m_K6 (1<<PROCESSOR_K6)
467 #define m_ATHLON (1<<PROCESSOR_ATHLON)
468 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
469 #define m_K8 (1<<PROCESSOR_K8)
470 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_zero_extend_with_and = m_486 | m_PENT;
475 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
476 const int x86_double_with_add = ~m_386;
477 const int x86_use_bit_test = m_386;
478 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480 const int x86_3dnow_a = m_ATHLON_K8;
481 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
482 const int x86_branch_hints = m_PENT4;
483 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
484 const int x86_partial_reg_stall = m_PPRO;
485 const int x86_use_loop = m_K6;
486 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
487 const int x86_use_mov0 = m_K6;
488 const int x86_use_cltd = ~(m_PENT | m_K6);
489 const int x86_read_modify_write = ~m_PENT;
490 const int x86_read_modify = ~(m_PENT | m_PPRO);
491 const int x86_split_long_moves = m_PPRO;
492 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
493 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
494 const int x86_single_stringop = m_386 | m_PENT4;
495 const int x86_qimode_math = ~(0);
496 const int x86_promote_qi_regs = 0;
497 const int x86_himode_math = ~(m_PPRO);
498 const int x86_promote_hi_regs = m_PPRO;
499 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_decompose_lea = m_PENT4;
510 const int x86_shift1 = ~m_486;
511 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513 /* Set for machines where the type and dependencies are resolved on SSE register
514 parts instead of whole registers, so we may maintain just lower part of
515 scalar values in proper format leaving the upper part undefined. */
516 const int x86_sse_partial_regs = m_ATHLON_K8;
517 /* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520 const int x86_sse_typeless_stores = m_ATHLON_K8;
521 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522 const int x86_use_ffreep = m_ATHLON_K8;
523 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 /* In case the average insn count for single function invocation is
526 lower than this constant, emit fast (but longer) prologue and
527 epilogue code. */
528 #define FAST_PROLOGUE_INSN_COUNT 20
530 /* Set by prologue expander and used by epilogue expander to determine
531 the style used. */
532 static int use_fast_prologue_epilogue;
534 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
535 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
536 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
537 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
539 /* Array of the smallest class containing reg number REGNO, indexed by
540 REGNO. Used by REGNO_REG_CLASS in i386.h. */
542 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 /* ax, dx, cx, bx */
545 AREG, DREG, CREG, BREG,
546 /* si, di, bp, sp */
547 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 /* FP registers */
549 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
550 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
551 /* arg pointer */
552 NON_Q_REGS,
553 /* flags, fpsr, dirflag, frame */
554 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
555 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 SSE_REGS, SSE_REGS,
557 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 MMX_REGS, MMX_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
561 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
562 SSE_REGS, SSE_REGS,
565 /* The "default" register map used in 32bit mode. */
567 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
569 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
570 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
571 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
572 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
573 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
575 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
578 static int const x86_64_int_parameter_registers[6] =
580 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
581 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
584 static int const x86_64_int_return_registers[4] =
586 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
589 /* The "default" register map used in 64bit mode. */
590 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
592 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
593 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
594 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
595 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
596 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
597 8,9,10,11,12,13,14,15, /* extended integer registers */
598 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
601 /* Define the register numbers to be used in Dwarf debugging information.
602 The SVR4 reference port C compiler uses the following register numbers
603 in its Dwarf output code:
604 0 for %eax (gcc regno = 0)
605 1 for %ecx (gcc regno = 2)
606 2 for %edx (gcc regno = 1)
607 3 for %ebx (gcc regno = 3)
608 4 for %esp (gcc regno = 7)
609 5 for %ebp (gcc regno = 6)
610 6 for %esi (gcc regno = 4)
611 7 for %edi (gcc regno = 5)
612 The following three DWARF register numbers are never generated by
613 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
614 believes these numbers have these meanings.
615 8 for %eip (no gcc equivalent)
616 9 for %eflags (gcc regno = 17)
617 10 for %trapno (no gcc equivalent)
618 It is not at all clear how we should number the FP stack registers
619 for the x86 architecture. If the version of SDB on x86/svr4 were
620 a bit less brain dead with respect to floating-point then we would
621 have a precedent to follow with respect to DWARF register numbers
622 for x86 FP registers, but the SDB on x86/svr4 is so completely
623 broken with respect to FP registers that it is hardly worth thinking
624 of it as something to strive for compatibility with.
625 The version of x86/svr4 SDB I have at the moment does (partially)
626 seem to believe that DWARF register number 11 is associated with
627 the x86 register %st(0), but that's about all. Higher DWARF
628 register numbers don't seem to be associated with anything in
629 particular, and even for DWARF regno 11, SDB only seems to under-
630 stand that it should say that a variable lives in %st(0) (when
631 asked via an `=' command) if we said it was in DWARF regno 11,
632 but SDB still prints garbage when asked for the value of the
633 variable in question (via a `/' command).
634 (Also note that the labels SDB prints for various FP stack regs
635 when doing an `x' command are all wrong.)
636 Note that these problems generally don't affect the native SVR4
637 C compiler because it doesn't allow the use of -O with -g and
638 because when it is *not* optimizing, it allocates a memory
639 location for each floating-point variable, and the memory
640 location is what gets described in the DWARF AT_location
641 attribute for the variable in question.
642 Regardless of the severe mental illness of the x86/svr4 SDB, we
643 do something sensible here and we use the following DWARF
644 register numbers. Note that these are all stack-top-relative
645 numbers.
646 11 for %st(0) (gcc regno = 8)
647 12 for %st(1) (gcc regno = 9)
648 13 for %st(2) (gcc regno = 10)
649 14 for %st(3) (gcc regno = 11)
650 15 for %st(4) (gcc regno = 12)
651 16 for %st(5) (gcc regno = 13)
652 17 for %st(6) (gcc regno = 14)
653 18 for %st(7) (gcc regno = 15)
655 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
657 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
658 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
659 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
660 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
661 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
663 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
666 /* Test and compare insns in i386.md store the information needed to
667 generate branch and scc insns here. */
669 rtx ix86_compare_op0 = NULL_RTX;
670 rtx ix86_compare_op1 = NULL_RTX;
672 /* The encoding characters for the four TLS models present in ELF. */
674 static char const tls_model_chars[] = " GLil";
676 #define MAX_386_STACK_LOCALS 3
677 /* Size of the register save area. */
678 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
680 /* Define the structure for the machine field in struct function. */
681 struct machine_function GTY(())
683 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
684 const char *some_ld_name;
685 int save_varrargs_registers;
686 int accesses_prev_frame;
689 #define ix86_stack_locals (cfun->machine->stack_locals)
690 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
692 /* Structure describing stack frame layout.
693 Stack grows downward:
695 [arguments]
696 <- ARG_POINTER
697 saved pc
699 saved frame pointer if frame_pointer_needed
700 <- HARD_FRAME_POINTER
701 [saved regs]
703 [padding1] \
705 [va_arg registers] (
706 > to_allocate <- FRAME_POINTER
707 [frame] (
709 [padding2] /
711 struct ix86_frame
713 int nregs;
714 int padding1;
715 int va_arg_size;
716 HOST_WIDE_INT frame;
717 int padding2;
718 int outgoing_arguments_size;
719 int red_zone_size;
721 HOST_WIDE_INT to_allocate;
722 /* The offsets relative to ARG_POINTER. */
723 HOST_WIDE_INT frame_pointer_offset;
724 HOST_WIDE_INT hard_frame_pointer_offset;
725 HOST_WIDE_INT stack_pointer_offset;
728 /* Used to enable/disable debugging features. */
729 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
730 /* Code model option as passed by user. */
731 const char *ix86_cmodel_string;
732 /* Parsed value. */
733 enum cmodel ix86_cmodel;
734 /* Asm dialect. */
735 const char *ix86_asm_string;
736 enum asm_dialect ix86_asm_dialect = ASM_ATT;
737 /* TLS dialext. */
738 const char *ix86_tls_dialect_string;
739 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
741 /* Which unit we are generating floating point math for. */
742 enum fpmath_unit ix86_fpmath;
744 /* Which cpu are we scheduling for. */
745 enum processor_type ix86_cpu;
746 /* Which instruction set architecture to use. */
747 enum processor_type ix86_arch;
749 /* Strings to hold which cpu and instruction set architecture to use. */
750 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
751 const char *ix86_arch_string; /* for -march=<xxx> */
752 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
754 /* # of registers to use to pass arguments. */
755 const char *ix86_regparm_string;
757 /* true if sse prefetch instruction is not NOOP. */
758 int x86_prefetch_sse;
760 /* ix86_regparm_string as a number */
761 int ix86_regparm;
763 /* Alignment to use for loops and jumps: */
765 /* Power of two alignment for loops. */
766 const char *ix86_align_loops_string;
768 /* Power of two alignment for non-loop jumps. */
769 const char *ix86_align_jumps_string;
771 /* Power of two alignment for stack boundary in bytes. */
772 const char *ix86_preferred_stack_boundary_string;
774 /* Preferred alignment for stack boundary in bits. */
775 int ix86_preferred_stack_boundary;
777 /* Values 1-5: see jump.c */
778 int ix86_branch_cost;
779 const char *ix86_branch_cost_string;
781 /* Power of two alignment for functions. */
782 const char *ix86_align_funcs_string;
784 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785 static char internal_label_prefix[16];
786 static int internal_label_prefix_len;
788 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
789 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
790 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
791 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
792 int, int, FILE *));
793 static const char *get_some_local_dynamic_name PARAMS ((void));
794 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
795 static rtx maybe_get_pool_constant PARAMS ((rtx));
796 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
797 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
798 rtx *, rtx *));
799 static rtx get_thread_pointer PARAMS ((void));
800 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
801 static rtx gen_push PARAMS ((rtx));
802 static int memory_address_length PARAMS ((rtx addr));
803 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
804 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
805 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
806 static void ix86_dump_ppro_packet PARAMS ((FILE *));
807 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
808 static struct machine_function * ix86_init_machine_status PARAMS ((void));
809 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
810 static int ix86_nsaved_regs PARAMS ((void));
811 static void ix86_emit_save_regs PARAMS ((void));
812 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
813 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
814 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
815 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
816 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
817 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
818 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
819 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
820 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
821 static int ix86_issue_rate PARAMS ((void));
822 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
823 static void ix86_sched_init PARAMS ((FILE *, int, int));
824 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
825 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
826 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
827 static int ia32_multipass_dfa_lookahead PARAMS ((void));
828 static void ix86_init_mmx_sse_builtins PARAMS ((void));
829 static rtx x86_this_parameter PARAMS ((tree));
830 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
833 HOST_WIDE_INT, tree));
834 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
836 struct ix86_address
838 rtx base, index, disp;
839 HOST_WIDE_INT scale;
842 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
843 static int ix86_address_cost PARAMS ((rtx));
844 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
846 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
847 static const char *ix86_strip_name_encoding PARAMS ((const char *))
848 ATTRIBUTE_UNUSED;
850 struct builtin_description;
851 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
852 tree, rtx));
853 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
854 tree, rtx));
855 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
856 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
857 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
858 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
859 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
860 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
861 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
862 enum rtx_code *,
863 enum rtx_code *,
864 enum rtx_code *));
865 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
866 rtx *, rtx *));
867 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
868 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
869 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
870 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
871 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
872 static int ix86_save_reg PARAMS ((unsigned int, int));
873 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
874 static int ix86_comp_type_attributes PARAMS ((tree, tree));
875 static int ix86_fntype_regparm PARAMS ((tree));
876 const struct attribute_spec ix86_attribute_table[];
877 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
878 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
879 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
880 static int ix86_value_regno PARAMS ((enum machine_mode));
881 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
882 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
883 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
884 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
886 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
887 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
888 #endif
890 /* Register class used for passing given 64bit part of the argument.
891 These represent classes as documented by the PS ABI, with the exception
892 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
893 use SF or DFmode move instead of DImode to avoid reformatting penalties.
895 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
896 whenever possible (upper half does contain padding).
898 enum x86_64_reg_class
900 X86_64_NO_CLASS,
901 X86_64_INTEGER_CLASS,
902 X86_64_INTEGERSI_CLASS,
903 X86_64_SSE_CLASS,
904 X86_64_SSESF_CLASS,
905 X86_64_SSEDF_CLASS,
906 X86_64_SSEUP_CLASS,
907 X86_64_X87_CLASS,
908 X86_64_X87UP_CLASS,
909 X86_64_MEMORY_CLASS
911 static const char * const x86_64_reg_class_name[] =
912 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
914 #define MAX_CLASSES 4
915 static int classify_argument PARAMS ((enum machine_mode, tree,
916 enum x86_64_reg_class [MAX_CLASSES],
917 int));
918 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
919 int *));
920 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
921 const int *, int));
922 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
923 enum x86_64_reg_class));
925 /* Initialize the GCC target structure. */
926 #undef TARGET_ATTRIBUTE_TABLE
927 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
928 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
929 # undef TARGET_MERGE_DECL_ATTRIBUTES
930 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
931 #endif
933 #undef TARGET_COMP_TYPE_ATTRIBUTES
934 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
936 #undef TARGET_INIT_BUILTINS
937 #define TARGET_INIT_BUILTINS ix86_init_builtins
939 #undef TARGET_EXPAND_BUILTIN
940 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
942 #undef TARGET_ASM_FUNCTION_EPILOGUE
943 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
945 #undef TARGET_ASM_OPEN_PAREN
946 #define TARGET_ASM_OPEN_PAREN ""
947 #undef TARGET_ASM_CLOSE_PAREN
948 #define TARGET_ASM_CLOSE_PAREN ""
950 #undef TARGET_ASM_ALIGNED_HI_OP
951 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952 #undef TARGET_ASM_ALIGNED_SI_OP
953 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
954 #ifdef ASM_QUAD
955 #undef TARGET_ASM_ALIGNED_DI_OP
956 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
957 #endif
959 #undef TARGET_ASM_UNALIGNED_HI_OP
960 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961 #undef TARGET_ASM_UNALIGNED_SI_OP
962 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963 #undef TARGET_ASM_UNALIGNED_DI_OP
964 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
966 #undef TARGET_SCHED_ADJUST_COST
967 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968 #undef TARGET_SCHED_ISSUE_RATE
969 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970 #undef TARGET_SCHED_VARIABLE_ISSUE
971 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972 #undef TARGET_SCHED_INIT
973 #define TARGET_SCHED_INIT ix86_sched_init
974 #undef TARGET_SCHED_REORDER
975 #define TARGET_SCHED_REORDER ix86_sched_reorder
976 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
977 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
983 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
984 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
986 #ifdef HAVE_AS_TLS
987 #undef TARGET_HAVE_TLS
988 #define TARGET_HAVE_TLS true
989 #endif
990 #undef TARGET_CANNOT_FORCE_CONST_MEM
991 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
993 #undef TARGET_MS_BITFIELD_LAYOUT_P
994 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
996 #undef TARGET_ASM_OUTPUT_MI_THUNK
997 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
998 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
999 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1001 #undef TARGET_RTX_COSTS
1002 #define TARGET_RTX_COSTS ix86_rtx_costs
1003 #undef TARGET_ADDRESS_COST
1004 #define TARGET_ADDRESS_COST ix86_address_cost
1006 struct gcc_target targetm = TARGET_INITIALIZER;
1008 /* Sometimes certain combinations of command options do not make
1009 sense on a particular target machine. You can define a macro
1010 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1011 defined, is executed once just after all the command options have
1012 been parsed.
1014 Don't use this macro to turn on various extra optimizations for
1015 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1017 void
1018 override_options ()
1020 int i;
1021 /* Comes from final.c -- no real reason to change it. */
1022 #define MAX_CODE_ALIGN 16
1024 static struct ptt
1026 const struct processor_costs *cost; /* Processor costs */
1027 const int target_enable; /* Target flags to enable. */
1028 const int target_disable; /* Target flags to disable. */
1029 const int align_loop; /* Default alignments. */
1030 const int align_loop_max_skip;
1031 const int align_jump;
1032 const int align_jump_max_skip;
1033 const int align_func;
1035 const processor_target_table[PROCESSOR_max] =
1037 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1038 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1039 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1040 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1041 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1042 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1043 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1044 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1047 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1048 static struct pta
1050 const char *const name; /* processor name or nickname. */
1051 const enum processor_type processor;
1052 const enum pta_flags
1054 PTA_SSE = 1,
1055 PTA_SSE2 = 2,
1056 PTA_MMX = 4,
1057 PTA_PREFETCH_SSE = 8,
1058 PTA_3DNOW = 16,
1059 PTA_3DNOW_A = 64,
1060 PTA_64BIT = 128
1061 } flags;
1063 const processor_alias_table[] =
1065 {"i386", PROCESSOR_I386, 0},
1066 {"i486", PROCESSOR_I486, 0},
1067 {"i586", PROCESSOR_PENTIUM, 0},
1068 {"pentium", PROCESSOR_PENTIUM, 0},
1069 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1070 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1071 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1072 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1073 {"i686", PROCESSOR_PENTIUMPRO, 0},
1074 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1075 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1076 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1077 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1078 PTA_MMX | PTA_PREFETCH_SSE},
1079 {"k6", PROCESSOR_K6, PTA_MMX},
1080 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1081 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1082 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1083 | PTA_3DNOW_A},
1084 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1085 | PTA_3DNOW | PTA_3DNOW_A},
1086 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1087 | PTA_3DNOW_A | PTA_SSE},
1088 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1089 | PTA_3DNOW_A | PTA_SSE},
1090 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1091 | PTA_3DNOW_A | PTA_SSE},
1092 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1093 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1096 int const pta_size = ARRAY_SIZE (processor_alias_table);
1098 /* By default our XFmode is the 80-bit extended format. If we have
1099 use TFmode instead, it's also the 80-bit format, but with padding. */
1100 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1101 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1103 /* Set the default values for switches whose default depends on TARGET_64BIT
1104 in case they weren't overwritten by command line options. */
1105 if (TARGET_64BIT)
1107 if (flag_omit_frame_pointer == 2)
1108 flag_omit_frame_pointer = 1;
1109 if (flag_asynchronous_unwind_tables == 2)
1110 flag_asynchronous_unwind_tables = 1;
1111 if (flag_pcc_struct_return == 2)
1112 flag_pcc_struct_return = 0;
1114 else
1116 if (flag_omit_frame_pointer == 2)
1117 flag_omit_frame_pointer = 0;
1118 if (flag_asynchronous_unwind_tables == 2)
1119 flag_asynchronous_unwind_tables = 0;
1120 if (flag_pcc_struct_return == 2)
1121 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1124 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1125 SUBTARGET_OVERRIDE_OPTIONS;
1126 #endif
1128 if (!ix86_cpu_string && ix86_arch_string)
1129 ix86_cpu_string = ix86_arch_string;
1130 if (!ix86_cpu_string)
1131 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1132 if (!ix86_arch_string)
1133 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1135 if (ix86_cmodel_string != 0)
1137 if (!strcmp (ix86_cmodel_string, "small"))
1138 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1139 else if (flag_pic)
1140 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1141 else if (!strcmp (ix86_cmodel_string, "32"))
1142 ix86_cmodel = CM_32;
1143 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1144 ix86_cmodel = CM_KERNEL;
1145 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1146 ix86_cmodel = CM_MEDIUM;
1147 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1148 ix86_cmodel = CM_LARGE;
1149 else
1150 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1152 else
1154 ix86_cmodel = CM_32;
1155 if (TARGET_64BIT)
1156 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1158 if (ix86_asm_string != 0)
1160 if (!strcmp (ix86_asm_string, "intel"))
1161 ix86_asm_dialect = ASM_INTEL;
1162 else if (!strcmp (ix86_asm_string, "att"))
1163 ix86_asm_dialect = ASM_ATT;
1164 else
1165 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1167 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1168 error ("code model `%s' not supported in the %s bit mode",
1169 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1170 if (ix86_cmodel == CM_LARGE)
1171 sorry ("code model `large' not supported yet");
1172 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1173 sorry ("%i-bit mode not compiled in",
1174 (target_flags & MASK_64BIT) ? 64 : 32);
1176 for (i = 0; i < pta_size; i++)
1177 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1179 ix86_arch = processor_alias_table[i].processor;
1180 /* Default cpu tuning to the architecture. */
1181 ix86_cpu = ix86_arch;
1182 if (processor_alias_table[i].flags & PTA_MMX
1183 && !(target_flags_explicit & MASK_MMX))
1184 target_flags |= MASK_MMX;
1185 if (processor_alias_table[i].flags & PTA_3DNOW
1186 && !(target_flags_explicit & MASK_3DNOW))
1187 target_flags |= MASK_3DNOW;
1188 if (processor_alias_table[i].flags & PTA_3DNOW_A
1189 && !(target_flags_explicit & MASK_3DNOW_A))
1190 target_flags |= MASK_3DNOW_A;
1191 if (processor_alias_table[i].flags & PTA_SSE
1192 && !(target_flags_explicit & MASK_SSE))
1193 target_flags |= MASK_SSE;
1194 if (processor_alias_table[i].flags & PTA_SSE2
1195 && !(target_flags_explicit & MASK_SSE2))
1196 target_flags |= MASK_SSE2;
1197 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1198 x86_prefetch_sse = true;
1199 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1200 error ("CPU you selected does not support x86-64 instruction set");
1201 break;
1204 if (i == pta_size)
1205 error ("bad value (%s) for -march= switch", ix86_arch_string);
1207 for (i = 0; i < pta_size; i++)
1208 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1210 ix86_cpu = processor_alias_table[i].processor;
1211 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1212 error ("CPU you selected does not support x86-64 instruction set");
1213 break;
1215 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1216 x86_prefetch_sse = true;
1217 if (i == pta_size)
1218 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1220 if (optimize_size)
1221 ix86_cost = &size_cost;
1222 else
1223 ix86_cost = processor_target_table[ix86_cpu].cost;
1224 target_flags |= processor_target_table[ix86_cpu].target_enable;
1225 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1227 /* Arrange to set up i386_stack_locals for all functions. */
1228 init_machine_status = ix86_init_machine_status;
1230 /* Validate -mregparm= value. */
1231 if (ix86_regparm_string)
1233 i = atoi (ix86_regparm_string);
1234 if (i < 0 || i > REGPARM_MAX)
1235 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1236 else
1237 ix86_regparm = i;
1239 else
1240 if (TARGET_64BIT)
1241 ix86_regparm = REGPARM_MAX;
1243 /* If the user has provided any of the -malign-* options,
1244 warn and use that value only if -falign-* is not set.
1245 Remove this code in GCC 3.2 or later. */
1246 if (ix86_align_loops_string)
1248 warning ("-malign-loops is obsolete, use -falign-loops");
1249 if (align_loops == 0)
1251 i = atoi (ix86_align_loops_string);
1252 if (i < 0 || i > MAX_CODE_ALIGN)
1253 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1254 else
1255 align_loops = 1 << i;
1259 if (ix86_align_jumps_string)
1261 warning ("-malign-jumps is obsolete, use -falign-jumps");
1262 if (align_jumps == 0)
1264 i = atoi (ix86_align_jumps_string);
1265 if (i < 0 || i > MAX_CODE_ALIGN)
1266 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1267 else
1268 align_jumps = 1 << i;
1272 if (ix86_align_funcs_string)
1274 warning ("-malign-functions is obsolete, use -falign-functions");
1275 if (align_functions == 0)
1277 i = atoi (ix86_align_funcs_string);
1278 if (i < 0 || i > MAX_CODE_ALIGN)
1279 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1280 else
1281 align_functions = 1 << i;
1285 /* Default align_* from the processor table. */
1286 if (align_loops == 0)
1288 align_loops = processor_target_table[ix86_cpu].align_loop;
1289 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1291 if (align_jumps == 0)
1293 align_jumps = processor_target_table[ix86_cpu].align_jump;
1294 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1296 if (align_functions == 0)
1298 align_functions = processor_target_table[ix86_cpu].align_func;
1301 /* Validate -mpreferred-stack-boundary= value, or provide default.
1302 The default of 128 bits is for Pentium III's SSE __m128, but we
1303 don't want additional code to keep the stack aligned when
1304 optimizing for code size. */
1305 ix86_preferred_stack_boundary = (optimize_size
1306 ? TARGET_64BIT ? 128 : 32
1307 : 128);
1308 if (ix86_preferred_stack_boundary_string)
1310 i = atoi (ix86_preferred_stack_boundary_string);
1311 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1312 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1313 TARGET_64BIT ? 4 : 2);
1314 else
1315 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1318 /* Validate -mbranch-cost= value, or provide default. */
1319 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1320 if (ix86_branch_cost_string)
1322 i = atoi (ix86_branch_cost_string);
1323 if (i < 0 || i > 5)
1324 error ("-mbranch-cost=%d is not between 0 and 5", i);
1325 else
1326 ix86_branch_cost = i;
1329 if (ix86_tls_dialect_string)
1331 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1332 ix86_tls_dialect = TLS_DIALECT_GNU;
1333 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1334 ix86_tls_dialect = TLS_DIALECT_SUN;
1335 else
1336 error ("bad value (%s) for -mtls-dialect= switch",
1337 ix86_tls_dialect_string);
1340 /* Keep nonleaf frame pointers. */
1341 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1342 flag_omit_frame_pointer = 1;
1344 /* If we're doing fast math, we don't care about comparison order
1345 wrt NaNs. This lets us use a shorter comparison sequence. */
1346 if (flag_unsafe_math_optimizations)
1347 target_flags &= ~MASK_IEEE_FP;
1349 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1350 since the insns won't need emulation. */
1351 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1352 target_flags &= ~MASK_NO_FANCY_MATH_387;
1354 if (TARGET_64BIT)
1356 if (TARGET_ALIGN_DOUBLE)
1357 error ("-malign-double makes no sense in the 64bit mode");
1358 if (TARGET_RTD)
1359 error ("-mrtd calling convention not supported in the 64bit mode");
1360 /* Enable by default the SSE and MMX builtins. */
1361 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1362 ix86_fpmath = FPMATH_SSE;
1364 else
1365 ix86_fpmath = FPMATH_387;
1367 if (ix86_fpmath_string != 0)
1369 if (! strcmp (ix86_fpmath_string, "387"))
1370 ix86_fpmath = FPMATH_387;
1371 else if (! strcmp (ix86_fpmath_string, "sse"))
1373 if (!TARGET_SSE)
1375 warning ("SSE instruction set disabled, using 387 arithmetics");
1376 ix86_fpmath = FPMATH_387;
1378 else
1379 ix86_fpmath = FPMATH_SSE;
1381 else if (! strcmp (ix86_fpmath_string, "387,sse")
1382 || ! strcmp (ix86_fpmath_string, "sse,387"))
1384 if (!TARGET_SSE)
1386 warning ("SSE instruction set disabled, using 387 arithmetics");
1387 ix86_fpmath = FPMATH_387;
1389 else if (!TARGET_80387)
1391 warning ("387 instruction set disabled, using SSE arithmetics");
1392 ix86_fpmath = FPMATH_SSE;
1394 else
1395 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1397 else
1398 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1401 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1402 on by -msse. */
1403 if (TARGET_SSE)
1405 target_flags |= MASK_MMX;
1406 x86_prefetch_sse = true;
1409 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1410 if (TARGET_3DNOW)
1412 target_flags |= MASK_MMX;
1413 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1414 extensions it adds. */
1415 if (x86_3dnow_a & (1 << ix86_arch))
1416 target_flags |= MASK_3DNOW_A;
1418 if ((x86_accumulate_outgoing_args & CPUMASK)
1419 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1420 && !optimize_size)
1421 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1423 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1425 char *p;
1426 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1427 p = strchr (internal_label_prefix, 'X');
1428 internal_label_prefix_len = p - internal_label_prefix;
1429 *p = '\0';
1433 void
1434 optimization_options (level, size)
1435 int level;
1436 int size ATTRIBUTE_UNUSED;
1438 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1439 make the problem with not enough registers even worse. */
1440 #ifdef INSN_SCHEDULING
1441 if (level > 1)
1442 flag_schedule_insns = 0;
1443 #endif
1445 /* The default values of these switches depend on the TARGET_64BIT
1446 that is not known at this moment. Mark these values with 2 and
1447 let user the to override these. In case there is no command line option
1448 specifying them, we will set the defaults in override_options. */
1449 if (optimize >= 1)
1450 flag_omit_frame_pointer = 2;
1451 flag_pcc_struct_return = 2;
1452 flag_asynchronous_unwind_tables = 2;
1455 /* Table of valid machine attributes. */
1456 const struct attribute_spec ix86_attribute_table[] =
1458 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1459 /* Stdcall attribute says callee is responsible for popping arguments
1460 if they are not variable. */
1461 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1462 /* Fastcall attribute says callee is responsible for popping arguments
1463 if they are not variable. */
1464 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1465 /* Cdecl attribute says the callee is a normal C declaration */
1466 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1467 /* Regparm attribute specifies how many integer arguments are to be
1468 passed in registers. */
1469 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1470 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1471 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1472 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1473 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1474 #endif
1475 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1476 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1477 { NULL, 0, 0, false, false, false, NULL }
1480 /* If PIC, we cannot make sibling calls to global functions
1481 because the PLT requires %ebx live.
1482 If we are returning floats on the register stack, we cannot make
1483 sibling calls to functions that return floats. (The stack adjust
1484 instruction will wind up after the sibcall jump, and not be executed.) */
1486 static bool
1487 ix86_function_ok_for_sibcall (decl, exp)
1488 tree decl;
1489 tree exp;
1491 /* If we are generating position-independent code, we cannot sibcall
1492 optimize any indirect call, or a direct call to a global function,
1493 as the PLT requires %ebx be live. */
1494 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1495 return false;
1497 /* If we are returning floats on the 80387 register stack, we cannot
1498 make a sibcall from a function that doesn't return a float to a
1499 function that does; the necessary stack adjustment will not be
1500 executed. */
1501 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1502 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1503 return false;
1505 /* If this call is indirect, we'll need to be able to use a call-clobbered
1506 register for the address of the target function. Make sure that all
1507 such registers are not used for passing parameters. */
1508 if (!decl && !TARGET_64BIT)
1510 int regparm = ix86_regparm;
1511 tree attr, type;
1513 /* We're looking at the CALL_EXPR, we need the type of the function. */
1514 type = TREE_OPERAND (exp, 0); /* pointer expression */
1515 type = TREE_TYPE (type); /* pointer type */
1516 type = TREE_TYPE (type); /* function type */
1518 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1519 if (attr)
1520 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1522 if (regparm >= 3)
1524 /* ??? Need to count the actual number of registers to be used,
1525 not the possible number of registers. Fix later. */
1526 return false;
1530 /* Otherwise okay. That also includes certain types of indirect calls. */
1531 return true;
1534 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1535 arguments as in struct attribute_spec.handler. */
1536 static tree
1537 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1538 tree *node;
1539 tree name;
1540 tree args ATTRIBUTE_UNUSED;
1541 int flags ATTRIBUTE_UNUSED;
1542 bool *no_add_attrs;
1544 if (TREE_CODE (*node) != FUNCTION_TYPE
1545 && TREE_CODE (*node) != METHOD_TYPE
1546 && TREE_CODE (*node) != FIELD_DECL
1547 && TREE_CODE (*node) != TYPE_DECL)
1549 warning ("`%s' attribute only applies to functions",
1550 IDENTIFIER_POINTER (name));
1551 *no_add_attrs = true;
1553 else
1555 if (is_attribute_p ("fastcall", name))
1557 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1559 error ("fastcall and stdcall attributes are not compatible");
1561 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1563 error ("fastcall and regparm attributes are not compatible");
1566 else if (is_attribute_p ("stdcall", name))
1568 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1570 error ("fastcall and stdcall attributes are not compatible");
1575 if (TARGET_64BIT)
1577 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1578 *no_add_attrs = true;
1581 return NULL_TREE;
1584 /* Handle a "regparm" attribute;
1585 arguments as in struct attribute_spec.handler. */
1586 static tree
1587 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1588 tree *node;
1589 tree name;
1590 tree args;
1591 int flags ATTRIBUTE_UNUSED;
1592 bool *no_add_attrs;
1594 if (TREE_CODE (*node) != FUNCTION_TYPE
1595 && TREE_CODE (*node) != METHOD_TYPE
1596 && TREE_CODE (*node) != FIELD_DECL
1597 && TREE_CODE (*node) != TYPE_DECL)
1599 warning ("`%s' attribute only applies to functions",
1600 IDENTIFIER_POINTER (name));
1601 *no_add_attrs = true;
1603 else
1605 tree cst;
1607 cst = TREE_VALUE (args);
1608 if (TREE_CODE (cst) != INTEGER_CST)
1610 warning ("`%s' attribute requires an integer constant argument",
1611 IDENTIFIER_POINTER (name));
1612 *no_add_attrs = true;
1614 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1616 warning ("argument to `%s' attribute larger than %d",
1617 IDENTIFIER_POINTER (name), REGPARM_MAX);
1618 *no_add_attrs = true;
1621 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1623 error ("fastcall and regparm attributes are not compatible");
1627 return NULL_TREE;
1630 /* Return 0 if the attributes for two types are incompatible, 1 if they
1631 are compatible, and 2 if they are nearly compatible (which causes a
1632 warning to be generated). */
1634 static int
1635 ix86_comp_type_attributes (type1, type2)
1636 tree type1;
1637 tree type2;
1639 /* Check for mismatch of non-default calling convention. */
1640 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1642 if (TREE_CODE (type1) != FUNCTION_TYPE)
1643 return 1;
1645 /* Check for mismatched fastcall types */
1646 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1647 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1648 return 0;
1650 /* Check for mismatched return types (cdecl vs stdcall). */
1651 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1652 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1653 return 0;
1654 return 1;
1657 /* Return the regparm value for a fuctio with the indicated TYPE. */
1659 static int
1660 ix86_fntype_regparm (type)
1661 tree type;
1663 tree attr;
1665 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1666 if (attr)
1667 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1668 else
1669 return ix86_regparm;
1672 /* Value is the number of bytes of arguments automatically
1673 popped when returning from a subroutine call.
1674 FUNDECL is the declaration node of the function (as a tree),
1675 FUNTYPE is the data type of the function (as a tree),
1676 or for a library call it is an identifier node for the subroutine name.
1677 SIZE is the number of bytes of arguments passed on the stack.
1679 On the 80386, the RTD insn may be used to pop them if the number
1680 of args is fixed, but if the number is variable then the caller
1681 must pop them all. RTD can't be used for library calls now
1682 because the library is compiled with the Unix compiler.
1683 Use of RTD is a selectable option, since it is incompatible with
1684 standard Unix calling sequences. If the option is not selected,
1685 the caller must always pop the args.
1687 The attribute stdcall is equivalent to RTD on a per module basis. */
1690 ix86_return_pops_args (fundecl, funtype, size)
1691 tree fundecl;
1692 tree funtype;
1693 int size;
1695 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1697 /* Cdecl functions override -mrtd, and never pop the stack. */
1698 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1700 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1701 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1702 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1703 rtd = 1;
1705 if (rtd
1706 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1707 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1708 == void_type_node)))
1709 return size;
1712 /* Lose any fake structure return argument if it is passed on the stack. */
1713 if (aggregate_value_p (TREE_TYPE (funtype))
1714 && !TARGET_64BIT)
1716 int nregs = ix86_fntype_regparm (funtype);
1718 if (!nregs)
1719 return GET_MODE_SIZE (Pmode);
1722 return 0;
1725 /* Argument support functions. */
1727 /* Return true when register may be used to pass function parameters. */
1728 bool
1729 ix86_function_arg_regno_p (regno)
1730 int regno;
1732 int i;
1733 if (!TARGET_64BIT)
1734 return (regno < REGPARM_MAX
1735 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1736 if (SSE_REGNO_P (regno) && TARGET_SSE)
1737 return true;
1738 /* RAX is used as hidden argument to va_arg functions. */
1739 if (!regno)
1740 return true;
1741 for (i = 0; i < REGPARM_MAX; i++)
1742 if (regno == x86_64_int_parameter_registers[i])
1743 return true;
1744 return false;
1747 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1748 for a call to a function whose data type is FNTYPE.
1749 For a library call, FNTYPE is 0. */
1751 void
1752 init_cumulative_args (cum, fntype, libname)
1753 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1754 tree fntype; /* tree ptr for function decl */
1755 rtx libname; /* SYMBOL_REF of library name or 0 */
1757 static CUMULATIVE_ARGS zero_cum;
1758 tree param, next_param;
1760 if (TARGET_DEBUG_ARG)
1762 fprintf (stderr, "\ninit_cumulative_args (");
1763 if (fntype)
1764 fprintf (stderr, "fntype code = %s, ret code = %s",
1765 tree_code_name[(int) TREE_CODE (fntype)],
1766 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1767 else
1768 fprintf (stderr, "no fntype");
1770 if (libname)
1771 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1774 *cum = zero_cum;
1776 /* Set up the number of registers to use for passing arguments. */
1777 cum->nregs = ix86_regparm;
1778 cum->sse_nregs = SSE_REGPARM_MAX;
1779 if (fntype && !TARGET_64BIT)
1781 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1783 if (attr)
1784 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1786 cum->maybe_vaarg = false;
1788 /* Use ecx and edx registers if function has fastcall attribute */
1789 if (fntype && !TARGET_64BIT)
1791 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1793 cum->nregs = 2;
1794 cum->fastcall = 1;
1799 /* Determine if this function has variable arguments. This is
1800 indicated by the last argument being 'void_type_mode' if there
1801 are no variable arguments. If there are variable arguments, then
1802 we won't pass anything in registers */
1804 if (cum->nregs)
1806 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1807 param != 0; param = next_param)
1809 next_param = TREE_CHAIN (param);
1810 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1812 if (!TARGET_64BIT)
1814 cum->nregs = 0;
1815 cum->fastcall = 0;
1817 cum->maybe_vaarg = true;
1821 if ((!fntype && !libname)
1822 || (fntype && !TYPE_ARG_TYPES (fntype)))
1823 cum->maybe_vaarg = 1;
1825 if (TARGET_DEBUG_ARG)
1826 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1828 return;
1831 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1832 of this code is to classify each 8bytes of incoming argument by the register
1833 class and assign registers accordingly. */
1835 /* Return the union class of CLASS1 and CLASS2.
1836 See the x86-64 PS ABI for details. */
1838 static enum x86_64_reg_class
1839 merge_classes (class1, class2)
1840 enum x86_64_reg_class class1, class2;
1842 /* Rule #1: If both classes are equal, this is the resulting class. */
1843 if (class1 == class2)
1844 return class1;
1846 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1847 the other class. */
1848 if (class1 == X86_64_NO_CLASS)
1849 return class2;
1850 if (class2 == X86_64_NO_CLASS)
1851 return class1;
1853 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1854 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1855 return X86_64_MEMORY_CLASS;
1857 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1858 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1859 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1860 return X86_64_INTEGERSI_CLASS;
1861 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1862 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1863 return X86_64_INTEGER_CLASS;
1865 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1866 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1867 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1868 return X86_64_MEMORY_CLASS;
1870 /* Rule #6: Otherwise class SSE is used. */
1871 return X86_64_SSE_CLASS;
1874 /* Classify the argument of type TYPE and mode MODE.
1875 CLASSES will be filled by the register class used to pass each word
1876 of the operand. The number of words is returned. In case the parameter
1877 should be passed in memory, 0 is returned. As a special case for zero
1878 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1880 BIT_OFFSET is used internally for handling records and specifies offset
1881 of the offset in bits modulo 256 to avoid overflow cases.
1883 See the x86-64 PS ABI for details.
1886 static int
1887 classify_argument (mode, type, classes, bit_offset)
1888 enum machine_mode mode;
1889 tree type;
1890 enum x86_64_reg_class classes[MAX_CLASSES];
1891 int bit_offset;
1893 int bytes =
1894 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1895 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1897 /* Variable sized entities are always passed/returned in memory. */
1898 if (bytes < 0)
1899 return 0;
1901 if (type && AGGREGATE_TYPE_P (type))
1903 int i;
1904 tree field;
1905 enum x86_64_reg_class subclasses[MAX_CLASSES];
1907 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1908 if (bytes > 16)
1909 return 0;
1911 for (i = 0; i < words; i++)
1912 classes[i] = X86_64_NO_CLASS;
1914 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1915 signalize memory class, so handle it as special case. */
1916 if (!words)
1918 classes[0] = X86_64_NO_CLASS;
1919 return 1;
1922 /* Classify each field of record and merge classes. */
1923 if (TREE_CODE (type) == RECORD_TYPE)
1925 /* For classes first merge in the field of the subclasses. */
1926 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1928 tree bases = TYPE_BINFO_BASETYPES (type);
1929 int n_bases = TREE_VEC_LENGTH (bases);
1930 int i;
1932 for (i = 0; i < n_bases; ++i)
1934 tree binfo = TREE_VEC_ELT (bases, i);
1935 int num;
1936 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1937 tree type = BINFO_TYPE (binfo);
1939 num = classify_argument (TYPE_MODE (type),
1940 type, subclasses,
1941 (offset + bit_offset) % 256);
1942 if (!num)
1943 return 0;
1944 for (i = 0; i < num; i++)
1946 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1947 classes[i + pos] =
1948 merge_classes (subclasses[i], classes[i + pos]);
1952 /* And now merge the fields of structure. */
1953 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1955 if (TREE_CODE (field) == FIELD_DECL)
1957 int num;
1959 /* Bitfields are always classified as integer. Handle them
1960 early, since later code would consider them to be
1961 misaligned integers. */
1962 if (DECL_BIT_FIELD (field))
1964 for (i = int_bit_position (field) / 8 / 8;
1965 i < (int_bit_position (field)
1966 + tree_low_cst (DECL_SIZE (field), 0)
1967 + 63) / 8 / 8; i++)
1968 classes[i] =
1969 merge_classes (X86_64_INTEGER_CLASS,
1970 classes[i]);
1972 else
1974 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1975 TREE_TYPE (field), subclasses,
1976 (int_bit_position (field)
1977 + bit_offset) % 256);
1978 if (!num)
1979 return 0;
1980 for (i = 0; i < num; i++)
1982 int pos =
1983 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1984 classes[i + pos] =
1985 merge_classes (subclasses[i], classes[i + pos]);
1991 /* Arrays are handled as small records. */
1992 else if (TREE_CODE (type) == ARRAY_TYPE)
1994 int num;
1995 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1996 TREE_TYPE (type), subclasses, bit_offset);
1997 if (!num)
1998 return 0;
2000 /* The partial classes are now full classes. */
2001 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2002 subclasses[0] = X86_64_SSE_CLASS;
2003 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2004 subclasses[0] = X86_64_INTEGER_CLASS;
2006 for (i = 0; i < words; i++)
2007 classes[i] = subclasses[i % num];
2009 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2010 else if (TREE_CODE (type) == UNION_TYPE
2011 || TREE_CODE (type) == QUAL_UNION_TYPE)
2013 /* For classes first merge in the field of the subclasses. */
2014 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2016 tree bases = TYPE_BINFO_BASETYPES (type);
2017 int n_bases = TREE_VEC_LENGTH (bases);
2018 int i;
2020 for (i = 0; i < n_bases; ++i)
2022 tree binfo = TREE_VEC_ELT (bases, i);
2023 int num;
2024 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2025 tree type = BINFO_TYPE (binfo);
2027 num = classify_argument (TYPE_MODE (type),
2028 type, subclasses,
2029 (offset + (bit_offset % 64)) % 256);
2030 if (!num)
2031 return 0;
2032 for (i = 0; i < num; i++)
2034 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2035 classes[i + pos] =
2036 merge_classes (subclasses[i], classes[i + pos]);
2040 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2042 if (TREE_CODE (field) == FIELD_DECL)
2044 int num;
2045 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2046 TREE_TYPE (field), subclasses,
2047 bit_offset);
2048 if (!num)
2049 return 0;
2050 for (i = 0; i < num; i++)
2051 classes[i] = merge_classes (subclasses[i], classes[i]);
2055 else
2056 abort ();
2058 /* Final merger cleanup. */
2059 for (i = 0; i < words; i++)
2061 /* If one class is MEMORY, everything should be passed in
2062 memory. */
2063 if (classes[i] == X86_64_MEMORY_CLASS)
2064 return 0;
2066 /* The X86_64_SSEUP_CLASS should be always preceded by
2067 X86_64_SSE_CLASS. */
2068 if (classes[i] == X86_64_SSEUP_CLASS
2069 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2070 classes[i] = X86_64_SSE_CLASS;
2072 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2073 if (classes[i] == X86_64_X87UP_CLASS
2074 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2075 classes[i] = X86_64_SSE_CLASS;
2077 return words;
2080 /* Compute alignment needed. We align all types to natural boundaries with
2081 exception of XFmode that is aligned to 64bits. */
2082 if (mode != VOIDmode && mode != BLKmode)
2084 int mode_alignment = GET_MODE_BITSIZE (mode);
2086 if (mode == XFmode)
2087 mode_alignment = 128;
2088 else if (mode == XCmode)
2089 mode_alignment = 256;
2090 /* Misaligned fields are always returned in memory. */
2091 if (bit_offset % mode_alignment)
2092 return 0;
2095 /* Classification of atomic types. */
2096 switch (mode)
2098 case DImode:
2099 case SImode:
2100 case HImode:
2101 case QImode:
2102 case CSImode:
2103 case CHImode:
2104 case CQImode:
2105 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2106 classes[0] = X86_64_INTEGERSI_CLASS;
2107 else
2108 classes[0] = X86_64_INTEGER_CLASS;
2109 return 1;
2110 case CDImode:
2111 case TImode:
2112 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2113 return 2;
2114 case CTImode:
2115 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2116 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2117 return 4;
2118 case SFmode:
2119 if (!(bit_offset % 64))
2120 classes[0] = X86_64_SSESF_CLASS;
2121 else
2122 classes[0] = X86_64_SSE_CLASS;
2123 return 1;
2124 case DFmode:
2125 classes[0] = X86_64_SSEDF_CLASS;
2126 return 1;
2127 case TFmode:
2128 classes[0] = X86_64_X87_CLASS;
2129 classes[1] = X86_64_X87UP_CLASS;
2130 return 2;
2131 case TCmode:
2132 classes[0] = X86_64_X87_CLASS;
2133 classes[1] = X86_64_X87UP_CLASS;
2134 classes[2] = X86_64_X87_CLASS;
2135 classes[3] = X86_64_X87UP_CLASS;
2136 return 4;
2137 case DCmode:
2138 classes[0] = X86_64_SSEDF_CLASS;
2139 classes[1] = X86_64_SSEDF_CLASS;
2140 return 2;
2141 case SCmode:
2142 classes[0] = X86_64_SSE_CLASS;
2143 return 1;
2144 case V4SFmode:
2145 case V4SImode:
2146 case V16QImode:
2147 case V8HImode:
2148 case V2DFmode:
2149 case V2DImode:
2150 classes[0] = X86_64_SSE_CLASS;
2151 classes[1] = X86_64_SSEUP_CLASS;
2152 return 2;
2153 case V2SFmode:
2154 case V2SImode:
2155 case V4HImode:
2156 case V8QImode:
2157 return 0;
2158 case BLKmode:
2159 case VOIDmode:
2160 return 0;
2161 default:
2162 abort ();
2166 /* Examine the argument and return set number of register required in each
2167 class. Return 0 iff parameter should be passed in memory. */
2168 static int
2169 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2170 enum machine_mode mode;
2171 tree type;
2172 int *int_nregs, *sse_nregs;
2173 int in_return;
2175 enum x86_64_reg_class class[MAX_CLASSES];
2176 int n = classify_argument (mode, type, class, 0);
2178 *int_nregs = 0;
2179 *sse_nregs = 0;
2180 if (!n)
2181 return 0;
2182 for (n--; n >= 0; n--)
2183 switch (class[n])
2185 case X86_64_INTEGER_CLASS:
2186 case X86_64_INTEGERSI_CLASS:
2187 (*int_nregs)++;
2188 break;
2189 case X86_64_SSE_CLASS:
2190 case X86_64_SSESF_CLASS:
2191 case X86_64_SSEDF_CLASS:
2192 (*sse_nregs)++;
2193 break;
2194 case X86_64_NO_CLASS:
2195 case X86_64_SSEUP_CLASS:
2196 break;
2197 case X86_64_X87_CLASS:
2198 case X86_64_X87UP_CLASS:
2199 if (!in_return)
2200 return 0;
2201 break;
2202 case X86_64_MEMORY_CLASS:
2203 abort ();
2205 return 1;
2207 /* Construct container for the argument used by GCC interface. See
2208 FUNCTION_ARG for the detailed description. */
2209 static rtx
2210 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2211 enum machine_mode mode;
2212 tree type;
2213 int in_return;
2214 int nintregs, nsseregs;
2215 const int * intreg;
2216 int sse_regno;
2218 enum machine_mode tmpmode;
2219 int bytes =
2220 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2221 enum x86_64_reg_class class[MAX_CLASSES];
2222 int n;
2223 int i;
2224 int nexps = 0;
2225 int needed_sseregs, needed_intregs;
2226 rtx exp[MAX_CLASSES];
2227 rtx ret;
2229 n = classify_argument (mode, type, class, 0);
2230 if (TARGET_DEBUG_ARG)
2232 if (!n)
2233 fprintf (stderr, "Memory class\n");
2234 else
2236 fprintf (stderr, "Classes:");
2237 for (i = 0; i < n; i++)
2239 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2241 fprintf (stderr, "\n");
2244 if (!n)
2245 return NULL;
2246 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2247 return NULL;
2248 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2249 return NULL;
2251 /* First construct simple cases. Avoid SCmode, since we want to use
2252 single register to pass this type. */
2253 if (n == 1 && mode != SCmode)
2254 switch (class[0])
2256 case X86_64_INTEGER_CLASS:
2257 case X86_64_INTEGERSI_CLASS:
2258 return gen_rtx_REG (mode, intreg[0]);
2259 case X86_64_SSE_CLASS:
2260 case X86_64_SSESF_CLASS:
2261 case X86_64_SSEDF_CLASS:
2262 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2263 case X86_64_X87_CLASS:
2264 return gen_rtx_REG (mode, FIRST_STACK_REG);
2265 case X86_64_NO_CLASS:
2266 /* Zero sized array, struct or class. */
2267 return NULL;
2268 default:
2269 abort ();
2271 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2272 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2273 if (n == 2
2274 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2275 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2276 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2277 && class[1] == X86_64_INTEGER_CLASS
2278 && (mode == CDImode || mode == TImode)
2279 && intreg[0] + 1 == intreg[1])
2280 return gen_rtx_REG (mode, intreg[0]);
2281 if (n == 4
2282 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2283 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2284 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2286 /* Otherwise figure out the entries of the PARALLEL. */
2287 for (i = 0; i < n; i++)
2289 switch (class[i])
2291 case X86_64_NO_CLASS:
2292 break;
2293 case X86_64_INTEGER_CLASS:
2294 case X86_64_INTEGERSI_CLASS:
2295 /* Merge TImodes on aligned occasions here too. */
2296 if (i * 8 + 8 > bytes)
2297 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2298 else if (class[i] == X86_64_INTEGERSI_CLASS)
2299 tmpmode = SImode;
2300 else
2301 tmpmode = DImode;
2302 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2303 if (tmpmode == BLKmode)
2304 tmpmode = DImode;
2305 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2306 gen_rtx_REG (tmpmode, *intreg),
2307 GEN_INT (i*8));
2308 intreg++;
2309 break;
2310 case X86_64_SSESF_CLASS:
2311 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2312 gen_rtx_REG (SFmode,
2313 SSE_REGNO (sse_regno)),
2314 GEN_INT (i*8));
2315 sse_regno++;
2316 break;
2317 case X86_64_SSEDF_CLASS:
2318 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2319 gen_rtx_REG (DFmode,
2320 SSE_REGNO (sse_regno)),
2321 GEN_INT (i*8));
2322 sse_regno++;
2323 break;
2324 case X86_64_SSE_CLASS:
2325 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2326 tmpmode = TImode;
2327 else
2328 tmpmode = DImode;
2329 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2330 gen_rtx_REG (tmpmode,
2331 SSE_REGNO (sse_regno)),
2332 GEN_INT (i*8));
2333 if (tmpmode == TImode)
2334 i++;
2335 sse_regno++;
2336 break;
2337 default:
2338 abort ();
2341 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2342 for (i = 0; i < nexps; i++)
2343 XVECEXP (ret, 0, i) = exp [i];
2344 return ret;
2347 /* Update the data in CUM to advance over an argument
2348 of mode MODE and data type TYPE.
2349 (TYPE is null for libcalls where that information may not be available.) */
2351 void
2352 function_arg_advance (cum, mode, type, named)
2353 CUMULATIVE_ARGS *cum; /* current arg information */
2354 enum machine_mode mode; /* current arg mode */
2355 tree type; /* type of the argument or 0 if lib support */
2356 int named; /* whether or not the argument was named */
2358 int bytes =
2359 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2360 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2362 if (TARGET_DEBUG_ARG)
2363 fprintf (stderr,
2364 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2365 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2366 if (TARGET_64BIT)
2368 int int_nregs, sse_nregs;
2369 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2370 cum->words += words;
2371 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2373 cum->nregs -= int_nregs;
2374 cum->sse_nregs -= sse_nregs;
2375 cum->regno += int_nregs;
2376 cum->sse_regno += sse_nregs;
2378 else
2379 cum->words += words;
2381 else
2383 if (TARGET_SSE && mode == TImode)
2385 cum->sse_words += words;
2386 cum->sse_nregs -= 1;
2387 cum->sse_regno += 1;
2388 if (cum->sse_nregs <= 0)
2390 cum->sse_nregs = 0;
2391 cum->sse_regno = 0;
2394 else
2396 cum->words += words;
2397 cum->nregs -= words;
2398 cum->regno += words;
2400 if (cum->nregs <= 0)
2402 cum->nregs = 0;
2403 cum->regno = 0;
2407 return;
2410 /* Define where to put the arguments to a function.
2411 Value is zero to push the argument on the stack,
2412 or a hard register in which to store the argument.
2414 MODE is the argument's machine mode.
2415 TYPE is the data type of the argument (as a tree).
2416 This is null for libcalls where that information may
2417 not be available.
2418 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2419 the preceding args and about the function being called.
2420 NAMED is nonzero if this argument is a named parameter
2421 (otherwise it is an extra parameter matching an ellipsis). */
2424 function_arg (cum, mode, type, named)
2425 CUMULATIVE_ARGS *cum; /* current arg information */
2426 enum machine_mode mode; /* current arg mode */
2427 tree type; /* type of the argument or 0 if lib support */
2428 int named; /* != 0 for normal args, == 0 for ... args */
2430 rtx ret = NULL_RTX;
2431 int bytes =
2432 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2433 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2435 /* Handle a hidden AL argument containing number of registers for varargs
2436 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2437 any AL settings. */
2438 if (mode == VOIDmode)
2440 if (TARGET_64BIT)
2441 return GEN_INT (cum->maybe_vaarg
2442 ? (cum->sse_nregs < 0
2443 ? SSE_REGPARM_MAX
2444 : cum->sse_regno)
2445 : -1);
2446 else
2447 return constm1_rtx;
2449 if (TARGET_64BIT)
2450 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2451 &x86_64_int_parameter_registers [cum->regno],
2452 cum->sse_regno);
2453 else
2454 switch (mode)
2456 /* For now, pass fp/complex values on the stack. */
2457 default:
2458 break;
2460 case BLKmode:
2461 case DImode:
2462 case SImode:
2463 case HImode:
2464 case QImode:
2465 if (words <= cum->nregs)
2467 int regno = cum->regno;
2469 /* Fastcall allocates the first two DWORD (SImode) or
2470 smaller arguments to ECX and EDX. */
2471 if (cum->fastcall)
2473 if (mode == BLKmode || mode == DImode)
2474 break;
2476 /* ECX not EAX is the first allocated register. */
2477 if (regno == 0)
2478 regno = 2;
2480 ret = gen_rtx_REG (mode, regno);
2482 break;
2483 case TImode:
2484 if (cum->sse_nregs)
2485 ret = gen_rtx_REG (mode, cum->sse_regno);
2486 break;
2489 if (TARGET_DEBUG_ARG)
2491 fprintf (stderr,
2492 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2493 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2495 if (ret)
2496 print_simple_rtl (stderr, ret);
2497 else
2498 fprintf (stderr, ", stack");
2500 fprintf (stderr, " )\n");
2503 return ret;
2506 /* A C expression that indicates when an argument must be passed by
2507 reference. If nonzero for an argument, a copy of that argument is
2508 made in memory and a pointer to the argument is passed instead of
2509 the argument itself. The pointer is passed in whatever way is
2510 appropriate for passing a pointer to that type. */
2513 function_arg_pass_by_reference (cum, mode, type, named)
2514 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2515 enum machine_mode mode ATTRIBUTE_UNUSED;
2516 tree type;
2517 int named ATTRIBUTE_UNUSED;
2519 if (!TARGET_64BIT)
2520 return 0;
2522 if (type && int_size_in_bytes (type) == -1)
2524 if (TARGET_DEBUG_ARG)
2525 fprintf (stderr, "function_arg_pass_by_reference\n");
2526 return 1;
2529 return 0;
2532 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2533 and type. */
2536 ix86_function_arg_boundary (mode, type)
2537 enum machine_mode mode;
2538 tree type;
2540 int align;
2541 if (!TARGET_64BIT)
2542 return PARM_BOUNDARY;
2543 if (type)
2544 align = TYPE_ALIGN (type);
2545 else
2546 align = GET_MODE_ALIGNMENT (mode);
2547 if (align < PARM_BOUNDARY)
2548 align = PARM_BOUNDARY;
2549 if (align > 128)
2550 align = 128;
2551 return align;
2554 /* Return true if N is a possible register number of function value. */
2555 bool
2556 ix86_function_value_regno_p (regno)
2557 int regno;
2559 if (!TARGET_64BIT)
2561 return ((regno) == 0
2562 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2563 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2565 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2566 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2567 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2570 /* Define how to find the value returned by a function.
2571 VALTYPE is the data type of the value (as a tree).
2572 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2573 otherwise, FUNC is 0. */
2575 ix86_function_value (valtype)
2576 tree valtype;
2578 if (TARGET_64BIT)
2580 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2581 REGPARM_MAX, SSE_REGPARM_MAX,
2582 x86_64_int_return_registers, 0);
2583 /* For zero sized structures, construct_container return NULL, but we need
2584 to keep rest of compiler happy by returning meaningful value. */
2585 if (!ret)
2586 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2587 return ret;
2589 else
2590 return gen_rtx_REG (TYPE_MODE (valtype),
2591 ix86_value_regno (TYPE_MODE (valtype)));
2594 /* Return false iff type is returned in memory. */
2596 ix86_return_in_memory (type)
2597 tree type;
2599 int needed_intregs, needed_sseregs;
2600 if (TARGET_64BIT)
2602 return !examine_argument (TYPE_MODE (type), type, 1,
2603 &needed_intregs, &needed_sseregs);
2605 else
2607 if (TYPE_MODE (type) == BLKmode
2608 || (VECTOR_MODE_P (TYPE_MODE (type))
2609 && int_size_in_bytes (type) == 8)
2610 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2611 && TYPE_MODE (type) != TFmode
2612 && !VECTOR_MODE_P (TYPE_MODE (type))))
2613 return 1;
2614 return 0;
2618 /* Define how to find the value returned by a library function
2619 assuming the value has mode MODE. */
2621 ix86_libcall_value (mode)
2622 enum machine_mode mode;
2624 if (TARGET_64BIT)
2626 switch (mode)
2628 case SFmode:
2629 case SCmode:
2630 case DFmode:
2631 case DCmode:
2632 return gen_rtx_REG (mode, FIRST_SSE_REG);
2633 case TFmode:
2634 case TCmode:
2635 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2636 default:
2637 return gen_rtx_REG (mode, 0);
2640 else
2641 return gen_rtx_REG (mode, ix86_value_regno (mode));
2644 /* Given a mode, return the register to use for a return value. */
2646 static int
2647 ix86_value_regno (mode)
2648 enum machine_mode mode;
2650 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2651 return FIRST_FLOAT_REG;
2652 if (mode == TImode || VECTOR_MODE_P (mode))
2653 return FIRST_SSE_REG;
2654 return 0;
2657 /* Create the va_list data type. */
2659 tree
2660 ix86_build_va_list ()
2662 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2664 /* For i386 we use plain pointer to argument area. */
2665 if (!TARGET_64BIT)
2666 return build_pointer_type (char_type_node);
2668 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2669 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2671 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2672 unsigned_type_node);
2673 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2674 unsigned_type_node);
2675 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2676 ptr_type_node);
2677 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2678 ptr_type_node);
2680 DECL_FIELD_CONTEXT (f_gpr) = record;
2681 DECL_FIELD_CONTEXT (f_fpr) = record;
2682 DECL_FIELD_CONTEXT (f_ovf) = record;
2683 DECL_FIELD_CONTEXT (f_sav) = record;
2685 TREE_CHAIN (record) = type_decl;
2686 TYPE_NAME (record) = type_decl;
2687 TYPE_FIELDS (record) = f_gpr;
2688 TREE_CHAIN (f_gpr) = f_fpr;
2689 TREE_CHAIN (f_fpr) = f_ovf;
2690 TREE_CHAIN (f_ovf) = f_sav;
2692 layout_type (record);
2694 /* The correct type is an array type of one element. */
2695 return build_array_type (record, build_index_type (size_zero_node));
2698 /* Perform any needed actions needed for a function that is receiving a
2699 variable number of arguments.
2701 CUM is as above.
2703 MODE and TYPE are the mode and type of the current parameter.
2705 PRETEND_SIZE is a variable that should be set to the amount of stack
2706 that must be pushed by the prolog to pretend that our caller pushed
2709 Normally, this macro will push all remaining incoming registers on the
2710 stack and set PRETEND_SIZE to the length of the registers pushed. */
2712 void
2713 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2714 CUMULATIVE_ARGS *cum;
2715 enum machine_mode mode;
2716 tree type;
2717 int *pretend_size ATTRIBUTE_UNUSED;
2718 int no_rtl;
2721 CUMULATIVE_ARGS next_cum;
2722 rtx save_area = NULL_RTX, mem;
2723 rtx label;
2724 rtx label_ref;
2725 rtx tmp_reg;
2726 rtx nsse_reg;
2727 int set;
2728 tree fntype;
2729 int stdarg_p;
2730 int i;
2732 if (!TARGET_64BIT)
2733 return;
2735 /* Indicate to allocate space on the stack for varargs save area. */
2736 ix86_save_varrargs_registers = 1;
2738 fntype = TREE_TYPE (current_function_decl);
2739 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2740 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2741 != void_type_node));
2743 /* For varargs, we do not want to skip the dummy va_dcl argument.
2744 For stdargs, we do want to skip the last named argument. */
2745 next_cum = *cum;
2746 if (stdarg_p)
2747 function_arg_advance (&next_cum, mode, type, 1);
2749 if (!no_rtl)
2750 save_area = frame_pointer_rtx;
2752 set = get_varargs_alias_set ();
2754 for (i = next_cum.regno; i < ix86_regparm; i++)
2756 mem = gen_rtx_MEM (Pmode,
2757 plus_constant (save_area, i * UNITS_PER_WORD));
2758 set_mem_alias_set (mem, set);
2759 emit_move_insn (mem, gen_rtx_REG (Pmode,
2760 x86_64_int_parameter_registers[i]));
2763 if (next_cum.sse_nregs)
2765 /* Now emit code to save SSE registers. The AX parameter contains number
2766 of SSE parameter registers used to call this function. We use
2767 sse_prologue_save insn template that produces computed jump across
2768 SSE saves. We need some preparation work to get this working. */
2770 label = gen_label_rtx ();
2771 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2773 /* Compute address to jump to :
2774 label - 5*eax + nnamed_sse_arguments*5 */
2775 tmp_reg = gen_reg_rtx (Pmode);
2776 nsse_reg = gen_reg_rtx (Pmode);
2777 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2778 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2779 gen_rtx_MULT (Pmode, nsse_reg,
2780 GEN_INT (4))));
2781 if (next_cum.sse_regno)
2782 emit_move_insn
2783 (nsse_reg,
2784 gen_rtx_CONST (DImode,
2785 gen_rtx_PLUS (DImode,
2786 label_ref,
2787 GEN_INT (next_cum.sse_regno * 4))));
2788 else
2789 emit_move_insn (nsse_reg, label_ref);
2790 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2792 /* Compute address of memory block we save into. We always use pointer
2793 pointing 127 bytes after first byte to store - this is needed to keep
2794 instruction size limited by 4 bytes. */
2795 tmp_reg = gen_reg_rtx (Pmode);
2796 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2797 plus_constant (save_area,
2798 8 * REGPARM_MAX + 127)));
2799 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2800 set_mem_alias_set (mem, set);
2801 set_mem_align (mem, BITS_PER_WORD);
2803 /* And finally do the dirty job! */
2804 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2805 GEN_INT (next_cum.sse_regno), label));
2810 /* Implement va_start. */
2812 void
2813 ix86_va_start (valist, nextarg)
2814 tree valist;
2815 rtx nextarg;
2817 HOST_WIDE_INT words, n_gpr, n_fpr;
2818 tree f_gpr, f_fpr, f_ovf, f_sav;
2819 tree gpr, fpr, ovf, sav, t;
2821 /* Only 64bit target needs something special. */
2822 if (!TARGET_64BIT)
2824 std_expand_builtin_va_start (valist, nextarg);
2825 return;
2828 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2829 f_fpr = TREE_CHAIN (f_gpr);
2830 f_ovf = TREE_CHAIN (f_fpr);
2831 f_sav = TREE_CHAIN (f_ovf);
2833 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2834 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2835 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2836 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2837 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2839 /* Count number of gp and fp argument registers used. */
2840 words = current_function_args_info.words;
2841 n_gpr = current_function_args_info.regno;
2842 n_fpr = current_function_args_info.sse_regno;
2844 if (TARGET_DEBUG_ARG)
2845 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2846 (int) words, (int) n_gpr, (int) n_fpr);
2848 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2849 build_int_2 (n_gpr * 8, 0));
2850 TREE_SIDE_EFFECTS (t) = 1;
2851 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2853 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2854 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2855 TREE_SIDE_EFFECTS (t) = 1;
2856 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2858 /* Find the overflow area. */
2859 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2860 if (words != 0)
2861 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2862 build_int_2 (words * UNITS_PER_WORD, 0));
2863 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2864 TREE_SIDE_EFFECTS (t) = 1;
2865 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2867 /* Find the register save area.
2868 Prologue of the function save it right above stack frame. */
2869 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2870 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2871 TREE_SIDE_EFFECTS (t) = 1;
2872 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2875 /* Implement va_arg. */
2877 ix86_va_arg (valist, type)
2878 tree valist, type;
2880 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2881 tree f_gpr, f_fpr, f_ovf, f_sav;
2882 tree gpr, fpr, ovf, sav, t;
2883 int size, rsize;
2884 rtx lab_false, lab_over = NULL_RTX;
2885 rtx addr_rtx, r;
2886 rtx container;
2887 int indirect_p = 0;
2889 /* Only 64bit target needs something special. */
2890 if (!TARGET_64BIT)
2892 return std_expand_builtin_va_arg (valist, type);
2895 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2896 f_fpr = TREE_CHAIN (f_gpr);
2897 f_ovf = TREE_CHAIN (f_fpr);
2898 f_sav = TREE_CHAIN (f_ovf);
2900 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2901 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2902 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2903 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2904 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2906 size = int_size_in_bytes (type);
2907 if (size == -1)
2909 /* Passed by reference. */
2910 indirect_p = 1;
2911 type = build_pointer_type (type);
2912 size = int_size_in_bytes (type);
2914 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2916 container = construct_container (TYPE_MODE (type), type, 0,
2917 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2919 * Pull the value out of the saved registers ...
2922 addr_rtx = gen_reg_rtx (Pmode);
2924 if (container)
2926 rtx int_addr_rtx, sse_addr_rtx;
2927 int needed_intregs, needed_sseregs;
2928 int need_temp;
2930 lab_over = gen_label_rtx ();
2931 lab_false = gen_label_rtx ();
2933 examine_argument (TYPE_MODE (type), type, 0,
2934 &needed_intregs, &needed_sseregs);
2937 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2938 || TYPE_ALIGN (type) > 128);
2940 /* In case we are passing structure, verify that it is consecutive block
2941 on the register save area. If not we need to do moves. */
2942 if (!need_temp && !REG_P (container))
2944 /* Verify that all registers are strictly consecutive */
2945 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2947 int i;
2949 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2951 rtx slot = XVECEXP (container, 0, i);
2952 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2953 || INTVAL (XEXP (slot, 1)) != i * 16)
2954 need_temp = 1;
2957 else
2959 int i;
2961 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2963 rtx slot = XVECEXP (container, 0, i);
2964 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2965 || INTVAL (XEXP (slot, 1)) != i * 8)
2966 need_temp = 1;
2970 if (!need_temp)
2972 int_addr_rtx = addr_rtx;
2973 sse_addr_rtx = addr_rtx;
2975 else
2977 int_addr_rtx = gen_reg_rtx (Pmode);
2978 sse_addr_rtx = gen_reg_rtx (Pmode);
2980 /* First ensure that we fit completely in registers. */
2981 if (needed_intregs)
2983 emit_cmp_and_jump_insns (expand_expr
2984 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2985 GEN_INT ((REGPARM_MAX - needed_intregs +
2986 1) * 8), GE, const1_rtx, SImode,
2987 1, lab_false);
2989 if (needed_sseregs)
2991 emit_cmp_and_jump_insns (expand_expr
2992 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2993 GEN_INT ((SSE_REGPARM_MAX -
2994 needed_sseregs + 1) * 16 +
2995 REGPARM_MAX * 8), GE, const1_rtx,
2996 SImode, 1, lab_false);
2999 /* Compute index to start of area used for integer regs. */
3000 if (needed_intregs)
3002 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3003 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3004 if (r != int_addr_rtx)
3005 emit_move_insn (int_addr_rtx, r);
3007 if (needed_sseregs)
3009 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3010 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3011 if (r != sse_addr_rtx)
3012 emit_move_insn (sse_addr_rtx, r);
3014 if (need_temp)
3016 int i;
3017 rtx mem;
3019 /* Never use the memory itself, as it has the alias set. */
3020 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3021 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3022 set_mem_alias_set (mem, get_varargs_alias_set ());
3023 set_mem_align (mem, BITS_PER_UNIT);
3025 for (i = 0; i < XVECLEN (container, 0); i++)
3027 rtx slot = XVECEXP (container, 0, i);
3028 rtx reg = XEXP (slot, 0);
3029 enum machine_mode mode = GET_MODE (reg);
3030 rtx src_addr;
3031 rtx src_mem;
3032 int src_offset;
3033 rtx dest_mem;
3035 if (SSE_REGNO_P (REGNO (reg)))
3037 src_addr = sse_addr_rtx;
3038 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3040 else
3042 src_addr = int_addr_rtx;
3043 src_offset = REGNO (reg) * 8;
3045 src_mem = gen_rtx_MEM (mode, src_addr);
3046 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3047 src_mem = adjust_address (src_mem, mode, src_offset);
3048 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3049 emit_move_insn (dest_mem, src_mem);
3053 if (needed_intregs)
3056 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3057 build_int_2 (needed_intregs * 8, 0));
3058 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3059 TREE_SIDE_EFFECTS (t) = 1;
3060 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3062 if (needed_sseregs)
3065 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3066 build_int_2 (needed_sseregs * 16, 0));
3067 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3068 TREE_SIDE_EFFECTS (t) = 1;
3069 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3072 emit_jump_insn (gen_jump (lab_over));
3073 emit_barrier ();
3074 emit_label (lab_false);
3077 /* ... otherwise out of the overflow area. */
3079 /* Care for on-stack alignment if needed. */
3080 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3081 t = ovf;
3082 else
3084 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3085 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3086 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3088 t = save_expr (t);
3090 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3091 if (r != addr_rtx)
3092 emit_move_insn (addr_rtx, r);
3095 build (PLUS_EXPR, TREE_TYPE (t), t,
3096 build_int_2 (rsize * UNITS_PER_WORD, 0));
3097 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3098 TREE_SIDE_EFFECTS (t) = 1;
3099 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3101 if (container)
3102 emit_label (lab_over);
3104 if (indirect_p)
3106 r = gen_rtx_MEM (Pmode, addr_rtx);
3107 set_mem_alias_set (r, get_varargs_alias_set ());
3108 emit_move_insn (addr_rtx, r);
3111 return addr_rtx;
3114 /* Return nonzero if OP is either a i387 or SSE fp register. */
3116 any_fp_register_operand (op, mode)
3117 rtx op;
3118 enum machine_mode mode ATTRIBUTE_UNUSED;
3120 return ANY_FP_REG_P (op);
3123 /* Return nonzero if OP is an i387 fp register. */
3125 fp_register_operand (op, mode)
3126 rtx op;
3127 enum machine_mode mode ATTRIBUTE_UNUSED;
3129 return FP_REG_P (op);
3132 /* Return nonzero if OP is a non-fp register_operand. */
3134 register_and_not_any_fp_reg_operand (op, mode)
3135 rtx op;
3136 enum machine_mode mode;
3138 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3141 /* Return nonzero if OP is a register operand other than an
3142 i387 fp register. */
3144 register_and_not_fp_reg_operand (op, mode)
3145 rtx op;
3146 enum machine_mode mode;
3148 return register_operand (op, mode) && !FP_REG_P (op);
3151 /* Return nonzero if OP is general operand representable on x86_64. */
3154 x86_64_general_operand (op, mode)
3155 rtx op;
3156 enum machine_mode mode;
3158 if (!TARGET_64BIT)
3159 return general_operand (op, mode);
3160 if (nonimmediate_operand (op, mode))
3161 return 1;
3162 return x86_64_sign_extended_value (op);
3165 /* Return nonzero if OP is general operand representable on x86_64
3166 as either sign extended or zero extended constant. */
3169 x86_64_szext_general_operand (op, mode)
3170 rtx op;
3171 enum machine_mode mode;
3173 if (!TARGET_64BIT)
3174 return general_operand (op, mode);
3175 if (nonimmediate_operand (op, mode))
3176 return 1;
3177 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3180 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3183 x86_64_nonmemory_operand (op, mode)
3184 rtx op;
3185 enum machine_mode mode;
3187 if (!TARGET_64BIT)
3188 return nonmemory_operand (op, mode);
3189 if (register_operand (op, mode))
3190 return 1;
3191 return x86_64_sign_extended_value (op);
3194 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3197 x86_64_movabs_operand (op, mode)
3198 rtx op;
3199 enum machine_mode mode;
3201 if (!TARGET_64BIT || !flag_pic)
3202 return nonmemory_operand (op, mode);
3203 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3204 return 1;
3205 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3206 return 1;
3207 return 0;
3210 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3213 x86_64_szext_nonmemory_operand (op, mode)
3214 rtx op;
3215 enum machine_mode mode;
3217 if (!TARGET_64BIT)
3218 return nonmemory_operand (op, mode);
3219 if (register_operand (op, mode))
3220 return 1;
3221 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3224 /* Return nonzero if OP is immediate operand representable on x86_64. */
3227 x86_64_immediate_operand (op, mode)
3228 rtx op;
3229 enum machine_mode mode;
3231 if (!TARGET_64BIT)
3232 return immediate_operand (op, mode);
3233 return x86_64_sign_extended_value (op);
3236 /* Return nonzero if OP is immediate operand representable on x86_64. */
3239 x86_64_zext_immediate_operand (op, mode)
3240 rtx op;
3241 enum machine_mode mode ATTRIBUTE_UNUSED;
3243 return x86_64_zero_extended_value (op);
3246 /* Return nonzero if OP is (const_int 1), else return zero. */
3249 const_int_1_operand (op, mode)
3250 rtx op;
3251 enum machine_mode mode ATTRIBUTE_UNUSED;
3253 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3256 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3257 for shift & compare patterns, as shifting by 0 does not change flags),
3258 else return zero. */
3261 const_int_1_31_operand (op, mode)
3262 rtx op;
3263 enum machine_mode mode ATTRIBUTE_UNUSED;
3265 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3268 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3269 reference and a constant. */
3272 symbolic_operand (op, mode)
3273 register rtx op;
3274 enum machine_mode mode ATTRIBUTE_UNUSED;
3276 switch (GET_CODE (op))
3278 case SYMBOL_REF:
3279 case LABEL_REF:
3280 return 1;
3282 case CONST:
3283 op = XEXP (op, 0);
3284 if (GET_CODE (op) == SYMBOL_REF
3285 || GET_CODE (op) == LABEL_REF
3286 || (GET_CODE (op) == UNSPEC
3287 && (XINT (op, 1) == UNSPEC_GOT
3288 || XINT (op, 1) == UNSPEC_GOTOFF
3289 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3290 return 1;
3291 if (GET_CODE (op) != PLUS
3292 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3293 return 0;
3295 op = XEXP (op, 0);
3296 if (GET_CODE (op) == SYMBOL_REF
3297 || GET_CODE (op) == LABEL_REF)
3298 return 1;
3299 /* Only @GOTOFF gets offsets. */
3300 if (GET_CODE (op) != UNSPEC
3301 || XINT (op, 1) != UNSPEC_GOTOFF)
3302 return 0;
3304 op = XVECEXP (op, 0, 0);
3305 if (GET_CODE (op) == SYMBOL_REF
3306 || GET_CODE (op) == LABEL_REF)
3307 return 1;
3308 return 0;
3310 default:
3311 return 0;
3315 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3318 pic_symbolic_operand (op, mode)
3319 register rtx op;
3320 enum machine_mode mode ATTRIBUTE_UNUSED;
3322 if (GET_CODE (op) != CONST)
3323 return 0;
3324 op = XEXP (op, 0);
3325 if (TARGET_64BIT)
3327 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3328 return 1;
3330 else
3332 if (GET_CODE (op) == UNSPEC)
3333 return 1;
3334 if (GET_CODE (op) != PLUS
3335 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3336 return 0;
3337 op = XEXP (op, 0);
3338 if (GET_CODE (op) == UNSPEC)
3339 return 1;
3341 return 0;
3344 /* Return true if OP is a symbolic operand that resolves locally. */
3346 static int
3347 local_symbolic_operand (op, mode)
3348 rtx op;
3349 enum machine_mode mode ATTRIBUTE_UNUSED;
3351 if (GET_CODE (op) == CONST
3352 && GET_CODE (XEXP (op, 0)) == PLUS
3353 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3354 op = XEXP (XEXP (op, 0), 0);
3356 if (GET_CODE (op) == LABEL_REF)
3357 return 1;
3359 if (GET_CODE (op) != SYMBOL_REF)
3360 return 0;
3362 /* These we've been told are local by varasm and encode_section_info
3363 respectively. */
3364 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3365 return 1;
3367 /* There is, however, a not insubstantial body of code in the rest of
3368 the compiler that assumes it can just stick the results of
3369 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3370 /* ??? This is a hack. Should update the body of the compiler to
3371 always create a DECL an invoke targetm.encode_section_info. */
3372 if (strncmp (XSTR (op, 0), internal_label_prefix,
3373 internal_label_prefix_len) == 0)
3374 return 1;
3376 return 0;
3379 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3382 tls_symbolic_operand (op, mode)
3383 register rtx op;
3384 enum machine_mode mode ATTRIBUTE_UNUSED;
3386 const char *symbol_str;
3388 if (GET_CODE (op) != SYMBOL_REF)
3389 return 0;
3390 symbol_str = XSTR (op, 0);
3392 if (symbol_str[0] != '%')
3393 return 0;
3394 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3397 static int
3398 tls_symbolic_operand_1 (op, kind)
3399 rtx op;
3400 enum tls_model kind;
3402 const char *symbol_str;
3404 if (GET_CODE (op) != SYMBOL_REF)
3405 return 0;
3406 symbol_str = XSTR (op, 0);
3408 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3412 global_dynamic_symbolic_operand (op, mode)
3413 register rtx op;
3414 enum machine_mode mode ATTRIBUTE_UNUSED;
3416 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3420 local_dynamic_symbolic_operand (op, mode)
3421 register rtx op;
3422 enum machine_mode mode ATTRIBUTE_UNUSED;
3424 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3428 initial_exec_symbolic_operand (op, mode)
3429 register rtx op;
3430 enum machine_mode mode ATTRIBUTE_UNUSED;
3432 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3436 local_exec_symbolic_operand (op, mode)
3437 register rtx op;
3438 enum machine_mode mode ATTRIBUTE_UNUSED;
3440 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3443 /* Test for a valid operand for a call instruction. Don't allow the
3444 arg pointer register or virtual regs since they may decay into
3445 reg + const, which the patterns can't handle. */
3448 call_insn_operand (op, mode)
3449 rtx op;
3450 enum machine_mode mode ATTRIBUTE_UNUSED;
3452 /* Disallow indirect through a virtual register. This leads to
3453 compiler aborts when trying to eliminate them. */
3454 if (GET_CODE (op) == REG
3455 && (op == arg_pointer_rtx
3456 || op == frame_pointer_rtx
3457 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3458 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3459 return 0;
3461 /* Disallow `call 1234'. Due to varying assembler lameness this
3462 gets either rejected or translated to `call .+1234'. */
3463 if (GET_CODE (op) == CONST_INT)
3464 return 0;
3466 /* Explicitly allow SYMBOL_REF even if pic. */
3467 if (GET_CODE (op) == SYMBOL_REF)
3468 return 1;
3470 /* Otherwise we can allow any general_operand in the address. */
3471 return general_operand (op, Pmode);
3474 /* Test for a valid operand for a call instruction. Don't allow the
3475 arg pointer register or virtual regs since they may decay into
3476 reg + const, which the patterns can't handle. */
3479 sibcall_insn_operand (op, mode)
3480 rtx op;
3481 enum machine_mode mode ATTRIBUTE_UNUSED;
3483 /* Disallow indirect through a virtual register. This leads to
3484 compiler aborts when trying to eliminate them. */
3485 if (GET_CODE (op) == REG
3486 && (op == arg_pointer_rtx
3487 || op == frame_pointer_rtx
3488 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3489 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3490 return 0;
3492 /* Explicitly allow SYMBOL_REF even if pic. */
3493 if (GET_CODE (op) == SYMBOL_REF)
3494 return 1;
3496 /* Otherwise we can only allow register operands. */
3497 return register_operand (op, Pmode);
3501 constant_call_address_operand (op, mode)
3502 rtx op;
3503 enum machine_mode mode ATTRIBUTE_UNUSED;
3505 if (GET_CODE (op) == CONST
3506 && GET_CODE (XEXP (op, 0)) == PLUS
3507 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3508 op = XEXP (XEXP (op, 0), 0);
3509 return GET_CODE (op) == SYMBOL_REF;
3512 /* Match exactly zero and one. */
3515 const0_operand (op, mode)
3516 register rtx op;
3517 enum machine_mode mode;
3519 return op == CONST0_RTX (mode);
3523 const1_operand (op, mode)
3524 register rtx op;
3525 enum machine_mode mode ATTRIBUTE_UNUSED;
3527 return op == const1_rtx;
3530 /* Match 2, 4, or 8. Used for leal multiplicands. */
3533 const248_operand (op, mode)
3534 register rtx op;
3535 enum machine_mode mode ATTRIBUTE_UNUSED;
3537 return (GET_CODE (op) == CONST_INT
3538 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3541 /* True if this is a constant appropriate for an increment or decrement. */
3544 incdec_operand (op, mode)
3545 register rtx op;
3546 enum machine_mode mode ATTRIBUTE_UNUSED;
3548 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3549 registers, since carry flag is not set. */
3550 if (TARGET_PENTIUM4 && !optimize_size)
3551 return 0;
3552 return op == const1_rtx || op == constm1_rtx;
3555 /* Return nonzero if OP is acceptable as operand of DImode shift
3556 expander. */
3559 shiftdi_operand (op, mode)
3560 rtx op;
3561 enum machine_mode mode ATTRIBUTE_UNUSED;
3563 if (TARGET_64BIT)
3564 return nonimmediate_operand (op, mode);
3565 else
3566 return register_operand (op, mode);
3569 /* Return false if this is the stack pointer, or any other fake
3570 register eliminable to the stack pointer. Otherwise, this is
3571 a register operand.
3573 This is used to prevent esp from being used as an index reg.
3574 Which would only happen in pathological cases. */
3577 reg_no_sp_operand (op, mode)
3578 register rtx op;
3579 enum machine_mode mode;
3581 rtx t = op;
3582 if (GET_CODE (t) == SUBREG)
3583 t = SUBREG_REG (t);
3584 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3585 return 0;
3587 return register_operand (op, mode);
3591 mmx_reg_operand (op, mode)
3592 register rtx op;
3593 enum machine_mode mode ATTRIBUTE_UNUSED;
3595 return MMX_REG_P (op);
3598 /* Return false if this is any eliminable register. Otherwise
3599 general_operand. */
3602 general_no_elim_operand (op, mode)
3603 register rtx op;
3604 enum machine_mode mode;
3606 rtx t = op;
3607 if (GET_CODE (t) == SUBREG)
3608 t = SUBREG_REG (t);
3609 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3610 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3611 || t == virtual_stack_dynamic_rtx)
3612 return 0;
3613 if (REG_P (t)
3614 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3615 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3616 return 0;
3618 return general_operand (op, mode);
3621 /* Return false if this is any eliminable register. Otherwise
3622 register_operand or const_int. */
3625 nonmemory_no_elim_operand (op, mode)
3626 register rtx op;
3627 enum machine_mode mode;
3629 rtx t = op;
3630 if (GET_CODE (t) == SUBREG)
3631 t = SUBREG_REG (t);
3632 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3633 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3634 || t == virtual_stack_dynamic_rtx)
3635 return 0;
3637 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3640 /* Return false if this is any eliminable register or stack register,
3641 otherwise work like register_operand. */
3644 index_register_operand (op, mode)
3645 register rtx op;
3646 enum machine_mode mode;
3648 rtx t = op;
3649 if (GET_CODE (t) == SUBREG)
3650 t = SUBREG_REG (t);
3651 if (!REG_P (t))
3652 return 0;
3653 if (t == arg_pointer_rtx
3654 || t == frame_pointer_rtx
3655 || t == virtual_incoming_args_rtx
3656 || t == virtual_stack_vars_rtx
3657 || t == virtual_stack_dynamic_rtx
3658 || REGNO (t) == STACK_POINTER_REGNUM)
3659 return 0;
3661 return general_operand (op, mode);
3664 /* Return true if op is a Q_REGS class register. */
3667 q_regs_operand (op, mode)
3668 register rtx op;
3669 enum machine_mode mode;
3671 if (mode != VOIDmode && GET_MODE (op) != mode)
3672 return 0;
3673 if (GET_CODE (op) == SUBREG)
3674 op = SUBREG_REG (op);
3675 return ANY_QI_REG_P (op);
3678 /* Return true if op is an flags register. */
3681 flags_reg_operand (op, mode)
3682 register rtx op;
3683 enum machine_mode mode;
3685 if (mode != VOIDmode && GET_MODE (op) != mode)
3686 return 0;
3687 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3690 /* Return true if op is a NON_Q_REGS class register. */
3693 non_q_regs_operand (op, mode)
3694 register rtx op;
3695 enum machine_mode mode;
3697 if (mode != VOIDmode && GET_MODE (op) != mode)
3698 return 0;
3699 if (GET_CODE (op) == SUBREG)
3700 op = SUBREG_REG (op);
3701 return NON_QI_REG_P (op);
3705 zero_extended_scalar_load_operand (op, mode)
3706 rtx op;
3707 enum machine_mode mode ATTRIBUTE_UNUSED;
3709 unsigned n_elts;
3710 if (GET_CODE (op) != MEM)
3711 return 0;
3712 op = maybe_get_pool_constant (op);
3713 if (!op)
3714 return 0;
3715 if (GET_CODE (op) != CONST_VECTOR)
3716 return 0;
3717 n_elts =
3718 (GET_MODE_SIZE (GET_MODE (op)) /
3719 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3720 for (n_elts--; n_elts > 0; n_elts--)
3722 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3723 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3724 return 0;
3726 return 1;
3729 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3730 insns. */
3732 sse_comparison_operator (op, mode)
3733 rtx op;
3734 enum machine_mode mode ATTRIBUTE_UNUSED;
3736 enum rtx_code code = GET_CODE (op);
3737 switch (code)
3739 /* Operations supported directly. */
3740 case EQ:
3741 case LT:
3742 case LE:
3743 case UNORDERED:
3744 case NE:
3745 case UNGE:
3746 case UNGT:
3747 case ORDERED:
3748 return 1;
3749 /* These are equivalent to ones above in non-IEEE comparisons. */
3750 case UNEQ:
3751 case UNLT:
3752 case UNLE:
3753 case LTGT:
3754 case GE:
3755 case GT:
3756 return !TARGET_IEEE_FP;
3757 default:
3758 return 0;
3761 /* Return 1 if OP is a valid comparison operator in valid mode. */
3763 ix86_comparison_operator (op, mode)
3764 register rtx op;
3765 enum machine_mode mode;
3767 enum machine_mode inmode;
3768 enum rtx_code code = GET_CODE (op);
3769 if (mode != VOIDmode && GET_MODE (op) != mode)
3770 return 0;
3771 if (GET_RTX_CLASS (code) != '<')
3772 return 0;
3773 inmode = GET_MODE (XEXP (op, 0));
3775 if (inmode == CCFPmode || inmode == CCFPUmode)
3777 enum rtx_code second_code, bypass_code;
3778 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3779 return (bypass_code == NIL && second_code == NIL);
3781 switch (code)
3783 case EQ: case NE:
3784 return 1;
3785 case LT: case GE:
3786 if (inmode == CCmode || inmode == CCGCmode
3787 || inmode == CCGOCmode || inmode == CCNOmode)
3788 return 1;
3789 return 0;
3790 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3791 if (inmode == CCmode)
3792 return 1;
3793 return 0;
3794 case GT: case LE:
3795 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3796 return 1;
3797 return 0;
3798 default:
3799 return 0;
3803 /* Return 1 if OP is a valid comparison operator testing carry flag
3804 to be set. */
3806 ix86_carry_flag_operator (op, mode)
3807 register rtx op;
3808 enum machine_mode mode;
3810 enum machine_mode inmode;
3811 enum rtx_code code = GET_CODE (op);
3813 if (mode != VOIDmode && GET_MODE (op) != mode)
3814 return 0;
3815 if (GET_RTX_CLASS (code) != '<')
3816 return 0;
3817 inmode = GET_MODE (XEXP (op, 0));
3818 if (GET_CODE (XEXP (op, 0)) != REG
3819 || REGNO (XEXP (op, 0)) != 17
3820 || XEXP (op, 1) != const0_rtx)
3821 return 0;
3823 if (inmode == CCFPmode || inmode == CCFPUmode)
3825 enum rtx_code second_code, bypass_code;
3827 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3828 if (bypass_code != NIL || second_code != NIL)
3829 return 0;
3830 code = ix86_fp_compare_code_to_integer (code);
3832 else if (inmode != CCmode)
3833 return 0;
3834 return code == LTU;
3837 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3840 fcmov_comparison_operator (op, mode)
3841 register rtx op;
3842 enum machine_mode mode;
3844 enum machine_mode inmode;
3845 enum rtx_code code = GET_CODE (op);
3847 if (mode != VOIDmode && GET_MODE (op) != mode)
3848 return 0;
3849 if (GET_RTX_CLASS (code) != '<')
3850 return 0;
3851 inmode = GET_MODE (XEXP (op, 0));
3852 if (inmode == CCFPmode || inmode == CCFPUmode)
3854 enum rtx_code second_code, bypass_code;
3856 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3857 if (bypass_code != NIL || second_code != NIL)
3858 return 0;
3859 code = ix86_fp_compare_code_to_integer (code);
3861 /* i387 supports just limited amount of conditional codes. */
3862 switch (code)
3864 case LTU: case GTU: case LEU: case GEU:
3865 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3866 return 1;
3867 return 0;
3868 case ORDERED: case UNORDERED:
3869 case EQ: case NE:
3870 return 1;
3871 default:
3872 return 0;
3876 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3879 promotable_binary_operator (op, mode)
3880 register rtx op;
3881 enum machine_mode mode ATTRIBUTE_UNUSED;
3883 switch (GET_CODE (op))
3885 case MULT:
3886 /* Modern CPUs have same latency for HImode and SImode multiply,
3887 but 386 and 486 do HImode multiply faster. */
3888 return ix86_cpu > PROCESSOR_I486;
3889 case PLUS:
3890 case AND:
3891 case IOR:
3892 case XOR:
3893 case ASHIFT:
3894 return 1;
3895 default:
3896 return 0;
3900 /* Nearly general operand, but accept any const_double, since we wish
3901 to be able to drop them into memory rather than have them get pulled
3902 into registers. */
3905 cmp_fp_expander_operand (op, mode)
3906 register rtx op;
3907 enum machine_mode mode;
3909 if (mode != VOIDmode && mode != GET_MODE (op))
3910 return 0;
3911 if (GET_CODE (op) == CONST_DOUBLE)
3912 return 1;
3913 return general_operand (op, mode);
3916 /* Match an SI or HImode register for a zero_extract. */
3919 ext_register_operand (op, mode)
3920 register rtx op;
3921 enum machine_mode mode ATTRIBUTE_UNUSED;
3923 int regno;
3924 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3925 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3926 return 0;
3928 if (!register_operand (op, VOIDmode))
3929 return 0;
3931 /* Be careful to accept only registers having upper parts. */
3932 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3933 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3936 /* Return 1 if this is a valid binary floating-point operation.
3937 OP is the expression matched, and MODE is its mode. */
3940 binary_fp_operator (op, mode)
3941 register rtx op;
3942 enum machine_mode mode;
3944 if (mode != VOIDmode && mode != GET_MODE (op))
3945 return 0;
3947 switch (GET_CODE (op))
3949 case PLUS:
3950 case MINUS:
3951 case MULT:
3952 case DIV:
3953 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3955 default:
3956 return 0;
3961 mult_operator (op, mode)
3962 register rtx op;
3963 enum machine_mode mode ATTRIBUTE_UNUSED;
3965 return GET_CODE (op) == MULT;
3969 div_operator (op, mode)
3970 register rtx op;
3971 enum machine_mode mode ATTRIBUTE_UNUSED;
3973 return GET_CODE (op) == DIV;
3977 arith_or_logical_operator (op, mode)
3978 rtx op;
3979 enum machine_mode mode;
3981 return ((mode == VOIDmode || GET_MODE (op) == mode)
3982 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3983 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3986 /* Returns 1 if OP is memory operand with a displacement. */
3989 memory_displacement_operand (op, mode)
3990 register rtx op;
3991 enum machine_mode mode;
3993 struct ix86_address parts;
3995 if (! memory_operand (op, mode))
3996 return 0;
3998 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3999 abort ();
4001 return parts.disp != NULL_RTX;
4004 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4005 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4007 ??? It seems likely that this will only work because cmpsi is an
4008 expander, and no actual insns use this. */
4011 cmpsi_operand (op, mode)
4012 rtx op;
4013 enum machine_mode mode;
4015 if (nonimmediate_operand (op, mode))
4016 return 1;
4018 if (GET_CODE (op) == AND
4019 && GET_MODE (op) == SImode
4020 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4021 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4022 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4023 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4024 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4025 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4026 return 1;
4028 return 0;
4031 /* Returns 1 if OP is memory operand that can not be represented by the
4032 modRM array. */
4035 long_memory_operand (op, mode)
4036 register rtx op;
4037 enum machine_mode mode;
4039 if (! memory_operand (op, mode))
4040 return 0;
4042 return memory_address_length (op) != 0;
4045 /* Return nonzero if the rtx is known aligned. */
4048 aligned_operand (op, mode)
4049 rtx op;
4050 enum machine_mode mode;
4052 struct ix86_address parts;
4054 if (!general_operand (op, mode))
4055 return 0;
4057 /* Registers and immediate operands are always "aligned". */
4058 if (GET_CODE (op) != MEM)
4059 return 1;
4061 /* Don't even try to do any aligned optimizations with volatiles. */
4062 if (MEM_VOLATILE_P (op))
4063 return 0;
4065 op = XEXP (op, 0);
4067 /* Pushes and pops are only valid on the stack pointer. */
4068 if (GET_CODE (op) == PRE_DEC
4069 || GET_CODE (op) == POST_INC)
4070 return 1;
4072 /* Decode the address. */
4073 if (! ix86_decompose_address (op, &parts))
4074 abort ();
4076 if (parts.base && GET_CODE (parts.base) == SUBREG)
4077 parts.base = SUBREG_REG (parts.base);
4078 if (parts.index && GET_CODE (parts.index) == SUBREG)
4079 parts.index = SUBREG_REG (parts.index);
4081 /* Look for some component that isn't known to be aligned. */
4082 if (parts.index)
4084 if (parts.scale < 4
4085 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4086 return 0;
4088 if (parts.base)
4090 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4091 return 0;
4093 if (parts.disp)
4095 if (GET_CODE (parts.disp) != CONST_INT
4096 || (INTVAL (parts.disp) & 3) != 0)
4097 return 0;
4100 /* Didn't find one -- this must be an aligned address. */
4101 return 1;
4104 /* Return true if the constant is something that can be loaded with
4105 a special instruction. Only handle 0.0 and 1.0; others are less
4106 worthwhile. */
4109 standard_80387_constant_p (x)
4110 rtx x;
4112 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4113 return -1;
4114 /* Note that on the 80387, other constants, such as pi, that we should support
4115 too. On some machines, these are much slower to load as standard constant,
4116 than to load from doubles in memory. */
4117 if (x == CONST0_RTX (GET_MODE (x)))
4118 return 1;
4119 if (x == CONST1_RTX (GET_MODE (x)))
4120 return 2;
4121 return 0;
4124 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4127 standard_sse_constant_p (x)
4128 rtx x;
4130 if (x == const0_rtx)
4131 return 1;
4132 return (x == CONST0_RTX (GET_MODE (x)));
4135 /* Returns 1 if OP contains a symbol reference */
4138 symbolic_reference_mentioned_p (op)
4139 rtx op;
4141 register const char *fmt;
4142 register int i;
4144 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4145 return 1;
4147 fmt = GET_RTX_FORMAT (GET_CODE (op));
4148 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4150 if (fmt[i] == 'E')
4152 register int j;
4154 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4155 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4156 return 1;
4159 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4160 return 1;
4163 return 0;
4166 /* Return 1 if it is appropriate to emit `ret' instructions in the
4167 body of a function. Do this only if the epilogue is simple, needing a
4168 couple of insns. Prior to reloading, we can't tell how many registers
4169 must be saved, so return 0 then. Return 0 if there is no frame
4170 marker to de-allocate.
4172 If NON_SAVING_SETJMP is defined and true, then it is not possible
4173 for the epilogue to be simple, so return 0. This is a special case
4174 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4175 until final, but jump_optimize may need to know sooner if a
4176 `return' is OK. */
4179 ix86_can_use_return_insn_p ()
4181 struct ix86_frame frame;
4183 #ifdef NON_SAVING_SETJMP
4184 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4185 return 0;
4186 #endif
4188 if (! reload_completed || frame_pointer_needed)
4189 return 0;
4191 /* Don't allow more than 32 pop, since that's all we can do
4192 with one instruction. */
4193 if (current_function_pops_args
4194 && current_function_args_size >= 32768)
4195 return 0;
4197 ix86_compute_frame_layout (&frame);
4198 return frame.to_allocate == 0 && frame.nregs == 0;
4201 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4203 x86_64_sign_extended_value (value)
4204 rtx value;
4206 switch (GET_CODE (value))
4208 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4209 to be at least 32 and this all acceptable constants are
4210 represented as CONST_INT. */
4211 case CONST_INT:
4212 if (HOST_BITS_PER_WIDE_INT == 32)
4213 return 1;
4214 else
4216 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4217 return trunc_int_for_mode (val, SImode) == val;
4219 break;
4221 /* For certain code models, the symbolic references are known to fit.
4222 in CM_SMALL_PIC model we know it fits if it is local to the shared
4223 library. Don't count TLS SYMBOL_REFs here, since they should fit
4224 only if inside of UNSPEC handled below. */
4225 case SYMBOL_REF:
4226 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4228 /* For certain code models, the code is near as well. */
4229 case LABEL_REF:
4230 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4231 || ix86_cmodel == CM_KERNEL);
4233 /* We also may accept the offsetted memory references in certain special
4234 cases. */
4235 case CONST:
4236 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4237 switch (XINT (XEXP (value, 0), 1))
4239 case UNSPEC_GOTPCREL:
4240 case UNSPEC_DTPOFF:
4241 case UNSPEC_GOTNTPOFF:
4242 case UNSPEC_NTPOFF:
4243 return 1;
4244 default:
4245 break;
4247 if (GET_CODE (XEXP (value, 0)) == PLUS)
4249 rtx op1 = XEXP (XEXP (value, 0), 0);
4250 rtx op2 = XEXP (XEXP (value, 0), 1);
4251 HOST_WIDE_INT offset;
4253 if (ix86_cmodel == CM_LARGE)
4254 return 0;
4255 if (GET_CODE (op2) != CONST_INT)
4256 return 0;
4257 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4258 switch (GET_CODE (op1))
4260 case SYMBOL_REF:
4261 /* For CM_SMALL assume that latest object is 16MB before
4262 end of 31bits boundary. We may also accept pretty
4263 large negative constants knowing that all objects are
4264 in the positive half of address space. */
4265 if (ix86_cmodel == CM_SMALL
4266 && offset < 16*1024*1024
4267 && trunc_int_for_mode (offset, SImode) == offset)
4268 return 1;
4269 /* For CM_KERNEL we know that all object resist in the
4270 negative half of 32bits address space. We may not
4271 accept negative offsets, since they may be just off
4272 and we may accept pretty large positive ones. */
4273 if (ix86_cmodel == CM_KERNEL
4274 && offset > 0
4275 && trunc_int_for_mode (offset, SImode) == offset)
4276 return 1;
4277 break;
4278 case LABEL_REF:
4279 /* These conditions are similar to SYMBOL_REF ones, just the
4280 constraints for code models differ. */
4281 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4282 && offset < 16*1024*1024
4283 && trunc_int_for_mode (offset, SImode) == offset)
4284 return 1;
4285 if (ix86_cmodel == CM_KERNEL
4286 && offset > 0
4287 && trunc_int_for_mode (offset, SImode) == offset)
4288 return 1;
4289 break;
4290 case UNSPEC:
4291 switch (XINT (op1, 1))
4293 case UNSPEC_DTPOFF:
4294 case UNSPEC_NTPOFF:
4295 if (offset > 0
4296 && trunc_int_for_mode (offset, SImode) == offset)
4297 return 1;
4299 break;
4300 default:
4301 return 0;
4304 return 0;
4305 default:
4306 return 0;
4310 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4312 x86_64_zero_extended_value (value)
4313 rtx value;
4315 switch (GET_CODE (value))
4317 case CONST_DOUBLE:
4318 if (HOST_BITS_PER_WIDE_INT == 32)
4319 return (GET_MODE (value) == VOIDmode
4320 && !CONST_DOUBLE_HIGH (value));
4321 else
4322 return 0;
4323 case CONST_INT:
4324 if (HOST_BITS_PER_WIDE_INT == 32)
4325 return INTVAL (value) >= 0;
4326 else
4327 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4328 break;
4330 /* For certain code models, the symbolic references are known to fit. */
4331 case SYMBOL_REF:
4332 return ix86_cmodel == CM_SMALL;
4334 /* For certain code models, the code is near as well. */
4335 case LABEL_REF:
4336 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4338 /* We also may accept the offsetted memory references in certain special
4339 cases. */
4340 case CONST:
4341 if (GET_CODE (XEXP (value, 0)) == PLUS)
4343 rtx op1 = XEXP (XEXP (value, 0), 0);
4344 rtx op2 = XEXP (XEXP (value, 0), 1);
4346 if (ix86_cmodel == CM_LARGE)
4347 return 0;
4348 switch (GET_CODE (op1))
4350 case SYMBOL_REF:
4351 return 0;
4352 /* For small code model we may accept pretty large positive
4353 offsets, since one bit is available for free. Negative
4354 offsets are limited by the size of NULL pointer area
4355 specified by the ABI. */
4356 if (ix86_cmodel == CM_SMALL
4357 && GET_CODE (op2) == CONST_INT
4358 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4359 && (trunc_int_for_mode (INTVAL (op2), SImode)
4360 == INTVAL (op2)))
4361 return 1;
4362 /* ??? For the kernel, we may accept adjustment of
4363 -0x10000000, since we know that it will just convert
4364 negative address space to positive, but perhaps this
4365 is not worthwhile. */
4366 break;
4367 case LABEL_REF:
4368 /* These conditions are similar to SYMBOL_REF ones, just the
4369 constraints for code models differ. */
4370 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4371 && GET_CODE (op2) == CONST_INT
4372 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4373 && (trunc_int_for_mode (INTVAL (op2), SImode)
4374 == INTVAL (op2)))
4375 return 1;
4376 break;
4377 default:
4378 return 0;
4381 return 0;
4382 default:
4383 return 0;
4387 /* Value should be nonzero if functions must have frame pointers.
4388 Zero means the frame pointer need not be set up (and parms may
4389 be accessed via the stack pointer) in functions that seem suitable. */
4392 ix86_frame_pointer_required ()
4394 /* If we accessed previous frames, then the generated code expects
4395 to be able to access the saved ebp value in our frame. */
4396 if (cfun->machine->accesses_prev_frame)
4397 return 1;
4399 /* Several x86 os'es need a frame pointer for other reasons,
4400 usually pertaining to setjmp. */
4401 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4402 return 1;
4404 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4405 the frame pointer by default. Turn it back on now if we've not
4406 got a leaf function. */
4407 if (TARGET_OMIT_LEAF_FRAME_POINTER
4408 && (!current_function_is_leaf))
4409 return 1;
4411 if (current_function_profile)
4412 return 1;
4414 return 0;
4417 /* Record that the current function accesses previous call frames. */
4419 void
4420 ix86_setup_frame_addresses ()
4422 cfun->machine->accesses_prev_frame = 1;
4425 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4426 # define USE_HIDDEN_LINKONCE 1
4427 #else
4428 # define USE_HIDDEN_LINKONCE 0
4429 #endif
4431 static int pic_labels_used;
4433 /* Fills in the label name that should be used for a pc thunk for
4434 the given register. */
4436 static void
4437 get_pc_thunk_name (name, regno)
4438 char name[32];
4439 unsigned int regno;
4441 if (USE_HIDDEN_LINKONCE)
4442 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4443 else
4444 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4448 /* This function generates code for -fpic that loads %ebx with
4449 the return address of the caller and then returns. */
4451 void
4452 ix86_asm_file_end (file)
4453 FILE *file;
4455 rtx xops[2];
4456 int regno;
4458 for (regno = 0; regno < 8; ++regno)
4460 char name[32];
4462 if (! ((pic_labels_used >> regno) & 1))
4463 continue;
4465 get_pc_thunk_name (name, regno);
4467 if (USE_HIDDEN_LINKONCE)
4469 tree decl;
4471 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4472 error_mark_node);
4473 TREE_PUBLIC (decl) = 1;
4474 TREE_STATIC (decl) = 1;
4475 DECL_ONE_ONLY (decl) = 1;
4477 (*targetm.asm_out.unique_section) (decl, 0);
4478 named_section (decl, NULL, 0);
4480 (*targetm.asm_out.globalize_label) (file, name);
4481 fputs ("\t.hidden\t", file);
4482 assemble_name (file, name);
4483 fputc ('\n', file);
4484 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4486 else
4488 text_section ();
4489 ASM_OUTPUT_LABEL (file, name);
4492 xops[0] = gen_rtx_REG (SImode, regno);
4493 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4494 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4495 output_asm_insn ("ret", xops);
4499 /* Emit code for the SET_GOT patterns. */
4501 const char *
4502 output_set_got (dest)
4503 rtx dest;
4505 rtx xops[3];
4507 xops[0] = dest;
4508 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4510 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4512 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4514 if (!flag_pic)
4515 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4516 else
4517 output_asm_insn ("call\t%a2", xops);
4519 #if TARGET_MACHO
4520 /* Output the "canonical" label name ("Lxx$pb") here too. This
4521 is what will be referred to by the Mach-O PIC subsystem. */
4522 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4523 #endif
4524 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4525 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4527 if (flag_pic)
4528 output_asm_insn ("pop{l}\t%0", xops);
4530 else
4532 char name[32];
4533 get_pc_thunk_name (name, REGNO (dest));
4534 pic_labels_used |= 1 << REGNO (dest);
4536 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4537 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4538 output_asm_insn ("call\t%X2", xops);
4541 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4542 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4543 else if (!TARGET_MACHO)
4544 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4546 return "";
4549 /* Generate an "push" pattern for input ARG. */
4551 static rtx
4552 gen_push (arg)
4553 rtx arg;
4555 return gen_rtx_SET (VOIDmode,
4556 gen_rtx_MEM (Pmode,
4557 gen_rtx_PRE_DEC (Pmode,
4558 stack_pointer_rtx)),
4559 arg);
4562 /* Return >= 0 if there is an unused call-clobbered register available
4563 for the entire function. */
4565 static unsigned int
4566 ix86_select_alt_pic_regnum ()
4568 if (current_function_is_leaf && !current_function_profile)
4570 int i;
4571 for (i = 2; i >= 0; --i)
4572 if (!regs_ever_live[i])
4573 return i;
4576 return INVALID_REGNUM;
4579 /* Return 1 if we need to save REGNO. */
4580 static int
4581 ix86_save_reg (regno, maybe_eh_return)
4582 unsigned int regno;
4583 int maybe_eh_return;
4585 if (pic_offset_table_rtx
4586 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4587 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4588 || current_function_profile
4589 || current_function_calls_eh_return))
4591 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4592 return 0;
4593 return 1;
4596 if (current_function_calls_eh_return && maybe_eh_return)
4598 unsigned i;
4599 for (i = 0; ; i++)
4601 unsigned test = EH_RETURN_DATA_REGNO (i);
4602 if (test == INVALID_REGNUM)
4603 break;
4604 if (test == regno)
4605 return 1;
4609 return (regs_ever_live[regno]
4610 && !call_used_regs[regno]
4611 && !fixed_regs[regno]
4612 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4615 /* Return number of registers to be saved on the stack. */
4617 static int
4618 ix86_nsaved_regs ()
4620 int nregs = 0;
4621 int regno;
4623 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4624 if (ix86_save_reg (regno, true))
4625 nregs++;
4626 return nregs;
4629 /* Return the offset between two registers, one to be eliminated, and the other
4630 its replacement, at the start of a routine. */
4632 HOST_WIDE_INT
4633 ix86_initial_elimination_offset (from, to)
4634 int from;
4635 int to;
4637 struct ix86_frame frame;
4638 ix86_compute_frame_layout (&frame);
4640 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4641 return frame.hard_frame_pointer_offset;
4642 else if (from == FRAME_POINTER_REGNUM
4643 && to == HARD_FRAME_POINTER_REGNUM)
4644 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4645 else
4647 if (to != STACK_POINTER_REGNUM)
4648 abort ();
4649 else if (from == ARG_POINTER_REGNUM)
4650 return frame.stack_pointer_offset;
4651 else if (from != FRAME_POINTER_REGNUM)
4652 abort ();
4653 else
4654 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4658 /* Fill structure ix86_frame about frame of currently computed function. */
4660 static void
4661 ix86_compute_frame_layout (frame)
4662 struct ix86_frame *frame;
4664 HOST_WIDE_INT total_size;
4665 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4666 int offset;
4667 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4668 HOST_WIDE_INT size = get_frame_size ();
4670 frame->nregs = ix86_nsaved_regs ();
4671 total_size = size;
4673 /* Skip return address and saved base pointer. */
4674 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4676 frame->hard_frame_pointer_offset = offset;
4678 /* Do some sanity checking of stack_alignment_needed and
4679 preferred_alignment, since i386 port is the only using those features
4680 that may break easily. */
4682 if (size && !stack_alignment_needed)
4683 abort ();
4684 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4685 abort ();
4686 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4687 abort ();
4688 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4689 abort ();
4691 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4692 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4694 /* Register save area */
4695 offset += frame->nregs * UNITS_PER_WORD;
4697 /* Va-arg area */
4698 if (ix86_save_varrargs_registers)
4700 offset += X86_64_VARARGS_SIZE;
4701 frame->va_arg_size = X86_64_VARARGS_SIZE;
4703 else
4704 frame->va_arg_size = 0;
4706 /* Align start of frame for local function. */
4707 frame->padding1 = ((offset + stack_alignment_needed - 1)
4708 & -stack_alignment_needed) - offset;
4710 offset += frame->padding1;
4712 /* Frame pointer points here. */
4713 frame->frame_pointer_offset = offset;
4715 offset += size;
4717 /* Add outgoing arguments area. Can be skipped if we eliminated
4718 all the function calls as dead code. */
4719 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4721 offset += current_function_outgoing_args_size;
4722 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4724 else
4725 frame->outgoing_arguments_size = 0;
4727 /* Align stack boundary. Only needed if we're calling another function
4728 or using alloca. */
4729 if (!current_function_is_leaf || current_function_calls_alloca)
4730 frame->padding2 = ((offset + preferred_alignment - 1)
4731 & -preferred_alignment) - offset;
4732 else
4733 frame->padding2 = 0;
4735 offset += frame->padding2;
4737 /* We've reached end of stack frame. */
4738 frame->stack_pointer_offset = offset;
4740 /* Size prologue needs to allocate. */
4741 frame->to_allocate =
4742 (size + frame->padding1 + frame->padding2
4743 + frame->outgoing_arguments_size + frame->va_arg_size);
4745 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4746 && current_function_is_leaf)
4748 frame->red_zone_size = frame->to_allocate;
4749 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4750 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4752 else
4753 frame->red_zone_size = 0;
4754 frame->to_allocate -= frame->red_zone_size;
4755 frame->stack_pointer_offset -= frame->red_zone_size;
4756 #if 0
4757 fprintf (stderr, "nregs: %i\n", frame->nregs);
4758 fprintf (stderr, "size: %i\n", size);
4759 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4760 fprintf (stderr, "padding1: %i\n", frame->padding1);
4761 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4762 fprintf (stderr, "padding2: %i\n", frame->padding2);
4763 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4764 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4765 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4766 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4767 frame->hard_frame_pointer_offset);
4768 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4769 #endif
4772 /* Emit code to save registers in the prologue. */
4774 static void
4775 ix86_emit_save_regs ()
4777 register int regno;
4778 rtx insn;
4780 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4781 if (ix86_save_reg (regno, true))
4783 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4784 RTX_FRAME_RELATED_P (insn) = 1;
4788 /* Emit code to save registers using MOV insns. First register
4789 is restored from POINTER + OFFSET. */
4790 static void
4791 ix86_emit_save_regs_using_mov (pointer, offset)
4792 rtx pointer;
4793 HOST_WIDE_INT offset;
4795 int regno;
4796 rtx insn;
4798 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4799 if (ix86_save_reg (regno, true))
4801 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4802 Pmode, offset),
4803 gen_rtx_REG (Pmode, regno));
4804 RTX_FRAME_RELATED_P (insn) = 1;
4805 offset += UNITS_PER_WORD;
4809 /* Expand the prologue into a bunch of separate insns. */
4811 void
4812 ix86_expand_prologue ()
4814 rtx insn;
4815 bool pic_reg_used;
4816 struct ix86_frame frame;
4817 int use_mov = 0;
4818 HOST_WIDE_INT allocate;
4820 ix86_compute_frame_layout (&frame);
4821 if (!optimize_size)
4823 int count = frame.nregs;
4825 /* The fast prologue uses move instead of push to save registers. This
4826 is significantly longer, but also executes faster as modern hardware
4827 can execute the moves in parallel, but can't do that for push/pop.
4829 Be careful about choosing what prologue to emit: When function takes
4830 many instructions to execute we may use slow version as well as in
4831 case function is known to be outside hot spot (this is known with
4832 feedback only). Weight the size of function by number of registers
4833 to save as it is cheap to use one or two push instructions but very
4834 slow to use many of them. */
4835 if (count)
4836 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4837 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4838 || (flag_branch_probabilities
4839 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4840 use_fast_prologue_epilogue = 0;
4841 else
4842 use_fast_prologue_epilogue = !expensive_function_p (count);
4843 if (TARGET_PROLOGUE_USING_MOVE)
4844 use_mov = use_fast_prologue_epilogue;
4847 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4848 slower on all targets. Also sdb doesn't like it. */
4850 if (frame_pointer_needed)
4852 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4853 RTX_FRAME_RELATED_P (insn) = 1;
4855 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4856 RTX_FRAME_RELATED_P (insn) = 1;
4859 allocate = frame.to_allocate;
4860 /* In case we are dealing only with single register and empty frame,
4861 push is equivalent of the mov+add sequence. */
4862 if (allocate == 0 && frame.nregs <= 1)
4863 use_mov = 0;
4865 if (!use_mov)
4866 ix86_emit_save_regs ();
4867 else
4868 allocate += frame.nregs * UNITS_PER_WORD;
4870 if (allocate == 0)
4872 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4874 insn = emit_insn (gen_pro_epilogue_adjust_stack
4875 (stack_pointer_rtx, stack_pointer_rtx,
4876 GEN_INT (-allocate)));
4877 RTX_FRAME_RELATED_P (insn) = 1;
4879 else
4881 /* ??? Is this only valid for Win32? */
4883 rtx arg0, sym;
4885 if (TARGET_64BIT)
4886 abort ();
4888 arg0 = gen_rtx_REG (SImode, 0);
4889 emit_move_insn (arg0, GEN_INT (allocate));
4891 sym = gen_rtx_MEM (FUNCTION_MODE,
4892 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4893 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4895 CALL_INSN_FUNCTION_USAGE (insn)
4896 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4897 CALL_INSN_FUNCTION_USAGE (insn));
4899 /* Don't allow scheduling pass to move insns across __alloca
4900 call. */
4901 emit_insn (gen_blockage (const0_rtx));
4903 if (use_mov)
4905 if (!frame_pointer_needed || !frame.to_allocate)
4906 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4907 else
4908 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4909 -frame.nregs * UNITS_PER_WORD);
4912 #ifdef SUBTARGET_PROLOGUE
4913 SUBTARGET_PROLOGUE;
4914 #endif
4916 pic_reg_used = false;
4917 if (pic_offset_table_rtx
4918 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4919 || current_function_profile))
4921 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4923 if (alt_pic_reg_used != INVALID_REGNUM)
4924 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4926 pic_reg_used = true;
4929 if (pic_reg_used)
4931 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4933 /* Even with accurate pre-reload life analysis, we can wind up
4934 deleting all references to the pic register after reload.
4935 Consider if cross-jumping unifies two sides of a branch
4936 controlled by a comparison vs the only read from a global.
4937 In which case, allow the set_got to be deleted, though we're
4938 too late to do anything about the ebx save in the prologue. */
4939 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4942 /* Prevent function calls from be scheduled before the call to mcount.
4943 In the pic_reg_used case, make sure that the got load isn't deleted. */
4944 if (current_function_profile)
4945 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4948 /* Emit code to restore saved registers using MOV insns. First register
4949 is restored from POINTER + OFFSET. */
4950 static void
4951 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4952 rtx pointer;
4953 int offset;
4954 int maybe_eh_return;
4956 int regno;
4958 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4959 if (ix86_save_reg (regno, maybe_eh_return))
4961 emit_move_insn (gen_rtx_REG (Pmode, regno),
4962 adjust_address (gen_rtx_MEM (Pmode, pointer),
4963 Pmode, offset));
4964 offset += UNITS_PER_WORD;
4968 /* Restore function stack, frame, and registers. */
4970 void
4971 ix86_expand_epilogue (style)
4972 int style;
4974 int regno;
4975 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4976 struct ix86_frame frame;
4977 HOST_WIDE_INT offset;
4979 ix86_compute_frame_layout (&frame);
4981 /* Calculate start of saved registers relative to ebp. Special care
4982 must be taken for the normal return case of a function using
4983 eh_return: the eax and edx registers are marked as saved, but not
4984 restored along this path. */
4985 offset = frame.nregs;
4986 if (current_function_calls_eh_return && style != 2)
4987 offset -= 2;
4988 offset *= -UNITS_PER_WORD;
4990 /* If we're only restoring one register and sp is not valid then
4991 using a move instruction to restore the register since it's
4992 less work than reloading sp and popping the register.
4994 The default code result in stack adjustment using add/lea instruction,
4995 while this code results in LEAVE instruction (or discrete equivalent),
4996 so it is profitable in some other cases as well. Especially when there
4997 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4998 and there is exactly one register to pop. This heuristic may need some
4999 tuning in future. */
5000 if ((!sp_valid && frame.nregs <= 1)
5001 || (TARGET_EPILOGUE_USING_MOVE
5002 && use_fast_prologue_epilogue
5003 && (frame.nregs > 1 || frame.to_allocate))
5004 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5005 || (frame_pointer_needed && TARGET_USE_LEAVE
5006 && use_fast_prologue_epilogue && frame.nregs == 1)
5007 || current_function_calls_eh_return)
5009 /* Restore registers. We can use ebp or esp to address the memory
5010 locations. If both are available, default to ebp, since offsets
5011 are known to be small. Only exception is esp pointing directly to the
5012 end of block of saved registers, where we may simplify addressing
5013 mode. */
5015 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5016 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5017 frame.to_allocate, style == 2);
5018 else
5019 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5020 offset, style == 2);
5022 /* eh_return epilogues need %ecx added to the stack pointer. */
5023 if (style == 2)
5025 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5027 if (frame_pointer_needed)
5029 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5030 tmp = plus_constant (tmp, UNITS_PER_WORD);
5031 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5033 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5034 emit_move_insn (hard_frame_pointer_rtx, tmp);
5036 emit_insn (gen_pro_epilogue_adjust_stack
5037 (stack_pointer_rtx, sa, const0_rtx));
5039 else
5041 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5042 tmp = plus_constant (tmp, (frame.to_allocate
5043 + frame.nregs * UNITS_PER_WORD));
5044 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5047 else if (!frame_pointer_needed)
5048 emit_insn (gen_pro_epilogue_adjust_stack
5049 (stack_pointer_rtx, stack_pointer_rtx,
5050 GEN_INT (frame.to_allocate
5051 + frame.nregs * UNITS_PER_WORD)));
5052 /* If not an i386, mov & pop is faster than "leave". */
5053 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
5054 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5055 else
5057 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5058 hard_frame_pointer_rtx,
5059 const0_rtx));
5060 if (TARGET_64BIT)
5061 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5062 else
5063 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5066 else
5068 /* First step is to deallocate the stack frame so that we can
5069 pop the registers. */
5070 if (!sp_valid)
5072 if (!frame_pointer_needed)
5073 abort ();
5074 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5075 hard_frame_pointer_rtx,
5076 GEN_INT (offset)));
5078 else if (frame.to_allocate)
5079 emit_insn (gen_pro_epilogue_adjust_stack
5080 (stack_pointer_rtx, stack_pointer_rtx,
5081 GEN_INT (frame.to_allocate)));
5083 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5084 if (ix86_save_reg (regno, false))
5086 if (TARGET_64BIT)
5087 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5088 else
5089 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5091 if (frame_pointer_needed)
5093 /* Leave results in shorter dependency chains on CPUs that are
5094 able to grok it fast. */
5095 if (TARGET_USE_LEAVE)
5096 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5097 else if (TARGET_64BIT)
5098 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5099 else
5100 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5104 /* Sibcall epilogues don't want a return instruction. */
5105 if (style == 0)
5106 return;
5108 if (current_function_pops_args && current_function_args_size)
5110 rtx popc = GEN_INT (current_function_pops_args);
5112 /* i386 can only pop 64K bytes. If asked to pop more, pop
5113 return address, do explicit add, and jump indirectly to the
5114 caller. */
5116 if (current_function_pops_args >= 65536)
5118 rtx ecx = gen_rtx_REG (SImode, 2);
5120 /* There are is no "pascal" calling convention in 64bit ABI. */
5121 if (TARGET_64BIT)
5122 abort ();
5124 emit_insn (gen_popsi1 (ecx));
5125 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5126 emit_jump_insn (gen_return_indirect_internal (ecx));
5128 else
5129 emit_jump_insn (gen_return_pop_internal (popc));
5131 else
5132 emit_jump_insn (gen_return_internal ());
5135 /* Reset from the function's potential modifications. */
5137 static void
5138 ix86_output_function_epilogue (file, size)
5139 FILE *file ATTRIBUTE_UNUSED;
5140 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5142 if (pic_offset_table_rtx)
5143 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5146 /* Extract the parts of an RTL expression that is a valid memory address
5147 for an instruction. Return 0 if the structure of the address is
5148 grossly off. Return -1 if the address contains ASHIFT, so it is not
5149 strictly valid, but still used for computing length of lea instruction.
5152 static int
5153 ix86_decompose_address (addr, out)
5154 register rtx addr;
5155 struct ix86_address *out;
5157 rtx base = NULL_RTX;
5158 rtx index = NULL_RTX;
5159 rtx disp = NULL_RTX;
5160 HOST_WIDE_INT scale = 1;
5161 rtx scale_rtx = NULL_RTX;
5162 int retval = 1;
5164 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5165 base = addr;
5166 else if (GET_CODE (addr) == PLUS)
5168 rtx op0 = XEXP (addr, 0);
5169 rtx op1 = XEXP (addr, 1);
5170 enum rtx_code code0 = GET_CODE (op0);
5171 enum rtx_code code1 = GET_CODE (op1);
5173 if (code0 == REG || code0 == SUBREG)
5175 if (code1 == REG || code1 == SUBREG)
5176 index = op0, base = op1; /* index + base */
5177 else
5178 base = op0, disp = op1; /* base + displacement */
5180 else if (code0 == MULT)
5182 index = XEXP (op0, 0);
5183 scale_rtx = XEXP (op0, 1);
5184 if (code1 == REG || code1 == SUBREG)
5185 base = op1; /* index*scale + base */
5186 else
5187 disp = op1; /* index*scale + disp */
5189 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5191 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5192 scale_rtx = XEXP (XEXP (op0, 0), 1);
5193 base = XEXP (op0, 1);
5194 disp = op1;
5196 else if (code0 == PLUS)
5198 index = XEXP (op0, 0); /* index + base + disp */
5199 base = XEXP (op0, 1);
5200 disp = op1;
5202 else
5203 return 0;
5205 else if (GET_CODE (addr) == MULT)
5207 index = XEXP (addr, 0); /* index*scale */
5208 scale_rtx = XEXP (addr, 1);
5210 else if (GET_CODE (addr) == ASHIFT)
5212 rtx tmp;
5214 /* We're called for lea too, which implements ashift on occasion. */
5215 index = XEXP (addr, 0);
5216 tmp = XEXP (addr, 1);
5217 if (GET_CODE (tmp) != CONST_INT)
5218 return 0;
5219 scale = INTVAL (tmp);
5220 if ((unsigned HOST_WIDE_INT) scale > 3)
5221 return 0;
5222 scale = 1 << scale;
5223 retval = -1;
5225 else
5226 disp = addr; /* displacement */
5228 /* Extract the integral value of scale. */
5229 if (scale_rtx)
5231 if (GET_CODE (scale_rtx) != CONST_INT)
5232 return 0;
5233 scale = INTVAL (scale_rtx);
5236 /* Allow arg pointer and stack pointer as index if there is not scaling */
5237 if (base && index && scale == 1
5238 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5239 || index == stack_pointer_rtx))
5241 rtx tmp = base;
5242 base = index;
5243 index = tmp;
5246 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5247 if ((base == hard_frame_pointer_rtx
5248 || base == frame_pointer_rtx
5249 || base == arg_pointer_rtx) && !disp)
5250 disp = const0_rtx;
5252 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5253 Avoid this by transforming to [%esi+0]. */
5254 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5255 && base && !index && !disp
5256 && REG_P (base)
5257 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5258 disp = const0_rtx;
5260 /* Special case: encode reg+reg instead of reg*2. */
5261 if (!base && index && scale && scale == 2)
5262 base = index, scale = 1;
5264 /* Special case: scaling cannot be encoded without base or displacement. */
5265 if (!base && !disp && index && scale != 1)
5266 disp = const0_rtx;
5268 out->base = base;
5269 out->index = index;
5270 out->disp = disp;
5271 out->scale = scale;
5273 return retval;
5276 /* Return cost of the memory address x.
5277 For i386, it is better to use a complex address than let gcc copy
5278 the address into a reg and make a new pseudo. But not if the address
5279 requires to two regs - that would mean more pseudos with longer
5280 lifetimes. */
5281 static int
5282 ix86_address_cost (x)
5283 rtx x;
5285 struct ix86_address parts;
5286 int cost = 1;
5288 if (!ix86_decompose_address (x, &parts))
5289 abort ();
5291 if (parts.base && GET_CODE (parts.base) == SUBREG)
5292 parts.base = SUBREG_REG (parts.base);
5293 if (parts.index && GET_CODE (parts.index) == SUBREG)
5294 parts.index = SUBREG_REG (parts.index);
5296 /* More complex memory references are better. */
5297 if (parts.disp && parts.disp != const0_rtx)
5298 cost--;
5300 /* Attempt to minimize number of registers in the address. */
5301 if ((parts.base
5302 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5303 || (parts.index
5304 && (!REG_P (parts.index)
5305 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5306 cost++;
5308 if (parts.base
5309 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5310 && parts.index
5311 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5312 && parts.base != parts.index)
5313 cost++;
5315 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5316 since it's predecode logic can't detect the length of instructions
5317 and it degenerates to vector decoded. Increase cost of such
5318 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5319 to split such addresses or even refuse such addresses at all.
5321 Following addressing modes are affected:
5322 [base+scale*index]
5323 [scale*index+disp]
5324 [base+index]
5326 The first and last case may be avoidable by explicitly coding the zero in
5327 memory address, but I don't have AMD-K6 machine handy to check this
5328 theory. */
5330 if (TARGET_K6
5331 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5332 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5333 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5334 cost += 10;
5336 return cost;
5339 /* If X is a machine specific address (i.e. a symbol or label being
5340 referenced as a displacement from the GOT implemented using an
5341 UNSPEC), then return the base term. Otherwise return X. */
5344 ix86_find_base_term (x)
5345 rtx x;
5347 rtx term;
5349 if (TARGET_64BIT)
5351 if (GET_CODE (x) != CONST)
5352 return x;
5353 term = XEXP (x, 0);
5354 if (GET_CODE (term) == PLUS
5355 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5356 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5357 term = XEXP (term, 0);
5358 if (GET_CODE (term) != UNSPEC
5359 || XINT (term, 1) != UNSPEC_GOTPCREL)
5360 return x;
5362 term = XVECEXP (term, 0, 0);
5364 if (GET_CODE (term) != SYMBOL_REF
5365 && GET_CODE (term) != LABEL_REF)
5366 return x;
5368 return term;
5371 if (GET_CODE (x) != PLUS
5372 || XEXP (x, 0) != pic_offset_table_rtx
5373 || GET_CODE (XEXP (x, 1)) != CONST)
5374 return x;
5376 term = XEXP (XEXP (x, 1), 0);
5378 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5379 term = XEXP (term, 0);
5381 if (GET_CODE (term) != UNSPEC
5382 || XINT (term, 1) != UNSPEC_GOTOFF)
5383 return x;
5385 term = XVECEXP (term, 0, 0);
5387 if (GET_CODE (term) != SYMBOL_REF
5388 && GET_CODE (term) != LABEL_REF)
5389 return x;
5391 return term;
5394 /* Determine if a given RTX is a valid constant. We already know this
5395 satisfies CONSTANT_P. */
5397 bool
5398 legitimate_constant_p (x)
5399 rtx x;
5401 rtx inner;
5403 switch (GET_CODE (x))
5405 case SYMBOL_REF:
5406 /* TLS symbols are not constant. */
5407 if (tls_symbolic_operand (x, Pmode))
5408 return false;
5409 break;
5411 case CONST:
5412 inner = XEXP (x, 0);
5414 /* Offsets of TLS symbols are never valid.
5415 Discourage CSE from creating them. */
5416 if (GET_CODE (inner) == PLUS
5417 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5418 return false;
5420 /* Only some unspecs are valid as "constants". */
5421 if (GET_CODE (inner) == UNSPEC)
5422 switch (XINT (inner, 1))
5424 case UNSPEC_TPOFF:
5425 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5426 default:
5427 return false;
5429 break;
5431 default:
5432 break;
5435 /* Otherwise we handle everything else in the move patterns. */
5436 return true;
5439 /* Determine if it's legal to put X into the constant pool. This
5440 is not possible for the address of thread-local symbols, which
5441 is checked above. */
5443 static bool
5444 ix86_cannot_force_const_mem (x)
5445 rtx x;
5447 return !legitimate_constant_p (x);
5450 /* Determine if a given RTX is a valid constant address. */
5452 bool
5453 constant_address_p (x)
5454 rtx x;
5456 switch (GET_CODE (x))
5458 case LABEL_REF:
5459 case CONST_INT:
5460 return true;
5462 case CONST_DOUBLE:
5463 return TARGET_64BIT;
5465 case CONST:
5466 /* For Mach-O, really believe the CONST. */
5467 if (TARGET_MACHO)
5468 return true;
5469 /* Otherwise fall through. */
5470 case SYMBOL_REF:
5471 return !flag_pic && legitimate_constant_p (x);
5473 default:
5474 return false;
5478 /* Nonzero if the constant value X is a legitimate general operand
5479 when generating PIC code. It is given that flag_pic is on and
5480 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5482 bool
5483 legitimate_pic_operand_p (x)
5484 rtx x;
5486 rtx inner;
5488 switch (GET_CODE (x))
5490 case CONST:
5491 inner = XEXP (x, 0);
5493 /* Only some unspecs are valid as "constants". */
5494 if (GET_CODE (inner) == UNSPEC)
5495 switch (XINT (inner, 1))
5497 case UNSPEC_TPOFF:
5498 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5499 default:
5500 return false;
5502 /* FALLTHRU */
5504 case SYMBOL_REF:
5505 case LABEL_REF:
5506 return legitimate_pic_address_disp_p (x);
5508 default:
5509 return true;
5513 /* Determine if a given CONST RTX is a valid memory displacement
5514 in PIC mode. */
5517 legitimate_pic_address_disp_p (disp)
5518 register rtx disp;
5520 bool saw_plus;
5522 /* In 64bit mode we can allow direct addresses of symbols and labels
5523 when they are not dynamic symbols. */
5524 if (TARGET_64BIT)
5526 /* TLS references should always be enclosed in UNSPEC. */
5527 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5528 return 0;
5529 if (GET_CODE (disp) == SYMBOL_REF
5530 && ix86_cmodel == CM_SMALL_PIC
5531 && (CONSTANT_POOL_ADDRESS_P (disp)
5532 || SYMBOL_REF_FLAG (disp)))
5533 return 1;
5534 if (GET_CODE (disp) == LABEL_REF)
5535 return 1;
5536 if (GET_CODE (disp) == CONST
5537 && GET_CODE (XEXP (disp, 0)) == PLUS
5538 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5539 && ix86_cmodel == CM_SMALL_PIC
5540 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5541 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5542 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5543 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5544 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5545 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5546 return 1;
5548 if (GET_CODE (disp) != CONST)
5549 return 0;
5550 disp = XEXP (disp, 0);
5552 if (TARGET_64BIT)
5554 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5555 of GOT tables. We should not need these anyway. */
5556 if (GET_CODE (disp) != UNSPEC
5557 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5558 return 0;
5560 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5561 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5562 return 0;
5563 return 1;
5566 saw_plus = false;
5567 if (GET_CODE (disp) == PLUS)
5569 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5570 return 0;
5571 disp = XEXP (disp, 0);
5572 saw_plus = true;
5575 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5576 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5578 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5579 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5580 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5582 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5583 if (strstr (sym_name, "$pb") != 0)
5584 return 1;
5588 if (GET_CODE (disp) != UNSPEC)
5589 return 0;
5591 switch (XINT (disp, 1))
5593 case UNSPEC_GOT:
5594 if (saw_plus)
5595 return false;
5596 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5597 case UNSPEC_GOTOFF:
5598 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5599 case UNSPEC_GOTTPOFF:
5600 case UNSPEC_GOTNTPOFF:
5601 case UNSPEC_INDNTPOFF:
5602 if (saw_plus)
5603 return false;
5604 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5605 case UNSPEC_NTPOFF:
5606 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5607 case UNSPEC_DTPOFF:
5608 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5611 return 0;
5614 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5615 memory address for an instruction. The MODE argument is the machine mode
5616 for the MEM expression that wants to use this address.
5618 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5619 convert common non-canonical forms to canonical form so that they will
5620 be recognized. */
5623 legitimate_address_p (mode, addr, strict)
5624 enum machine_mode mode;
5625 register rtx addr;
5626 int strict;
5628 struct ix86_address parts;
5629 rtx base, index, disp;
5630 HOST_WIDE_INT scale;
5631 const char *reason = NULL;
5632 rtx reason_rtx = NULL_RTX;
5634 if (TARGET_DEBUG_ADDR)
5636 fprintf (stderr,
5637 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5638 GET_MODE_NAME (mode), strict);
5639 debug_rtx (addr);
5642 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5644 if (TARGET_DEBUG_ADDR)
5645 fprintf (stderr, "Success.\n");
5646 return TRUE;
5649 if (ix86_decompose_address (addr, &parts) <= 0)
5651 reason = "decomposition failed";
5652 goto report_error;
5655 base = parts.base;
5656 index = parts.index;
5657 disp = parts.disp;
5658 scale = parts.scale;
5660 /* Validate base register.
5662 Don't allow SUBREG's here, it can lead to spill failures when the base
5663 is one word out of a two word structure, which is represented internally
5664 as a DImode int. */
5666 if (base)
5668 rtx reg;
5669 reason_rtx = base;
5671 if (GET_CODE (base) == SUBREG)
5672 reg = SUBREG_REG (base);
5673 else
5674 reg = base;
5676 if (GET_CODE (reg) != REG)
5678 reason = "base is not a register";
5679 goto report_error;
5682 if (GET_MODE (base) != Pmode)
5684 reason = "base is not in Pmode";
5685 goto report_error;
5688 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5689 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5691 reason = "base is not valid";
5692 goto report_error;
5696 /* Validate index register.
5698 Don't allow SUBREG's here, it can lead to spill failures when the index
5699 is one word out of a two word structure, which is represented internally
5700 as a DImode int. */
5702 if (index)
5704 rtx reg;
5705 reason_rtx = index;
5707 if (GET_CODE (index) == SUBREG)
5708 reg = SUBREG_REG (index);
5709 else
5710 reg = index;
5712 if (GET_CODE (reg) != REG)
5714 reason = "index is not a register";
5715 goto report_error;
5718 if (GET_MODE (index) != Pmode)
5720 reason = "index is not in Pmode";
5721 goto report_error;
5724 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5725 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5727 reason = "index is not valid";
5728 goto report_error;
5732 /* Validate scale factor. */
5733 if (scale != 1)
5735 reason_rtx = GEN_INT (scale);
5736 if (!index)
5738 reason = "scale without index";
5739 goto report_error;
5742 if (scale != 2 && scale != 4 && scale != 8)
5744 reason = "scale is not a valid multiplier";
5745 goto report_error;
5749 /* Validate displacement. */
5750 if (disp)
5752 reason_rtx = disp;
5754 if (GET_CODE (disp) == CONST
5755 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5756 switch (XINT (XEXP (disp, 0), 1))
5758 case UNSPEC_GOT:
5759 case UNSPEC_GOTOFF:
5760 case UNSPEC_GOTPCREL:
5761 if (!flag_pic)
5762 abort ();
5763 goto is_legitimate_pic;
5765 case UNSPEC_GOTTPOFF:
5766 case UNSPEC_GOTNTPOFF:
5767 case UNSPEC_INDNTPOFF:
5768 case UNSPEC_NTPOFF:
5769 case UNSPEC_DTPOFF:
5770 break;
5772 default:
5773 reason = "invalid address unspec";
5774 goto report_error;
5777 else if (flag_pic && (SYMBOLIC_CONST (disp)
5778 #if TARGET_MACHO
5779 && !machopic_operand_p (disp)
5780 #endif
5783 is_legitimate_pic:
5784 if (TARGET_64BIT && (index || base))
5786 /* foo@dtpoff(%rX) is ok. */
5787 if (GET_CODE (disp) != CONST
5788 || GET_CODE (XEXP (disp, 0)) != PLUS
5789 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5790 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5791 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5792 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5794 reason = "non-constant pic memory reference";
5795 goto report_error;
5798 else if (! legitimate_pic_address_disp_p (disp))
5800 reason = "displacement is an invalid pic construct";
5801 goto report_error;
5804 /* This code used to verify that a symbolic pic displacement
5805 includes the pic_offset_table_rtx register.
5807 While this is good idea, unfortunately these constructs may
5808 be created by "adds using lea" optimization for incorrect
5809 code like:
5811 int a;
5812 int foo(int i)
5814 return *(&a+i);
5817 This code is nonsensical, but results in addressing
5818 GOT table with pic_offset_table_rtx base. We can't
5819 just refuse it easily, since it gets matched by
5820 "addsi3" pattern, that later gets split to lea in the
5821 case output register differs from input. While this
5822 can be handled by separate addsi pattern for this case
5823 that never results in lea, this seems to be easier and
5824 correct fix for crash to disable this test. */
5826 else if (!CONSTANT_ADDRESS_P (disp))
5828 reason = "displacement is not constant";
5829 goto report_error;
5831 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5833 reason = "displacement is out of range";
5834 goto report_error;
5836 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5838 reason = "displacement is a const_double";
5839 goto report_error;
5843 /* Everything looks valid. */
5844 if (TARGET_DEBUG_ADDR)
5845 fprintf (stderr, "Success.\n");
5846 return TRUE;
5848 report_error:
5849 if (TARGET_DEBUG_ADDR)
5851 fprintf (stderr, "Error: %s\n", reason);
5852 debug_rtx (reason_rtx);
5854 return FALSE;
5857 /* Return an unique alias set for the GOT. */
5859 static HOST_WIDE_INT
5860 ix86_GOT_alias_set ()
5862 static HOST_WIDE_INT set = -1;
5863 if (set == -1)
5864 set = new_alias_set ();
5865 return set;
5868 /* Return a legitimate reference for ORIG (an address) using the
5869 register REG. If REG is 0, a new pseudo is generated.
5871 There are two types of references that must be handled:
5873 1. Global data references must load the address from the GOT, via
5874 the PIC reg. An insn is emitted to do this load, and the reg is
5875 returned.
5877 2. Static data references, constant pool addresses, and code labels
5878 compute the address as an offset from the GOT, whose base is in
5879 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5880 differentiate them from global data objects. The returned
5881 address is the PIC reg + an unspec constant.
5883 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5884 reg also appears in the address. */
5887 legitimize_pic_address (orig, reg)
5888 rtx orig;
5889 rtx reg;
5891 rtx addr = orig;
5892 rtx new = orig;
5893 rtx base;
5895 #if TARGET_MACHO
5896 if (reg == 0)
5897 reg = gen_reg_rtx (Pmode);
5898 /* Use the generic Mach-O PIC machinery. */
5899 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5900 #endif
5902 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5903 new = addr;
5904 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5906 /* This symbol may be referenced via a displacement from the PIC
5907 base address (@GOTOFF). */
5909 if (reload_in_progress)
5910 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5911 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5912 new = gen_rtx_CONST (Pmode, new);
5913 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5915 if (reg != 0)
5917 emit_move_insn (reg, new);
5918 new = reg;
5921 else if (GET_CODE (addr) == SYMBOL_REF)
5923 if (TARGET_64BIT)
5925 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5926 new = gen_rtx_CONST (Pmode, new);
5927 new = gen_rtx_MEM (Pmode, new);
5928 RTX_UNCHANGING_P (new) = 1;
5929 set_mem_alias_set (new, ix86_GOT_alias_set ());
5931 if (reg == 0)
5932 reg = gen_reg_rtx (Pmode);
5933 /* Use directly gen_movsi, otherwise the address is loaded
5934 into register for CSE. We don't want to CSE this addresses,
5935 instead we CSE addresses from the GOT table, so skip this. */
5936 emit_insn (gen_movsi (reg, new));
5937 new = reg;
5939 else
5941 /* This symbol must be referenced via a load from the
5942 Global Offset Table (@GOT). */
5944 if (reload_in_progress)
5945 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5946 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5947 new = gen_rtx_CONST (Pmode, new);
5948 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5949 new = gen_rtx_MEM (Pmode, new);
5950 RTX_UNCHANGING_P (new) = 1;
5951 set_mem_alias_set (new, ix86_GOT_alias_set ());
5953 if (reg == 0)
5954 reg = gen_reg_rtx (Pmode);
5955 emit_move_insn (reg, new);
5956 new = reg;
5959 else
5961 if (GET_CODE (addr) == CONST)
5963 addr = XEXP (addr, 0);
5965 /* We must match stuff we generate before. Assume the only
5966 unspecs that can get here are ours. Not that we could do
5967 anything with them anyway... */
5968 if (GET_CODE (addr) == UNSPEC
5969 || (GET_CODE (addr) == PLUS
5970 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5971 return orig;
5972 if (GET_CODE (addr) != PLUS)
5973 abort ();
5975 if (GET_CODE (addr) == PLUS)
5977 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5979 /* Check first to see if this is a constant offset from a @GOTOFF
5980 symbol reference. */
5981 if (local_symbolic_operand (op0, Pmode)
5982 && GET_CODE (op1) == CONST_INT)
5984 if (!TARGET_64BIT)
5986 if (reload_in_progress)
5987 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5988 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5989 UNSPEC_GOTOFF);
5990 new = gen_rtx_PLUS (Pmode, new, op1);
5991 new = gen_rtx_CONST (Pmode, new);
5992 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5994 if (reg != 0)
5996 emit_move_insn (reg, new);
5997 new = reg;
6000 else
6002 if (INTVAL (op1) < -16*1024*1024
6003 || INTVAL (op1) >= 16*1024*1024)
6004 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6007 else
6009 base = legitimize_pic_address (XEXP (addr, 0), reg);
6010 new = legitimize_pic_address (XEXP (addr, 1),
6011 base == reg ? NULL_RTX : reg);
6013 if (GET_CODE (new) == CONST_INT)
6014 new = plus_constant (base, INTVAL (new));
6015 else
6017 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6019 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6020 new = XEXP (new, 1);
6022 new = gen_rtx_PLUS (Pmode, base, new);
6027 return new;
6030 static void
6031 ix86_encode_section_info (decl, first)
6032 tree decl;
6033 int first ATTRIBUTE_UNUSED;
6035 bool local_p = (*targetm.binds_local_p) (decl);
6036 rtx rtl, symbol;
6038 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6039 if (GET_CODE (rtl) != MEM)
6040 return;
6041 symbol = XEXP (rtl, 0);
6042 if (GET_CODE (symbol) != SYMBOL_REF)
6043 return;
6045 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6046 symbol so that we may access it directly in the GOT. */
6048 if (flag_pic)
6049 SYMBOL_REF_FLAG (symbol) = local_p;
6051 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6052 "local dynamic", "initial exec" or "local exec" TLS models
6053 respectively. */
6055 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6057 const char *symbol_str;
6058 char *newstr;
6059 size_t len;
6060 enum tls_model kind = decl_tls_model (decl);
6062 if (TARGET_64BIT && ! flag_pic)
6064 /* x86-64 doesn't allow non-pic code for shared libraries,
6065 so don't generate GD/LD TLS models for non-pic code. */
6066 switch (kind)
6068 case TLS_MODEL_GLOBAL_DYNAMIC:
6069 kind = TLS_MODEL_INITIAL_EXEC; break;
6070 case TLS_MODEL_LOCAL_DYNAMIC:
6071 kind = TLS_MODEL_LOCAL_EXEC; break;
6072 default:
6073 break;
6077 symbol_str = XSTR (symbol, 0);
6079 if (symbol_str[0] == '%')
6081 if (symbol_str[1] == tls_model_chars[kind])
6082 return;
6083 symbol_str += 2;
6085 len = strlen (symbol_str) + 1;
6086 newstr = alloca (len + 2);
6088 newstr[0] = '%';
6089 newstr[1] = tls_model_chars[kind];
6090 memcpy (newstr + 2, symbol_str, len);
6092 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6096 /* Undo the above when printing symbol names. */
6098 static const char *
6099 ix86_strip_name_encoding (str)
6100 const char *str;
6102 if (str[0] == '%')
6103 str += 2;
6104 if (str [0] == '*')
6105 str += 1;
6106 return str;
6109 /* Load the thread pointer into a register. */
6111 static rtx
6112 get_thread_pointer ()
6114 rtx tp;
6116 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6117 tp = gen_rtx_MEM (Pmode, tp);
6118 RTX_UNCHANGING_P (tp) = 1;
6119 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6120 tp = force_reg (Pmode, tp);
6122 return tp;
6125 /* Try machine-dependent ways of modifying an illegitimate address
6126 to be legitimate. If we find one, return the new, valid address.
6127 This macro is used in only one place: `memory_address' in explow.c.
6129 OLDX is the address as it was before break_out_memory_refs was called.
6130 In some cases it is useful to look at this to decide what needs to be done.
6132 MODE and WIN are passed so that this macro can use
6133 GO_IF_LEGITIMATE_ADDRESS.
6135 It is always safe for this macro to do nothing. It exists to recognize
6136 opportunities to optimize the output.
6138 For the 80386, we handle X+REG by loading X into a register R and
6139 using R+REG. R will go in a general reg and indexing will be used.
6140 However, if REG is a broken-out memory address or multiplication,
6141 nothing needs to be done because REG can certainly go in a general reg.
6143 When -fpic is used, special handling is needed for symbolic references.
6144 See comments by legitimize_pic_address in i386.c for details. */
6147 legitimize_address (x, oldx, mode)
6148 register rtx x;
6149 register rtx oldx ATTRIBUTE_UNUSED;
6150 enum machine_mode mode;
6152 int changed = 0;
6153 unsigned log;
6155 if (TARGET_DEBUG_ADDR)
6157 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6158 GET_MODE_NAME (mode));
6159 debug_rtx (x);
6162 log = tls_symbolic_operand (x, mode);
6163 if (log)
6165 rtx dest, base, off, pic;
6166 int type;
6168 switch (log)
6170 case TLS_MODEL_GLOBAL_DYNAMIC:
6171 dest = gen_reg_rtx (Pmode);
6172 if (TARGET_64BIT)
6174 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6176 start_sequence ();
6177 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6178 insns = get_insns ();
6179 end_sequence ();
6181 emit_libcall_block (insns, dest, rax, x);
6183 else
6184 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6185 break;
6187 case TLS_MODEL_LOCAL_DYNAMIC:
6188 base = gen_reg_rtx (Pmode);
6189 if (TARGET_64BIT)
6191 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6193 start_sequence ();
6194 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6195 insns = get_insns ();
6196 end_sequence ();
6198 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6199 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6200 emit_libcall_block (insns, base, rax, note);
6202 else
6203 emit_insn (gen_tls_local_dynamic_base_32 (base));
6205 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6206 off = gen_rtx_CONST (Pmode, off);
6208 return gen_rtx_PLUS (Pmode, base, off);
6210 case TLS_MODEL_INITIAL_EXEC:
6211 if (TARGET_64BIT)
6213 pic = NULL;
6214 type = UNSPEC_GOTNTPOFF;
6216 else if (flag_pic)
6218 if (reload_in_progress)
6219 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6220 pic = pic_offset_table_rtx;
6221 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6223 else if (!TARGET_GNU_TLS)
6225 pic = gen_reg_rtx (Pmode);
6226 emit_insn (gen_set_got (pic));
6227 type = UNSPEC_GOTTPOFF;
6229 else
6231 pic = NULL;
6232 type = UNSPEC_INDNTPOFF;
6235 base = get_thread_pointer ();
6237 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6238 off = gen_rtx_CONST (Pmode, off);
6239 if (pic)
6240 off = gen_rtx_PLUS (Pmode, pic, off);
6241 off = gen_rtx_MEM (Pmode, off);
6242 RTX_UNCHANGING_P (off) = 1;
6243 set_mem_alias_set (off, ix86_GOT_alias_set ());
6244 dest = gen_reg_rtx (Pmode);
6246 if (TARGET_64BIT || TARGET_GNU_TLS)
6248 emit_move_insn (dest, off);
6249 return gen_rtx_PLUS (Pmode, base, dest);
6251 else
6252 emit_insn (gen_subsi3 (dest, base, off));
6253 break;
6255 case TLS_MODEL_LOCAL_EXEC:
6256 base = get_thread_pointer ();
6258 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6259 (TARGET_64BIT || TARGET_GNU_TLS)
6260 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6261 off = gen_rtx_CONST (Pmode, off);
6263 if (TARGET_64BIT || TARGET_GNU_TLS)
6264 return gen_rtx_PLUS (Pmode, base, off);
6265 else
6267 dest = gen_reg_rtx (Pmode);
6268 emit_insn (gen_subsi3 (dest, base, off));
6270 break;
6272 default:
6273 abort ();
6276 return dest;
6279 if (flag_pic && SYMBOLIC_CONST (x))
6280 return legitimize_pic_address (x, 0);
6282 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6283 if (GET_CODE (x) == ASHIFT
6284 && GET_CODE (XEXP (x, 1)) == CONST_INT
6285 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6287 changed = 1;
6288 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6289 GEN_INT (1 << log));
6292 if (GET_CODE (x) == PLUS)
6294 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6296 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6297 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6298 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6300 changed = 1;
6301 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6302 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6303 GEN_INT (1 << log));
6306 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6307 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6308 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6310 changed = 1;
6311 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6312 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6313 GEN_INT (1 << log));
6316 /* Put multiply first if it isn't already. */
6317 if (GET_CODE (XEXP (x, 1)) == MULT)
6319 rtx tmp = XEXP (x, 0);
6320 XEXP (x, 0) = XEXP (x, 1);
6321 XEXP (x, 1) = tmp;
6322 changed = 1;
6325 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6326 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6327 created by virtual register instantiation, register elimination, and
6328 similar optimizations. */
6329 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6331 changed = 1;
6332 x = gen_rtx_PLUS (Pmode,
6333 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6334 XEXP (XEXP (x, 1), 0)),
6335 XEXP (XEXP (x, 1), 1));
6338 /* Canonicalize
6339 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6340 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6341 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6342 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6343 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6344 && CONSTANT_P (XEXP (x, 1)))
6346 rtx constant;
6347 rtx other = NULL_RTX;
6349 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6351 constant = XEXP (x, 1);
6352 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6354 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6356 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6357 other = XEXP (x, 1);
6359 else
6360 constant = 0;
6362 if (constant)
6364 changed = 1;
6365 x = gen_rtx_PLUS (Pmode,
6366 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6367 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6368 plus_constant (other, INTVAL (constant)));
6372 if (changed && legitimate_address_p (mode, x, FALSE))
6373 return x;
6375 if (GET_CODE (XEXP (x, 0)) == MULT)
6377 changed = 1;
6378 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6381 if (GET_CODE (XEXP (x, 1)) == MULT)
6383 changed = 1;
6384 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6387 if (changed
6388 && GET_CODE (XEXP (x, 1)) == REG
6389 && GET_CODE (XEXP (x, 0)) == REG)
6390 return x;
6392 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6394 changed = 1;
6395 x = legitimize_pic_address (x, 0);
6398 if (changed && legitimate_address_p (mode, x, FALSE))
6399 return x;
6401 if (GET_CODE (XEXP (x, 0)) == REG)
6403 register rtx temp = gen_reg_rtx (Pmode);
6404 register rtx val = force_operand (XEXP (x, 1), temp);
6405 if (val != temp)
6406 emit_move_insn (temp, val);
6408 XEXP (x, 1) = temp;
6409 return x;
6412 else if (GET_CODE (XEXP (x, 1)) == REG)
6414 register rtx temp = gen_reg_rtx (Pmode);
6415 register rtx val = force_operand (XEXP (x, 0), temp);
6416 if (val != temp)
6417 emit_move_insn (temp, val);
6419 XEXP (x, 0) = temp;
6420 return x;
6424 return x;
6427 /* Print an integer constant expression in assembler syntax. Addition
6428 and subtraction are the only arithmetic that may appear in these
6429 expressions. FILE is the stdio stream to write to, X is the rtx, and
6430 CODE is the operand print code from the output string. */
6432 static void
6433 output_pic_addr_const (file, x, code)
6434 FILE *file;
6435 rtx x;
6436 int code;
6438 char buf[256];
6440 switch (GET_CODE (x))
6442 case PC:
6443 if (flag_pic)
6444 putc ('.', file);
6445 else
6446 abort ();
6447 break;
6449 case SYMBOL_REF:
6450 assemble_name (file, XSTR (x, 0));
6451 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6452 fputs ("@PLT", file);
6453 break;
6455 case LABEL_REF:
6456 x = XEXP (x, 0);
6457 /* FALLTHRU */
6458 case CODE_LABEL:
6459 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6460 assemble_name (asm_out_file, buf);
6461 break;
6463 case CONST_INT:
6464 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6465 break;
6467 case CONST:
6468 /* This used to output parentheses around the expression,
6469 but that does not work on the 386 (either ATT or BSD assembler). */
6470 output_pic_addr_const (file, XEXP (x, 0), code);
6471 break;
6473 case CONST_DOUBLE:
6474 if (GET_MODE (x) == VOIDmode)
6476 /* We can use %d if the number is <32 bits and positive. */
6477 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6478 fprintf (file, "0x%lx%08lx",
6479 (unsigned long) CONST_DOUBLE_HIGH (x),
6480 (unsigned long) CONST_DOUBLE_LOW (x));
6481 else
6482 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6484 else
6485 /* We can't handle floating point constants;
6486 PRINT_OPERAND must handle them. */
6487 output_operand_lossage ("floating constant misused");
6488 break;
6490 case PLUS:
6491 /* Some assemblers need integer constants to appear first. */
6492 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6494 output_pic_addr_const (file, XEXP (x, 0), code);
6495 putc ('+', file);
6496 output_pic_addr_const (file, XEXP (x, 1), code);
6498 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6500 output_pic_addr_const (file, XEXP (x, 1), code);
6501 putc ('+', file);
6502 output_pic_addr_const (file, XEXP (x, 0), code);
6504 else
6505 abort ();
6506 break;
6508 case MINUS:
6509 if (!TARGET_MACHO)
6510 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6511 output_pic_addr_const (file, XEXP (x, 0), code);
6512 putc ('-', file);
6513 output_pic_addr_const (file, XEXP (x, 1), code);
6514 if (!TARGET_MACHO)
6515 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6516 break;
6518 case UNSPEC:
6519 if (XVECLEN (x, 0) != 1)
6520 abort ();
6521 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6522 switch (XINT (x, 1))
6524 case UNSPEC_GOT:
6525 fputs ("@GOT", file);
6526 break;
6527 case UNSPEC_GOTOFF:
6528 fputs ("@GOTOFF", file);
6529 break;
6530 case UNSPEC_GOTPCREL:
6531 fputs ("@GOTPCREL(%rip)", file);
6532 break;
6533 case UNSPEC_GOTTPOFF:
6534 /* FIXME: This might be @TPOFF in Sun ld too. */
6535 fputs ("@GOTTPOFF", file);
6536 break;
6537 case UNSPEC_TPOFF:
6538 fputs ("@TPOFF", file);
6539 break;
6540 case UNSPEC_NTPOFF:
6541 if (TARGET_64BIT)
6542 fputs ("@TPOFF", file);
6543 else
6544 fputs ("@NTPOFF", file);
6545 break;
6546 case UNSPEC_DTPOFF:
6547 fputs ("@DTPOFF", file);
6548 break;
6549 case UNSPEC_GOTNTPOFF:
6550 if (TARGET_64BIT)
6551 fputs ("@GOTTPOFF(%rip)", file);
6552 else
6553 fputs ("@GOTNTPOFF", file);
6554 break;
6555 case UNSPEC_INDNTPOFF:
6556 fputs ("@INDNTPOFF", file);
6557 break;
6558 default:
6559 output_operand_lossage ("invalid UNSPEC as operand");
6560 break;
6562 break;
6564 default:
6565 output_operand_lossage ("invalid expression as operand");
6569 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6570 We need to handle our special PIC relocations. */
6572 void
6573 i386_dwarf_output_addr_const (file, x)
6574 FILE *file;
6575 rtx x;
6577 #ifdef ASM_QUAD
6578 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6579 #else
6580 if (TARGET_64BIT)
6581 abort ();
6582 fprintf (file, "%s", ASM_LONG);
6583 #endif
6584 if (flag_pic)
6585 output_pic_addr_const (file, x, '\0');
6586 else
6587 output_addr_const (file, x);
6588 fputc ('\n', file);
6591 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6592 We need to emit DTP-relative relocations. */
6594 void
6595 i386_output_dwarf_dtprel (file, size, x)
6596 FILE *file;
6597 int size;
6598 rtx x;
6600 fputs (ASM_LONG, file);
6601 output_addr_const (file, x);
6602 fputs ("@DTPOFF", file);
6603 switch (size)
6605 case 4:
6606 break;
6607 case 8:
6608 fputs (", 0", file);
6609 break;
6610 default:
6611 abort ();
6615 /* In the name of slightly smaller debug output, and to cater to
6616 general assembler losage, recognize PIC+GOTOFF and turn it back
6617 into a direct symbol reference. */
6620 i386_simplify_dwarf_addr (orig_x)
6621 rtx orig_x;
6623 rtx x = orig_x, y;
6625 if (GET_CODE (x) == MEM)
6626 x = XEXP (x, 0);
6628 if (TARGET_64BIT)
6630 if (GET_CODE (x) != CONST
6631 || GET_CODE (XEXP (x, 0)) != UNSPEC
6632 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6633 || GET_CODE (orig_x) != MEM)
6634 return orig_x;
6635 return XVECEXP (XEXP (x, 0), 0, 0);
6638 if (GET_CODE (x) != PLUS
6639 || GET_CODE (XEXP (x, 1)) != CONST)
6640 return orig_x;
6642 if (GET_CODE (XEXP (x, 0)) == REG
6643 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6644 /* %ebx + GOT/GOTOFF */
6645 y = NULL;
6646 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6648 /* %ebx + %reg * scale + GOT/GOTOFF */
6649 y = XEXP (x, 0);
6650 if (GET_CODE (XEXP (y, 0)) == REG
6651 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6652 y = XEXP (y, 1);
6653 else if (GET_CODE (XEXP (y, 1)) == REG
6654 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6655 y = XEXP (y, 0);
6656 else
6657 return orig_x;
6658 if (GET_CODE (y) != REG
6659 && GET_CODE (y) != MULT
6660 && GET_CODE (y) != ASHIFT)
6661 return orig_x;
6663 else
6664 return orig_x;
6666 x = XEXP (XEXP (x, 1), 0);
6667 if (GET_CODE (x) == UNSPEC
6668 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6669 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6671 if (y)
6672 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6673 return XVECEXP (x, 0, 0);
6676 if (GET_CODE (x) == PLUS
6677 && GET_CODE (XEXP (x, 0)) == UNSPEC
6678 && GET_CODE (XEXP (x, 1)) == CONST_INT
6679 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6680 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6681 && GET_CODE (orig_x) != MEM)))
6683 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6684 if (y)
6685 return gen_rtx_PLUS (Pmode, y, x);
6686 return x;
6689 return orig_x;
6692 static void
6693 put_condition_code (code, mode, reverse, fp, file)
6694 enum rtx_code code;
6695 enum machine_mode mode;
6696 int reverse, fp;
6697 FILE *file;
6699 const char *suffix;
6701 if (mode == CCFPmode || mode == CCFPUmode)
6703 enum rtx_code second_code, bypass_code;
6704 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6705 if (bypass_code != NIL || second_code != NIL)
6706 abort ();
6707 code = ix86_fp_compare_code_to_integer (code);
6708 mode = CCmode;
6710 if (reverse)
6711 code = reverse_condition (code);
6713 switch (code)
6715 case EQ:
6716 suffix = "e";
6717 break;
6718 case NE:
6719 suffix = "ne";
6720 break;
6721 case GT:
6722 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6723 abort ();
6724 suffix = "g";
6725 break;
6726 case GTU:
6727 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6728 Those same assemblers have the same but opposite losage on cmov. */
6729 if (mode != CCmode)
6730 abort ();
6731 suffix = fp ? "nbe" : "a";
6732 break;
6733 case LT:
6734 if (mode == CCNOmode || mode == CCGOCmode)
6735 suffix = "s";
6736 else if (mode == CCmode || mode == CCGCmode)
6737 suffix = "l";
6738 else
6739 abort ();
6740 break;
6741 case LTU:
6742 if (mode != CCmode)
6743 abort ();
6744 suffix = "b";
6745 break;
6746 case GE:
6747 if (mode == CCNOmode || mode == CCGOCmode)
6748 suffix = "ns";
6749 else if (mode == CCmode || mode == CCGCmode)
6750 suffix = "ge";
6751 else
6752 abort ();
6753 break;
6754 case GEU:
6755 /* ??? As above. */
6756 if (mode != CCmode)
6757 abort ();
6758 suffix = fp ? "nb" : "ae";
6759 break;
6760 case LE:
6761 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6762 abort ();
6763 suffix = "le";
6764 break;
6765 case LEU:
6766 if (mode != CCmode)
6767 abort ();
6768 suffix = "be";
6769 break;
6770 case UNORDERED:
6771 suffix = fp ? "u" : "p";
6772 break;
6773 case ORDERED:
6774 suffix = fp ? "nu" : "np";
6775 break;
6776 default:
6777 abort ();
6779 fputs (suffix, file);
6782 void
6783 print_reg (x, code, file)
6784 rtx x;
6785 int code;
6786 FILE *file;
6788 if (REGNO (x) == ARG_POINTER_REGNUM
6789 || REGNO (x) == FRAME_POINTER_REGNUM
6790 || REGNO (x) == FLAGS_REG
6791 || REGNO (x) == FPSR_REG)
6792 abort ();
6794 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6795 putc ('%', file);
6797 if (code == 'w' || MMX_REG_P (x))
6798 code = 2;
6799 else if (code == 'b')
6800 code = 1;
6801 else if (code == 'k')
6802 code = 4;
6803 else if (code == 'q')
6804 code = 8;
6805 else if (code == 'y')
6806 code = 3;
6807 else if (code == 'h')
6808 code = 0;
6809 else
6810 code = GET_MODE_SIZE (GET_MODE (x));
6812 /* Irritatingly, AMD extended registers use different naming convention
6813 from the normal registers. */
6814 if (REX_INT_REG_P (x))
6816 if (!TARGET_64BIT)
6817 abort ();
6818 switch (code)
6820 case 0:
6821 error ("extended registers have no high halves");
6822 break;
6823 case 1:
6824 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6825 break;
6826 case 2:
6827 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6828 break;
6829 case 4:
6830 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6831 break;
6832 case 8:
6833 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6834 break;
6835 default:
6836 error ("unsupported operand size for extended register");
6837 break;
6839 return;
6841 switch (code)
6843 case 3:
6844 if (STACK_TOP_P (x))
6846 fputs ("st(0)", file);
6847 break;
6849 /* FALLTHRU */
6850 case 8:
6851 case 4:
6852 case 12:
6853 if (! ANY_FP_REG_P (x))
6854 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6855 /* FALLTHRU */
6856 case 16:
6857 case 2:
6858 fputs (hi_reg_name[REGNO (x)], file);
6859 break;
6860 case 1:
6861 fputs (qi_reg_name[REGNO (x)], file);
6862 break;
6863 case 0:
6864 fputs (qi_high_reg_name[REGNO (x)], file);
6865 break;
6866 default:
6867 abort ();
6871 /* Locate some local-dynamic symbol still in use by this function
6872 so that we can print its name in some tls_local_dynamic_base
6873 pattern. */
6875 static const char *
6876 get_some_local_dynamic_name ()
6878 rtx insn;
6880 if (cfun->machine->some_ld_name)
6881 return cfun->machine->some_ld_name;
6883 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6884 if (INSN_P (insn)
6885 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6886 return cfun->machine->some_ld_name;
6888 abort ();
6891 static int
6892 get_some_local_dynamic_name_1 (px, data)
6893 rtx *px;
6894 void *data ATTRIBUTE_UNUSED;
6896 rtx x = *px;
6898 if (GET_CODE (x) == SYMBOL_REF
6899 && local_dynamic_symbolic_operand (x, Pmode))
6901 cfun->machine->some_ld_name = XSTR (x, 0);
6902 return 1;
6905 return 0;
6908 /* Meaning of CODE:
6909 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6910 C -- print opcode suffix for set/cmov insn.
6911 c -- like C, but print reversed condition
6912 F,f -- likewise, but for floating-point.
6913 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6914 nothing
6915 R -- print the prefix for register names.
6916 z -- print the opcode suffix for the size of the current operand.
6917 * -- print a star (in certain assembler syntax)
6918 A -- print an absolute memory reference.
6919 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6920 s -- print a shift double count, followed by the assemblers argument
6921 delimiter.
6922 b -- print the QImode name of the register for the indicated operand.
6923 %b0 would print %al if operands[0] is reg 0.
6924 w -- likewise, print the HImode name of the register.
6925 k -- likewise, print the SImode name of the register.
6926 q -- likewise, print the DImode name of the register.
6927 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6928 y -- print "st(0)" instead of "st" as a register.
6929 D -- print condition for SSE cmp instruction.
6930 P -- if PIC, print an @PLT suffix.
6931 X -- don't print any sort of PIC '@' suffix for a symbol.
6932 & -- print some in-use local-dynamic symbol name.
6935 void
6936 print_operand (file, x, code)
6937 FILE *file;
6938 rtx x;
6939 int code;
6941 if (code)
6943 switch (code)
6945 case '*':
6946 if (ASSEMBLER_DIALECT == ASM_ATT)
6947 putc ('*', file);
6948 return;
6950 case '&':
6951 assemble_name (file, get_some_local_dynamic_name ());
6952 return;
6954 case 'A':
6955 if (ASSEMBLER_DIALECT == ASM_ATT)
6956 putc ('*', file);
6957 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6959 /* Intel syntax. For absolute addresses, registers should not
6960 be surrounded by braces. */
6961 if (GET_CODE (x) != REG)
6963 putc ('[', file);
6964 PRINT_OPERAND (file, x, 0);
6965 putc (']', file);
6966 return;
6969 else
6970 abort ();
6972 PRINT_OPERAND (file, x, 0);
6973 return;
6976 case 'L':
6977 if (ASSEMBLER_DIALECT == ASM_ATT)
6978 putc ('l', file);
6979 return;
6981 case 'W':
6982 if (ASSEMBLER_DIALECT == ASM_ATT)
6983 putc ('w', file);
6984 return;
6986 case 'B':
6987 if (ASSEMBLER_DIALECT == ASM_ATT)
6988 putc ('b', file);
6989 return;
6991 case 'Q':
6992 if (ASSEMBLER_DIALECT == ASM_ATT)
6993 putc ('l', file);
6994 return;
6996 case 'S':
6997 if (ASSEMBLER_DIALECT == ASM_ATT)
6998 putc ('s', file);
6999 return;
7001 case 'T':
7002 if (ASSEMBLER_DIALECT == ASM_ATT)
7003 putc ('t', file);
7004 return;
7006 case 'z':
7007 /* 387 opcodes don't get size suffixes if the operands are
7008 registers. */
7009 if (STACK_REG_P (x))
7010 return;
7012 /* Likewise if using Intel opcodes. */
7013 if (ASSEMBLER_DIALECT == ASM_INTEL)
7014 return;
7016 /* This is the size of op from size of operand. */
7017 switch (GET_MODE_SIZE (GET_MODE (x)))
7019 case 2:
7020 #ifdef HAVE_GAS_FILDS_FISTS
7021 putc ('s', file);
7022 #endif
7023 return;
7025 case 4:
7026 if (GET_MODE (x) == SFmode)
7028 putc ('s', file);
7029 return;
7031 else
7032 putc ('l', file);
7033 return;
7035 case 12:
7036 case 16:
7037 putc ('t', file);
7038 return;
7040 case 8:
7041 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7043 #ifdef GAS_MNEMONICS
7044 putc ('q', file);
7045 #else
7046 putc ('l', file);
7047 putc ('l', file);
7048 #endif
7050 else
7051 putc ('l', file);
7052 return;
7054 default:
7055 abort ();
7058 case 'b':
7059 case 'w':
7060 case 'k':
7061 case 'q':
7062 case 'h':
7063 case 'y':
7064 case 'X':
7065 case 'P':
7066 break;
7068 case 's':
7069 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7071 PRINT_OPERAND (file, x, 0);
7072 putc (',', file);
7074 return;
7076 case 'D':
7077 /* Little bit of braindamage here. The SSE compare instructions
7078 does use completely different names for the comparisons that the
7079 fp conditional moves. */
7080 switch (GET_CODE (x))
7082 case EQ:
7083 case UNEQ:
7084 fputs ("eq", file);
7085 break;
7086 case LT:
7087 case UNLT:
7088 fputs ("lt", file);
7089 break;
7090 case LE:
7091 case UNLE:
7092 fputs ("le", file);
7093 break;
7094 case UNORDERED:
7095 fputs ("unord", file);
7096 break;
7097 case NE:
7098 case LTGT:
7099 fputs ("neq", file);
7100 break;
7101 case UNGE:
7102 case GE:
7103 fputs ("nlt", file);
7104 break;
7105 case UNGT:
7106 case GT:
7107 fputs ("nle", file);
7108 break;
7109 case ORDERED:
7110 fputs ("ord", file);
7111 break;
7112 default:
7113 abort ();
7114 break;
7116 return;
7117 case 'O':
7118 #ifdef CMOV_SUN_AS_SYNTAX
7119 if (ASSEMBLER_DIALECT == ASM_ATT)
7121 switch (GET_MODE (x))
7123 case HImode: putc ('w', file); break;
7124 case SImode:
7125 case SFmode: putc ('l', file); break;
7126 case DImode:
7127 case DFmode: putc ('q', file); break;
7128 default: abort ();
7130 putc ('.', file);
7132 #endif
7133 return;
7134 case 'C':
7135 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7136 return;
7137 case 'F':
7138 #ifdef CMOV_SUN_AS_SYNTAX
7139 if (ASSEMBLER_DIALECT == ASM_ATT)
7140 putc ('.', file);
7141 #endif
7142 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7143 return;
7145 /* Like above, but reverse condition */
7146 case 'c':
7147 /* Check to see if argument to %c is really a constant
7148 and not a condition code which needs to be reversed. */
7149 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7151 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7152 return;
7154 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7155 return;
7156 case 'f':
7157 #ifdef CMOV_SUN_AS_SYNTAX
7158 if (ASSEMBLER_DIALECT == ASM_ATT)
7159 putc ('.', file);
7160 #endif
7161 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7162 return;
7163 case '+':
7165 rtx x;
7167 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7168 return;
7170 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7171 if (x)
7173 int pred_val = INTVAL (XEXP (x, 0));
7175 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7176 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7178 int taken = pred_val > REG_BR_PROB_BASE / 2;
7179 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7181 /* Emit hints only in the case default branch prediction
7182 heuristics would fail. */
7183 if (taken != cputaken)
7185 /* We use 3e (DS) prefix for taken branches and
7186 2e (CS) prefix for not taken branches. */
7187 if (taken)
7188 fputs ("ds ; ", file);
7189 else
7190 fputs ("cs ; ", file);
7194 return;
7196 default:
7197 output_operand_lossage ("invalid operand code `%c'", code);
7201 if (GET_CODE (x) == REG)
7203 PRINT_REG (x, code, file);
7206 else if (GET_CODE (x) == MEM)
7208 /* No `byte ptr' prefix for call instructions. */
7209 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7211 const char * size;
7212 switch (GET_MODE_SIZE (GET_MODE (x)))
7214 case 1: size = "BYTE"; break;
7215 case 2: size = "WORD"; break;
7216 case 4: size = "DWORD"; break;
7217 case 8: size = "QWORD"; break;
7218 case 12: size = "XWORD"; break;
7219 case 16: size = "XMMWORD"; break;
7220 default:
7221 abort ();
7224 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7225 if (code == 'b')
7226 size = "BYTE";
7227 else if (code == 'w')
7228 size = "WORD";
7229 else if (code == 'k')
7230 size = "DWORD";
7232 fputs (size, file);
7233 fputs (" PTR ", file);
7236 x = XEXP (x, 0);
7237 if (flag_pic && CONSTANT_ADDRESS_P (x))
7238 output_pic_addr_const (file, x, code);
7239 /* Avoid (%rip) for call operands. */
7240 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7241 && GET_CODE (x) != CONST_INT)
7242 output_addr_const (file, x);
7243 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7244 output_operand_lossage ("invalid constraints for operand");
7245 else
7246 output_address (x);
7249 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7251 REAL_VALUE_TYPE r;
7252 long l;
7254 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7255 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7257 if (ASSEMBLER_DIALECT == ASM_ATT)
7258 putc ('$', file);
7259 fprintf (file, "0x%lx", l);
7262 /* These float cases don't actually occur as immediate operands. */
7263 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7265 char dstr[30];
7267 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7268 fprintf (file, "%s", dstr);
7271 else if (GET_CODE (x) == CONST_DOUBLE
7272 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7274 char dstr[30];
7276 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7277 fprintf (file, "%s", dstr);
7280 else
7282 if (code != 'P')
7284 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7286 if (ASSEMBLER_DIALECT == ASM_ATT)
7287 putc ('$', file);
7289 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7290 || GET_CODE (x) == LABEL_REF)
7292 if (ASSEMBLER_DIALECT == ASM_ATT)
7293 putc ('$', file);
7294 else
7295 fputs ("OFFSET FLAT:", file);
7298 if (GET_CODE (x) == CONST_INT)
7299 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7300 else if (flag_pic)
7301 output_pic_addr_const (file, x, code);
7302 else
7303 output_addr_const (file, x);
7307 /* Print a memory operand whose address is ADDR. */
7309 void
7310 print_operand_address (file, addr)
7311 FILE *file;
7312 register rtx addr;
7314 struct ix86_address parts;
7315 rtx base, index, disp;
7316 int scale;
7318 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7320 if (ASSEMBLER_DIALECT == ASM_INTEL)
7321 fputs ("DWORD PTR ", file);
7322 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7323 putc ('%', file);
7324 if (TARGET_64BIT)
7325 fputs ("fs:0", file);
7326 else
7327 fputs ("gs:0", file);
7328 return;
7331 if (! ix86_decompose_address (addr, &parts))
7332 abort ();
7334 base = parts.base;
7335 index = parts.index;
7336 disp = parts.disp;
7337 scale = parts.scale;
7339 if (!base && !index)
7341 /* Displacement only requires special attention. */
7343 if (GET_CODE (disp) == CONST_INT)
7345 if (ASSEMBLER_DIALECT == ASM_INTEL)
7347 if (USER_LABEL_PREFIX[0] == 0)
7348 putc ('%', file);
7349 fputs ("ds:", file);
7351 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7353 else if (flag_pic)
7354 output_pic_addr_const (file, addr, 0);
7355 else
7356 output_addr_const (file, addr);
7358 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7359 if (TARGET_64BIT
7360 && ((GET_CODE (addr) == SYMBOL_REF
7361 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7362 || GET_CODE (addr) == LABEL_REF
7363 || (GET_CODE (addr) == CONST
7364 && GET_CODE (XEXP (addr, 0)) == PLUS
7365 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7366 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7367 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7368 fputs ("(%rip)", file);
7370 else
7372 if (ASSEMBLER_DIALECT == ASM_ATT)
7374 if (disp)
7376 if (flag_pic)
7377 output_pic_addr_const (file, disp, 0);
7378 else if (GET_CODE (disp) == LABEL_REF)
7379 output_asm_label (disp);
7380 else
7381 output_addr_const (file, disp);
7384 putc ('(', file);
7385 if (base)
7386 PRINT_REG (base, 0, file);
7387 if (index)
7389 putc (',', file);
7390 PRINT_REG (index, 0, file);
7391 if (scale != 1)
7392 fprintf (file, ",%d", scale);
7394 putc (')', file);
7396 else
7398 rtx offset = NULL_RTX;
7400 if (disp)
7402 /* Pull out the offset of a symbol; print any symbol itself. */
7403 if (GET_CODE (disp) == CONST
7404 && GET_CODE (XEXP (disp, 0)) == PLUS
7405 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7407 offset = XEXP (XEXP (disp, 0), 1);
7408 disp = gen_rtx_CONST (VOIDmode,
7409 XEXP (XEXP (disp, 0), 0));
7412 if (flag_pic)
7413 output_pic_addr_const (file, disp, 0);
7414 else if (GET_CODE (disp) == LABEL_REF)
7415 output_asm_label (disp);
7416 else if (GET_CODE (disp) == CONST_INT)
7417 offset = disp;
7418 else
7419 output_addr_const (file, disp);
7422 putc ('[', file);
7423 if (base)
7425 PRINT_REG (base, 0, file);
7426 if (offset)
7428 if (INTVAL (offset) >= 0)
7429 putc ('+', file);
7430 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7433 else if (offset)
7434 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7435 else
7436 putc ('0', file);
7438 if (index)
7440 putc ('+', file);
7441 PRINT_REG (index, 0, file);
7442 if (scale != 1)
7443 fprintf (file, "*%d", scale);
7445 putc (']', file);
7450 bool
7451 output_addr_const_extra (file, x)
7452 FILE *file;
7453 rtx x;
7455 rtx op;
7457 if (GET_CODE (x) != UNSPEC)
7458 return false;
7460 op = XVECEXP (x, 0, 0);
7461 switch (XINT (x, 1))
7463 case UNSPEC_GOTTPOFF:
7464 output_addr_const (file, op);
7465 /* FIXME: This might be @TPOFF in Sun ld. */
7466 fputs ("@GOTTPOFF", file);
7467 break;
7468 case UNSPEC_TPOFF:
7469 output_addr_const (file, op);
7470 fputs ("@TPOFF", file);
7471 break;
7472 case UNSPEC_NTPOFF:
7473 output_addr_const (file, op);
7474 if (TARGET_64BIT)
7475 fputs ("@TPOFF", file);
7476 else
7477 fputs ("@NTPOFF", file);
7478 break;
7479 case UNSPEC_DTPOFF:
7480 output_addr_const (file, op);
7481 fputs ("@DTPOFF", file);
7482 break;
7483 case UNSPEC_GOTNTPOFF:
7484 output_addr_const (file, op);
7485 if (TARGET_64BIT)
7486 fputs ("@GOTTPOFF(%rip)", file);
7487 else
7488 fputs ("@GOTNTPOFF", file);
7489 break;
7490 case UNSPEC_INDNTPOFF:
7491 output_addr_const (file, op);
7492 fputs ("@INDNTPOFF", file);
7493 break;
7495 default:
7496 return false;
7499 return true;
7502 /* Split one or more DImode RTL references into pairs of SImode
7503 references. The RTL can be REG, offsettable MEM, integer constant, or
7504 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7505 split and "num" is its length. lo_half and hi_half are output arrays
7506 that parallel "operands". */
7508 void
7509 split_di (operands, num, lo_half, hi_half)
7510 rtx operands[];
7511 int num;
7512 rtx lo_half[], hi_half[];
7514 while (num--)
7516 rtx op = operands[num];
7518 /* simplify_subreg refuse to split volatile memory addresses,
7519 but we still have to handle it. */
7520 if (GET_CODE (op) == MEM)
7522 lo_half[num] = adjust_address (op, SImode, 0);
7523 hi_half[num] = adjust_address (op, SImode, 4);
7525 else
7527 lo_half[num] = simplify_gen_subreg (SImode, op,
7528 GET_MODE (op) == VOIDmode
7529 ? DImode : GET_MODE (op), 0);
7530 hi_half[num] = simplify_gen_subreg (SImode, op,
7531 GET_MODE (op) == VOIDmode
7532 ? DImode : GET_MODE (op), 4);
7536 /* Split one or more TImode RTL references into pairs of SImode
7537 references. The RTL can be REG, offsettable MEM, integer constant, or
7538 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7539 split and "num" is its length. lo_half and hi_half are output arrays
7540 that parallel "operands". */
7542 void
7543 split_ti (operands, num, lo_half, hi_half)
7544 rtx operands[];
7545 int num;
7546 rtx lo_half[], hi_half[];
7548 while (num--)
7550 rtx op = operands[num];
7552 /* simplify_subreg refuse to split volatile memory addresses, but we
7553 still have to handle it. */
7554 if (GET_CODE (op) == MEM)
7556 lo_half[num] = adjust_address (op, DImode, 0);
7557 hi_half[num] = adjust_address (op, DImode, 8);
7559 else
7561 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7562 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7567 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7568 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7569 is the expression of the binary operation. The output may either be
7570 emitted here, or returned to the caller, like all output_* functions.
7572 There is no guarantee that the operands are the same mode, as they
7573 might be within FLOAT or FLOAT_EXTEND expressions. */
7575 #ifndef SYSV386_COMPAT
7576 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7577 wants to fix the assemblers because that causes incompatibility
7578 with gcc. No-one wants to fix gcc because that causes
7579 incompatibility with assemblers... You can use the option of
7580 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7581 #define SYSV386_COMPAT 1
7582 #endif
7584 const char *
7585 output_387_binary_op (insn, operands)
7586 rtx insn;
7587 rtx *operands;
7589 static char buf[30];
7590 const char *p;
7591 const char *ssep;
7592 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7594 #ifdef ENABLE_CHECKING
7595 /* Even if we do not want to check the inputs, this documents input
7596 constraints. Which helps in understanding the following code. */
7597 if (STACK_REG_P (operands[0])
7598 && ((REG_P (operands[1])
7599 && REGNO (operands[0]) == REGNO (operands[1])
7600 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7601 || (REG_P (operands[2])
7602 && REGNO (operands[0]) == REGNO (operands[2])
7603 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7604 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7605 ; /* ok */
7606 else if (!is_sse)
7607 abort ();
7608 #endif
7610 switch (GET_CODE (operands[3]))
7612 case PLUS:
7613 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7614 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7615 p = "fiadd";
7616 else
7617 p = "fadd";
7618 ssep = "add";
7619 break;
7621 case MINUS:
7622 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7623 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7624 p = "fisub";
7625 else
7626 p = "fsub";
7627 ssep = "sub";
7628 break;
7630 case MULT:
7631 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7632 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7633 p = "fimul";
7634 else
7635 p = "fmul";
7636 ssep = "mul";
7637 break;
7639 case DIV:
7640 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7641 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7642 p = "fidiv";
7643 else
7644 p = "fdiv";
7645 ssep = "div";
7646 break;
7648 default:
7649 abort ();
7652 if (is_sse)
7654 strcpy (buf, ssep);
7655 if (GET_MODE (operands[0]) == SFmode)
7656 strcat (buf, "ss\t{%2, %0|%0, %2}");
7657 else
7658 strcat (buf, "sd\t{%2, %0|%0, %2}");
7659 return buf;
7661 strcpy (buf, p);
7663 switch (GET_CODE (operands[3]))
7665 case MULT:
7666 case PLUS:
7667 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7669 rtx temp = operands[2];
7670 operands[2] = operands[1];
7671 operands[1] = temp;
7674 /* know operands[0] == operands[1]. */
7676 if (GET_CODE (operands[2]) == MEM)
7678 p = "%z2\t%2";
7679 break;
7682 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7684 if (STACK_TOP_P (operands[0]))
7685 /* How is it that we are storing to a dead operand[2]?
7686 Well, presumably operands[1] is dead too. We can't
7687 store the result to st(0) as st(0) gets popped on this
7688 instruction. Instead store to operands[2] (which I
7689 think has to be st(1)). st(1) will be popped later.
7690 gcc <= 2.8.1 didn't have this check and generated
7691 assembly code that the Unixware assembler rejected. */
7692 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7693 else
7694 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7695 break;
7698 if (STACK_TOP_P (operands[0]))
7699 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7700 else
7701 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7702 break;
7704 case MINUS:
7705 case DIV:
7706 if (GET_CODE (operands[1]) == MEM)
7708 p = "r%z1\t%1";
7709 break;
7712 if (GET_CODE (operands[2]) == MEM)
7714 p = "%z2\t%2";
7715 break;
7718 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7720 #if SYSV386_COMPAT
7721 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7722 derived assemblers, confusingly reverse the direction of
7723 the operation for fsub{r} and fdiv{r} when the
7724 destination register is not st(0). The Intel assembler
7725 doesn't have this brain damage. Read !SYSV386_COMPAT to
7726 figure out what the hardware really does. */
7727 if (STACK_TOP_P (operands[0]))
7728 p = "{p\t%0, %2|rp\t%2, %0}";
7729 else
7730 p = "{rp\t%2, %0|p\t%0, %2}";
7731 #else
7732 if (STACK_TOP_P (operands[0]))
7733 /* As above for fmul/fadd, we can't store to st(0). */
7734 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7735 else
7736 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7737 #endif
7738 break;
7741 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7743 #if SYSV386_COMPAT
7744 if (STACK_TOP_P (operands[0]))
7745 p = "{rp\t%0, %1|p\t%1, %0}";
7746 else
7747 p = "{p\t%1, %0|rp\t%0, %1}";
7748 #else
7749 if (STACK_TOP_P (operands[0]))
7750 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7751 else
7752 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7753 #endif
7754 break;
7757 if (STACK_TOP_P (operands[0]))
7759 if (STACK_TOP_P (operands[1]))
7760 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7761 else
7762 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7763 break;
7765 else if (STACK_TOP_P (operands[1]))
7767 #if SYSV386_COMPAT
7768 p = "{\t%1, %0|r\t%0, %1}";
7769 #else
7770 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7771 #endif
7773 else
7775 #if SYSV386_COMPAT
7776 p = "{r\t%2, %0|\t%0, %2}";
7777 #else
7778 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7779 #endif
7781 break;
7783 default:
7784 abort ();
7787 strcat (buf, p);
7788 return buf;
7791 /* Output code to initialize control word copies used by
7792 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7793 is set to control word rounding downwards. */
7794 void
7795 emit_i387_cw_initialization (normal, round_down)
7796 rtx normal, round_down;
7798 rtx reg = gen_reg_rtx (HImode);
7800 emit_insn (gen_x86_fnstcw_1 (normal));
7801 emit_move_insn (reg, normal);
7802 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7803 && !TARGET_64BIT)
7804 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7805 else
7806 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7807 emit_move_insn (round_down, reg);
7810 /* Output code for INSN to convert a float to a signed int. OPERANDS
7811 are the insn operands. The output may be [HSD]Imode and the input
7812 operand may be [SDX]Fmode. */
7814 const char *
7815 output_fix_trunc (insn, operands)
7816 rtx insn;
7817 rtx *operands;
7819 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7820 int dimode_p = GET_MODE (operands[0]) == DImode;
7822 /* Jump through a hoop or two for DImode, since the hardware has no
7823 non-popping instruction. We used to do this a different way, but
7824 that was somewhat fragile and broke with post-reload splitters. */
7825 if (dimode_p && !stack_top_dies)
7826 output_asm_insn ("fld\t%y1", operands);
7828 if (!STACK_TOP_P (operands[1]))
7829 abort ();
7831 if (GET_CODE (operands[0]) != MEM)
7832 abort ();
7834 output_asm_insn ("fldcw\t%3", operands);
7835 if (stack_top_dies || dimode_p)
7836 output_asm_insn ("fistp%z0\t%0", operands);
7837 else
7838 output_asm_insn ("fist%z0\t%0", operands);
7839 output_asm_insn ("fldcw\t%2", operands);
7841 return "";
7844 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7845 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7846 when fucom should be used. */
7848 const char *
7849 output_fp_compare (insn, operands, eflags_p, unordered_p)
7850 rtx insn;
7851 rtx *operands;
7852 int eflags_p, unordered_p;
7854 int stack_top_dies;
7855 rtx cmp_op0 = operands[0];
7856 rtx cmp_op1 = operands[1];
7857 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7859 if (eflags_p == 2)
7861 cmp_op0 = cmp_op1;
7862 cmp_op1 = operands[2];
7864 if (is_sse)
7866 if (GET_MODE (operands[0]) == SFmode)
7867 if (unordered_p)
7868 return "ucomiss\t{%1, %0|%0, %1}";
7869 else
7870 return "comiss\t{%1, %0|%0, %1}";
7871 else
7872 if (unordered_p)
7873 return "ucomisd\t{%1, %0|%0, %1}";
7874 else
7875 return "comisd\t{%1, %0|%0, %1}";
7878 if (! STACK_TOP_P (cmp_op0))
7879 abort ();
7881 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7883 if (STACK_REG_P (cmp_op1)
7884 && stack_top_dies
7885 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7886 && REGNO (cmp_op1) != FIRST_STACK_REG)
7888 /* If both the top of the 387 stack dies, and the other operand
7889 is also a stack register that dies, then this must be a
7890 `fcompp' float compare */
7892 if (eflags_p == 1)
7894 /* There is no double popping fcomi variant. Fortunately,
7895 eflags is immune from the fstp's cc clobbering. */
7896 if (unordered_p)
7897 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7898 else
7899 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7900 return "fstp\t%y0";
7902 else
7904 if (eflags_p == 2)
7906 if (unordered_p)
7907 return "fucompp\n\tfnstsw\t%0";
7908 else
7909 return "fcompp\n\tfnstsw\t%0";
7911 else
7913 if (unordered_p)
7914 return "fucompp";
7915 else
7916 return "fcompp";
7920 else
7922 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7924 static const char * const alt[24] =
7926 "fcom%z1\t%y1",
7927 "fcomp%z1\t%y1",
7928 "fucom%z1\t%y1",
7929 "fucomp%z1\t%y1",
7931 "ficom%z1\t%y1",
7932 "ficomp%z1\t%y1",
7933 NULL,
7934 NULL,
7936 "fcomi\t{%y1, %0|%0, %y1}",
7937 "fcomip\t{%y1, %0|%0, %y1}",
7938 "fucomi\t{%y1, %0|%0, %y1}",
7939 "fucomip\t{%y1, %0|%0, %y1}",
7941 NULL,
7942 NULL,
7943 NULL,
7944 NULL,
7946 "fcom%z2\t%y2\n\tfnstsw\t%0",
7947 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7948 "fucom%z2\t%y2\n\tfnstsw\t%0",
7949 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7951 "ficom%z2\t%y2\n\tfnstsw\t%0",
7952 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7953 NULL,
7954 NULL
7957 int mask;
7958 const char *ret;
7960 mask = eflags_p << 3;
7961 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7962 mask |= unordered_p << 1;
7963 mask |= stack_top_dies;
7965 if (mask >= 24)
7966 abort ();
7967 ret = alt[mask];
7968 if (ret == NULL)
7969 abort ();
7971 return ret;
7975 void
7976 ix86_output_addr_vec_elt (file, value)
7977 FILE *file;
7978 int value;
7980 const char *directive = ASM_LONG;
7982 if (TARGET_64BIT)
7984 #ifdef ASM_QUAD
7985 directive = ASM_QUAD;
7986 #else
7987 abort ();
7988 #endif
7991 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7994 void
7995 ix86_output_addr_diff_elt (file, value, rel)
7996 FILE *file;
7997 int value, rel;
7999 if (TARGET_64BIT)
8000 fprintf (file, "%s%s%d-%s%d\n",
8001 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8002 else if (HAVE_AS_GOTOFF_IN_DATA)
8003 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8004 #if TARGET_MACHO
8005 else if (TARGET_MACHO)
8006 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8007 machopic_function_base_name () + 1);
8008 #endif
8009 else
8010 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8011 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8014 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8015 for the target. */
8017 void
8018 ix86_expand_clear (dest)
8019 rtx dest;
8021 rtx tmp;
8023 /* We play register width games, which are only valid after reload. */
8024 if (!reload_completed)
8025 abort ();
8027 /* Avoid HImode and its attendant prefix byte. */
8028 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8029 dest = gen_rtx_REG (SImode, REGNO (dest));
8031 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8033 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8034 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8036 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8037 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8040 emit_insn (tmp);
8043 /* X is an unchanging MEM. If it is a constant pool reference, return
8044 the constant pool rtx, else NULL. */
8046 static rtx
8047 maybe_get_pool_constant (x)
8048 rtx x;
8050 x = XEXP (x, 0);
8052 if (flag_pic && ! TARGET_64BIT)
8054 if (GET_CODE (x) != PLUS)
8055 return NULL_RTX;
8056 if (XEXP (x, 0) != pic_offset_table_rtx)
8057 return NULL_RTX;
8058 x = XEXP (x, 1);
8059 if (GET_CODE (x) != CONST)
8060 return NULL_RTX;
8061 x = XEXP (x, 0);
8062 if (GET_CODE (x) != UNSPEC)
8063 return NULL_RTX;
8064 if (XINT (x, 1) != UNSPEC_GOTOFF)
8065 return NULL_RTX;
8066 x = XVECEXP (x, 0, 0);
8069 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8070 return get_pool_constant (x);
8072 return NULL_RTX;
8075 void
8076 ix86_expand_move (mode, operands)
8077 enum machine_mode mode;
8078 rtx operands[];
8080 int strict = (reload_in_progress || reload_completed);
8081 rtx insn, op0, op1, tmp;
8083 op0 = operands[0];
8084 op1 = operands[1];
8086 if (tls_symbolic_operand (op1, Pmode))
8088 op1 = legitimize_address (op1, op1, VOIDmode);
8089 if (GET_CODE (op0) == MEM)
8091 tmp = gen_reg_rtx (mode);
8092 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8093 op1 = tmp;
8096 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8098 #if TARGET_MACHO
8099 if (MACHOPIC_PURE)
8101 rtx temp = ((reload_in_progress
8102 || ((op0 && GET_CODE (op0) == REG)
8103 && mode == Pmode))
8104 ? op0 : gen_reg_rtx (Pmode));
8105 op1 = machopic_indirect_data_reference (op1, temp);
8106 op1 = machopic_legitimize_pic_address (op1, mode,
8107 temp == op1 ? 0 : temp);
8109 else
8111 if (MACHOPIC_INDIRECT)
8112 op1 = machopic_indirect_data_reference (op1, 0);
8114 if (op0 != op1)
8116 insn = gen_rtx_SET (VOIDmode, op0, op1);
8117 emit_insn (insn);
8119 return;
8120 #endif /* TARGET_MACHO */
8121 if (GET_CODE (op0) == MEM)
8122 op1 = force_reg (Pmode, op1);
8123 else
8125 rtx temp = op0;
8126 if (GET_CODE (temp) != REG)
8127 temp = gen_reg_rtx (Pmode);
8128 temp = legitimize_pic_address (op1, temp);
8129 if (temp == op0)
8130 return;
8131 op1 = temp;
8134 else
8136 if (GET_CODE (op0) == MEM
8137 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8138 || !push_operand (op0, mode))
8139 && GET_CODE (op1) == MEM)
8140 op1 = force_reg (mode, op1);
8142 if (push_operand (op0, mode)
8143 && ! general_no_elim_operand (op1, mode))
8144 op1 = copy_to_mode_reg (mode, op1);
8146 /* Force large constants in 64bit compilation into register
8147 to get them CSEed. */
8148 if (TARGET_64BIT && mode == DImode
8149 && immediate_operand (op1, mode)
8150 && !x86_64_zero_extended_value (op1)
8151 && !register_operand (op0, mode)
8152 && optimize && !reload_completed && !reload_in_progress)
8153 op1 = copy_to_mode_reg (mode, op1);
8155 if (FLOAT_MODE_P (mode))
8157 /* If we are loading a floating point constant to a register,
8158 force the value to memory now, since we'll get better code
8159 out the back end. */
8161 if (strict)
8163 else if (GET_CODE (op1) == CONST_DOUBLE
8164 && register_operand (op0, mode))
8165 op1 = validize_mem (force_const_mem (mode, op1));
8169 insn = gen_rtx_SET (VOIDmode, op0, op1);
8171 emit_insn (insn);
8174 void
8175 ix86_expand_vector_move (mode, operands)
8176 enum machine_mode mode;
8177 rtx operands[];
8179 /* Force constants other than zero into memory. We do not know how
8180 the instructions used to build constants modify the upper 64 bits
8181 of the register, once we have that information we may be able
8182 to handle some of them more efficiently. */
8183 if ((reload_in_progress | reload_completed) == 0
8184 && register_operand (operands[0], mode)
8185 && CONSTANT_P (operands[1]))
8186 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8188 /* Make operand1 a register if it isn't already. */
8189 if (!no_new_pseudos
8190 && !register_operand (operands[0], mode)
8191 && !register_operand (operands[1], mode))
8193 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8194 emit_move_insn (operands[0], temp);
8195 return;
8198 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8201 /* Attempt to expand a binary operator. Make the expansion closer to the
8202 actual machine, then just general_operand, which will allow 3 separate
8203 memory references (one output, two input) in a single insn. */
8205 void
8206 ix86_expand_binary_operator (code, mode, operands)
8207 enum rtx_code code;
8208 enum machine_mode mode;
8209 rtx operands[];
8211 int matching_memory;
8212 rtx src1, src2, dst, op, clob;
8214 dst = operands[0];
8215 src1 = operands[1];
8216 src2 = operands[2];
8218 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8219 if (GET_RTX_CLASS (code) == 'c'
8220 && (rtx_equal_p (dst, src2)
8221 || immediate_operand (src1, mode)))
8223 rtx temp = src1;
8224 src1 = src2;
8225 src2 = temp;
8228 /* If the destination is memory, and we do not have matching source
8229 operands, do things in registers. */
8230 matching_memory = 0;
8231 if (GET_CODE (dst) == MEM)
8233 if (rtx_equal_p (dst, src1))
8234 matching_memory = 1;
8235 else if (GET_RTX_CLASS (code) == 'c'
8236 && rtx_equal_p (dst, src2))
8237 matching_memory = 2;
8238 else
8239 dst = gen_reg_rtx (mode);
8242 /* Both source operands cannot be in memory. */
8243 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8245 if (matching_memory != 2)
8246 src2 = force_reg (mode, src2);
8247 else
8248 src1 = force_reg (mode, src1);
8251 /* If the operation is not commutable, source 1 cannot be a constant
8252 or non-matching memory. */
8253 if ((CONSTANT_P (src1)
8254 || (!matching_memory && GET_CODE (src1) == MEM))
8255 && GET_RTX_CLASS (code) != 'c')
8256 src1 = force_reg (mode, src1);
8258 /* If optimizing, copy to regs to improve CSE */
8259 if (optimize && ! no_new_pseudos)
8261 if (GET_CODE (dst) == MEM)
8262 dst = gen_reg_rtx (mode);
8263 if (GET_CODE (src1) == MEM)
8264 src1 = force_reg (mode, src1);
8265 if (GET_CODE (src2) == MEM)
8266 src2 = force_reg (mode, src2);
8269 /* Emit the instruction. */
8271 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8272 if (reload_in_progress)
8274 /* Reload doesn't know about the flags register, and doesn't know that
8275 it doesn't want to clobber it. We can only do this with PLUS. */
8276 if (code != PLUS)
8277 abort ();
8278 emit_insn (op);
8280 else
8282 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8283 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8286 /* Fix up the destination if needed. */
8287 if (dst != operands[0])
8288 emit_move_insn (operands[0], dst);
8291 /* Return TRUE or FALSE depending on whether the binary operator meets the
8292 appropriate constraints. */
8295 ix86_binary_operator_ok (code, mode, operands)
8296 enum rtx_code code;
8297 enum machine_mode mode ATTRIBUTE_UNUSED;
8298 rtx operands[3];
8300 /* Both source operands cannot be in memory. */
8301 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8302 return 0;
8303 /* If the operation is not commutable, source 1 cannot be a constant. */
8304 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8305 return 0;
8306 /* If the destination is memory, we must have a matching source operand. */
8307 if (GET_CODE (operands[0]) == MEM
8308 && ! (rtx_equal_p (operands[0], operands[1])
8309 || (GET_RTX_CLASS (code) == 'c'
8310 && rtx_equal_p (operands[0], operands[2]))))
8311 return 0;
8312 /* If the operation is not commutable and the source 1 is memory, we must
8313 have a matching destination. */
8314 if (GET_CODE (operands[1]) == MEM
8315 && GET_RTX_CLASS (code) != 'c'
8316 && ! rtx_equal_p (operands[0], operands[1]))
8317 return 0;
8318 return 1;
8321 /* Attempt to expand a unary operator. Make the expansion closer to the
8322 actual machine, then just general_operand, which will allow 2 separate
8323 memory references (one output, one input) in a single insn. */
8325 void
8326 ix86_expand_unary_operator (code, mode, operands)
8327 enum rtx_code code;
8328 enum machine_mode mode;
8329 rtx operands[];
8331 int matching_memory;
8332 rtx src, dst, op, clob;
8334 dst = operands[0];
8335 src = operands[1];
8337 /* If the destination is memory, and we do not have matching source
8338 operands, do things in registers. */
8339 matching_memory = 0;
8340 if (GET_CODE (dst) == MEM)
8342 if (rtx_equal_p (dst, src))
8343 matching_memory = 1;
8344 else
8345 dst = gen_reg_rtx (mode);
8348 /* When source operand is memory, destination must match. */
8349 if (!matching_memory && GET_CODE (src) == MEM)
8350 src = force_reg (mode, src);
8352 /* If optimizing, copy to regs to improve CSE */
8353 if (optimize && ! no_new_pseudos)
8355 if (GET_CODE (dst) == MEM)
8356 dst = gen_reg_rtx (mode);
8357 if (GET_CODE (src) == MEM)
8358 src = force_reg (mode, src);
8361 /* Emit the instruction. */
8363 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8364 if (reload_in_progress || code == NOT)
8366 /* Reload doesn't know about the flags register, and doesn't know that
8367 it doesn't want to clobber it. */
8368 if (code != NOT)
8369 abort ();
8370 emit_insn (op);
8372 else
8374 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8375 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8378 /* Fix up the destination if needed. */
8379 if (dst != operands[0])
8380 emit_move_insn (operands[0], dst);
8383 /* Return TRUE or FALSE depending on whether the unary operator meets the
8384 appropriate constraints. */
8387 ix86_unary_operator_ok (code, mode, operands)
8388 enum rtx_code code ATTRIBUTE_UNUSED;
8389 enum machine_mode mode ATTRIBUTE_UNUSED;
8390 rtx operands[2] ATTRIBUTE_UNUSED;
8392 /* If one of operands is memory, source and destination must match. */
8393 if ((GET_CODE (operands[0]) == MEM
8394 || GET_CODE (operands[1]) == MEM)
8395 && ! rtx_equal_p (operands[0], operands[1]))
8396 return FALSE;
8397 return TRUE;
8400 /* Return TRUE or FALSE depending on whether the first SET in INSN
8401 has source and destination with matching CC modes, and that the
8402 CC mode is at least as constrained as REQ_MODE. */
8405 ix86_match_ccmode (insn, req_mode)
8406 rtx insn;
8407 enum machine_mode req_mode;
8409 rtx set;
8410 enum machine_mode set_mode;
8412 set = PATTERN (insn);
8413 if (GET_CODE (set) == PARALLEL)
8414 set = XVECEXP (set, 0, 0);
8415 if (GET_CODE (set) != SET)
8416 abort ();
8417 if (GET_CODE (SET_SRC (set)) != COMPARE)
8418 abort ();
8420 set_mode = GET_MODE (SET_DEST (set));
8421 switch (set_mode)
8423 case CCNOmode:
8424 if (req_mode != CCNOmode
8425 && (req_mode != CCmode
8426 || XEXP (SET_SRC (set), 1) != const0_rtx))
8427 return 0;
8428 break;
8429 case CCmode:
8430 if (req_mode == CCGCmode)
8431 return 0;
8432 /* FALLTHRU */
8433 case CCGCmode:
8434 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8435 return 0;
8436 /* FALLTHRU */
8437 case CCGOCmode:
8438 if (req_mode == CCZmode)
8439 return 0;
8440 /* FALLTHRU */
8441 case CCZmode:
8442 break;
8444 default:
8445 abort ();
8448 return (GET_MODE (SET_SRC (set)) == set_mode);
8451 /* Generate insn patterns to do an integer compare of OPERANDS. */
8453 static rtx
8454 ix86_expand_int_compare (code, op0, op1)
8455 enum rtx_code code;
8456 rtx op0, op1;
8458 enum machine_mode cmpmode;
8459 rtx tmp, flags;
8461 cmpmode = SELECT_CC_MODE (code, op0, op1);
8462 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8464 /* This is very simple, but making the interface the same as in the
8465 FP case makes the rest of the code easier. */
8466 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8467 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8469 /* Return the test that should be put into the flags user, i.e.
8470 the bcc, scc, or cmov instruction. */
8471 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8474 /* Figure out whether to use ordered or unordered fp comparisons.
8475 Return the appropriate mode to use. */
8477 enum machine_mode
8478 ix86_fp_compare_mode (code)
8479 enum rtx_code code ATTRIBUTE_UNUSED;
8481 /* ??? In order to make all comparisons reversible, we do all comparisons
8482 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8483 all forms trapping and nontrapping comparisons, we can make inequality
8484 comparisons trapping again, since it results in better code when using
8485 FCOM based compares. */
8486 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8489 enum machine_mode
8490 ix86_cc_mode (code, op0, op1)
8491 enum rtx_code code;
8492 rtx op0, op1;
8494 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8495 return ix86_fp_compare_mode (code);
8496 switch (code)
8498 /* Only zero flag is needed. */
8499 case EQ: /* ZF=0 */
8500 case NE: /* ZF!=0 */
8501 return CCZmode;
8502 /* Codes needing carry flag. */
8503 case GEU: /* CF=0 */
8504 case GTU: /* CF=0 & ZF=0 */
8505 case LTU: /* CF=1 */
8506 case LEU: /* CF=1 | ZF=1 */
8507 return CCmode;
8508 /* Codes possibly doable only with sign flag when
8509 comparing against zero. */
8510 case GE: /* SF=OF or SF=0 */
8511 case LT: /* SF<>OF or SF=1 */
8512 if (op1 == const0_rtx)
8513 return CCGOCmode;
8514 else
8515 /* For other cases Carry flag is not required. */
8516 return CCGCmode;
8517 /* Codes doable only with sign flag when comparing
8518 against zero, but we miss jump instruction for it
8519 so we need to use relational tests against overflow
8520 that thus needs to be zero. */
8521 case GT: /* ZF=0 & SF=OF */
8522 case LE: /* ZF=1 | SF<>OF */
8523 if (op1 == const0_rtx)
8524 return CCNOmode;
8525 else
8526 return CCGCmode;
8527 /* strcmp pattern do (use flags) and combine may ask us for proper
8528 mode. */
8529 case USE:
8530 return CCmode;
8531 default:
8532 abort ();
8536 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8539 ix86_use_fcomi_compare (code)
8540 enum rtx_code code ATTRIBUTE_UNUSED;
8542 enum rtx_code swapped_code = swap_condition (code);
8543 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8544 || (ix86_fp_comparison_cost (swapped_code)
8545 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8548 /* Swap, force into registers, or otherwise massage the two operands
8549 to a fp comparison. The operands are updated in place; the new
8550 comparison code is returned. */
8552 static enum rtx_code
8553 ix86_prepare_fp_compare_args (code, pop0, pop1)
8554 enum rtx_code code;
8555 rtx *pop0, *pop1;
8557 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8558 rtx op0 = *pop0, op1 = *pop1;
8559 enum machine_mode op_mode = GET_MODE (op0);
8560 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8562 /* All of the unordered compare instructions only work on registers.
8563 The same is true of the XFmode compare instructions. The same is
8564 true of the fcomi compare instructions. */
8566 if (!is_sse
8567 && (fpcmp_mode == CCFPUmode
8568 || op_mode == XFmode
8569 || op_mode == TFmode
8570 || ix86_use_fcomi_compare (code)))
8572 op0 = force_reg (op_mode, op0);
8573 op1 = force_reg (op_mode, op1);
8575 else
8577 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8578 things around if they appear profitable, otherwise force op0
8579 into a register. */
8581 if (standard_80387_constant_p (op0) == 0
8582 || (GET_CODE (op0) == MEM
8583 && ! (standard_80387_constant_p (op1) == 0
8584 || GET_CODE (op1) == MEM)))
8586 rtx tmp;
8587 tmp = op0, op0 = op1, op1 = tmp;
8588 code = swap_condition (code);
8591 if (GET_CODE (op0) != REG)
8592 op0 = force_reg (op_mode, op0);
8594 if (CONSTANT_P (op1))
8596 if (standard_80387_constant_p (op1))
8597 op1 = force_reg (op_mode, op1);
8598 else
8599 op1 = validize_mem (force_const_mem (op_mode, op1));
8603 /* Try to rearrange the comparison to make it cheaper. */
8604 if (ix86_fp_comparison_cost (code)
8605 > ix86_fp_comparison_cost (swap_condition (code))
8606 && (GET_CODE (op1) == REG || !no_new_pseudos))
8608 rtx tmp;
8609 tmp = op0, op0 = op1, op1 = tmp;
8610 code = swap_condition (code);
8611 if (GET_CODE (op0) != REG)
8612 op0 = force_reg (op_mode, op0);
8615 *pop0 = op0;
8616 *pop1 = op1;
8617 return code;
8620 /* Convert comparison codes we use to represent FP comparison to integer
8621 code that will result in proper branch. Return UNKNOWN if no such code
8622 is available. */
8623 static enum rtx_code
8624 ix86_fp_compare_code_to_integer (code)
8625 enum rtx_code code;
8627 switch (code)
8629 case GT:
8630 return GTU;
8631 case GE:
8632 return GEU;
8633 case ORDERED:
8634 case UNORDERED:
8635 return code;
8636 break;
8637 case UNEQ:
8638 return EQ;
8639 break;
8640 case UNLT:
8641 return LTU;
8642 break;
8643 case UNLE:
8644 return LEU;
8645 break;
8646 case LTGT:
8647 return NE;
8648 break;
8649 default:
8650 return UNKNOWN;
8654 /* Split comparison code CODE into comparisons we can do using branch
8655 instructions. BYPASS_CODE is comparison code for branch that will
8656 branch around FIRST_CODE and SECOND_CODE. If some of branches
8657 is not required, set value to NIL.
8658 We never require more than two branches. */
8659 static void
8660 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8661 enum rtx_code code, *bypass_code, *first_code, *second_code;
8663 *first_code = code;
8664 *bypass_code = NIL;
8665 *second_code = NIL;
8667 /* The fcomi comparison sets flags as follows:
8669 cmp ZF PF CF
8670 > 0 0 0
8671 < 0 0 1
8672 = 1 0 0
8673 un 1 1 1 */
8675 switch (code)
8677 case GT: /* GTU - CF=0 & ZF=0 */
8678 case GE: /* GEU - CF=0 */
8679 case ORDERED: /* PF=0 */
8680 case UNORDERED: /* PF=1 */
8681 case UNEQ: /* EQ - ZF=1 */
8682 case UNLT: /* LTU - CF=1 */
8683 case UNLE: /* LEU - CF=1 | ZF=1 */
8684 case LTGT: /* EQ - ZF=0 */
8685 break;
8686 case LT: /* LTU - CF=1 - fails on unordered */
8687 *first_code = UNLT;
8688 *bypass_code = UNORDERED;
8689 break;
8690 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8691 *first_code = UNLE;
8692 *bypass_code = UNORDERED;
8693 break;
8694 case EQ: /* EQ - ZF=1 - fails on unordered */
8695 *first_code = UNEQ;
8696 *bypass_code = UNORDERED;
8697 break;
8698 case NE: /* NE - ZF=0 - fails on unordered */
8699 *first_code = LTGT;
8700 *second_code = UNORDERED;
8701 break;
8702 case UNGE: /* GEU - CF=0 - fails on unordered */
8703 *first_code = GE;
8704 *second_code = UNORDERED;
8705 break;
8706 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8707 *first_code = GT;
8708 *second_code = UNORDERED;
8709 break;
8710 default:
8711 abort ();
8713 if (!TARGET_IEEE_FP)
8715 *second_code = NIL;
8716 *bypass_code = NIL;
8720 /* Return cost of comparison done fcom + arithmetics operations on AX.
8721 All following functions do use number of instructions as a cost metrics.
8722 In future this should be tweaked to compute bytes for optimize_size and
8723 take into account performance of various instructions on various CPUs. */
8724 static int
8725 ix86_fp_comparison_arithmetics_cost (code)
8726 enum rtx_code code;
8728 if (!TARGET_IEEE_FP)
8729 return 4;
8730 /* The cost of code output by ix86_expand_fp_compare. */
8731 switch (code)
8733 case UNLE:
8734 case UNLT:
8735 case LTGT:
8736 case GT:
8737 case GE:
8738 case UNORDERED:
8739 case ORDERED:
8740 case UNEQ:
8741 return 4;
8742 break;
8743 case LT:
8744 case NE:
8745 case EQ:
8746 case UNGE:
8747 return 5;
8748 break;
8749 case LE:
8750 case UNGT:
8751 return 6;
8752 break;
8753 default:
8754 abort ();
8758 /* Return cost of comparison done using fcomi operation.
8759 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8760 static int
8761 ix86_fp_comparison_fcomi_cost (code)
8762 enum rtx_code code;
8764 enum rtx_code bypass_code, first_code, second_code;
8765 /* Return arbitrarily high cost when instruction is not supported - this
8766 prevents gcc from using it. */
8767 if (!TARGET_CMOVE)
8768 return 1024;
8769 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8770 return (bypass_code != NIL || second_code != NIL) + 2;
8773 /* Return cost of comparison done using sahf operation.
8774 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8775 static int
8776 ix86_fp_comparison_sahf_cost (code)
8777 enum rtx_code code;
8779 enum rtx_code bypass_code, first_code, second_code;
8780 /* Return arbitrarily high cost when instruction is not preferred - this
8781 avoids gcc from using it. */
8782 if (!TARGET_USE_SAHF && !optimize_size)
8783 return 1024;
8784 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8785 return (bypass_code != NIL || second_code != NIL) + 3;
8788 /* Compute cost of the comparison done using any method.
8789 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8790 static int
8791 ix86_fp_comparison_cost (code)
8792 enum rtx_code code;
8794 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8795 int min;
8797 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8798 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8800 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8801 if (min > sahf_cost)
8802 min = sahf_cost;
8803 if (min > fcomi_cost)
8804 min = fcomi_cost;
8805 return min;
8808 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8810 static rtx
8811 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8812 enum rtx_code code;
8813 rtx op0, op1, scratch;
8814 rtx *second_test;
8815 rtx *bypass_test;
8817 enum machine_mode fpcmp_mode, intcmp_mode;
8818 rtx tmp, tmp2;
8819 int cost = ix86_fp_comparison_cost (code);
8820 enum rtx_code bypass_code, first_code, second_code;
8822 fpcmp_mode = ix86_fp_compare_mode (code);
8823 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8825 if (second_test)
8826 *second_test = NULL_RTX;
8827 if (bypass_test)
8828 *bypass_test = NULL_RTX;
8830 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8832 /* Do fcomi/sahf based test when profitable. */
8833 if ((bypass_code == NIL || bypass_test)
8834 && (second_code == NIL || second_test)
8835 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8837 if (TARGET_CMOVE)
8839 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8840 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8841 tmp);
8842 emit_insn (tmp);
8844 else
8846 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8847 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8848 if (!scratch)
8849 scratch = gen_reg_rtx (HImode);
8850 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8851 emit_insn (gen_x86_sahf_1 (scratch));
8854 /* The FP codes work out to act like unsigned. */
8855 intcmp_mode = fpcmp_mode;
8856 code = first_code;
8857 if (bypass_code != NIL)
8858 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8859 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8860 const0_rtx);
8861 if (second_code != NIL)
8862 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8863 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8864 const0_rtx);
8866 else
8868 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8869 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8870 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8871 if (!scratch)
8872 scratch = gen_reg_rtx (HImode);
8873 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8875 /* In the unordered case, we have to check C2 for NaN's, which
8876 doesn't happen to work out to anything nice combination-wise.
8877 So do some bit twiddling on the value we've got in AH to come
8878 up with an appropriate set of condition codes. */
8880 intcmp_mode = CCNOmode;
8881 switch (code)
8883 case GT:
8884 case UNGT:
8885 if (code == GT || !TARGET_IEEE_FP)
8887 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8888 code = EQ;
8890 else
8892 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8893 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8894 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8895 intcmp_mode = CCmode;
8896 code = GEU;
8898 break;
8899 case LT:
8900 case UNLT:
8901 if (code == LT && TARGET_IEEE_FP)
8903 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8904 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8905 intcmp_mode = CCmode;
8906 code = EQ;
8908 else
8910 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8911 code = NE;
8913 break;
8914 case GE:
8915 case UNGE:
8916 if (code == GE || !TARGET_IEEE_FP)
8918 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8919 code = EQ;
8921 else
8923 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8924 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8925 GEN_INT (0x01)));
8926 code = NE;
8928 break;
8929 case LE:
8930 case UNLE:
8931 if (code == LE && TARGET_IEEE_FP)
8933 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8934 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8935 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8936 intcmp_mode = CCmode;
8937 code = LTU;
8939 else
8941 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8942 code = NE;
8944 break;
8945 case EQ:
8946 case UNEQ:
8947 if (code == EQ && TARGET_IEEE_FP)
8949 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8950 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8951 intcmp_mode = CCmode;
8952 code = EQ;
8954 else
8956 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8957 code = NE;
8958 break;
8960 break;
8961 case NE:
8962 case LTGT:
8963 if (code == NE && TARGET_IEEE_FP)
8965 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8966 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8967 GEN_INT (0x40)));
8968 code = NE;
8970 else
8972 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8973 code = EQ;
8975 break;
8977 case UNORDERED:
8978 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8979 code = NE;
8980 break;
8981 case ORDERED:
8982 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8983 code = EQ;
8984 break;
8986 default:
8987 abort ();
8991 /* Return the test that should be put into the flags user, i.e.
8992 the bcc, scc, or cmov instruction. */
8993 return gen_rtx_fmt_ee (code, VOIDmode,
8994 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8995 const0_rtx);
8999 ix86_expand_compare (code, second_test, bypass_test)
9000 enum rtx_code code;
9001 rtx *second_test, *bypass_test;
9003 rtx op0, op1, ret;
9004 op0 = ix86_compare_op0;
9005 op1 = ix86_compare_op1;
9007 if (second_test)
9008 *second_test = NULL_RTX;
9009 if (bypass_test)
9010 *bypass_test = NULL_RTX;
9012 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9013 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9014 second_test, bypass_test);
9015 else
9016 ret = ix86_expand_int_compare (code, op0, op1);
9018 return ret;
9021 /* Return true if the CODE will result in nontrivial jump sequence. */
9022 bool
9023 ix86_fp_jump_nontrivial_p (code)
9024 enum rtx_code code;
9026 enum rtx_code bypass_code, first_code, second_code;
9027 if (!TARGET_CMOVE)
9028 return true;
9029 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9030 return bypass_code != NIL || second_code != NIL;
9033 void
9034 ix86_expand_branch (code, label)
9035 enum rtx_code code;
9036 rtx label;
9038 rtx tmp;
9040 switch (GET_MODE (ix86_compare_op0))
9042 case QImode:
9043 case HImode:
9044 case SImode:
9045 simple:
9046 tmp = ix86_expand_compare (code, NULL, NULL);
9047 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9048 gen_rtx_LABEL_REF (VOIDmode, label),
9049 pc_rtx);
9050 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9051 return;
9053 case SFmode:
9054 case DFmode:
9055 case XFmode:
9056 case TFmode:
9058 rtvec vec;
9059 int use_fcomi;
9060 enum rtx_code bypass_code, first_code, second_code;
9062 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9063 &ix86_compare_op1);
9065 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9067 /* Check whether we will use the natural sequence with one jump. If
9068 so, we can expand jump early. Otherwise delay expansion by
9069 creating compound insn to not confuse optimizers. */
9070 if (bypass_code == NIL && second_code == NIL
9071 && TARGET_CMOVE)
9073 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9074 gen_rtx_LABEL_REF (VOIDmode, label),
9075 pc_rtx, NULL_RTX);
9077 else
9079 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9080 ix86_compare_op0, ix86_compare_op1);
9081 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9082 gen_rtx_LABEL_REF (VOIDmode, label),
9083 pc_rtx);
9084 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9086 use_fcomi = ix86_use_fcomi_compare (code);
9087 vec = rtvec_alloc (3 + !use_fcomi);
9088 RTVEC_ELT (vec, 0) = tmp;
9089 RTVEC_ELT (vec, 1)
9090 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9091 RTVEC_ELT (vec, 2)
9092 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9093 if (! use_fcomi)
9094 RTVEC_ELT (vec, 3)
9095 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9097 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9099 return;
9102 case DImode:
9103 if (TARGET_64BIT)
9104 goto simple;
9105 /* Expand DImode branch into multiple compare+branch. */
9107 rtx lo[2], hi[2], label2;
9108 enum rtx_code code1, code2, code3;
9110 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9112 tmp = ix86_compare_op0;
9113 ix86_compare_op0 = ix86_compare_op1;
9114 ix86_compare_op1 = tmp;
9115 code = swap_condition (code);
9117 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9118 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9120 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9121 avoid two branches. This costs one extra insn, so disable when
9122 optimizing for size. */
9124 if ((code == EQ || code == NE)
9125 && (!optimize_size
9126 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9128 rtx xor0, xor1;
9130 xor1 = hi[0];
9131 if (hi[1] != const0_rtx)
9132 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9133 NULL_RTX, 0, OPTAB_WIDEN);
9135 xor0 = lo[0];
9136 if (lo[1] != const0_rtx)
9137 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9138 NULL_RTX, 0, OPTAB_WIDEN);
9140 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9141 NULL_RTX, 0, OPTAB_WIDEN);
9143 ix86_compare_op0 = tmp;
9144 ix86_compare_op1 = const0_rtx;
9145 ix86_expand_branch (code, label);
9146 return;
9149 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9150 op1 is a constant and the low word is zero, then we can just
9151 examine the high word. */
9153 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9154 switch (code)
9156 case LT: case LTU: case GE: case GEU:
9157 ix86_compare_op0 = hi[0];
9158 ix86_compare_op1 = hi[1];
9159 ix86_expand_branch (code, label);
9160 return;
9161 default:
9162 break;
9165 /* Otherwise, we need two or three jumps. */
9167 label2 = gen_label_rtx ();
9169 code1 = code;
9170 code2 = swap_condition (code);
9171 code3 = unsigned_condition (code);
9173 switch (code)
9175 case LT: case GT: case LTU: case GTU:
9176 break;
9178 case LE: code1 = LT; code2 = GT; break;
9179 case GE: code1 = GT; code2 = LT; break;
9180 case LEU: code1 = LTU; code2 = GTU; break;
9181 case GEU: code1 = GTU; code2 = LTU; break;
9183 case EQ: code1 = NIL; code2 = NE; break;
9184 case NE: code2 = NIL; break;
9186 default:
9187 abort ();
9191 * a < b =>
9192 * if (hi(a) < hi(b)) goto true;
9193 * if (hi(a) > hi(b)) goto false;
9194 * if (lo(a) < lo(b)) goto true;
9195 * false:
9198 ix86_compare_op0 = hi[0];
9199 ix86_compare_op1 = hi[1];
9201 if (code1 != NIL)
9202 ix86_expand_branch (code1, label);
9203 if (code2 != NIL)
9204 ix86_expand_branch (code2, label2);
9206 ix86_compare_op0 = lo[0];
9207 ix86_compare_op1 = lo[1];
9208 ix86_expand_branch (code3, label);
9210 if (code2 != NIL)
9211 emit_label (label2);
9212 return;
9215 default:
9216 abort ();
9220 /* Split branch based on floating point condition. */
9221 void
9222 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9223 enum rtx_code code;
9224 rtx op1, op2, target1, target2, tmp;
9226 rtx second, bypass;
9227 rtx label = NULL_RTX;
9228 rtx condition;
9229 int bypass_probability = -1, second_probability = -1, probability = -1;
9230 rtx i;
9232 if (target2 != pc_rtx)
9234 rtx tmp = target2;
9235 code = reverse_condition_maybe_unordered (code);
9236 target2 = target1;
9237 target1 = tmp;
9240 condition = ix86_expand_fp_compare (code, op1, op2,
9241 tmp, &second, &bypass);
9243 if (split_branch_probability >= 0)
9245 /* Distribute the probabilities across the jumps.
9246 Assume the BYPASS and SECOND to be always test
9247 for UNORDERED. */
9248 probability = split_branch_probability;
9250 /* Value of 1 is low enough to make no need for probability
9251 to be updated. Later we may run some experiments and see
9252 if unordered values are more frequent in practice. */
9253 if (bypass)
9254 bypass_probability = 1;
9255 if (second)
9256 second_probability = 1;
9258 if (bypass != NULL_RTX)
9260 label = gen_label_rtx ();
9261 i = emit_jump_insn (gen_rtx_SET
9262 (VOIDmode, pc_rtx,
9263 gen_rtx_IF_THEN_ELSE (VOIDmode,
9264 bypass,
9265 gen_rtx_LABEL_REF (VOIDmode,
9266 label),
9267 pc_rtx)));
9268 if (bypass_probability >= 0)
9269 REG_NOTES (i)
9270 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9271 GEN_INT (bypass_probability),
9272 REG_NOTES (i));
9274 i = emit_jump_insn (gen_rtx_SET
9275 (VOIDmode, pc_rtx,
9276 gen_rtx_IF_THEN_ELSE (VOIDmode,
9277 condition, target1, target2)));
9278 if (probability >= 0)
9279 REG_NOTES (i)
9280 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9281 GEN_INT (probability),
9282 REG_NOTES (i));
9283 if (second != NULL_RTX)
9285 i = emit_jump_insn (gen_rtx_SET
9286 (VOIDmode, pc_rtx,
9287 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9288 target2)));
9289 if (second_probability >= 0)
9290 REG_NOTES (i)
9291 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9292 GEN_INT (second_probability),
9293 REG_NOTES (i));
9295 if (label != NULL_RTX)
9296 emit_label (label);
9300 ix86_expand_setcc (code, dest)
9301 enum rtx_code code;
9302 rtx dest;
9304 rtx ret, tmp, tmpreg;
9305 rtx second_test, bypass_test;
9307 if (GET_MODE (ix86_compare_op0) == DImode
9308 && !TARGET_64BIT)
9309 return 0; /* FAIL */
9311 if (GET_MODE (dest) != QImode)
9312 abort ();
9314 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9315 PUT_MODE (ret, QImode);
9317 tmp = dest;
9318 tmpreg = dest;
9320 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9321 if (bypass_test || second_test)
9323 rtx test = second_test;
9324 int bypass = 0;
9325 rtx tmp2 = gen_reg_rtx (QImode);
9326 if (bypass_test)
9328 if (second_test)
9329 abort ();
9330 test = bypass_test;
9331 bypass = 1;
9332 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9334 PUT_MODE (test, QImode);
9335 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9337 if (bypass)
9338 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9339 else
9340 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9343 return 1; /* DONE */
9346 /* Expand comparison setting or clearing carry flag. Return true when successful
9347 and set pop for the operation. */
9348 bool
9349 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9350 rtx op0, op1, *pop;
9351 enum rtx_code code;
9353 enum machine_mode mode =
9354 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9356 /* Do not handle DImode compares that go trought special path. Also we can't
9357 deal with FP compares yet. This is possible to add. */
9358 if ((mode == DImode && !TARGET_64BIT))
9359 return false;
9360 if (FLOAT_MODE_P (mode))
9362 rtx second_test = NULL, bypass_test = NULL;
9363 rtx compare_op, compare_seq;
9365 /* Shortcut: following common codes never translate into carry flag compares. */
9366 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9367 || code == ORDERED || code == UNORDERED)
9368 return false;
9370 /* These comparisons require zero flag; swap operands so they won't. */
9371 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9372 && !TARGET_IEEE_FP)
9374 rtx tmp = op0;
9375 op0 = op1;
9376 op1 = tmp;
9377 code = swap_condition (code);
9380 /* Try to expand the comparsion and verify that we end up with carry flag
9381 based comparsion. This is fails to be true only when we decide to expand
9382 comparsion using arithmetic that is not too common scenario. */
9383 start_sequence ();
9384 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9385 &second_test, &bypass_test);
9386 compare_seq = get_insns ();
9387 end_sequence ();
9389 if (second_test || bypass_test)
9390 return false;
9391 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9392 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9393 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9394 else
9395 code = GET_CODE (compare_op);
9396 if (code != LTU && code != GEU)
9397 return false;
9398 emit_insn (compare_seq);
9399 *pop = compare_op;
9400 return true;
9402 if (!INTEGRAL_MODE_P (mode))
9403 return false;
9404 switch (code)
9406 case LTU:
9407 case GEU:
9408 break;
9410 /* Convert a==0 into (unsigned)a<1. */
9411 case EQ:
9412 case NE:
9413 if (op1 != const0_rtx)
9414 return false;
9415 op1 = const1_rtx;
9416 code = (code == EQ ? LTU : GEU);
9417 break;
9419 /* Convert a>b into b<a or a>=b-1. */
9420 case GTU:
9421 case LEU:
9422 if (GET_CODE (op1) == CONST_INT)
9424 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9425 /* Bail out on overflow. We still can swap operands but that
9426 would force loading of the constant into register. */
9427 if (op1 == const0_rtx
9428 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9429 return false;
9430 code = (code == GTU ? GEU : LTU);
9432 else
9434 rtx tmp = op1;
9435 op1 = op0;
9436 op0 = tmp;
9437 code = (code == GTU ? LTU : GEU);
9439 break;
9441 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9442 case LT:
9443 case GE:
9444 if (mode == DImode || op1 != const0_rtx)
9445 return false;
9446 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9447 code = (code == LT ? GEU : LTU);
9448 break;
9449 case LE:
9450 case GT:
9451 if (mode == DImode || op1 != constm1_rtx)
9452 return false;
9453 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9454 code = (code == LE ? GEU : LTU);
9455 break;
9457 default:
9458 return false;
9460 ix86_compare_op0 = op0;
9461 ix86_compare_op1 = op1;
9462 *pop = ix86_expand_compare (code, NULL, NULL);
9463 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9464 abort ();
9465 return true;
9469 ix86_expand_int_movcc (operands)
9470 rtx operands[];
9472 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9473 rtx compare_seq, compare_op;
9474 rtx second_test, bypass_test;
9475 enum machine_mode mode = GET_MODE (operands[0]);
9476 bool sign_bit_compare_p = false;;
9478 start_sequence ();
9479 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9480 compare_seq = get_insns ();
9481 end_sequence ();
9483 compare_code = GET_CODE (compare_op);
9485 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9486 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9487 sign_bit_compare_p = true;
9489 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9490 HImode insns, we'd be swallowed in word prefix ops. */
9492 if ((mode != HImode || TARGET_FAST_PREFIX)
9493 && (mode != DImode || TARGET_64BIT)
9494 && GET_CODE (operands[2]) == CONST_INT
9495 && GET_CODE (operands[3]) == CONST_INT)
9497 rtx out = operands[0];
9498 HOST_WIDE_INT ct = INTVAL (operands[2]);
9499 HOST_WIDE_INT cf = INTVAL (operands[3]);
9500 HOST_WIDE_INT diff;
9502 diff = ct - cf;
9503 /* Sign bit compares are better done using shifts than we do by using
9504 sbb. */
9505 if (sign_bit_compare_p
9506 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9507 ix86_compare_op1, &compare_op))
9509 /* Detect overlap between destination and compare sources. */
9510 rtx tmp = out;
9512 if (!sign_bit_compare_p)
9514 bool fpcmp = false;
9516 compare_code = GET_CODE (compare_op);
9518 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9519 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9521 fpcmp = true;
9522 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9525 /* To simplify rest of code, restrict to the GEU case. */
9526 if (compare_code == LTU)
9528 HOST_WIDE_INT tmp = ct;
9529 ct = cf;
9530 cf = tmp;
9531 compare_code = reverse_condition (compare_code);
9532 code = reverse_condition (code);
9534 else
9536 if (fpcmp)
9537 PUT_CODE (compare_op,
9538 reverse_condition_maybe_unordered
9539 (GET_CODE (compare_op)));
9540 else
9541 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9543 diff = ct - cf;
9545 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9546 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9547 tmp = gen_reg_rtx (mode);
9549 if (mode == DImode)
9550 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9551 else
9552 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9554 else
9556 if (code == GT || code == GE)
9557 code = reverse_condition (code);
9558 else
9560 HOST_WIDE_INT tmp = ct;
9561 ct = cf;
9562 cf = tmp;
9563 diff = ct - cf;
9565 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9566 ix86_compare_op1, VOIDmode, 0, -1);
9569 if (diff == 1)
9572 * cmpl op0,op1
9573 * sbbl dest,dest
9574 * [addl dest, ct]
9576 * Size 5 - 8.
9578 if (ct)
9579 tmp = expand_simple_binop (mode, PLUS,
9580 tmp, GEN_INT (ct),
9581 copy_rtx (tmp), 1, OPTAB_DIRECT);
9583 else if (cf == -1)
9586 * cmpl op0,op1
9587 * sbbl dest,dest
9588 * orl $ct, dest
9590 * Size 8.
9592 tmp = expand_simple_binop (mode, IOR,
9593 tmp, GEN_INT (ct),
9594 copy_rtx (tmp), 1, OPTAB_DIRECT);
9596 else if (diff == -1 && ct)
9599 * cmpl op0,op1
9600 * sbbl dest,dest
9601 * notl dest
9602 * [addl dest, cf]
9604 * Size 8 - 11.
9606 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9607 if (cf)
9608 tmp = expand_simple_binop (mode, PLUS,
9609 copy_rtx (tmp), GEN_INT (cf),
9610 copy_rtx (tmp), 1, OPTAB_DIRECT);
9612 else
9615 * cmpl op0,op1
9616 * sbbl dest,dest
9617 * [notl dest]
9618 * andl cf - ct, dest
9619 * [addl dest, ct]
9621 * Size 8 - 11.
9624 if (cf == 0)
9626 cf = ct;
9627 ct = 0;
9628 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9631 tmp = expand_simple_binop (mode, AND,
9632 copy_rtx (tmp),
9633 gen_int_mode (cf - ct, mode),
9634 copy_rtx (tmp), 1, OPTAB_DIRECT);
9635 if (ct)
9636 tmp = expand_simple_binop (mode, PLUS,
9637 copy_rtx (tmp), GEN_INT (ct),
9638 copy_rtx (tmp), 1, OPTAB_DIRECT);
9641 if (!rtx_equal_p (tmp, out))
9642 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9644 return 1; /* DONE */
9647 if (diff < 0)
9649 HOST_WIDE_INT tmp;
9650 tmp = ct, ct = cf, cf = tmp;
9651 diff = -diff;
9652 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9654 /* We may be reversing unordered compare to normal compare, that
9655 is not valid in general (we may convert non-trapping condition
9656 to trapping one), however on i386 we currently emit all
9657 comparisons unordered. */
9658 compare_code = reverse_condition_maybe_unordered (compare_code);
9659 code = reverse_condition_maybe_unordered (code);
9661 else
9663 compare_code = reverse_condition (compare_code);
9664 code = reverse_condition (code);
9668 compare_code = NIL;
9669 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9670 && GET_CODE (ix86_compare_op1) == CONST_INT)
9672 if (ix86_compare_op1 == const0_rtx
9673 && (code == LT || code == GE))
9674 compare_code = code;
9675 else if (ix86_compare_op1 == constm1_rtx)
9677 if (code == LE)
9678 compare_code = LT;
9679 else if (code == GT)
9680 compare_code = GE;
9684 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9685 if (compare_code != NIL
9686 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9687 && (cf == -1 || ct == -1))
9689 /* If lea code below could be used, only optimize
9690 if it results in a 2 insn sequence. */
9692 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9693 || diff == 3 || diff == 5 || diff == 9)
9694 || (compare_code == LT && ct == -1)
9695 || (compare_code == GE && cf == -1))
9698 * notl op1 (if necessary)
9699 * sarl $31, op1
9700 * orl cf, op1
9702 if (ct != -1)
9704 cf = ct;
9705 ct = -1;
9706 code = reverse_condition (code);
9709 out = emit_store_flag (out, code, ix86_compare_op0,
9710 ix86_compare_op1, VOIDmode, 0, -1);
9712 out = expand_simple_binop (mode, IOR,
9713 out, GEN_INT (cf),
9714 out, 1, OPTAB_DIRECT);
9715 if (out != operands[0])
9716 emit_move_insn (operands[0], out);
9718 return 1; /* DONE */
9723 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9724 || diff == 3 || diff == 5 || diff == 9)
9725 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9726 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9729 * xorl dest,dest
9730 * cmpl op1,op2
9731 * setcc dest
9732 * lea cf(dest*(ct-cf)),dest
9734 * Size 14.
9736 * This also catches the degenerate setcc-only case.
9739 rtx tmp;
9740 int nops;
9742 out = emit_store_flag (out, code, ix86_compare_op0,
9743 ix86_compare_op1, VOIDmode, 0, 1);
9745 nops = 0;
9746 /* On x86_64 the lea instruction operates on Pmode, so we need
9747 to get arithmetics done in proper mode to match. */
9748 if (diff == 1)
9749 tmp = copy_rtx (out);
9750 else
9752 rtx out1;
9753 out1 = copy_rtx (out);
9754 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9755 nops++;
9756 if (diff & 1)
9758 tmp = gen_rtx_PLUS (mode, tmp, out1);
9759 nops++;
9762 if (cf != 0)
9764 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9765 nops++;
9767 if (!rtx_equal_p (tmp, out))
9769 if (nops == 1)
9770 out = force_operand (tmp, copy_rtx (out));
9771 else
9772 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9774 if (!rtx_equal_p (out, operands[0]))
9775 emit_move_insn (operands[0], copy_rtx (out));
9777 return 1; /* DONE */
9781 * General case: Jumpful:
9782 * xorl dest,dest cmpl op1, op2
9783 * cmpl op1, op2 movl ct, dest
9784 * setcc dest jcc 1f
9785 * decl dest movl cf, dest
9786 * andl (cf-ct),dest 1:
9787 * addl ct,dest
9789 * Size 20. Size 14.
9791 * This is reasonably steep, but branch mispredict costs are
9792 * high on modern cpus, so consider failing only if optimizing
9793 * for space.
9796 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9797 && BRANCH_COST >= 2)
9799 if (cf == 0)
9801 cf = ct;
9802 ct = 0;
9803 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9804 /* We may be reversing unordered compare to normal compare,
9805 that is not valid in general (we may convert non-trapping
9806 condition to trapping one), however on i386 we currently
9807 emit all comparisons unordered. */
9808 code = reverse_condition_maybe_unordered (code);
9809 else
9811 code = reverse_condition (code);
9812 if (compare_code != NIL)
9813 compare_code = reverse_condition (compare_code);
9817 if (compare_code != NIL)
9819 /* notl op1 (if needed)
9820 sarl $31, op1
9821 andl (cf-ct), op1
9822 addl ct, op1
9824 For x < 0 (resp. x <= -1) there will be no notl,
9825 so if possible swap the constants to get rid of the
9826 complement.
9827 True/false will be -1/0 while code below (store flag
9828 followed by decrement) is 0/-1, so the constants need
9829 to be exchanged once more. */
9831 if (compare_code == GE || !cf)
9833 code = reverse_condition (code);
9834 compare_code = LT;
9836 else
9838 HOST_WIDE_INT tmp = cf;
9839 cf = ct;
9840 ct = tmp;
9843 out = emit_store_flag (out, code, ix86_compare_op0,
9844 ix86_compare_op1, VOIDmode, 0, -1);
9846 else
9848 out = emit_store_flag (out, code, ix86_compare_op0,
9849 ix86_compare_op1, VOIDmode, 0, 1);
9851 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9852 copy_rtx (out), 1, OPTAB_DIRECT);
9855 out = expand_simple_binop (mode, AND, copy_rtx (out),
9856 gen_int_mode (cf - ct, mode),
9857 copy_rtx (out), 1, OPTAB_DIRECT);
9858 if (ct)
9859 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9860 copy_rtx (out), 1, OPTAB_DIRECT);
9861 if (!rtx_equal_p (out, operands[0]))
9862 emit_move_insn (operands[0], copy_rtx (out));
9864 return 1; /* DONE */
9868 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9870 /* Try a few things more with specific constants and a variable. */
9872 optab op;
9873 rtx var, orig_out, out, tmp;
9875 if (BRANCH_COST <= 2)
9876 return 0; /* FAIL */
9878 /* If one of the two operands is an interesting constant, load a
9879 constant with the above and mask it in with a logical operation. */
9881 if (GET_CODE (operands[2]) == CONST_INT)
9883 var = operands[3];
9884 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9885 operands[3] = constm1_rtx, op = and_optab;
9886 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9887 operands[3] = const0_rtx, op = ior_optab;
9888 else
9889 return 0; /* FAIL */
9891 else if (GET_CODE (operands[3]) == CONST_INT)
9893 var = operands[2];
9894 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9895 operands[2] = constm1_rtx, op = and_optab;
9896 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9897 operands[2] = const0_rtx, op = ior_optab;
9898 else
9899 return 0; /* FAIL */
9901 else
9902 return 0; /* FAIL */
9904 orig_out = operands[0];
9905 tmp = gen_reg_rtx (mode);
9906 operands[0] = tmp;
9908 /* Recurse to get the constant loaded. */
9909 if (ix86_expand_int_movcc (operands) == 0)
9910 return 0; /* FAIL */
9912 /* Mask in the interesting variable. */
9913 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9914 OPTAB_WIDEN);
9915 if (!rtx_equal_p (out, orig_out))
9916 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9918 return 1; /* DONE */
9922 * For comparison with above,
9924 * movl cf,dest
9925 * movl ct,tmp
9926 * cmpl op1,op2
9927 * cmovcc tmp,dest
9929 * Size 15.
9932 if (! nonimmediate_operand (operands[2], mode))
9933 operands[2] = force_reg (mode, operands[2]);
9934 if (! nonimmediate_operand (operands[3], mode))
9935 operands[3] = force_reg (mode, operands[3]);
9937 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9939 rtx tmp = gen_reg_rtx (mode);
9940 emit_move_insn (tmp, operands[3]);
9941 operands[3] = tmp;
9943 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9945 rtx tmp = gen_reg_rtx (mode);
9946 emit_move_insn (tmp, operands[2]);
9947 operands[2] = tmp;
9950 if (! register_operand (operands[2], VOIDmode)
9951 && (mode == QImode
9952 || ! register_operand (operands[3], VOIDmode)))
9953 operands[2] = force_reg (mode, operands[2]);
9955 if (mode == QImode
9956 && ! register_operand (operands[3], VOIDmode))
9957 operands[3] = force_reg (mode, operands[3]);
9959 emit_insn (compare_seq);
9960 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9961 gen_rtx_IF_THEN_ELSE (mode,
9962 compare_op, operands[2],
9963 operands[3])));
9964 if (bypass_test)
9965 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9966 gen_rtx_IF_THEN_ELSE (mode,
9967 bypass_test,
9968 copy_rtx (operands[3]),
9969 copy_rtx (operands[0]))));
9970 if (second_test)
9971 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9972 gen_rtx_IF_THEN_ELSE (mode,
9973 second_test,
9974 copy_rtx (operands[2]),
9975 copy_rtx (operands[0]))));
9977 return 1; /* DONE */
9981 ix86_expand_fp_movcc (operands)
9982 rtx operands[];
9984 enum rtx_code code;
9985 rtx tmp;
9986 rtx compare_op, second_test, bypass_test;
9988 /* For SF/DFmode conditional moves based on comparisons
9989 in same mode, we may want to use SSE min/max instructions. */
9990 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9991 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9992 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9993 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9994 && (!TARGET_IEEE_FP
9995 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9996 /* We may be called from the post-reload splitter. */
9997 && (!REG_P (operands[0])
9998 || SSE_REG_P (operands[0])
9999 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10001 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10002 code = GET_CODE (operands[1]);
10004 /* See if we have (cross) match between comparison operands and
10005 conditional move operands. */
10006 if (rtx_equal_p (operands[2], op1))
10008 rtx tmp = op0;
10009 op0 = op1;
10010 op1 = tmp;
10011 code = reverse_condition_maybe_unordered (code);
10013 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10015 /* Check for min operation. */
10016 if (code == LT || code == UNLE)
10018 if (code == UNLE)
10020 rtx tmp = op0;
10021 op0 = op1;
10022 op1 = tmp;
10024 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10025 if (memory_operand (op0, VOIDmode))
10026 op0 = force_reg (GET_MODE (operands[0]), op0);
10027 if (GET_MODE (operands[0]) == SFmode)
10028 emit_insn (gen_minsf3 (operands[0], op0, op1));
10029 else
10030 emit_insn (gen_mindf3 (operands[0], op0, op1));
10031 return 1;
10033 /* Check for max operation. */
10034 if (code == GT || code == UNGE)
10036 if (code == UNGE)
10038 rtx tmp = op0;
10039 op0 = op1;
10040 op1 = tmp;
10042 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10043 if (memory_operand (op0, VOIDmode))
10044 op0 = force_reg (GET_MODE (operands[0]), op0);
10045 if (GET_MODE (operands[0]) == SFmode)
10046 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10047 else
10048 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10049 return 1;
10052 /* Manage condition to be sse_comparison_operator. In case we are
10053 in non-ieee mode, try to canonicalize the destination operand
10054 to be first in the comparison - this helps reload to avoid extra
10055 moves. */
10056 if (!sse_comparison_operator (operands[1], VOIDmode)
10057 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10059 rtx tmp = ix86_compare_op0;
10060 ix86_compare_op0 = ix86_compare_op1;
10061 ix86_compare_op1 = tmp;
10062 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10063 VOIDmode, ix86_compare_op0,
10064 ix86_compare_op1);
10066 /* Similarly try to manage result to be first operand of conditional
10067 move. We also don't support the NE comparison on SSE, so try to
10068 avoid it. */
10069 if ((rtx_equal_p (operands[0], operands[3])
10070 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10071 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10073 rtx tmp = operands[2];
10074 operands[2] = operands[3];
10075 operands[3] = tmp;
10076 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10077 (GET_CODE (operands[1])),
10078 VOIDmode, ix86_compare_op0,
10079 ix86_compare_op1);
10081 if (GET_MODE (operands[0]) == SFmode)
10082 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10083 operands[2], operands[3],
10084 ix86_compare_op0, ix86_compare_op1));
10085 else
10086 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10087 operands[2], operands[3],
10088 ix86_compare_op0, ix86_compare_op1));
10089 return 1;
10092 /* The floating point conditional move instructions don't directly
10093 support conditions resulting from a signed integer comparison. */
10095 code = GET_CODE (operands[1]);
10096 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10098 /* The floating point conditional move instructions don't directly
10099 support signed integer comparisons. */
10101 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10103 if (second_test != NULL || bypass_test != NULL)
10104 abort ();
10105 tmp = gen_reg_rtx (QImode);
10106 ix86_expand_setcc (code, tmp);
10107 code = NE;
10108 ix86_compare_op0 = tmp;
10109 ix86_compare_op1 = const0_rtx;
10110 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10112 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10114 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10115 emit_move_insn (tmp, operands[3]);
10116 operands[3] = tmp;
10118 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10120 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10121 emit_move_insn (tmp, operands[2]);
10122 operands[2] = tmp;
10125 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10126 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10127 compare_op,
10128 operands[2],
10129 operands[3])));
10130 if (bypass_test)
10131 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10132 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10133 bypass_test,
10134 operands[3],
10135 operands[0])));
10136 if (second_test)
10137 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10138 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10139 second_test,
10140 operands[2],
10141 operands[0])));
10143 return 1;
10146 /* Expand conditional increment or decrement using adb/sbb instructions.
10147 The default case using setcc followed by the conditional move can be
10148 done by generic code. */
10150 ix86_expand_int_addcc (operands)
10151 rtx operands[];
10153 enum rtx_code code = GET_CODE (operands[1]);
10154 rtx compare_op;
10155 rtx val = const0_rtx;
10156 bool fpcmp = false;
10157 enum machine_mode mode = GET_MODE (operands[0]);
10159 if (operands[3] != const1_rtx
10160 && operands[3] != constm1_rtx)
10161 return 0;
10162 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10163 ix86_compare_op1, &compare_op))
10164 return 0;
10165 code = GET_CODE (compare_op);
10167 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10168 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10170 fpcmp = true;
10171 code = ix86_fp_compare_code_to_integer (code);
10174 if (code != LTU)
10176 val = constm1_rtx;
10177 if (fpcmp)
10178 PUT_CODE (compare_op,
10179 reverse_condition_maybe_unordered
10180 (GET_CODE (compare_op)));
10181 else
10182 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10184 PUT_MODE (compare_op, mode);
10186 /* Construct either adc or sbb insn. */
10187 if ((code == LTU) == (operands[3] == constm1_rtx))
10189 switch (GET_MODE (operands[0]))
10191 case QImode:
10192 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10193 break;
10194 case HImode:
10195 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10196 break;
10197 case SImode:
10198 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10199 break;
10200 case DImode:
10201 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10202 break;
10203 default:
10204 abort ();
10207 else
10209 switch (GET_MODE (operands[0]))
10211 case QImode:
10212 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10213 break;
10214 case HImode:
10215 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10216 break;
10217 case SImode:
10218 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10219 break;
10220 case DImode:
10221 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10222 break;
10223 default:
10224 abort ();
10227 return 1; /* DONE */
10231 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10232 works for floating pointer parameters and nonoffsetable memories.
10233 For pushes, it returns just stack offsets; the values will be saved
10234 in the right order. Maximally three parts are generated. */
10236 static int
10237 ix86_split_to_parts (operand, parts, mode)
10238 rtx operand;
10239 rtx *parts;
10240 enum machine_mode mode;
10242 int size;
10244 if (!TARGET_64BIT)
10245 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10246 else
10247 size = (GET_MODE_SIZE (mode) + 4) / 8;
10249 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10250 abort ();
10251 if (size < 2 || size > 3)
10252 abort ();
10254 /* Optimize constant pool reference to immediates. This is used by fp
10255 moves, that force all constants to memory to allow combining. */
10256 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10258 rtx tmp = maybe_get_pool_constant (operand);
10259 if (tmp)
10260 operand = tmp;
10263 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10265 /* The only non-offsetable memories we handle are pushes. */
10266 if (! push_operand (operand, VOIDmode))
10267 abort ();
10269 operand = copy_rtx (operand);
10270 PUT_MODE (operand, Pmode);
10271 parts[0] = parts[1] = parts[2] = operand;
10273 else if (!TARGET_64BIT)
10275 if (mode == DImode)
10276 split_di (&operand, 1, &parts[0], &parts[1]);
10277 else
10279 if (REG_P (operand))
10281 if (!reload_completed)
10282 abort ();
10283 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10284 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10285 if (size == 3)
10286 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10288 else if (offsettable_memref_p (operand))
10290 operand = adjust_address (operand, SImode, 0);
10291 parts[0] = operand;
10292 parts[1] = adjust_address (operand, SImode, 4);
10293 if (size == 3)
10294 parts[2] = adjust_address (operand, SImode, 8);
10296 else if (GET_CODE (operand) == CONST_DOUBLE)
10298 REAL_VALUE_TYPE r;
10299 long l[4];
10301 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10302 switch (mode)
10304 case XFmode:
10305 case TFmode:
10306 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10307 parts[2] = gen_int_mode (l[2], SImode);
10308 break;
10309 case DFmode:
10310 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10311 break;
10312 default:
10313 abort ();
10315 parts[1] = gen_int_mode (l[1], SImode);
10316 parts[0] = gen_int_mode (l[0], SImode);
10318 else
10319 abort ();
10322 else
10324 if (mode == TImode)
10325 split_ti (&operand, 1, &parts[0], &parts[1]);
10326 if (mode == XFmode || mode == TFmode)
10328 if (REG_P (operand))
10330 if (!reload_completed)
10331 abort ();
10332 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10333 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10335 else if (offsettable_memref_p (operand))
10337 operand = adjust_address (operand, DImode, 0);
10338 parts[0] = operand;
10339 parts[1] = adjust_address (operand, SImode, 8);
10341 else if (GET_CODE (operand) == CONST_DOUBLE)
10343 REAL_VALUE_TYPE r;
10344 long l[3];
10346 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10347 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10348 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10349 if (HOST_BITS_PER_WIDE_INT >= 64)
10350 parts[0]
10351 = gen_int_mode
10352 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10353 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10354 DImode);
10355 else
10356 parts[0] = immed_double_const (l[0], l[1], DImode);
10357 parts[1] = gen_int_mode (l[2], SImode);
10359 else
10360 abort ();
10364 return size;
10367 /* Emit insns to perform a move or push of DI, DF, and XF values.
10368 Return false when normal moves are needed; true when all required
10369 insns have been emitted. Operands 2-4 contain the input values
10370 int the correct order; operands 5-7 contain the output values. */
10372 void
10373 ix86_split_long_move (operands)
10374 rtx operands[];
10376 rtx part[2][3];
10377 int nparts;
10378 int push = 0;
10379 int collisions = 0;
10380 enum machine_mode mode = GET_MODE (operands[0]);
10382 /* The DFmode expanders may ask us to move double.
10383 For 64bit target this is single move. By hiding the fact
10384 here we simplify i386.md splitters. */
10385 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10387 /* Optimize constant pool reference to immediates. This is used by
10388 fp moves, that force all constants to memory to allow combining. */
10390 if (GET_CODE (operands[1]) == MEM
10391 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10392 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10393 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10394 if (push_operand (operands[0], VOIDmode))
10396 operands[0] = copy_rtx (operands[0]);
10397 PUT_MODE (operands[0], Pmode);
10399 else
10400 operands[0] = gen_lowpart (DImode, operands[0]);
10401 operands[1] = gen_lowpart (DImode, operands[1]);
10402 emit_move_insn (operands[0], operands[1]);
10403 return;
10406 /* The only non-offsettable memory we handle is push. */
10407 if (push_operand (operands[0], VOIDmode))
10408 push = 1;
10409 else if (GET_CODE (operands[0]) == MEM
10410 && ! offsettable_memref_p (operands[0]))
10411 abort ();
10413 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10414 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10416 /* When emitting push, take care for source operands on the stack. */
10417 if (push && GET_CODE (operands[1]) == MEM
10418 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10420 if (nparts == 3)
10421 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10422 XEXP (part[1][2], 0));
10423 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10424 XEXP (part[1][1], 0));
10427 /* We need to do copy in the right order in case an address register
10428 of the source overlaps the destination. */
10429 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10431 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10432 collisions++;
10433 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10434 collisions++;
10435 if (nparts == 3
10436 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10437 collisions++;
10439 /* Collision in the middle part can be handled by reordering. */
10440 if (collisions == 1 && nparts == 3
10441 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10443 rtx tmp;
10444 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10445 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10448 /* If there are more collisions, we can't handle it by reordering.
10449 Do an lea to the last part and use only one colliding move. */
10450 else if (collisions > 1)
10452 collisions = 1;
10453 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10454 XEXP (part[1][0], 0)));
10455 part[1][0] = change_address (part[1][0],
10456 TARGET_64BIT ? DImode : SImode,
10457 part[0][nparts - 1]);
10458 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10459 if (nparts == 3)
10460 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10464 if (push)
10466 if (!TARGET_64BIT)
10468 if (nparts == 3)
10470 /* We use only first 12 bytes of TFmode value, but for pushing we
10471 are required to adjust stack as if we were pushing real 16byte
10472 value. */
10473 if (mode == TFmode && !TARGET_64BIT)
10474 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10475 GEN_INT (-4)));
10476 emit_move_insn (part[0][2], part[1][2]);
10479 else
10481 /* In 64bit mode we don't have 32bit push available. In case this is
10482 register, it is OK - we will just use larger counterpart. We also
10483 retype memory - these comes from attempt to avoid REX prefix on
10484 moving of second half of TFmode value. */
10485 if (GET_MODE (part[1][1]) == SImode)
10487 if (GET_CODE (part[1][1]) == MEM)
10488 part[1][1] = adjust_address (part[1][1], DImode, 0);
10489 else if (REG_P (part[1][1]))
10490 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10491 else
10492 abort ();
10493 if (GET_MODE (part[1][0]) == SImode)
10494 part[1][0] = part[1][1];
10497 emit_move_insn (part[0][1], part[1][1]);
10498 emit_move_insn (part[0][0], part[1][0]);
10499 return;
10502 /* Choose correct order to not overwrite the source before it is copied. */
10503 if ((REG_P (part[0][0])
10504 && REG_P (part[1][1])
10505 && (REGNO (part[0][0]) == REGNO (part[1][1])
10506 || (nparts == 3
10507 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10508 || (collisions > 0
10509 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10511 if (nparts == 3)
10513 operands[2] = part[0][2];
10514 operands[3] = part[0][1];
10515 operands[4] = part[0][0];
10516 operands[5] = part[1][2];
10517 operands[6] = part[1][1];
10518 operands[7] = part[1][0];
10520 else
10522 operands[2] = part[0][1];
10523 operands[3] = part[0][0];
10524 operands[5] = part[1][1];
10525 operands[6] = part[1][0];
10528 else
10530 if (nparts == 3)
10532 operands[2] = part[0][0];
10533 operands[3] = part[0][1];
10534 operands[4] = part[0][2];
10535 operands[5] = part[1][0];
10536 operands[6] = part[1][1];
10537 operands[7] = part[1][2];
10539 else
10541 operands[2] = part[0][0];
10542 operands[3] = part[0][1];
10543 operands[5] = part[1][0];
10544 operands[6] = part[1][1];
10547 emit_move_insn (operands[2], operands[5]);
10548 emit_move_insn (operands[3], operands[6]);
10549 if (nparts == 3)
10550 emit_move_insn (operands[4], operands[7]);
10552 return;
10555 void
10556 ix86_split_ashldi (operands, scratch)
10557 rtx *operands, scratch;
10559 rtx low[2], high[2];
10560 int count;
10562 if (GET_CODE (operands[2]) == CONST_INT)
10564 split_di (operands, 2, low, high);
10565 count = INTVAL (operands[2]) & 63;
10567 if (count >= 32)
10569 emit_move_insn (high[0], low[1]);
10570 emit_move_insn (low[0], const0_rtx);
10572 if (count > 32)
10573 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10575 else
10577 if (!rtx_equal_p (operands[0], operands[1]))
10578 emit_move_insn (operands[0], operands[1]);
10579 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10580 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10583 else
10585 if (!rtx_equal_p (operands[0], operands[1]))
10586 emit_move_insn (operands[0], operands[1]);
10588 split_di (operands, 1, low, high);
10590 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10591 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10593 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10595 if (! no_new_pseudos)
10596 scratch = force_reg (SImode, const0_rtx);
10597 else
10598 emit_move_insn (scratch, const0_rtx);
10600 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10601 scratch));
10603 else
10604 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10608 void
10609 ix86_split_ashrdi (operands, scratch)
10610 rtx *operands, scratch;
10612 rtx low[2], high[2];
10613 int count;
10615 if (GET_CODE (operands[2]) == CONST_INT)
10617 split_di (operands, 2, low, high);
10618 count = INTVAL (operands[2]) & 63;
10620 if (count >= 32)
10622 emit_move_insn (low[0], high[1]);
10624 if (! reload_completed)
10625 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10626 else
10628 emit_move_insn (high[0], low[0]);
10629 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10632 if (count > 32)
10633 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10635 else
10637 if (!rtx_equal_p (operands[0], operands[1]))
10638 emit_move_insn (operands[0], operands[1]);
10639 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10640 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10643 else
10645 if (!rtx_equal_p (operands[0], operands[1]))
10646 emit_move_insn (operands[0], operands[1]);
10648 split_di (operands, 1, low, high);
10650 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10651 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10653 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10655 if (! no_new_pseudos)
10656 scratch = gen_reg_rtx (SImode);
10657 emit_move_insn (scratch, high[0]);
10658 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10659 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10660 scratch));
10662 else
10663 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10667 void
10668 ix86_split_lshrdi (operands, scratch)
10669 rtx *operands, scratch;
10671 rtx low[2], high[2];
10672 int count;
10674 if (GET_CODE (operands[2]) == CONST_INT)
10676 split_di (operands, 2, low, high);
10677 count = INTVAL (operands[2]) & 63;
10679 if (count >= 32)
10681 emit_move_insn (low[0], high[1]);
10682 emit_move_insn (high[0], const0_rtx);
10684 if (count > 32)
10685 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10687 else
10689 if (!rtx_equal_p (operands[0], operands[1]))
10690 emit_move_insn (operands[0], operands[1]);
10691 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10692 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10695 else
10697 if (!rtx_equal_p (operands[0], operands[1]))
10698 emit_move_insn (operands[0], operands[1]);
10700 split_di (operands, 1, low, high);
10702 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10703 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10705 /* Heh. By reversing the arguments, we can reuse this pattern. */
10706 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10708 if (! no_new_pseudos)
10709 scratch = force_reg (SImode, const0_rtx);
10710 else
10711 emit_move_insn (scratch, const0_rtx);
10713 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10714 scratch));
10716 else
10717 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10721 /* Helper function for the string operations below. Dest VARIABLE whether
10722 it is aligned to VALUE bytes. If true, jump to the label. */
10723 static rtx
10724 ix86_expand_aligntest (variable, value)
10725 rtx variable;
10726 int value;
10728 rtx label = gen_label_rtx ();
10729 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10730 if (GET_MODE (variable) == DImode)
10731 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10732 else
10733 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10734 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10735 1, label);
10736 return label;
10739 /* Adjust COUNTER by the VALUE. */
10740 static void
10741 ix86_adjust_counter (countreg, value)
10742 rtx countreg;
10743 HOST_WIDE_INT value;
10745 if (GET_MODE (countreg) == DImode)
10746 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10747 else
10748 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10751 /* Zero extend possibly SImode EXP to Pmode register. */
10753 ix86_zero_extend_to_Pmode (exp)
10754 rtx exp;
10756 rtx r;
10757 if (GET_MODE (exp) == VOIDmode)
10758 return force_reg (Pmode, exp);
10759 if (GET_MODE (exp) == Pmode)
10760 return copy_to_mode_reg (Pmode, exp);
10761 r = gen_reg_rtx (Pmode);
10762 emit_insn (gen_zero_extendsidi2 (r, exp));
10763 return r;
10766 /* Expand string move (memcpy) operation. Use i386 string operations when
10767 profitable. expand_clrstr contains similar code. */
10769 ix86_expand_movstr (dst, src, count_exp, align_exp)
10770 rtx dst, src, count_exp, align_exp;
10772 rtx srcreg, destreg, countreg;
10773 enum machine_mode counter_mode;
10774 HOST_WIDE_INT align = 0;
10775 unsigned HOST_WIDE_INT count = 0;
10776 rtx insns;
10779 if (GET_CODE (align_exp) == CONST_INT)
10780 align = INTVAL (align_exp);
10782 /* This simple hack avoids all inlining code and simplifies code below. */
10783 if (!TARGET_ALIGN_STRINGOPS)
10784 align = 64;
10786 if (GET_CODE (count_exp) == CONST_INT)
10788 count = INTVAL (count_exp);
10789 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10790 return 0;
10793 /* Figure out proper mode for counter. For 32bits it is always SImode,
10794 for 64bits use SImode when possible, otherwise DImode.
10795 Set count to number of bytes copied when known at compile time. */
10796 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10797 || x86_64_zero_extended_value (count_exp))
10798 counter_mode = SImode;
10799 else
10800 counter_mode = DImode;
10802 start_sequence ();
10804 if (counter_mode != SImode && counter_mode != DImode)
10805 abort ();
10807 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10808 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10810 emit_insn (gen_cld ());
10812 /* When optimizing for size emit simple rep ; movsb instruction for
10813 counts not divisible by 4. */
10815 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10817 countreg = ix86_zero_extend_to_Pmode (count_exp);
10818 if (TARGET_64BIT)
10819 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10820 destreg, srcreg, countreg));
10821 else
10822 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10823 destreg, srcreg, countreg));
10826 /* For constant aligned (or small unaligned) copies use rep movsl
10827 followed by code copying the rest. For PentiumPro ensure 8 byte
10828 alignment to allow rep movsl acceleration. */
10830 else if (count != 0
10831 && (align >= 8
10832 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10833 || optimize_size || count < (unsigned int) 64))
10835 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10836 if (count & ~(size - 1))
10838 countreg = copy_to_mode_reg (counter_mode,
10839 GEN_INT ((count >> (size == 4 ? 2 : 3))
10840 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10841 countreg = ix86_zero_extend_to_Pmode (countreg);
10842 if (size == 4)
10844 if (TARGET_64BIT)
10845 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10846 destreg, srcreg, countreg));
10847 else
10848 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10849 destreg, srcreg, countreg));
10851 else
10852 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10853 destreg, srcreg, countreg));
10855 if (size == 8 && (count & 0x04))
10856 emit_insn (gen_strmovsi (destreg, srcreg));
10857 if (count & 0x02)
10858 emit_insn (gen_strmovhi (destreg, srcreg));
10859 if (count & 0x01)
10860 emit_insn (gen_strmovqi (destreg, srcreg));
10862 /* The generic code based on the glibc implementation:
10863 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10864 allowing accelerated copying there)
10865 - copy the data using rep movsl
10866 - copy the rest. */
10867 else
10869 rtx countreg2;
10870 rtx label = NULL;
10871 int desired_alignment = (TARGET_PENTIUMPRO
10872 && (count == 0 || count >= (unsigned int) 260)
10873 ? 8 : UNITS_PER_WORD);
10875 /* In case we don't know anything about the alignment, default to
10876 library version, since it is usually equally fast and result in
10877 shorter code.
10879 Also emit call when we know that the count is large and call overhead
10880 will not be important. */
10881 if (!TARGET_INLINE_ALL_STRINGOPS
10882 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10884 end_sequence ();
10885 return 0;
10888 if (TARGET_SINGLE_STRINGOP)
10889 emit_insn (gen_cld ());
10891 countreg2 = gen_reg_rtx (Pmode);
10892 countreg = copy_to_mode_reg (counter_mode, count_exp);
10894 /* We don't use loops to align destination and to copy parts smaller
10895 than 4 bytes, because gcc is able to optimize such code better (in
10896 the case the destination or the count really is aligned, gcc is often
10897 able to predict the branches) and also it is friendlier to the
10898 hardware branch prediction.
10900 Using loops is beneficial for generic case, because we can
10901 handle small counts using the loops. Many CPUs (such as Athlon)
10902 have large REP prefix setup costs.
10904 This is quite costly. Maybe we can revisit this decision later or
10905 add some customizability to this code. */
10907 if (count == 0 && align < desired_alignment)
10909 label = gen_label_rtx ();
10910 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10911 LEU, 0, counter_mode, 1, label);
10913 if (align <= 1)
10915 rtx label = ix86_expand_aligntest (destreg, 1);
10916 emit_insn (gen_strmovqi (destreg, srcreg));
10917 ix86_adjust_counter (countreg, 1);
10918 emit_label (label);
10919 LABEL_NUSES (label) = 1;
10921 if (align <= 2)
10923 rtx label = ix86_expand_aligntest (destreg, 2);
10924 emit_insn (gen_strmovhi (destreg, srcreg));
10925 ix86_adjust_counter (countreg, 2);
10926 emit_label (label);
10927 LABEL_NUSES (label) = 1;
10929 if (align <= 4 && desired_alignment > 4)
10931 rtx label = ix86_expand_aligntest (destreg, 4);
10932 emit_insn (gen_strmovsi (destreg, srcreg));
10933 ix86_adjust_counter (countreg, 4);
10934 emit_label (label);
10935 LABEL_NUSES (label) = 1;
10938 if (label && desired_alignment > 4 && !TARGET_64BIT)
10940 emit_label (label);
10941 LABEL_NUSES (label) = 1;
10942 label = NULL_RTX;
10944 if (!TARGET_SINGLE_STRINGOP)
10945 emit_insn (gen_cld ());
10946 if (TARGET_64BIT)
10948 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10949 GEN_INT (3)));
10950 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10951 destreg, srcreg, countreg2));
10953 else
10955 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10956 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10957 destreg, srcreg, countreg2));
10960 if (label)
10962 emit_label (label);
10963 LABEL_NUSES (label) = 1;
10965 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10966 emit_insn (gen_strmovsi (destreg, srcreg));
10967 if ((align <= 4 || count == 0) && TARGET_64BIT)
10969 rtx label = ix86_expand_aligntest (countreg, 4);
10970 emit_insn (gen_strmovsi (destreg, srcreg));
10971 emit_label (label);
10972 LABEL_NUSES (label) = 1;
10974 if (align > 2 && count != 0 && (count & 2))
10975 emit_insn (gen_strmovhi (destreg, srcreg));
10976 if (align <= 2 || count == 0)
10978 rtx label = ix86_expand_aligntest (countreg, 2);
10979 emit_insn (gen_strmovhi (destreg, srcreg));
10980 emit_label (label);
10981 LABEL_NUSES (label) = 1;
10983 if (align > 1 && count != 0 && (count & 1))
10984 emit_insn (gen_strmovqi (destreg, srcreg));
10985 if (align <= 1 || count == 0)
10987 rtx label = ix86_expand_aligntest (countreg, 1);
10988 emit_insn (gen_strmovqi (destreg, srcreg));
10989 emit_label (label);
10990 LABEL_NUSES (label) = 1;
10994 insns = get_insns ();
10995 end_sequence ();
10997 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10998 emit_insn (insns);
10999 return 1;
11002 /* Expand string clear operation (bzero). Use i386 string operations when
11003 profitable. expand_movstr contains similar code. */
11005 ix86_expand_clrstr (src, count_exp, align_exp)
11006 rtx src, count_exp, align_exp;
11008 rtx destreg, zeroreg, countreg;
11009 enum machine_mode counter_mode;
11010 HOST_WIDE_INT align = 0;
11011 unsigned HOST_WIDE_INT count = 0;
11013 if (GET_CODE (align_exp) == CONST_INT)
11014 align = INTVAL (align_exp);
11016 /* This simple hack avoids all inlining code and simplifies code below. */
11017 if (!TARGET_ALIGN_STRINGOPS)
11018 align = 32;
11020 if (GET_CODE (count_exp) == CONST_INT)
11022 count = INTVAL (count_exp);
11023 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11024 return 0;
11026 /* Figure out proper mode for counter. For 32bits it is always SImode,
11027 for 64bits use SImode when possible, otherwise DImode.
11028 Set count to number of bytes copied when known at compile time. */
11029 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11030 || x86_64_zero_extended_value (count_exp))
11031 counter_mode = SImode;
11032 else
11033 counter_mode = DImode;
11035 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11037 emit_insn (gen_cld ());
11039 /* When optimizing for size emit simple rep ; movsb instruction for
11040 counts not divisible by 4. */
11042 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11044 countreg = ix86_zero_extend_to_Pmode (count_exp);
11045 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11046 if (TARGET_64BIT)
11047 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11048 destreg, countreg));
11049 else
11050 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11051 destreg, countreg));
11053 else if (count != 0
11054 && (align >= 8
11055 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11056 || optimize_size || count < (unsigned int) 64))
11058 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11059 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11060 if (count & ~(size - 1))
11062 countreg = copy_to_mode_reg (counter_mode,
11063 GEN_INT ((count >> (size == 4 ? 2 : 3))
11064 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11065 countreg = ix86_zero_extend_to_Pmode (countreg);
11066 if (size == 4)
11068 if (TARGET_64BIT)
11069 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11070 destreg, countreg));
11071 else
11072 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11073 destreg, countreg));
11075 else
11076 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11077 destreg, countreg));
11079 if (size == 8 && (count & 0x04))
11080 emit_insn (gen_strsetsi (destreg,
11081 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11082 if (count & 0x02)
11083 emit_insn (gen_strsethi (destreg,
11084 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11085 if (count & 0x01)
11086 emit_insn (gen_strsetqi (destreg,
11087 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11089 else
11091 rtx countreg2;
11092 rtx label = NULL;
11093 /* Compute desired alignment of the string operation. */
11094 int desired_alignment = (TARGET_PENTIUMPRO
11095 && (count == 0 || count >= (unsigned int) 260)
11096 ? 8 : UNITS_PER_WORD);
11098 /* In case we don't know anything about the alignment, default to
11099 library version, since it is usually equally fast and result in
11100 shorter code.
11102 Also emit call when we know that the count is large and call overhead
11103 will not be important. */
11104 if (!TARGET_INLINE_ALL_STRINGOPS
11105 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11106 return 0;
11108 if (TARGET_SINGLE_STRINGOP)
11109 emit_insn (gen_cld ());
11111 countreg2 = gen_reg_rtx (Pmode);
11112 countreg = copy_to_mode_reg (counter_mode, count_exp);
11113 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11115 if (count == 0 && align < desired_alignment)
11117 label = gen_label_rtx ();
11118 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11119 LEU, 0, counter_mode, 1, label);
11121 if (align <= 1)
11123 rtx label = ix86_expand_aligntest (destreg, 1);
11124 emit_insn (gen_strsetqi (destreg,
11125 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11126 ix86_adjust_counter (countreg, 1);
11127 emit_label (label);
11128 LABEL_NUSES (label) = 1;
11130 if (align <= 2)
11132 rtx label = ix86_expand_aligntest (destreg, 2);
11133 emit_insn (gen_strsethi (destreg,
11134 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11135 ix86_adjust_counter (countreg, 2);
11136 emit_label (label);
11137 LABEL_NUSES (label) = 1;
11139 if (align <= 4 && desired_alignment > 4)
11141 rtx label = ix86_expand_aligntest (destreg, 4);
11142 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11143 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11144 : zeroreg)));
11145 ix86_adjust_counter (countreg, 4);
11146 emit_label (label);
11147 LABEL_NUSES (label) = 1;
11150 if (label && desired_alignment > 4 && !TARGET_64BIT)
11152 emit_label (label);
11153 LABEL_NUSES (label) = 1;
11154 label = NULL_RTX;
11157 if (!TARGET_SINGLE_STRINGOP)
11158 emit_insn (gen_cld ());
11159 if (TARGET_64BIT)
11161 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11162 GEN_INT (3)));
11163 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11164 destreg, countreg2));
11166 else
11168 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11169 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11170 destreg, countreg2));
11172 if (label)
11174 emit_label (label);
11175 LABEL_NUSES (label) = 1;
11178 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11179 emit_insn (gen_strsetsi (destreg,
11180 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11181 if (TARGET_64BIT && (align <= 4 || count == 0))
11183 rtx label = ix86_expand_aligntest (countreg, 4);
11184 emit_insn (gen_strsetsi (destreg,
11185 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11186 emit_label (label);
11187 LABEL_NUSES (label) = 1;
11189 if (align > 2 && count != 0 && (count & 2))
11190 emit_insn (gen_strsethi (destreg,
11191 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11192 if (align <= 2 || count == 0)
11194 rtx label = ix86_expand_aligntest (countreg, 2);
11195 emit_insn (gen_strsethi (destreg,
11196 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11197 emit_label (label);
11198 LABEL_NUSES (label) = 1;
11200 if (align > 1 && count != 0 && (count & 1))
11201 emit_insn (gen_strsetqi (destreg,
11202 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11203 if (align <= 1 || count == 0)
11205 rtx label = ix86_expand_aligntest (countreg, 1);
11206 emit_insn (gen_strsetqi (destreg,
11207 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11208 emit_label (label);
11209 LABEL_NUSES (label) = 1;
11212 return 1;
11214 /* Expand strlen. */
11216 ix86_expand_strlen (out, src, eoschar, align)
11217 rtx out, src, eoschar, align;
11219 rtx addr, scratch1, scratch2, scratch3, scratch4;
11221 /* The generic case of strlen expander is long. Avoid it's
11222 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11224 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11225 && !TARGET_INLINE_ALL_STRINGOPS
11226 && !optimize_size
11227 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11228 return 0;
11230 addr = force_reg (Pmode, XEXP (src, 0));
11231 scratch1 = gen_reg_rtx (Pmode);
11233 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11234 && !optimize_size)
11236 /* Well it seems that some optimizer does not combine a call like
11237 foo(strlen(bar), strlen(bar));
11238 when the move and the subtraction is done here. It does calculate
11239 the length just once when these instructions are done inside of
11240 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11241 often used and I use one fewer register for the lifetime of
11242 output_strlen_unroll() this is better. */
11244 emit_move_insn (out, addr);
11246 ix86_expand_strlensi_unroll_1 (out, align);
11248 /* strlensi_unroll_1 returns the address of the zero at the end of
11249 the string, like memchr(), so compute the length by subtracting
11250 the start address. */
11251 if (TARGET_64BIT)
11252 emit_insn (gen_subdi3 (out, out, addr));
11253 else
11254 emit_insn (gen_subsi3 (out, out, addr));
11256 else
11258 scratch2 = gen_reg_rtx (Pmode);
11259 scratch3 = gen_reg_rtx (Pmode);
11260 scratch4 = force_reg (Pmode, constm1_rtx);
11262 emit_move_insn (scratch3, addr);
11263 eoschar = force_reg (QImode, eoschar);
11265 emit_insn (gen_cld ());
11266 if (TARGET_64BIT)
11268 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11269 align, scratch4, scratch3));
11270 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11271 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11273 else
11275 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11276 align, scratch4, scratch3));
11277 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11278 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11281 return 1;
11284 /* Expand the appropriate insns for doing strlen if not just doing
11285 repnz; scasb
11287 out = result, initialized with the start address
11288 align_rtx = alignment of the address.
11289 scratch = scratch register, initialized with the startaddress when
11290 not aligned, otherwise undefined
11292 This is just the body. It needs the initialisations mentioned above and
11293 some address computing at the end. These things are done in i386.md. */
11295 static void
11296 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11297 rtx out, align_rtx;
11299 int align;
11300 rtx tmp;
11301 rtx align_2_label = NULL_RTX;
11302 rtx align_3_label = NULL_RTX;
11303 rtx align_4_label = gen_label_rtx ();
11304 rtx end_0_label = gen_label_rtx ();
11305 rtx mem;
11306 rtx tmpreg = gen_reg_rtx (SImode);
11307 rtx scratch = gen_reg_rtx (SImode);
11308 rtx cmp;
11310 align = 0;
11311 if (GET_CODE (align_rtx) == CONST_INT)
11312 align = INTVAL (align_rtx);
11314 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11316 /* Is there a known alignment and is it less than 4? */
11317 if (align < 4)
11319 rtx scratch1 = gen_reg_rtx (Pmode);
11320 emit_move_insn (scratch1, out);
11321 /* Is there a known alignment and is it not 2? */
11322 if (align != 2)
11324 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11325 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11327 /* Leave just the 3 lower bits. */
11328 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11329 NULL_RTX, 0, OPTAB_WIDEN);
11331 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11332 Pmode, 1, align_4_label);
11333 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11334 Pmode, 1, align_2_label);
11335 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11336 Pmode, 1, align_3_label);
11338 else
11340 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11341 check if is aligned to 4 - byte. */
11343 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11344 NULL_RTX, 0, OPTAB_WIDEN);
11346 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11347 Pmode, 1, align_4_label);
11350 mem = gen_rtx_MEM (QImode, out);
11352 /* Now compare the bytes. */
11354 /* Compare the first n unaligned byte on a byte per byte basis. */
11355 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11356 QImode, 1, end_0_label);
11358 /* Increment the address. */
11359 if (TARGET_64BIT)
11360 emit_insn (gen_adddi3 (out, out, const1_rtx));
11361 else
11362 emit_insn (gen_addsi3 (out, out, const1_rtx));
11364 /* Not needed with an alignment of 2 */
11365 if (align != 2)
11367 emit_label (align_2_label);
11369 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11370 end_0_label);
11372 if (TARGET_64BIT)
11373 emit_insn (gen_adddi3 (out, out, const1_rtx));
11374 else
11375 emit_insn (gen_addsi3 (out, out, const1_rtx));
11377 emit_label (align_3_label);
11380 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11381 end_0_label);
11383 if (TARGET_64BIT)
11384 emit_insn (gen_adddi3 (out, out, const1_rtx));
11385 else
11386 emit_insn (gen_addsi3 (out, out, const1_rtx));
11389 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11390 align this loop. It gives only huge programs, but does not help to
11391 speed up. */
11392 emit_label (align_4_label);
11394 mem = gen_rtx_MEM (SImode, out);
11395 emit_move_insn (scratch, mem);
11396 if (TARGET_64BIT)
11397 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11398 else
11399 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11401 /* This formula yields a nonzero result iff one of the bytes is zero.
11402 This saves three branches inside loop and many cycles. */
11404 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11405 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11406 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11407 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11408 gen_int_mode (0x80808080, SImode)));
11409 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11410 align_4_label);
11412 if (TARGET_CMOVE)
11414 rtx reg = gen_reg_rtx (SImode);
11415 rtx reg2 = gen_reg_rtx (Pmode);
11416 emit_move_insn (reg, tmpreg);
11417 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11419 /* If zero is not in the first two bytes, move two bytes forward. */
11420 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11421 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11422 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11423 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11424 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11425 reg,
11426 tmpreg)));
11427 /* Emit lea manually to avoid clobbering of flags. */
11428 emit_insn (gen_rtx_SET (SImode, reg2,
11429 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11431 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11432 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11433 emit_insn (gen_rtx_SET (VOIDmode, out,
11434 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11435 reg2,
11436 out)));
11439 else
11441 rtx end_2_label = gen_label_rtx ();
11442 /* Is zero in the first two bytes? */
11444 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11445 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11446 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11447 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11448 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11449 pc_rtx);
11450 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11451 JUMP_LABEL (tmp) = end_2_label;
11453 /* Not in the first two. Move two bytes forward. */
11454 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11455 if (TARGET_64BIT)
11456 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11457 else
11458 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11460 emit_label (end_2_label);
11464 /* Avoid branch in fixing the byte. */
11465 tmpreg = gen_lowpart (QImode, tmpreg);
11466 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11467 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11468 if (TARGET_64BIT)
11469 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11470 else
11471 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11473 emit_label (end_0_label);
11476 void
11477 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11478 rtx retval, fnaddr, callarg1, callarg2, pop;
11479 int sibcall;
11481 rtx use = NULL, call;
11483 if (pop == const0_rtx)
11484 pop = NULL;
11485 if (TARGET_64BIT && pop)
11486 abort ();
11488 #if TARGET_MACHO
11489 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11490 fnaddr = machopic_indirect_call_target (fnaddr);
11491 #else
11492 /* Static functions and indirect calls don't need the pic register. */
11493 if (! TARGET_64BIT && flag_pic
11494 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11495 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11496 use_reg (&use, pic_offset_table_rtx);
11498 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11500 rtx al = gen_rtx_REG (QImode, 0);
11501 emit_move_insn (al, callarg2);
11502 use_reg (&use, al);
11504 #endif /* TARGET_MACHO */
11506 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11508 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11509 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11511 if (sibcall && TARGET_64BIT
11512 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11514 rtx addr;
11515 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11516 fnaddr = gen_rtx_REG (Pmode, 40);
11517 emit_move_insn (fnaddr, addr);
11518 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11521 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11522 if (retval)
11523 call = gen_rtx_SET (VOIDmode, retval, call);
11524 if (pop)
11526 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11527 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11528 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11531 call = emit_call_insn (call);
11532 if (use)
11533 CALL_INSN_FUNCTION_USAGE (call) = use;
11537 /* Clear stack slot assignments remembered from previous functions.
11538 This is called from INIT_EXPANDERS once before RTL is emitted for each
11539 function. */
11541 static struct machine_function *
11542 ix86_init_machine_status ()
11544 return ggc_alloc_cleared (sizeof (struct machine_function));
11547 /* Return a MEM corresponding to a stack slot with mode MODE.
11548 Allocate a new slot if necessary.
11550 The RTL for a function can have several slots available: N is
11551 which slot to use. */
11554 assign_386_stack_local (mode, n)
11555 enum machine_mode mode;
11556 int n;
11558 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11559 abort ();
11561 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11562 ix86_stack_locals[(int) mode][n]
11563 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11565 return ix86_stack_locals[(int) mode][n];
11568 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11570 static GTY(()) rtx ix86_tls_symbol;
11572 ix86_tls_get_addr ()
11575 if (!ix86_tls_symbol)
11577 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11578 (TARGET_GNU_TLS && !TARGET_64BIT)
11579 ? "___tls_get_addr"
11580 : "__tls_get_addr");
11583 return ix86_tls_symbol;
11586 /* Calculate the length of the memory address in the instruction
11587 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11589 static int
11590 memory_address_length (addr)
11591 rtx addr;
11593 struct ix86_address parts;
11594 rtx base, index, disp;
11595 int len;
11597 if (GET_CODE (addr) == PRE_DEC
11598 || GET_CODE (addr) == POST_INC
11599 || GET_CODE (addr) == PRE_MODIFY
11600 || GET_CODE (addr) == POST_MODIFY)
11601 return 0;
11603 if (! ix86_decompose_address (addr, &parts))
11604 abort ();
11606 base = parts.base;
11607 index = parts.index;
11608 disp = parts.disp;
11609 len = 0;
11611 /* Register Indirect. */
11612 if (base && !index && !disp)
11614 /* Special cases: ebp and esp need the two-byte modrm form. */
11615 if (addr == stack_pointer_rtx
11616 || addr == arg_pointer_rtx
11617 || addr == frame_pointer_rtx
11618 || addr == hard_frame_pointer_rtx)
11619 len = 1;
11622 /* Direct Addressing. */
11623 else if (disp && !base && !index)
11624 len = 4;
11626 else
11628 /* Find the length of the displacement constant. */
11629 if (disp)
11631 if (GET_CODE (disp) == CONST_INT
11632 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11633 len = 1;
11634 else
11635 len = 4;
11638 /* An index requires the two-byte modrm form. */
11639 if (index)
11640 len += 1;
11643 return len;
11646 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11647 is set, expect that insn have 8bit immediate alternative. */
11649 ix86_attr_length_immediate_default (insn, shortform)
11650 rtx insn;
11651 int shortform;
11653 int len = 0;
11654 int i;
11655 extract_insn_cached (insn);
11656 for (i = recog_data.n_operands - 1; i >= 0; --i)
11657 if (CONSTANT_P (recog_data.operand[i]))
11659 if (len)
11660 abort ();
11661 if (shortform
11662 && GET_CODE (recog_data.operand[i]) == CONST_INT
11663 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11664 len = 1;
11665 else
11667 switch (get_attr_mode (insn))
11669 case MODE_QI:
11670 len+=1;
11671 break;
11672 case MODE_HI:
11673 len+=2;
11674 break;
11675 case MODE_SI:
11676 len+=4;
11677 break;
11678 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11679 case MODE_DI:
11680 len+=4;
11681 break;
11682 default:
11683 fatal_insn ("unknown insn mode", insn);
11687 return len;
11689 /* Compute default value for "length_address" attribute. */
11691 ix86_attr_length_address_default (insn)
11692 rtx insn;
11694 int i;
11695 extract_insn_cached (insn);
11696 for (i = recog_data.n_operands - 1; i >= 0; --i)
11697 if (GET_CODE (recog_data.operand[i]) == MEM)
11699 return memory_address_length (XEXP (recog_data.operand[i], 0));
11700 break;
11702 return 0;
11705 /* Return the maximum number of instructions a cpu can issue. */
11707 static int
11708 ix86_issue_rate ()
11710 switch (ix86_cpu)
11712 case PROCESSOR_PENTIUM:
11713 case PROCESSOR_K6:
11714 return 2;
11716 case PROCESSOR_PENTIUMPRO:
11717 case PROCESSOR_PENTIUM4:
11718 case PROCESSOR_ATHLON:
11719 case PROCESSOR_K8:
11720 return 3;
11722 default:
11723 return 1;
11727 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11728 by DEP_INSN and nothing set by DEP_INSN. */
11730 static int
11731 ix86_flags_dependant (insn, dep_insn, insn_type)
11732 rtx insn, dep_insn;
11733 enum attr_type insn_type;
11735 rtx set, set2;
11737 /* Simplify the test for uninteresting insns. */
11738 if (insn_type != TYPE_SETCC
11739 && insn_type != TYPE_ICMOV
11740 && insn_type != TYPE_FCMOV
11741 && insn_type != TYPE_IBR)
11742 return 0;
11744 if ((set = single_set (dep_insn)) != 0)
11746 set = SET_DEST (set);
11747 set2 = NULL_RTX;
11749 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11750 && XVECLEN (PATTERN (dep_insn), 0) == 2
11751 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11752 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11754 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11755 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11757 else
11758 return 0;
11760 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11761 return 0;
11763 /* This test is true if the dependent insn reads the flags but
11764 not any other potentially set register. */
11765 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11766 return 0;
11768 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11769 return 0;
11771 return 1;
11774 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11775 address with operands set by DEP_INSN. */
11777 static int
11778 ix86_agi_dependant (insn, dep_insn, insn_type)
11779 rtx insn, dep_insn;
11780 enum attr_type insn_type;
11782 rtx addr;
11784 if (insn_type == TYPE_LEA
11785 && TARGET_PENTIUM)
11787 addr = PATTERN (insn);
11788 if (GET_CODE (addr) == SET)
11790 else if (GET_CODE (addr) == PARALLEL
11791 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11792 addr = XVECEXP (addr, 0, 0);
11793 else
11794 abort ();
11795 addr = SET_SRC (addr);
11797 else
11799 int i;
11800 extract_insn_cached (insn);
11801 for (i = recog_data.n_operands - 1; i >= 0; --i)
11802 if (GET_CODE (recog_data.operand[i]) == MEM)
11804 addr = XEXP (recog_data.operand[i], 0);
11805 goto found;
11807 return 0;
11808 found:;
11811 return modified_in_p (addr, dep_insn);
11814 static int
11815 ix86_adjust_cost (insn, link, dep_insn, cost)
11816 rtx insn, link, dep_insn;
11817 int cost;
11819 enum attr_type insn_type, dep_insn_type;
11820 enum attr_memory memory, dep_memory;
11821 rtx set, set2;
11822 int dep_insn_code_number;
11824 /* Anti and output dependencies have zero cost on all CPUs. */
11825 if (REG_NOTE_KIND (link) != 0)
11826 return 0;
11828 dep_insn_code_number = recog_memoized (dep_insn);
11830 /* If we can't recognize the insns, we can't really do anything. */
11831 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11832 return cost;
11834 insn_type = get_attr_type (insn);
11835 dep_insn_type = get_attr_type (dep_insn);
11837 switch (ix86_cpu)
11839 case PROCESSOR_PENTIUM:
11840 /* Address Generation Interlock adds a cycle of latency. */
11841 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11842 cost += 1;
11844 /* ??? Compares pair with jump/setcc. */
11845 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11846 cost = 0;
11848 /* Floating point stores require value to be ready one cycle earlier. */
11849 if (insn_type == TYPE_FMOV
11850 && get_attr_memory (insn) == MEMORY_STORE
11851 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11852 cost += 1;
11853 break;
11855 case PROCESSOR_PENTIUMPRO:
11856 memory = get_attr_memory (insn);
11857 dep_memory = get_attr_memory (dep_insn);
11859 /* Since we can't represent delayed latencies of load+operation,
11860 increase the cost here for non-imov insns. */
11861 if (dep_insn_type != TYPE_IMOV
11862 && dep_insn_type != TYPE_FMOV
11863 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11864 cost += 1;
11866 /* INT->FP conversion is expensive. */
11867 if (get_attr_fp_int_src (dep_insn))
11868 cost += 5;
11870 /* There is one cycle extra latency between an FP op and a store. */
11871 if (insn_type == TYPE_FMOV
11872 && (set = single_set (dep_insn)) != NULL_RTX
11873 && (set2 = single_set (insn)) != NULL_RTX
11874 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11875 && GET_CODE (SET_DEST (set2)) == MEM)
11876 cost += 1;
11878 /* Show ability of reorder buffer to hide latency of load by executing
11879 in parallel with previous instruction in case
11880 previous instruction is not needed to compute the address. */
11881 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11882 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11884 /* Claim moves to take one cycle, as core can issue one load
11885 at time and the next load can start cycle later. */
11886 if (dep_insn_type == TYPE_IMOV
11887 || dep_insn_type == TYPE_FMOV)
11888 cost = 1;
11889 else if (cost > 1)
11890 cost--;
11892 break;
11894 case PROCESSOR_K6:
11895 memory = get_attr_memory (insn);
11896 dep_memory = get_attr_memory (dep_insn);
11897 /* The esp dependency is resolved before the instruction is really
11898 finished. */
11899 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11900 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11901 return 1;
11903 /* Since we can't represent delayed latencies of load+operation,
11904 increase the cost here for non-imov insns. */
11905 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11906 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11908 /* INT->FP conversion is expensive. */
11909 if (get_attr_fp_int_src (dep_insn))
11910 cost += 5;
11912 /* Show ability of reorder buffer to hide latency of load by executing
11913 in parallel with previous instruction in case
11914 previous instruction is not needed to compute the address. */
11915 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11916 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11918 /* Claim moves to take one cycle, as core can issue one load
11919 at time and the next load can start cycle later. */
11920 if (dep_insn_type == TYPE_IMOV
11921 || dep_insn_type == TYPE_FMOV)
11922 cost = 1;
11923 else if (cost > 2)
11924 cost -= 2;
11925 else
11926 cost = 1;
11928 break;
11930 case PROCESSOR_ATHLON:
11931 case PROCESSOR_K8:
11932 memory = get_attr_memory (insn);
11933 dep_memory = get_attr_memory (dep_insn);
11935 /* Show ability of reorder buffer to hide latency of load by executing
11936 in parallel with previous instruction in case
11937 previous instruction is not needed to compute the address. */
11938 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11939 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11941 /* Claim moves to take one cycle, as core can issue one load
11942 at time and the next load can start cycle later. */
11943 if (dep_insn_type == TYPE_IMOV
11944 || dep_insn_type == TYPE_FMOV)
11945 cost = 0;
11946 else if (cost >= 3)
11947 cost -= 3;
11948 else
11949 cost = 0;
11952 default:
11953 break;
11956 return cost;
11959 static union
11961 struct ppro_sched_data
11963 rtx decode[3];
11964 int issued_this_cycle;
11965 } ppro;
11966 } ix86_sched_data;
11968 static enum attr_ppro_uops
11969 ix86_safe_ppro_uops (insn)
11970 rtx insn;
11972 if (recog_memoized (insn) >= 0)
11973 return get_attr_ppro_uops (insn);
11974 else
11975 return PPRO_UOPS_MANY;
11978 static void
11979 ix86_dump_ppro_packet (dump)
11980 FILE *dump;
11982 if (ix86_sched_data.ppro.decode[0])
11984 fprintf (dump, "PPRO packet: %d",
11985 INSN_UID (ix86_sched_data.ppro.decode[0]));
11986 if (ix86_sched_data.ppro.decode[1])
11987 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11988 if (ix86_sched_data.ppro.decode[2])
11989 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11990 fputc ('\n', dump);
11994 /* We're beginning a new block. Initialize data structures as necessary. */
11996 static void
11997 ix86_sched_init (dump, sched_verbose, veclen)
11998 FILE *dump ATTRIBUTE_UNUSED;
11999 int sched_verbose ATTRIBUTE_UNUSED;
12000 int veclen ATTRIBUTE_UNUSED;
12002 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12005 /* Shift INSN to SLOT, and shift everything else down. */
12007 static void
12008 ix86_reorder_insn (insnp, slot)
12009 rtx *insnp, *slot;
12011 if (insnp != slot)
12013 rtx insn = *insnp;
12015 insnp[0] = insnp[1];
12016 while (++insnp != slot);
12017 *insnp = insn;
12021 static void
12022 ix86_sched_reorder_ppro (ready, e_ready)
12023 rtx *ready;
12024 rtx *e_ready;
12026 rtx decode[3];
12027 enum attr_ppro_uops cur_uops;
12028 int issued_this_cycle;
12029 rtx *insnp;
12030 int i;
12032 /* At this point .ppro.decode contains the state of the three
12033 decoders from last "cycle". That is, those insns that were
12034 actually independent. But here we're scheduling for the
12035 decoder, and we may find things that are decodable in the
12036 same cycle. */
12038 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12039 issued_this_cycle = 0;
12041 insnp = e_ready;
12042 cur_uops = ix86_safe_ppro_uops (*insnp);
12044 /* If the decoders are empty, and we've a complex insn at the
12045 head of the priority queue, let it issue without complaint. */
12046 if (decode[0] == NULL)
12048 if (cur_uops == PPRO_UOPS_MANY)
12050 decode[0] = *insnp;
12051 goto ppro_done;
12054 /* Otherwise, search for a 2-4 uop unsn to issue. */
12055 while (cur_uops != PPRO_UOPS_FEW)
12057 if (insnp == ready)
12058 break;
12059 cur_uops = ix86_safe_ppro_uops (*--insnp);
12062 /* If so, move it to the head of the line. */
12063 if (cur_uops == PPRO_UOPS_FEW)
12064 ix86_reorder_insn (insnp, e_ready);
12066 /* Issue the head of the queue. */
12067 issued_this_cycle = 1;
12068 decode[0] = *e_ready--;
12071 /* Look for simple insns to fill in the other two slots. */
12072 for (i = 1; i < 3; ++i)
12073 if (decode[i] == NULL)
12075 if (ready > e_ready)
12076 goto ppro_done;
12078 insnp = e_ready;
12079 cur_uops = ix86_safe_ppro_uops (*insnp);
12080 while (cur_uops != PPRO_UOPS_ONE)
12082 if (insnp == ready)
12083 break;
12084 cur_uops = ix86_safe_ppro_uops (*--insnp);
12087 /* Found one. Move it to the head of the queue and issue it. */
12088 if (cur_uops == PPRO_UOPS_ONE)
12090 ix86_reorder_insn (insnp, e_ready);
12091 decode[i] = *e_ready--;
12092 issued_this_cycle++;
12093 continue;
12096 /* ??? Didn't find one. Ideally, here we would do a lazy split
12097 of 2-uop insns, issue one and queue the other. */
12100 ppro_done:
12101 if (issued_this_cycle == 0)
12102 issued_this_cycle = 1;
12103 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12106 /* We are about to being issuing insns for this clock cycle.
12107 Override the default sort algorithm to better slot instructions. */
12108 static int
12109 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12110 FILE *dump ATTRIBUTE_UNUSED;
12111 int sched_verbose ATTRIBUTE_UNUSED;
12112 rtx *ready;
12113 int *n_readyp;
12114 int clock_var ATTRIBUTE_UNUSED;
12116 int n_ready = *n_readyp;
12117 rtx *e_ready = ready + n_ready - 1;
12119 /* Make sure to go ahead and initialize key items in
12120 ix86_sched_data if we are not going to bother trying to
12121 reorder the ready queue. */
12122 if (n_ready < 2)
12124 ix86_sched_data.ppro.issued_this_cycle = 1;
12125 goto out;
12128 switch (ix86_cpu)
12130 default:
12131 break;
12133 case PROCESSOR_PENTIUMPRO:
12134 ix86_sched_reorder_ppro (ready, e_ready);
12135 break;
12138 out:
12139 return ix86_issue_rate ();
12142 /* We are about to issue INSN. Return the number of insns left on the
12143 ready queue that can be issued this cycle. */
12145 static int
12146 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12147 FILE *dump;
12148 int sched_verbose;
12149 rtx insn;
12150 int can_issue_more;
12152 int i;
12153 switch (ix86_cpu)
12155 default:
12156 return can_issue_more - 1;
12158 case PROCESSOR_PENTIUMPRO:
12160 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12162 if (uops == PPRO_UOPS_MANY)
12164 if (sched_verbose)
12165 ix86_dump_ppro_packet (dump);
12166 ix86_sched_data.ppro.decode[0] = insn;
12167 ix86_sched_data.ppro.decode[1] = NULL;
12168 ix86_sched_data.ppro.decode[2] = NULL;
12169 if (sched_verbose)
12170 ix86_dump_ppro_packet (dump);
12171 ix86_sched_data.ppro.decode[0] = NULL;
12173 else if (uops == PPRO_UOPS_FEW)
12175 if (sched_verbose)
12176 ix86_dump_ppro_packet (dump);
12177 ix86_sched_data.ppro.decode[0] = insn;
12178 ix86_sched_data.ppro.decode[1] = NULL;
12179 ix86_sched_data.ppro.decode[2] = NULL;
12181 else
12183 for (i = 0; i < 3; ++i)
12184 if (ix86_sched_data.ppro.decode[i] == NULL)
12186 ix86_sched_data.ppro.decode[i] = insn;
12187 break;
12189 if (i == 3)
12190 abort ();
12191 if (i == 2)
12193 if (sched_verbose)
12194 ix86_dump_ppro_packet (dump);
12195 ix86_sched_data.ppro.decode[0] = NULL;
12196 ix86_sched_data.ppro.decode[1] = NULL;
12197 ix86_sched_data.ppro.decode[2] = NULL;
12201 return --ix86_sched_data.ppro.issued_this_cycle;
12205 static int
12206 ia32_use_dfa_pipeline_interface ()
12208 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12209 return 1;
12210 return 0;
12213 /* How many alternative schedules to try. This should be as wide as the
12214 scheduling freedom in the DFA, but no wider. Making this value too
12215 large results extra work for the scheduler. */
12217 static int
12218 ia32_multipass_dfa_lookahead ()
12220 if (ix86_cpu == PROCESSOR_PENTIUM)
12221 return 2;
12222 else
12223 return 0;
12227 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12228 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12229 appropriate. */
12231 void
12232 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12233 rtx insns;
12234 rtx dstref, srcref, dstreg, srcreg;
12236 rtx insn;
12238 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12239 if (INSN_P (insn))
12240 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12241 dstreg, srcreg);
12244 /* Subroutine of above to actually do the updating by recursively walking
12245 the rtx. */
12247 static void
12248 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12249 rtx x;
12250 rtx dstref, srcref, dstreg, srcreg;
12252 enum rtx_code code = GET_CODE (x);
12253 const char *format_ptr = GET_RTX_FORMAT (code);
12254 int i, j;
12256 if (code == MEM && XEXP (x, 0) == dstreg)
12257 MEM_COPY_ATTRIBUTES (x, dstref);
12258 else if (code == MEM && XEXP (x, 0) == srcreg)
12259 MEM_COPY_ATTRIBUTES (x, srcref);
12261 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12263 if (*format_ptr == 'e')
12264 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12265 dstreg, srcreg);
12266 else if (*format_ptr == 'E')
12267 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12268 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12269 dstreg, srcreg);
12273 /* Compute the alignment given to a constant that is being placed in memory.
12274 EXP is the constant and ALIGN is the alignment that the object would
12275 ordinarily have.
12276 The value of this function is used instead of that alignment to align
12277 the object. */
12280 ix86_constant_alignment (exp, align)
12281 tree exp;
12282 int align;
12284 if (TREE_CODE (exp) == REAL_CST)
12286 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12287 return 64;
12288 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12289 return 128;
12291 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12292 && align < 256)
12293 return 256;
12295 return align;
12298 /* Compute the alignment for a static variable.
12299 TYPE is the data type, and ALIGN is the alignment that
12300 the object would ordinarily have. The value of this function is used
12301 instead of that alignment to align the object. */
12304 ix86_data_alignment (type, align)
12305 tree type;
12306 int align;
12308 if (AGGREGATE_TYPE_P (type)
12309 && TYPE_SIZE (type)
12310 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12311 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12312 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12313 return 256;
12315 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12316 to 16byte boundary. */
12317 if (TARGET_64BIT)
12319 if (AGGREGATE_TYPE_P (type)
12320 && TYPE_SIZE (type)
12321 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12322 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12323 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12324 return 128;
12327 if (TREE_CODE (type) == ARRAY_TYPE)
12329 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12330 return 64;
12331 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12332 return 128;
12334 else if (TREE_CODE (type) == COMPLEX_TYPE)
12337 if (TYPE_MODE (type) == DCmode && align < 64)
12338 return 64;
12339 if (TYPE_MODE (type) == XCmode && align < 128)
12340 return 128;
12342 else if ((TREE_CODE (type) == RECORD_TYPE
12343 || TREE_CODE (type) == UNION_TYPE
12344 || TREE_CODE (type) == QUAL_UNION_TYPE)
12345 && TYPE_FIELDS (type))
12347 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12348 return 64;
12349 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12350 return 128;
12352 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12353 || TREE_CODE (type) == INTEGER_TYPE)
12355 if (TYPE_MODE (type) == DFmode && align < 64)
12356 return 64;
12357 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12358 return 128;
12361 return align;
12364 /* Compute the alignment for a local variable.
12365 TYPE is the data type, and ALIGN is the alignment that
12366 the object would ordinarily have. The value of this macro is used
12367 instead of that alignment to align the object. */
12370 ix86_local_alignment (type, align)
12371 tree type;
12372 int align;
12374 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12375 to 16byte boundary. */
12376 if (TARGET_64BIT)
12378 if (AGGREGATE_TYPE_P (type)
12379 && TYPE_SIZE (type)
12380 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12381 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12382 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12383 return 128;
12385 if (TREE_CODE (type) == ARRAY_TYPE)
12387 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12388 return 64;
12389 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12390 return 128;
12392 else if (TREE_CODE (type) == COMPLEX_TYPE)
12394 if (TYPE_MODE (type) == DCmode && align < 64)
12395 return 64;
12396 if (TYPE_MODE (type) == XCmode && align < 128)
12397 return 128;
12399 else if ((TREE_CODE (type) == RECORD_TYPE
12400 || TREE_CODE (type) == UNION_TYPE
12401 || TREE_CODE (type) == QUAL_UNION_TYPE)
12402 && TYPE_FIELDS (type))
12404 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12405 return 64;
12406 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12407 return 128;
12409 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12410 || TREE_CODE (type) == INTEGER_TYPE)
12413 if (TYPE_MODE (type) == DFmode && align < 64)
12414 return 64;
12415 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12416 return 128;
12418 return align;
12421 /* Emit RTL insns to initialize the variable parts of a trampoline.
12422 FNADDR is an RTX for the address of the function's pure code.
12423 CXT is an RTX for the static chain value for the function. */
12424 void
12425 x86_initialize_trampoline (tramp, fnaddr, cxt)
12426 rtx tramp, fnaddr, cxt;
12428 if (!TARGET_64BIT)
12430 /* Compute offset from the end of the jmp to the target function. */
12431 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12432 plus_constant (tramp, 10),
12433 NULL_RTX, 1, OPTAB_DIRECT);
12434 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12435 gen_int_mode (0xb9, QImode));
12436 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12437 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12438 gen_int_mode (0xe9, QImode));
12439 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12441 else
12443 int offset = 0;
12444 /* Try to load address using shorter movl instead of movabs.
12445 We may want to support movq for kernel mode, but kernel does not use
12446 trampolines at the moment. */
12447 if (x86_64_zero_extended_value (fnaddr))
12449 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12450 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12451 gen_int_mode (0xbb41, HImode));
12452 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12453 gen_lowpart (SImode, fnaddr));
12454 offset += 6;
12456 else
12458 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12459 gen_int_mode (0xbb49, HImode));
12460 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12461 fnaddr);
12462 offset += 10;
12464 /* Load static chain using movabs to r10. */
12465 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12466 gen_int_mode (0xba49, HImode));
12467 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12468 cxt);
12469 offset += 10;
12470 /* Jump to the r11 */
12471 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12472 gen_int_mode (0xff49, HImode));
12473 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12474 gen_int_mode (0xe3, QImode));
12475 offset += 3;
12476 if (offset > TRAMPOLINE_SIZE)
12477 abort ();
12480 #ifdef TRANSFER_FROM_TRAMPOLINE
12481 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12482 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12483 #endif
12486 #define def_builtin(MASK, NAME, TYPE, CODE) \
12487 do { \
12488 if ((MASK) & target_flags) \
12489 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12490 NULL, NULL_TREE); \
12491 } while (0)
12493 struct builtin_description
12495 const unsigned int mask;
12496 const enum insn_code icode;
12497 const char *const name;
12498 const enum ix86_builtins code;
12499 const enum rtx_code comparison;
12500 const unsigned int flag;
12503 /* Used for builtins that are enabled both by -msse and -msse2. */
12504 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12506 static const struct builtin_description bdesc_comi[] =
12508 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12509 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12510 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12511 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12512 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12513 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12514 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12515 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12516 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12517 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12518 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12519 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12520 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12521 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12522 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12523 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12524 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12525 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12526 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12527 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12528 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12529 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12530 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12531 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12534 static const struct builtin_description bdesc_2arg[] =
12536 /* SSE */
12537 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12538 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12539 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12540 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12541 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12542 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12543 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12544 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12546 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12547 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12548 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12549 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12550 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12551 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12552 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12553 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12554 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12555 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12556 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12557 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12558 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12559 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12560 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12561 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12562 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12563 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12564 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12565 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12567 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12568 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12569 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12570 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12572 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12573 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12574 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12575 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12577 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12578 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12579 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12580 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12581 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12583 /* MMX */
12584 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12585 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12586 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12587 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12588 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12589 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12591 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12592 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12593 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12594 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12595 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12596 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12597 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12598 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12600 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12601 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12602 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12604 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12605 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12606 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12607 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12609 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12610 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12612 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12613 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12614 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12615 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12616 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12617 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12619 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12620 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12621 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12622 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12624 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12625 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12626 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12627 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12628 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12629 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12631 /* Special. */
12632 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12633 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12634 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12636 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12637 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12639 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12640 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12641 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12642 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12643 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12644 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12646 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12647 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12648 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12649 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12650 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12651 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12653 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12654 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12655 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12656 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12658 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12659 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12661 /* SSE2 */
12662 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12663 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12664 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12665 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12666 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12667 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12668 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12669 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12671 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12672 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12673 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12674 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12675 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12676 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12677 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12678 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12679 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12680 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12681 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12682 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12683 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12684 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12685 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12686 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12687 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12688 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12689 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12690 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12692 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12693 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12694 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12695 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12697 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12698 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12699 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12700 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12702 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12703 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12704 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12706 /* SSE2 MMX */
12707 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12708 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12709 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12710 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12711 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12712 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12713 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12714 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12716 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12717 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12718 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12719 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12720 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12721 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12722 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12723 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12725 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12726 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12730 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12731 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12732 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12733 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12735 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12736 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12738 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12739 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12740 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12741 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12742 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12743 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12745 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12746 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12747 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12748 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12750 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12751 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12752 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12753 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12754 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12755 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12756 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12757 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12759 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12760 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12761 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12763 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12764 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12766 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12767 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12768 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12769 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12770 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12771 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12773 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12774 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12775 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12776 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12777 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12778 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12781 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12782 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12783 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12785 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12787 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12788 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12789 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12792 static const struct builtin_description bdesc_1arg[] =
12794 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12795 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12797 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12798 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12799 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12801 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12802 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12803 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12804 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12811 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12813 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12816 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12820 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12822 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12834 void
12835 ix86_init_builtins ()
12837 if (TARGET_MMX)
12838 ix86_init_mmx_sse_builtins ();
12841 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12842 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12843 builtins. */
12844 static void
12845 ix86_init_mmx_sse_builtins ()
12847 const struct builtin_description * d;
12848 size_t i;
12850 tree pchar_type_node = build_pointer_type (char_type_node);
12851 tree pcchar_type_node = build_pointer_type (
12852 build_type_variant (char_type_node, 1, 0));
12853 tree pfloat_type_node = build_pointer_type (float_type_node);
12854 tree pcfloat_type_node = build_pointer_type (
12855 build_type_variant (float_type_node, 1, 0));
12856 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12857 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12858 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12860 /* Comparisons. */
12861 tree int_ftype_v4sf_v4sf
12862 = build_function_type_list (integer_type_node,
12863 V4SF_type_node, V4SF_type_node, NULL_TREE);
12864 tree v4si_ftype_v4sf_v4sf
12865 = build_function_type_list (V4SI_type_node,
12866 V4SF_type_node, V4SF_type_node, NULL_TREE);
12867 /* MMX/SSE/integer conversions. */
12868 tree int_ftype_v4sf
12869 = build_function_type_list (integer_type_node,
12870 V4SF_type_node, NULL_TREE);
12871 tree int_ftype_v8qi
12872 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12873 tree v4sf_ftype_v4sf_int
12874 = build_function_type_list (V4SF_type_node,
12875 V4SF_type_node, integer_type_node, NULL_TREE);
12876 tree v4sf_ftype_v4sf_v2si
12877 = build_function_type_list (V4SF_type_node,
12878 V4SF_type_node, V2SI_type_node, NULL_TREE);
12879 tree int_ftype_v4hi_int
12880 = build_function_type_list (integer_type_node,
12881 V4HI_type_node, integer_type_node, NULL_TREE);
12882 tree v4hi_ftype_v4hi_int_int
12883 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12884 integer_type_node, integer_type_node,
12885 NULL_TREE);
12886 /* Miscellaneous. */
12887 tree v8qi_ftype_v4hi_v4hi
12888 = build_function_type_list (V8QI_type_node,
12889 V4HI_type_node, V4HI_type_node, NULL_TREE);
12890 tree v4hi_ftype_v2si_v2si
12891 = build_function_type_list (V4HI_type_node,
12892 V2SI_type_node, V2SI_type_node, NULL_TREE);
12893 tree v4sf_ftype_v4sf_v4sf_int
12894 = build_function_type_list (V4SF_type_node,
12895 V4SF_type_node, V4SF_type_node,
12896 integer_type_node, NULL_TREE);
12897 tree v2si_ftype_v4hi_v4hi
12898 = build_function_type_list (V2SI_type_node,
12899 V4HI_type_node, V4HI_type_node, NULL_TREE);
12900 tree v4hi_ftype_v4hi_int
12901 = build_function_type_list (V4HI_type_node,
12902 V4HI_type_node, integer_type_node, NULL_TREE);
12903 tree v4hi_ftype_v4hi_di
12904 = build_function_type_list (V4HI_type_node,
12905 V4HI_type_node, long_long_unsigned_type_node,
12906 NULL_TREE);
12907 tree v2si_ftype_v2si_di
12908 = build_function_type_list (V2SI_type_node,
12909 V2SI_type_node, long_long_unsigned_type_node,
12910 NULL_TREE);
12911 tree void_ftype_void
12912 = build_function_type (void_type_node, void_list_node);
12913 tree void_ftype_unsigned
12914 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12915 tree unsigned_ftype_void
12916 = build_function_type (unsigned_type_node, void_list_node);
12917 tree di_ftype_void
12918 = build_function_type (long_long_unsigned_type_node, void_list_node);
12919 tree v4sf_ftype_void
12920 = build_function_type (V4SF_type_node, void_list_node);
12921 tree v2si_ftype_v4sf
12922 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12923 /* Loads/stores. */
12924 tree void_ftype_v8qi_v8qi_pchar
12925 = build_function_type_list (void_type_node,
12926 V8QI_type_node, V8QI_type_node,
12927 pchar_type_node, NULL_TREE);
12928 tree v4sf_ftype_pcfloat
12929 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12930 /* @@@ the type is bogus */
12931 tree v4sf_ftype_v4sf_pv2si
12932 = build_function_type_list (V4SF_type_node,
12933 V4SF_type_node, pv2si_type_node, NULL_TREE);
12934 tree void_ftype_pv2si_v4sf
12935 = build_function_type_list (void_type_node,
12936 pv2si_type_node, V4SF_type_node, NULL_TREE);
12937 tree void_ftype_pfloat_v4sf
12938 = build_function_type_list (void_type_node,
12939 pfloat_type_node, V4SF_type_node, NULL_TREE);
12940 tree void_ftype_pdi_di
12941 = build_function_type_list (void_type_node,
12942 pdi_type_node, long_long_unsigned_type_node,
12943 NULL_TREE);
12944 tree void_ftype_pv2di_v2di
12945 = build_function_type_list (void_type_node,
12946 pv2di_type_node, V2DI_type_node, NULL_TREE);
12947 /* Normal vector unops. */
12948 tree v4sf_ftype_v4sf
12949 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12951 /* Normal vector binops. */
12952 tree v4sf_ftype_v4sf_v4sf
12953 = build_function_type_list (V4SF_type_node,
12954 V4SF_type_node, V4SF_type_node, NULL_TREE);
12955 tree v8qi_ftype_v8qi_v8qi
12956 = build_function_type_list (V8QI_type_node,
12957 V8QI_type_node, V8QI_type_node, NULL_TREE);
12958 tree v4hi_ftype_v4hi_v4hi
12959 = build_function_type_list (V4HI_type_node,
12960 V4HI_type_node, V4HI_type_node, NULL_TREE);
12961 tree v2si_ftype_v2si_v2si
12962 = build_function_type_list (V2SI_type_node,
12963 V2SI_type_node, V2SI_type_node, NULL_TREE);
12964 tree di_ftype_di_di
12965 = build_function_type_list (long_long_unsigned_type_node,
12966 long_long_unsigned_type_node,
12967 long_long_unsigned_type_node, NULL_TREE);
12969 tree v2si_ftype_v2sf
12970 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12971 tree v2sf_ftype_v2si
12972 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12973 tree v2si_ftype_v2si
12974 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12975 tree v2sf_ftype_v2sf
12976 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12977 tree v2sf_ftype_v2sf_v2sf
12978 = build_function_type_list (V2SF_type_node,
12979 V2SF_type_node, V2SF_type_node, NULL_TREE);
12980 tree v2si_ftype_v2sf_v2sf
12981 = build_function_type_list (V2SI_type_node,
12982 V2SF_type_node, V2SF_type_node, NULL_TREE);
12983 tree pint_type_node = build_pointer_type (integer_type_node);
12984 tree pcint_type_node = build_pointer_type (
12985 build_type_variant (integer_type_node, 1, 0));
12986 tree pdouble_type_node = build_pointer_type (double_type_node);
12987 tree pcdouble_type_node = build_pointer_type (
12988 build_type_variant (double_type_node, 1, 0));
12989 tree int_ftype_v2df_v2df
12990 = build_function_type_list (integer_type_node,
12991 V2DF_type_node, V2DF_type_node, NULL_TREE);
12993 tree ti_ftype_void
12994 = build_function_type (intTI_type_node, void_list_node);
12995 tree v2di_ftype_void
12996 = build_function_type (V2DI_type_node, void_list_node);
12997 tree ti_ftype_ti_ti
12998 = build_function_type_list (intTI_type_node,
12999 intTI_type_node, intTI_type_node, NULL_TREE);
13000 tree void_ftype_pcvoid
13001 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13002 tree v2di_ftype_di
13003 = build_function_type_list (V2DI_type_node,
13004 long_long_unsigned_type_node, NULL_TREE);
13005 tree di_ftype_v2di
13006 = build_function_type_list (long_long_unsigned_type_node,
13007 V2DI_type_node, NULL_TREE);
13008 tree v4sf_ftype_v4si
13009 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13010 tree v4si_ftype_v4sf
13011 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13012 tree v2df_ftype_v4si
13013 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13014 tree v4si_ftype_v2df
13015 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13016 tree v2si_ftype_v2df
13017 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13018 tree v4sf_ftype_v2df
13019 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13020 tree v2df_ftype_v2si
13021 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13022 tree v2df_ftype_v4sf
13023 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13024 tree int_ftype_v2df
13025 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13026 tree v2df_ftype_v2df_int
13027 = build_function_type_list (V2DF_type_node,
13028 V2DF_type_node, integer_type_node, NULL_TREE);
13029 tree v4sf_ftype_v4sf_v2df
13030 = build_function_type_list (V4SF_type_node,
13031 V4SF_type_node, V2DF_type_node, NULL_TREE);
13032 tree v2df_ftype_v2df_v4sf
13033 = build_function_type_list (V2DF_type_node,
13034 V2DF_type_node, V4SF_type_node, NULL_TREE);
13035 tree v2df_ftype_v2df_v2df_int
13036 = build_function_type_list (V2DF_type_node,
13037 V2DF_type_node, V2DF_type_node,
13038 integer_type_node,
13039 NULL_TREE);
13040 tree v2df_ftype_v2df_pv2si
13041 = build_function_type_list (V2DF_type_node,
13042 V2DF_type_node, pv2si_type_node, NULL_TREE);
13043 tree void_ftype_pv2si_v2df
13044 = build_function_type_list (void_type_node,
13045 pv2si_type_node, V2DF_type_node, NULL_TREE);
13046 tree void_ftype_pdouble_v2df
13047 = build_function_type_list (void_type_node,
13048 pdouble_type_node, V2DF_type_node, NULL_TREE);
13049 tree void_ftype_pint_int
13050 = build_function_type_list (void_type_node,
13051 pint_type_node, integer_type_node, NULL_TREE);
13052 tree void_ftype_v16qi_v16qi_pchar
13053 = build_function_type_list (void_type_node,
13054 V16QI_type_node, V16QI_type_node,
13055 pchar_type_node, NULL_TREE);
13056 tree v2df_ftype_pcdouble
13057 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13058 tree v2df_ftype_v2df_v2df
13059 = build_function_type_list (V2DF_type_node,
13060 V2DF_type_node, V2DF_type_node, NULL_TREE);
13061 tree v16qi_ftype_v16qi_v16qi
13062 = build_function_type_list (V16QI_type_node,
13063 V16QI_type_node, V16QI_type_node, NULL_TREE);
13064 tree v8hi_ftype_v8hi_v8hi
13065 = build_function_type_list (V8HI_type_node,
13066 V8HI_type_node, V8HI_type_node, NULL_TREE);
13067 tree v4si_ftype_v4si_v4si
13068 = build_function_type_list (V4SI_type_node,
13069 V4SI_type_node, V4SI_type_node, NULL_TREE);
13070 tree v2di_ftype_v2di_v2di
13071 = build_function_type_list (V2DI_type_node,
13072 V2DI_type_node, V2DI_type_node, NULL_TREE);
13073 tree v2di_ftype_v2df_v2df
13074 = build_function_type_list (V2DI_type_node,
13075 V2DF_type_node, V2DF_type_node, NULL_TREE);
13076 tree v2df_ftype_v2df
13077 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13078 tree v2df_ftype_double
13079 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13080 tree v2df_ftype_double_double
13081 = build_function_type_list (V2DF_type_node,
13082 double_type_node, double_type_node, NULL_TREE);
13083 tree int_ftype_v8hi_int
13084 = build_function_type_list (integer_type_node,
13085 V8HI_type_node, integer_type_node, NULL_TREE);
13086 tree v8hi_ftype_v8hi_int_int
13087 = build_function_type_list (V8HI_type_node,
13088 V8HI_type_node, integer_type_node,
13089 integer_type_node, NULL_TREE);
13090 tree v2di_ftype_v2di_int
13091 = build_function_type_list (V2DI_type_node,
13092 V2DI_type_node, integer_type_node, NULL_TREE);
13093 tree v4si_ftype_v4si_int
13094 = build_function_type_list (V4SI_type_node,
13095 V4SI_type_node, integer_type_node, NULL_TREE);
13096 tree v8hi_ftype_v8hi_int
13097 = build_function_type_list (V8HI_type_node,
13098 V8HI_type_node, integer_type_node, NULL_TREE);
13099 tree v8hi_ftype_v8hi_v2di
13100 = build_function_type_list (V8HI_type_node,
13101 V8HI_type_node, V2DI_type_node, NULL_TREE);
13102 tree v4si_ftype_v4si_v2di
13103 = build_function_type_list (V4SI_type_node,
13104 V4SI_type_node, V2DI_type_node, NULL_TREE);
13105 tree v4si_ftype_v8hi_v8hi
13106 = build_function_type_list (V4SI_type_node,
13107 V8HI_type_node, V8HI_type_node, NULL_TREE);
13108 tree di_ftype_v8qi_v8qi
13109 = build_function_type_list (long_long_unsigned_type_node,
13110 V8QI_type_node, V8QI_type_node, NULL_TREE);
13111 tree v2di_ftype_v16qi_v16qi
13112 = build_function_type_list (V2DI_type_node,
13113 V16QI_type_node, V16QI_type_node, NULL_TREE);
13114 tree int_ftype_v16qi
13115 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13116 tree v16qi_ftype_pcchar
13117 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13118 tree void_ftype_pchar_v16qi
13119 = build_function_type_list (void_type_node,
13120 pchar_type_node, V16QI_type_node, NULL_TREE);
13121 tree v4si_ftype_pcint
13122 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13123 tree void_ftype_pcint_v4si
13124 = build_function_type_list (void_type_node,
13125 pcint_type_node, V4SI_type_node, NULL_TREE);
13126 tree v2di_ftype_v2di
13127 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13129 /* Add all builtins that are more or less simple operations on two
13130 operands. */
13131 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13133 /* Use one of the operands; the target can have a different mode for
13134 mask-generating compares. */
13135 enum machine_mode mode;
13136 tree type;
13138 if (d->name == 0)
13139 continue;
13140 mode = insn_data[d->icode].operand[1].mode;
13142 switch (mode)
13144 case V16QImode:
13145 type = v16qi_ftype_v16qi_v16qi;
13146 break;
13147 case V8HImode:
13148 type = v8hi_ftype_v8hi_v8hi;
13149 break;
13150 case V4SImode:
13151 type = v4si_ftype_v4si_v4si;
13152 break;
13153 case V2DImode:
13154 type = v2di_ftype_v2di_v2di;
13155 break;
13156 case V2DFmode:
13157 type = v2df_ftype_v2df_v2df;
13158 break;
13159 case TImode:
13160 type = ti_ftype_ti_ti;
13161 break;
13162 case V4SFmode:
13163 type = v4sf_ftype_v4sf_v4sf;
13164 break;
13165 case V8QImode:
13166 type = v8qi_ftype_v8qi_v8qi;
13167 break;
13168 case V4HImode:
13169 type = v4hi_ftype_v4hi_v4hi;
13170 break;
13171 case V2SImode:
13172 type = v2si_ftype_v2si_v2si;
13173 break;
13174 case DImode:
13175 type = di_ftype_di_di;
13176 break;
13178 default:
13179 abort ();
13182 /* Override for comparisons. */
13183 if (d->icode == CODE_FOR_maskcmpv4sf3
13184 || d->icode == CODE_FOR_maskncmpv4sf3
13185 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13186 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13187 type = v4si_ftype_v4sf_v4sf;
13189 if (d->icode == CODE_FOR_maskcmpv2df3
13190 || d->icode == CODE_FOR_maskncmpv2df3
13191 || d->icode == CODE_FOR_vmmaskcmpv2df3
13192 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13193 type = v2di_ftype_v2df_v2df;
13195 def_builtin (d->mask, d->name, type, d->code);
13198 /* Add the remaining MMX insns with somewhat more complicated types. */
13199 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13200 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13201 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13202 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13203 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13205 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13206 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13207 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13209 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13210 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13212 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13213 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13215 /* comi/ucomi insns. */
13216 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13217 if (d->mask == MASK_SSE2)
13218 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13219 else
13220 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13222 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13223 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13224 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13226 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13227 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13228 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13229 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13230 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13231 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13232 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13233 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13235 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13236 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13238 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13240 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13241 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13242 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13243 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13244 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13245 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13247 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13248 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13249 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13250 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13252 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13253 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13254 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13255 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13257 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13259 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13261 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13262 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13263 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13264 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13265 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13266 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13268 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13270 /* Original 3DNow! */
13271 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13272 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13273 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13286 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13287 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13288 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13289 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13290 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13292 /* 3DNow! extension as used in the Athlon CPU. */
13293 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13294 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13295 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13296 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13297 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13298 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13300 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13302 /* SSE2 */
13303 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13304 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13306 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13307 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13308 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13310 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13311 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13312 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13313 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13314 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13315 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13317 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13318 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13319 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13320 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13322 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13323 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13324 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13325 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13326 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13328 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13329 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13330 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13331 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13333 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13334 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13336 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13338 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13339 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13341 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13342 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13343 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13344 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13345 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13347 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13349 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13350 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13352 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13353 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13354 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13356 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13357 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13358 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13360 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13361 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13362 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13363 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13364 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13365 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13366 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13368 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13369 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13370 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13372 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13373 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13374 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13375 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13376 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13377 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13378 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13380 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13382 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13383 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13384 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13386 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13387 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13388 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13390 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13391 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13393 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13394 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13395 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13396 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13398 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13399 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13400 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13401 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13403 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13404 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13406 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13409 /* Errors in the source file can cause expand_expr to return const0_rtx
13410 where we expect a vector. To avoid crashing, use one of the vector
13411 clear instructions. */
13412 static rtx
13413 safe_vector_operand (x, mode)
13414 rtx x;
13415 enum machine_mode mode;
13417 if (x != const0_rtx)
13418 return x;
13419 x = gen_reg_rtx (mode);
13421 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13422 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13423 : gen_rtx_SUBREG (DImode, x, 0)));
13424 else
13425 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13426 : gen_rtx_SUBREG (V4SFmode, x, 0),
13427 CONST0_RTX (V4SFmode)));
13428 return x;
13431 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13433 static rtx
13434 ix86_expand_binop_builtin (icode, arglist, target)
13435 enum insn_code icode;
13436 tree arglist;
13437 rtx target;
13439 rtx pat;
13440 tree arg0 = TREE_VALUE (arglist);
13441 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13442 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13443 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13444 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13445 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13446 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13448 if (VECTOR_MODE_P (mode0))
13449 op0 = safe_vector_operand (op0, mode0);
13450 if (VECTOR_MODE_P (mode1))
13451 op1 = safe_vector_operand (op1, mode1);
13453 if (! target
13454 || GET_MODE (target) != tmode
13455 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13456 target = gen_reg_rtx (tmode);
13458 /* In case the insn wants input operands in modes different from
13459 the result, abort. */
13460 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13461 abort ();
13463 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13464 op0 = copy_to_mode_reg (mode0, op0);
13465 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13466 op1 = copy_to_mode_reg (mode1, op1);
13468 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13469 yet one of the two must not be a memory. This is normally enforced
13470 by expanders, but we didn't bother to create one here. */
13471 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13472 op0 = copy_to_mode_reg (mode0, op0);
13474 pat = GEN_FCN (icode) (target, op0, op1);
13475 if (! pat)
13476 return 0;
13477 emit_insn (pat);
13478 return target;
13481 /* Subroutine of ix86_expand_builtin to take care of stores. */
13483 static rtx
13484 ix86_expand_store_builtin (icode, arglist)
13485 enum insn_code icode;
13486 tree arglist;
13488 rtx pat;
13489 tree arg0 = TREE_VALUE (arglist);
13490 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13491 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13492 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13493 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13494 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13496 if (VECTOR_MODE_P (mode1))
13497 op1 = safe_vector_operand (op1, mode1);
13499 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13501 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13502 op1 = copy_to_mode_reg (mode1, op1);
13504 pat = GEN_FCN (icode) (op0, op1);
13505 if (pat)
13506 emit_insn (pat);
13507 return 0;
13510 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13512 static rtx
13513 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13514 enum insn_code icode;
13515 tree arglist;
13516 rtx target;
13517 int do_load;
13519 rtx pat;
13520 tree arg0 = TREE_VALUE (arglist);
13521 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13522 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13523 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13525 if (! target
13526 || GET_MODE (target) != tmode
13527 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13528 target = gen_reg_rtx (tmode);
13529 if (do_load)
13530 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13531 else
13533 if (VECTOR_MODE_P (mode0))
13534 op0 = safe_vector_operand (op0, mode0);
13536 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13537 op0 = copy_to_mode_reg (mode0, op0);
13540 pat = GEN_FCN (icode) (target, op0);
13541 if (! pat)
13542 return 0;
13543 emit_insn (pat);
13544 return target;
13547 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13548 sqrtss, rsqrtss, rcpss. */
13550 static rtx
13551 ix86_expand_unop1_builtin (icode, arglist, target)
13552 enum insn_code icode;
13553 tree arglist;
13554 rtx target;
13556 rtx pat;
13557 tree arg0 = TREE_VALUE (arglist);
13558 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13559 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13560 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13562 if (! target
13563 || GET_MODE (target) != tmode
13564 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13565 target = gen_reg_rtx (tmode);
13567 if (VECTOR_MODE_P (mode0))
13568 op0 = safe_vector_operand (op0, mode0);
13570 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13571 op0 = copy_to_mode_reg (mode0, op0);
13573 op1 = op0;
13574 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13575 op1 = copy_to_mode_reg (mode0, op1);
13577 pat = GEN_FCN (icode) (target, op0, op1);
13578 if (! pat)
13579 return 0;
13580 emit_insn (pat);
13581 return target;
13584 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13586 static rtx
13587 ix86_expand_sse_compare (d, arglist, target)
13588 const struct builtin_description *d;
13589 tree arglist;
13590 rtx target;
13592 rtx pat;
13593 tree arg0 = TREE_VALUE (arglist);
13594 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13595 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13596 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13597 rtx op2;
13598 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13599 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13600 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13601 enum rtx_code comparison = d->comparison;
13603 if (VECTOR_MODE_P (mode0))
13604 op0 = safe_vector_operand (op0, mode0);
13605 if (VECTOR_MODE_P (mode1))
13606 op1 = safe_vector_operand (op1, mode1);
13608 /* Swap operands if we have a comparison that isn't available in
13609 hardware. */
13610 if (d->flag)
13612 rtx tmp = gen_reg_rtx (mode1);
13613 emit_move_insn (tmp, op1);
13614 op1 = op0;
13615 op0 = tmp;
13618 if (! target
13619 || GET_MODE (target) != tmode
13620 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13621 target = gen_reg_rtx (tmode);
13623 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13624 op0 = copy_to_mode_reg (mode0, op0);
13625 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13626 op1 = copy_to_mode_reg (mode1, op1);
13628 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13629 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13630 if (! pat)
13631 return 0;
13632 emit_insn (pat);
13633 return target;
13636 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13638 static rtx
13639 ix86_expand_sse_comi (d, arglist, target)
13640 const struct builtin_description *d;
13641 tree arglist;
13642 rtx target;
13644 rtx pat;
13645 tree arg0 = TREE_VALUE (arglist);
13646 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13647 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13648 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13649 rtx op2;
13650 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13651 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13652 enum rtx_code comparison = d->comparison;
13654 if (VECTOR_MODE_P (mode0))
13655 op0 = safe_vector_operand (op0, mode0);
13656 if (VECTOR_MODE_P (mode1))
13657 op1 = safe_vector_operand (op1, mode1);
13659 /* Swap operands if we have a comparison that isn't available in
13660 hardware. */
13661 if (d->flag)
13663 rtx tmp = op1;
13664 op1 = op0;
13665 op0 = tmp;
13668 target = gen_reg_rtx (SImode);
13669 emit_move_insn (target, const0_rtx);
13670 target = gen_rtx_SUBREG (QImode, target, 0);
13672 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13673 op0 = copy_to_mode_reg (mode0, op0);
13674 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13675 op1 = copy_to_mode_reg (mode1, op1);
13677 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13678 pat = GEN_FCN (d->icode) (op0, op1);
13679 if (! pat)
13680 return 0;
13681 emit_insn (pat);
13682 emit_insn (gen_rtx_SET (VOIDmode,
13683 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13684 gen_rtx_fmt_ee (comparison, QImode,
13685 SET_DEST (pat),
13686 const0_rtx)));
13688 return SUBREG_REG (target);
13691 /* Expand an expression EXP that calls a built-in function,
13692 with result going to TARGET if that's convenient
13693 (and in mode MODE if that's convenient).
13694 SUBTARGET may be used as the target for computing one of EXP's operands.
13695 IGNORE is nonzero if the value is to be ignored. */
13698 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13699 tree exp;
13700 rtx target;
13701 rtx subtarget ATTRIBUTE_UNUSED;
13702 enum machine_mode mode ATTRIBUTE_UNUSED;
13703 int ignore ATTRIBUTE_UNUSED;
13705 const struct builtin_description *d;
13706 size_t i;
13707 enum insn_code icode;
13708 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13709 tree arglist = TREE_OPERAND (exp, 1);
13710 tree arg0, arg1, arg2;
13711 rtx op0, op1, op2, pat;
13712 enum machine_mode tmode, mode0, mode1, mode2;
13713 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13715 switch (fcode)
13717 case IX86_BUILTIN_EMMS:
13718 emit_insn (gen_emms ());
13719 return 0;
13721 case IX86_BUILTIN_SFENCE:
13722 emit_insn (gen_sfence ());
13723 return 0;
13725 case IX86_BUILTIN_PEXTRW:
13726 case IX86_BUILTIN_PEXTRW128:
13727 icode = (fcode == IX86_BUILTIN_PEXTRW
13728 ? CODE_FOR_mmx_pextrw
13729 : CODE_FOR_sse2_pextrw);
13730 arg0 = TREE_VALUE (arglist);
13731 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13732 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13733 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13734 tmode = insn_data[icode].operand[0].mode;
13735 mode0 = insn_data[icode].operand[1].mode;
13736 mode1 = insn_data[icode].operand[2].mode;
13738 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13739 op0 = copy_to_mode_reg (mode0, op0);
13740 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13742 /* @@@ better error message */
13743 error ("selector must be an immediate");
13744 return gen_reg_rtx (tmode);
13746 if (target == 0
13747 || GET_MODE (target) != tmode
13748 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13749 target = gen_reg_rtx (tmode);
13750 pat = GEN_FCN (icode) (target, op0, op1);
13751 if (! pat)
13752 return 0;
13753 emit_insn (pat);
13754 return target;
13756 case IX86_BUILTIN_PINSRW:
13757 case IX86_BUILTIN_PINSRW128:
13758 icode = (fcode == IX86_BUILTIN_PINSRW
13759 ? CODE_FOR_mmx_pinsrw
13760 : CODE_FOR_sse2_pinsrw);
13761 arg0 = TREE_VALUE (arglist);
13762 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13763 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13764 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13765 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13766 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13767 tmode = insn_data[icode].operand[0].mode;
13768 mode0 = insn_data[icode].operand[1].mode;
13769 mode1 = insn_data[icode].operand[2].mode;
13770 mode2 = insn_data[icode].operand[3].mode;
13772 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13773 op0 = copy_to_mode_reg (mode0, op0);
13774 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13775 op1 = copy_to_mode_reg (mode1, op1);
13776 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13778 /* @@@ better error message */
13779 error ("selector must be an immediate");
13780 return const0_rtx;
13782 if (target == 0
13783 || GET_MODE (target) != tmode
13784 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13785 target = gen_reg_rtx (tmode);
13786 pat = GEN_FCN (icode) (target, op0, op1, op2);
13787 if (! pat)
13788 return 0;
13789 emit_insn (pat);
13790 return target;
13792 case IX86_BUILTIN_MASKMOVQ:
13793 case IX86_BUILTIN_MASKMOVDQU:
13794 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13795 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13796 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13797 : CODE_FOR_sse2_maskmovdqu));
13798 /* Note the arg order is different from the operand order. */
13799 arg1 = TREE_VALUE (arglist);
13800 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13801 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13802 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13803 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13804 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13805 mode0 = insn_data[icode].operand[0].mode;
13806 mode1 = insn_data[icode].operand[1].mode;
13807 mode2 = insn_data[icode].operand[2].mode;
13809 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13810 op0 = copy_to_mode_reg (mode0, op0);
13811 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13812 op1 = copy_to_mode_reg (mode1, op1);
13813 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13814 op2 = copy_to_mode_reg (mode2, op2);
13815 pat = GEN_FCN (icode) (op0, op1, op2);
13816 if (! pat)
13817 return 0;
13818 emit_insn (pat);
13819 return 0;
13821 case IX86_BUILTIN_SQRTSS:
13822 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13823 case IX86_BUILTIN_RSQRTSS:
13824 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13825 case IX86_BUILTIN_RCPSS:
13826 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13828 case IX86_BUILTIN_LOADAPS:
13829 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13831 case IX86_BUILTIN_LOADUPS:
13832 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13834 case IX86_BUILTIN_STOREAPS:
13835 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13837 case IX86_BUILTIN_STOREUPS:
13838 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13840 case IX86_BUILTIN_LOADSS:
13841 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13843 case IX86_BUILTIN_STORESS:
13844 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13846 case IX86_BUILTIN_LOADHPS:
13847 case IX86_BUILTIN_LOADLPS:
13848 case IX86_BUILTIN_LOADHPD:
13849 case IX86_BUILTIN_LOADLPD:
13850 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13851 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13852 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13853 : CODE_FOR_sse2_movlpd);
13854 arg0 = TREE_VALUE (arglist);
13855 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13856 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13857 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13858 tmode = insn_data[icode].operand[0].mode;
13859 mode0 = insn_data[icode].operand[1].mode;
13860 mode1 = insn_data[icode].operand[2].mode;
13862 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13863 op0 = copy_to_mode_reg (mode0, op0);
13864 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13865 if (target == 0
13866 || GET_MODE (target) != tmode
13867 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13868 target = gen_reg_rtx (tmode);
13869 pat = GEN_FCN (icode) (target, op0, op1);
13870 if (! pat)
13871 return 0;
13872 emit_insn (pat);
13873 return target;
13875 case IX86_BUILTIN_STOREHPS:
13876 case IX86_BUILTIN_STORELPS:
13877 case IX86_BUILTIN_STOREHPD:
13878 case IX86_BUILTIN_STORELPD:
13879 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13880 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13881 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13882 : CODE_FOR_sse2_movlpd);
13883 arg0 = TREE_VALUE (arglist);
13884 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13885 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13886 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13887 mode0 = insn_data[icode].operand[1].mode;
13888 mode1 = insn_data[icode].operand[2].mode;
13890 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13891 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13892 op1 = copy_to_mode_reg (mode1, op1);
13894 pat = GEN_FCN (icode) (op0, op0, op1);
13895 if (! pat)
13896 return 0;
13897 emit_insn (pat);
13898 return 0;
13900 case IX86_BUILTIN_MOVNTPS:
13901 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13902 case IX86_BUILTIN_MOVNTQ:
13903 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13905 case IX86_BUILTIN_LDMXCSR:
13906 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13907 target = assign_386_stack_local (SImode, 0);
13908 emit_move_insn (target, op0);
13909 emit_insn (gen_ldmxcsr (target));
13910 return 0;
13912 case IX86_BUILTIN_STMXCSR:
13913 target = assign_386_stack_local (SImode, 0);
13914 emit_insn (gen_stmxcsr (target));
13915 return copy_to_mode_reg (SImode, target);
13917 case IX86_BUILTIN_SHUFPS:
13918 case IX86_BUILTIN_SHUFPD:
13919 icode = (fcode == IX86_BUILTIN_SHUFPS
13920 ? CODE_FOR_sse_shufps
13921 : CODE_FOR_sse2_shufpd);
13922 arg0 = TREE_VALUE (arglist);
13923 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13924 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13925 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13926 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13927 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13928 tmode = insn_data[icode].operand[0].mode;
13929 mode0 = insn_data[icode].operand[1].mode;
13930 mode1 = insn_data[icode].operand[2].mode;
13931 mode2 = insn_data[icode].operand[3].mode;
13933 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13934 op0 = copy_to_mode_reg (mode0, op0);
13935 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13936 op1 = copy_to_mode_reg (mode1, op1);
13937 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13939 /* @@@ better error message */
13940 error ("mask must be an immediate");
13941 return gen_reg_rtx (tmode);
13943 if (target == 0
13944 || GET_MODE (target) != tmode
13945 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13946 target = gen_reg_rtx (tmode);
13947 pat = GEN_FCN (icode) (target, op0, op1, op2);
13948 if (! pat)
13949 return 0;
13950 emit_insn (pat);
13951 return target;
13953 case IX86_BUILTIN_PSHUFW:
13954 case IX86_BUILTIN_PSHUFD:
13955 case IX86_BUILTIN_PSHUFHW:
13956 case IX86_BUILTIN_PSHUFLW:
13957 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13958 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13959 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13960 : CODE_FOR_mmx_pshufw);
13961 arg0 = TREE_VALUE (arglist);
13962 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13963 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13964 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13965 tmode = insn_data[icode].operand[0].mode;
13966 mode1 = insn_data[icode].operand[1].mode;
13967 mode2 = insn_data[icode].operand[2].mode;
13969 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13970 op0 = copy_to_mode_reg (mode1, op0);
13971 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13973 /* @@@ better error message */
13974 error ("mask must be an immediate");
13975 return const0_rtx;
13977 if (target == 0
13978 || GET_MODE (target) != tmode
13979 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13980 target = gen_reg_rtx (tmode);
13981 pat = GEN_FCN (icode) (target, op0, op1);
13982 if (! pat)
13983 return 0;
13984 emit_insn (pat);
13985 return target;
13987 case IX86_BUILTIN_PSLLDQI128:
13988 case IX86_BUILTIN_PSRLDQI128:
13989 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13990 : CODE_FOR_sse2_lshrti3);
13991 arg0 = TREE_VALUE (arglist);
13992 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13993 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13994 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13995 tmode = insn_data[icode].operand[0].mode;
13996 mode1 = insn_data[icode].operand[1].mode;
13997 mode2 = insn_data[icode].operand[2].mode;
13999 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14001 op0 = copy_to_reg (op0);
14002 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14004 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14006 error ("shift must be an immediate");
14007 return const0_rtx;
14009 target = gen_reg_rtx (V2DImode);
14010 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14011 if (! pat)
14012 return 0;
14013 emit_insn (pat);
14014 return target;
14016 case IX86_BUILTIN_FEMMS:
14017 emit_insn (gen_femms ());
14018 return NULL_RTX;
14020 case IX86_BUILTIN_PAVGUSB:
14021 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14023 case IX86_BUILTIN_PF2ID:
14024 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14026 case IX86_BUILTIN_PFACC:
14027 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14029 case IX86_BUILTIN_PFADD:
14030 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14032 case IX86_BUILTIN_PFCMPEQ:
14033 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14035 case IX86_BUILTIN_PFCMPGE:
14036 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14038 case IX86_BUILTIN_PFCMPGT:
14039 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14041 case IX86_BUILTIN_PFMAX:
14042 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14044 case IX86_BUILTIN_PFMIN:
14045 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14047 case IX86_BUILTIN_PFMUL:
14048 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14050 case IX86_BUILTIN_PFRCP:
14051 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14053 case IX86_BUILTIN_PFRCPIT1:
14054 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14056 case IX86_BUILTIN_PFRCPIT2:
14057 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14059 case IX86_BUILTIN_PFRSQIT1:
14060 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14062 case IX86_BUILTIN_PFRSQRT:
14063 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14065 case IX86_BUILTIN_PFSUB:
14066 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14068 case IX86_BUILTIN_PFSUBR:
14069 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14071 case IX86_BUILTIN_PI2FD:
14072 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14074 case IX86_BUILTIN_PMULHRW:
14075 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14077 case IX86_BUILTIN_PF2IW:
14078 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14080 case IX86_BUILTIN_PFNACC:
14081 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14083 case IX86_BUILTIN_PFPNACC:
14084 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14086 case IX86_BUILTIN_PI2FW:
14087 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14089 case IX86_BUILTIN_PSWAPDSI:
14090 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14092 case IX86_BUILTIN_PSWAPDSF:
14093 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14095 case IX86_BUILTIN_SSE_ZERO:
14096 target = gen_reg_rtx (V4SFmode);
14097 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14098 return target;
14100 case IX86_BUILTIN_MMX_ZERO:
14101 target = gen_reg_rtx (DImode);
14102 emit_insn (gen_mmx_clrdi (target));
14103 return target;
14105 case IX86_BUILTIN_CLRTI:
14106 target = gen_reg_rtx (V2DImode);
14107 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14108 return target;
14111 case IX86_BUILTIN_SQRTSD:
14112 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14113 case IX86_BUILTIN_LOADAPD:
14114 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14115 case IX86_BUILTIN_LOADUPD:
14116 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14118 case IX86_BUILTIN_STOREAPD:
14119 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14120 case IX86_BUILTIN_STOREUPD:
14121 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14123 case IX86_BUILTIN_LOADSD:
14124 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14126 case IX86_BUILTIN_STORESD:
14127 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14129 case IX86_BUILTIN_SETPD1:
14130 target = assign_386_stack_local (DFmode, 0);
14131 arg0 = TREE_VALUE (arglist);
14132 emit_move_insn (adjust_address (target, DFmode, 0),
14133 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14134 op0 = gen_reg_rtx (V2DFmode);
14135 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14136 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14137 return op0;
14139 case IX86_BUILTIN_SETPD:
14140 target = assign_386_stack_local (V2DFmode, 0);
14141 arg0 = TREE_VALUE (arglist);
14142 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14143 emit_move_insn (adjust_address (target, DFmode, 0),
14144 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14145 emit_move_insn (adjust_address (target, DFmode, 8),
14146 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14147 op0 = gen_reg_rtx (V2DFmode);
14148 emit_insn (gen_sse2_movapd (op0, target));
14149 return op0;
14151 case IX86_BUILTIN_LOADRPD:
14152 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14153 gen_reg_rtx (V2DFmode), 1);
14154 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14155 return target;
14157 case IX86_BUILTIN_LOADPD1:
14158 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14159 gen_reg_rtx (V2DFmode), 1);
14160 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14161 return target;
14163 case IX86_BUILTIN_STOREPD1:
14164 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14165 case IX86_BUILTIN_STORERPD:
14166 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14168 case IX86_BUILTIN_CLRPD:
14169 target = gen_reg_rtx (V2DFmode);
14170 emit_insn (gen_sse_clrv2df (target));
14171 return target;
14173 case IX86_BUILTIN_MFENCE:
14174 emit_insn (gen_sse2_mfence ());
14175 return 0;
14176 case IX86_BUILTIN_LFENCE:
14177 emit_insn (gen_sse2_lfence ());
14178 return 0;
14180 case IX86_BUILTIN_CLFLUSH:
14181 arg0 = TREE_VALUE (arglist);
14182 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14183 icode = CODE_FOR_sse2_clflush;
14184 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14185 op0 = copy_to_mode_reg (Pmode, op0);
14187 emit_insn (gen_sse2_clflush (op0));
14188 return 0;
14190 case IX86_BUILTIN_MOVNTPD:
14191 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14192 case IX86_BUILTIN_MOVNTDQ:
14193 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14194 case IX86_BUILTIN_MOVNTI:
14195 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14197 case IX86_BUILTIN_LOADDQA:
14198 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14199 case IX86_BUILTIN_LOADDQU:
14200 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14201 case IX86_BUILTIN_LOADD:
14202 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14204 case IX86_BUILTIN_STOREDQA:
14205 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14206 case IX86_BUILTIN_STOREDQU:
14207 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14208 case IX86_BUILTIN_STORED:
14209 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14211 default:
14212 break;
14215 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14216 if (d->code == fcode)
14218 /* Compares are treated specially. */
14219 if (d->icode == CODE_FOR_maskcmpv4sf3
14220 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14221 || d->icode == CODE_FOR_maskncmpv4sf3
14222 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14223 || d->icode == CODE_FOR_maskcmpv2df3
14224 || d->icode == CODE_FOR_vmmaskcmpv2df3
14225 || d->icode == CODE_FOR_maskncmpv2df3
14226 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14227 return ix86_expand_sse_compare (d, arglist, target);
14229 return ix86_expand_binop_builtin (d->icode, arglist, target);
14232 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14233 if (d->code == fcode)
14234 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14236 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14237 if (d->code == fcode)
14238 return ix86_expand_sse_comi (d, arglist, target);
14240 /* @@@ Should really do something sensible here. */
14241 return 0;
14244 /* Store OPERAND to the memory after reload is completed. This means
14245 that we can't easily use assign_stack_local. */
14247 ix86_force_to_memory (mode, operand)
14248 enum machine_mode mode;
14249 rtx operand;
14251 rtx result;
14252 if (!reload_completed)
14253 abort ();
14254 if (TARGET_64BIT && TARGET_RED_ZONE)
14256 result = gen_rtx_MEM (mode,
14257 gen_rtx_PLUS (Pmode,
14258 stack_pointer_rtx,
14259 GEN_INT (-RED_ZONE_SIZE)));
14260 emit_move_insn (result, operand);
14262 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14264 switch (mode)
14266 case HImode:
14267 case SImode:
14268 operand = gen_lowpart (DImode, operand);
14269 /* FALLTHRU */
14270 case DImode:
14271 emit_insn (
14272 gen_rtx_SET (VOIDmode,
14273 gen_rtx_MEM (DImode,
14274 gen_rtx_PRE_DEC (DImode,
14275 stack_pointer_rtx)),
14276 operand));
14277 break;
14278 default:
14279 abort ();
14281 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14283 else
14285 switch (mode)
14287 case DImode:
14289 rtx operands[2];
14290 split_di (&operand, 1, operands, operands + 1);
14291 emit_insn (
14292 gen_rtx_SET (VOIDmode,
14293 gen_rtx_MEM (SImode,
14294 gen_rtx_PRE_DEC (Pmode,
14295 stack_pointer_rtx)),
14296 operands[1]));
14297 emit_insn (
14298 gen_rtx_SET (VOIDmode,
14299 gen_rtx_MEM (SImode,
14300 gen_rtx_PRE_DEC (Pmode,
14301 stack_pointer_rtx)),
14302 operands[0]));
14304 break;
14305 case HImode:
14306 /* It is better to store HImodes as SImodes. */
14307 if (!TARGET_PARTIAL_REG_STALL)
14308 operand = gen_lowpart (SImode, operand);
14309 /* FALLTHRU */
14310 case SImode:
14311 emit_insn (
14312 gen_rtx_SET (VOIDmode,
14313 gen_rtx_MEM (GET_MODE (operand),
14314 gen_rtx_PRE_DEC (SImode,
14315 stack_pointer_rtx)),
14316 operand));
14317 break;
14318 default:
14319 abort ();
14321 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14323 return result;
14326 /* Free operand from the memory. */
14327 void
14328 ix86_free_from_memory (mode)
14329 enum machine_mode mode;
14331 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14333 int size;
14335 if (mode == DImode || TARGET_64BIT)
14336 size = 8;
14337 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14338 size = 2;
14339 else
14340 size = 4;
14341 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14342 to pop or add instruction if registers are available. */
14343 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14344 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14345 GEN_INT (size))));
14349 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14350 QImode must go into class Q_REGS.
14351 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14352 movdf to do mem-to-mem moves through integer regs. */
14353 enum reg_class
14354 ix86_preferred_reload_class (x, class)
14355 rtx x;
14356 enum reg_class class;
14358 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14359 return NO_REGS;
14360 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14362 /* SSE can't load any constant directly yet. */
14363 if (SSE_CLASS_P (class))
14364 return NO_REGS;
14365 /* Floats can load 0 and 1. */
14366 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14368 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14369 if (MAYBE_SSE_CLASS_P (class))
14370 return (reg_class_subset_p (class, GENERAL_REGS)
14371 ? GENERAL_REGS : FLOAT_REGS);
14372 else
14373 return class;
14375 /* General regs can load everything. */
14376 if (reg_class_subset_p (class, GENERAL_REGS))
14377 return GENERAL_REGS;
14378 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14379 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14380 return NO_REGS;
14382 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14383 return NO_REGS;
14384 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14385 return Q_REGS;
14386 return class;
14389 /* If we are copying between general and FP registers, we need a memory
14390 location. The same is true for SSE and MMX registers.
14392 The macro can't work reliably when one of the CLASSES is class containing
14393 registers from multiple units (SSE, MMX, integer). We avoid this by never
14394 combining those units in single alternative in the machine description.
14395 Ensure that this constraint holds to avoid unexpected surprises.
14397 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14398 enforce these sanity checks. */
14400 ix86_secondary_memory_needed (class1, class2, mode, strict)
14401 enum reg_class class1, class2;
14402 enum machine_mode mode;
14403 int strict;
14405 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14406 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14407 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14408 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14409 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14410 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14412 if (strict)
14413 abort ();
14414 else
14415 return 1;
14417 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14418 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14419 && (mode) != SImode)
14420 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14421 && (mode) != SImode));
14423 /* Return the cost of moving data from a register in class CLASS1 to
14424 one in class CLASS2.
14426 It is not required that the cost always equal 2 when FROM is the same as TO;
14427 on some machines it is expensive to move between registers if they are not
14428 general registers. */
14430 ix86_register_move_cost (mode, class1, class2)
14431 enum machine_mode mode;
14432 enum reg_class class1, class2;
14434 /* In case we require secondary memory, compute cost of the store followed
14435 by load. In order to avoid bad register allocation choices, we need
14436 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14438 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14440 int cost = 1;
14442 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14443 MEMORY_MOVE_COST (mode, class1, 1));
14444 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14445 MEMORY_MOVE_COST (mode, class2, 1));
14447 /* In case of copying from general_purpose_register we may emit multiple
14448 stores followed by single load causing memory size mismatch stall.
14449 Count this as arbitrarily high cost of 20. */
14450 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14451 cost += 20;
14453 /* In the case of FP/MMX moves, the registers actually overlap, and we
14454 have to switch modes in order to treat them differently. */
14455 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14456 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14457 cost += 20;
14459 return cost;
14462 /* Moves between SSE/MMX and integer unit are expensive. */
14463 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14464 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14465 return ix86_cost->mmxsse_to_integer;
14466 if (MAYBE_FLOAT_CLASS_P (class1))
14467 return ix86_cost->fp_move;
14468 if (MAYBE_SSE_CLASS_P (class1))
14469 return ix86_cost->sse_move;
14470 if (MAYBE_MMX_CLASS_P (class1))
14471 return ix86_cost->mmx_move;
14472 return 2;
14475 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14477 ix86_hard_regno_mode_ok (regno, mode)
14478 int regno;
14479 enum machine_mode mode;
14481 /* Flags and only flags can only hold CCmode values. */
14482 if (CC_REGNO_P (regno))
14483 return GET_MODE_CLASS (mode) == MODE_CC;
14484 if (GET_MODE_CLASS (mode) == MODE_CC
14485 || GET_MODE_CLASS (mode) == MODE_RANDOM
14486 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14487 return 0;
14488 if (FP_REGNO_P (regno))
14489 return VALID_FP_MODE_P (mode);
14490 if (SSE_REGNO_P (regno))
14491 return VALID_SSE_REG_MODE (mode);
14492 if (MMX_REGNO_P (regno))
14493 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14494 /* We handle both integer and floats in the general purpose registers.
14495 In future we should be able to handle vector modes as well. */
14496 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14497 return 0;
14498 /* Take care for QImode values - they can be in non-QI regs, but then
14499 they do cause partial register stalls. */
14500 if (regno < 4 || mode != QImode || TARGET_64BIT)
14501 return 1;
14502 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14505 /* Return the cost of moving data of mode M between a
14506 register and memory. A value of 2 is the default; this cost is
14507 relative to those in `REGISTER_MOVE_COST'.
14509 If moving between registers and memory is more expensive than
14510 between two registers, you should define this macro to express the
14511 relative cost.
14513 Model also increased moving costs of QImode registers in non
14514 Q_REGS classes.
14517 ix86_memory_move_cost (mode, class, in)
14518 enum machine_mode mode;
14519 enum reg_class class;
14520 int in;
14522 if (FLOAT_CLASS_P (class))
14524 int index;
14525 switch (mode)
14527 case SFmode:
14528 index = 0;
14529 break;
14530 case DFmode:
14531 index = 1;
14532 break;
14533 case XFmode:
14534 case TFmode:
14535 index = 2;
14536 break;
14537 default:
14538 return 100;
14540 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14542 if (SSE_CLASS_P (class))
14544 int index;
14545 switch (GET_MODE_SIZE (mode))
14547 case 4:
14548 index = 0;
14549 break;
14550 case 8:
14551 index = 1;
14552 break;
14553 case 16:
14554 index = 2;
14555 break;
14556 default:
14557 return 100;
14559 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14561 if (MMX_CLASS_P (class))
14563 int index;
14564 switch (GET_MODE_SIZE (mode))
14566 case 4:
14567 index = 0;
14568 break;
14569 case 8:
14570 index = 1;
14571 break;
14572 default:
14573 return 100;
14575 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14577 switch (GET_MODE_SIZE (mode))
14579 case 1:
14580 if (in)
14581 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14582 : ix86_cost->movzbl_load);
14583 else
14584 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14585 : ix86_cost->int_store[0] + 4);
14586 break;
14587 case 2:
14588 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14589 default:
14590 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14591 if (mode == TFmode)
14592 mode = XFmode;
14593 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14594 * ((int) GET_MODE_SIZE (mode)
14595 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14599 /* Compute a (partial) cost for rtx X. Return true if the complete
14600 cost has been computed, and false if subexpressions should be
14601 scanned. In either case, *TOTAL contains the cost result. */
14603 static bool
14604 ix86_rtx_costs (x, code, outer_code, total)
14605 rtx x;
14606 int code, outer_code;
14607 int *total;
14609 enum machine_mode mode = GET_MODE (x);
14611 switch (code)
14613 case CONST_INT:
14614 case CONST:
14615 case LABEL_REF:
14616 case SYMBOL_REF:
14617 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14618 *total = 3;
14619 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14620 *total = 2;
14621 else if (flag_pic && SYMBOLIC_CONST (x))
14622 *total = 1;
14623 else
14624 *total = 0;
14625 return true;
14627 case CONST_DOUBLE:
14628 if (mode == VOIDmode)
14629 *total = 0;
14630 else
14631 switch (standard_80387_constant_p (x))
14633 case 1: /* 0.0 */
14634 *total = 1;
14635 break;
14636 case 2: /* 1.0 */
14637 *total = 2;
14638 break;
14639 default:
14640 /* Start with (MEM (SYMBOL_REF)), since that's where
14641 it'll probably end up. Add a penalty for size. */
14642 *total = (COSTS_N_INSNS (1)
14643 + (flag_pic != 0)
14644 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14645 break;
14647 return true;
14649 case ZERO_EXTEND:
14650 /* The zero extensions is often completely free on x86_64, so make
14651 it as cheap as possible. */
14652 if (TARGET_64BIT && mode == DImode
14653 && GET_MODE (XEXP (x, 0)) == SImode)
14654 *total = 1;
14655 else if (TARGET_ZERO_EXTEND_WITH_AND)
14656 *total = COSTS_N_INSNS (ix86_cost->add);
14657 else
14658 *total = COSTS_N_INSNS (ix86_cost->movzx);
14659 return false;
14661 case SIGN_EXTEND:
14662 *total = COSTS_N_INSNS (ix86_cost->movsx);
14663 return false;
14665 case ASHIFT:
14666 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14667 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14669 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14670 if (value == 1)
14672 *total = COSTS_N_INSNS (ix86_cost->add);
14673 return false;
14675 if ((value == 2 || value == 3)
14676 && !TARGET_DECOMPOSE_LEA
14677 && ix86_cost->lea <= ix86_cost->shift_const)
14679 *total = COSTS_N_INSNS (ix86_cost->lea);
14680 return false;
14683 /* FALLTHRU */
14685 case ROTATE:
14686 case ASHIFTRT:
14687 case LSHIFTRT:
14688 case ROTATERT:
14689 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14691 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14693 if (INTVAL (XEXP (x, 1)) > 32)
14694 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14695 else
14696 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14698 else
14700 if (GET_CODE (XEXP (x, 1)) == AND)
14701 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14702 else
14703 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14706 else
14708 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14709 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14710 else
14711 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14713 return false;
14715 case MULT:
14716 if (FLOAT_MODE_P (mode))
14717 *total = COSTS_N_INSNS (ix86_cost->fmul);
14718 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14720 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14721 int nbits;
14723 for (nbits = 0; value != 0; value >>= 1)
14724 nbits++;
14726 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14727 + nbits * ix86_cost->mult_bit);
14729 else
14731 /* This is arbitrary */
14732 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14733 + 7 * ix86_cost->mult_bit);
14735 return false;
14737 case DIV:
14738 case UDIV:
14739 case MOD:
14740 case UMOD:
14741 if (FLOAT_MODE_P (mode))
14742 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14743 else
14744 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14745 return false;
14747 case PLUS:
14748 if (FLOAT_MODE_P (mode))
14749 *total = COSTS_N_INSNS (ix86_cost->fadd);
14750 else if (!TARGET_DECOMPOSE_LEA
14751 && GET_MODE_CLASS (mode) == MODE_INT
14752 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14754 if (GET_CODE (XEXP (x, 0)) == PLUS
14755 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14756 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14757 && CONSTANT_P (XEXP (x, 1)))
14759 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14760 if (val == 2 || val == 4 || val == 8)
14762 *total = COSTS_N_INSNS (ix86_cost->lea);
14763 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14764 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14765 outer_code);
14766 *total += rtx_cost (XEXP (x, 1), outer_code);
14767 return true;
14770 else if (GET_CODE (XEXP (x, 0)) == MULT
14771 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14773 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14774 if (val == 2 || val == 4 || val == 8)
14776 *total = COSTS_N_INSNS (ix86_cost->lea);
14777 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14778 *total += rtx_cost (XEXP (x, 1), outer_code);
14779 return true;
14782 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14784 *total = COSTS_N_INSNS (ix86_cost->lea);
14785 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14786 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14787 *total += rtx_cost (XEXP (x, 1), outer_code);
14788 return true;
14791 /* FALLTHRU */
14793 case MINUS:
14794 if (FLOAT_MODE_P (mode))
14796 *total = COSTS_N_INSNS (ix86_cost->fadd);
14797 return false;
14799 /* FALLTHRU */
14801 case AND:
14802 case IOR:
14803 case XOR:
14804 if (!TARGET_64BIT && mode == DImode)
14806 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14807 + (rtx_cost (XEXP (x, 0), outer_code)
14808 << (GET_MODE (XEXP (x, 0)) != DImode))
14809 + (rtx_cost (XEXP (x, 1), outer_code)
14810 << (GET_MODE (XEXP (x, 1)) != DImode)));
14811 return true;
14813 /* FALLTHRU */
14815 case NEG:
14816 if (FLOAT_MODE_P (mode))
14818 *total = COSTS_N_INSNS (ix86_cost->fchs);
14819 return false;
14821 /* FALLTHRU */
14823 case NOT:
14824 if (!TARGET_64BIT && mode == DImode)
14825 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14826 else
14827 *total = COSTS_N_INSNS (ix86_cost->add);
14828 return false;
14830 case FLOAT_EXTEND:
14831 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14832 *total = 0;
14833 return false;
14835 case ABS:
14836 if (FLOAT_MODE_P (mode))
14837 *total = COSTS_N_INSNS (ix86_cost->fabs);
14838 return false;
14840 case SQRT:
14841 if (FLOAT_MODE_P (mode))
14842 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14843 return false;
14845 default:
14846 return false;
14850 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14851 static void
14852 ix86_svr3_asm_out_constructor (symbol, priority)
14853 rtx symbol;
14854 int priority ATTRIBUTE_UNUSED;
14856 init_section ();
14857 fputs ("\tpushl $", asm_out_file);
14858 assemble_name (asm_out_file, XSTR (symbol, 0));
14859 fputc ('\n', asm_out_file);
14861 #endif
14863 #if TARGET_MACHO
14865 static int current_machopic_label_num;
14867 /* Given a symbol name and its associated stub, write out the
14868 definition of the stub. */
14870 void
14871 machopic_output_stub (file, symb, stub)
14872 FILE *file;
14873 const char *symb, *stub;
14875 unsigned int length;
14876 char *binder_name, *symbol_name, lazy_ptr_name[32];
14877 int label = ++current_machopic_label_num;
14879 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14880 symb = (*targetm.strip_name_encoding) (symb);
14882 length = strlen (stub);
14883 binder_name = alloca (length + 32);
14884 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14886 length = strlen (symb);
14887 symbol_name = alloca (length + 32);
14888 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14890 sprintf (lazy_ptr_name, "L%d$lz", label);
14892 if (MACHOPIC_PURE)
14893 machopic_picsymbol_stub_section ();
14894 else
14895 machopic_symbol_stub_section ();
14897 fprintf (file, "%s:\n", stub);
14898 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14900 if (MACHOPIC_PURE)
14902 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14903 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14904 fprintf (file, "\tjmp %%edx\n");
14906 else
14907 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14909 fprintf (file, "%s:\n", binder_name);
14911 if (MACHOPIC_PURE)
14913 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14914 fprintf (file, "\tpushl %%eax\n");
14916 else
14917 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14919 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14921 machopic_lazy_symbol_ptr_section ();
14922 fprintf (file, "%s:\n", lazy_ptr_name);
14923 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14924 fprintf (file, "\t.long %s\n", binder_name);
14926 #endif /* TARGET_MACHO */
14928 /* Order the registers for register allocator. */
14930 void
14931 x86_order_regs_for_local_alloc ()
14933 int pos = 0;
14934 int i;
14936 /* First allocate the local general purpose registers. */
14937 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14938 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14939 reg_alloc_order [pos++] = i;
14941 /* Global general purpose registers. */
14942 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14943 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14944 reg_alloc_order [pos++] = i;
14946 /* x87 registers come first in case we are doing FP math
14947 using them. */
14948 if (!TARGET_SSE_MATH)
14949 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14950 reg_alloc_order [pos++] = i;
14952 /* SSE registers. */
14953 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14954 reg_alloc_order [pos++] = i;
14955 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14956 reg_alloc_order [pos++] = i;
14958 /* x87 registers. */
14959 if (TARGET_SSE_MATH)
14960 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14961 reg_alloc_order [pos++] = i;
14963 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14964 reg_alloc_order [pos++] = i;
14966 /* Initialize the rest of array as we do not allocate some registers
14967 at all. */
14968 while (pos < FIRST_PSEUDO_REGISTER)
14969 reg_alloc_order [pos++] = 0;
14972 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14973 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14974 #endif
14976 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14977 struct attribute_spec.handler. */
14978 static tree
14979 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
14980 tree *node;
14981 tree name;
14982 tree args ATTRIBUTE_UNUSED;
14983 int flags ATTRIBUTE_UNUSED;
14984 bool *no_add_attrs;
14986 tree *type = NULL;
14987 if (DECL_P (*node))
14989 if (TREE_CODE (*node) == TYPE_DECL)
14990 type = &TREE_TYPE (*node);
14992 else
14993 type = node;
14995 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14996 || TREE_CODE (*type) == UNION_TYPE)))
14998 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14999 *no_add_attrs = true;
15002 else if ((is_attribute_p ("ms_struct", name)
15003 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15004 || ((is_attribute_p ("gcc_struct", name)
15005 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15007 warning ("`%s' incompatible attribute ignored",
15008 IDENTIFIER_POINTER (name));
15009 *no_add_attrs = true;
15012 return NULL_TREE;
15015 static bool
15016 ix86_ms_bitfield_layout_p (record_type)
15017 tree record_type;
15019 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15020 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15021 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15024 /* Returns an expression indicating where the this parameter is
15025 located on entry to the FUNCTION. */
15027 static rtx
15028 x86_this_parameter (function)
15029 tree function;
15031 tree type = TREE_TYPE (function);
15033 if (TARGET_64BIT)
15035 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15036 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15039 if (ix86_fntype_regparm (type) > 0)
15041 tree parm;
15043 parm = TYPE_ARG_TYPES (type);
15044 /* Figure out whether or not the function has a variable number of
15045 arguments. */
15046 for (; parm; parm = TREE_CHAIN (parm))
15047 if (TREE_VALUE (parm) == void_type_node)
15048 break;
15049 /* If not, the this parameter is in %eax. */
15050 if (parm)
15051 return gen_rtx_REG (SImode, 0);
15054 if (aggregate_value_p (TREE_TYPE (type)))
15055 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15056 else
15057 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15060 /* Determine whether x86_output_mi_thunk can succeed. */
15062 static bool
15063 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15064 tree thunk ATTRIBUTE_UNUSED;
15065 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15066 HOST_WIDE_INT vcall_offset;
15067 tree function;
15069 /* 64-bit can handle anything. */
15070 if (TARGET_64BIT)
15071 return true;
15073 /* For 32-bit, everything's fine if we have one free register. */
15074 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15075 return true;
15077 /* Need a free register for vcall_offset. */
15078 if (vcall_offset)
15079 return false;
15081 /* Need a free register for GOT references. */
15082 if (flag_pic && !(*targetm.binds_local_p) (function))
15083 return false;
15085 /* Otherwise ok. */
15086 return true;
15089 /* Output the assembler code for a thunk function. THUNK_DECL is the
15090 declaration for the thunk function itself, FUNCTION is the decl for
15091 the target function. DELTA is an immediate constant offset to be
15092 added to THIS. If VCALL_OFFSET is nonzero, the word at
15093 *(*this + vcall_offset) should be added to THIS. */
15095 static void
15096 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15097 FILE *file ATTRIBUTE_UNUSED;
15098 tree thunk ATTRIBUTE_UNUSED;
15099 HOST_WIDE_INT delta;
15100 HOST_WIDE_INT vcall_offset;
15101 tree function;
15103 rtx xops[3];
15104 rtx this = x86_this_parameter (function);
15105 rtx this_reg, tmp;
15107 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15108 pull it in now and let DELTA benefit. */
15109 if (REG_P (this))
15110 this_reg = this;
15111 else if (vcall_offset)
15113 /* Put the this parameter into %eax. */
15114 xops[0] = this;
15115 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15116 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15118 else
15119 this_reg = NULL_RTX;
15121 /* Adjust the this parameter by a fixed constant. */
15122 if (delta)
15124 xops[0] = GEN_INT (delta);
15125 xops[1] = this_reg ? this_reg : this;
15126 if (TARGET_64BIT)
15128 if (!x86_64_general_operand (xops[0], DImode))
15130 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15131 xops[1] = tmp;
15132 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15133 xops[0] = tmp;
15134 xops[1] = this;
15136 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15138 else
15139 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15142 /* Adjust the this parameter by a value stored in the vtable. */
15143 if (vcall_offset)
15145 if (TARGET_64BIT)
15146 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15147 else
15148 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15150 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15151 xops[1] = tmp;
15152 if (TARGET_64BIT)
15153 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15154 else
15155 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15157 /* Adjust the this parameter. */
15158 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15159 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15161 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15162 xops[0] = GEN_INT (vcall_offset);
15163 xops[1] = tmp2;
15164 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15165 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15167 xops[1] = this_reg;
15168 if (TARGET_64BIT)
15169 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15170 else
15171 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15174 /* If necessary, drop THIS back to its stack slot. */
15175 if (this_reg && this_reg != this)
15177 xops[0] = this_reg;
15178 xops[1] = this;
15179 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15182 xops[0] = DECL_RTL (function);
15183 if (TARGET_64BIT)
15185 if (!flag_pic || (*targetm.binds_local_p) (function))
15186 output_asm_insn ("jmp\t%P0", xops);
15187 else
15189 tmp = XEXP (xops[0], 0);
15190 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15191 tmp = gen_rtx_CONST (Pmode, tmp);
15192 tmp = gen_rtx_MEM (QImode, tmp);
15193 xops[0] = tmp;
15194 output_asm_insn ("jmp\t%A0", xops);
15197 else
15199 if (!flag_pic || (*targetm.binds_local_p) (function))
15200 output_asm_insn ("jmp\t%P0", xops);
15201 else
15202 #if TARGET_MACHO
15203 if (TARGET_MACHO)
15205 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15206 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15207 tmp = gen_rtx_MEM (QImode, tmp);
15208 xops[0] = tmp;
15209 output_asm_insn ("jmp\t%0", xops);
15211 else
15212 #endif /* TARGET_MACHO */
15214 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15215 output_set_got (tmp);
15217 xops[1] = tmp;
15218 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15219 output_asm_insn ("jmp\t{*}%1", xops);
15225 x86_field_alignment (field, computed)
15226 tree field;
15227 int computed;
15229 enum machine_mode mode;
15230 tree type = TREE_TYPE (field);
15232 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15233 return computed;
15234 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15235 ? get_inner_array_type (type) : type);
15236 if (mode == DFmode || mode == DCmode
15237 || GET_MODE_CLASS (mode) == MODE_INT
15238 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15239 return MIN (32, computed);
15240 return computed;
15243 /* Output assembler code to FILE to increment profiler label # LABELNO
15244 for profiling a function entry. */
15245 void
15246 x86_function_profiler (file, labelno)
15247 FILE *file;
15248 int labelno ATTRIBUTE_UNUSED;
15250 if (TARGET_64BIT)
15251 if (flag_pic)
15253 #ifndef NO_PROFILE_COUNTERS
15254 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15255 #endif
15256 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15258 else
15260 #ifndef NO_PROFILE_COUNTERS
15261 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15262 #endif
15263 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15265 else if (flag_pic)
15267 #ifndef NO_PROFILE_COUNTERS
15268 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15269 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15270 #endif
15271 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15273 else
15275 #ifndef NO_PROFILE_COUNTERS
15276 fprintf (file, "\tmovl\t$%sP%d,%%$%s\n", LPREFIX, labelno,
15277 PROFILE_COUNT_REGISTER);
15278 #endif
15279 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15283 /* Implement machine specific optimizations.
15284 At the moment we implement single transformation: AMD Athlon works faster
15285 when RET is not destination of conditional jump or directly preceded
15286 by other jump instruction. We avoid the penalty by inserting NOP just
15287 before the RET instructions in such cases. */
15288 void
15289 x86_machine_dependent_reorg (first)
15290 rtx first ATTRIBUTE_UNUSED;
15292 edge e;
15294 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15295 return;
15296 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15298 basic_block bb = e->src;
15299 rtx ret = bb->end;
15300 rtx prev;
15301 bool insert = false;
15303 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15304 continue;
15305 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15306 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15307 break;
15308 if (prev && GET_CODE (prev) == CODE_LABEL)
15310 edge e;
15311 for (e = bb->pred; e; e = e->pred_next)
15312 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15313 && !(e->flags & EDGE_FALLTHRU))
15314 insert = 1;
15316 if (!insert)
15318 prev = prev_active_insn (ret);
15319 if (prev && GET_CODE (prev) == JUMP_INSN
15320 && any_condjump_p (prev))
15321 insert = 1;
15322 /* Empty functions get branch misspredict even when the jump destination
15323 is not visible to us. */
15324 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15325 insert = 1;
15327 if (insert)
15328 emit_insn_before (gen_nop (), ret);
15332 /* Return nonzero when QImode register that must be represented via REX prefix
15333 is used. */
15334 bool
15335 x86_extended_QIreg_mentioned_p (insn)
15336 rtx insn;
15338 int i;
15339 extract_insn_cached (insn);
15340 for (i = 0; i < recog_data.n_operands; i++)
15341 if (REG_P (recog_data.operand[i])
15342 && REGNO (recog_data.operand[i]) >= 4)
15343 return true;
15344 return false;
15347 /* Return nonzero when P points to register encoded via REX prefix.
15348 Called via for_each_rtx. */
15349 static int
15350 extended_reg_mentioned_1 (p, data)
15351 rtx *p;
15352 void *data ATTRIBUTE_UNUSED;
15354 unsigned int regno;
15355 if (!REG_P (*p))
15356 return 0;
15357 regno = REGNO (*p);
15358 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15361 /* Return true when INSN mentions register that must be encoded using REX
15362 prefix. */
15363 bool
15364 x86_extended_reg_mentioned_p (insn)
15365 rtx insn;
15367 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15370 #include "gt-i386.h"