FSF GCC merge 02/23/03
[official-gcc.git] / gcc / config / i386 / i386.c
blob89995c7cb782df6236a15bdf7ca57170f12eeee0
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
51 #endif
53 /* Return index of given mode in mult and division cost tables. */
54 #define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
59 : 4)
61 /* Processor costs (relative to an add) */
62 static const
63 struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
69 0, /* cost of multiply per each bit set */
70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
71 3, /* cost of movsx */
72 3, /* cost of movzx */
73 0, /* "large" insn */
74 2, /* MOVE_RATIO */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
97 1, /* Branch cost */
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
106 /* Processor costs (relative to an add) */
107 static const
108 struct processor_costs i386_cost = { /* 386 specific costs */
109 1, /* cost of an add instruction */
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
114 1, /* cost of multiply per each bit set */
115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
116 3, /* cost of movsx */
117 2, /* cost of movzx */
118 15, /* "large" insn */
119 3, /* MOVE_RATIO */
120 4, /* cost for loading QImode using movzbl */
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
123 Relative to reg-reg move (2). */
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
142 1, /* Branch cost */
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
151 static const
152 struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
158 1, /* cost of multiply per each bit set */
159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
160 3, /* cost of movsx */
161 2, /* cost of movzx */
162 15, /* "large" insn */
163 3, /* MOVE_RATIO */
164 4, /* cost for loading QImode using movzbl */
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
167 Relative to reg-reg move (2). */
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
186 1, /* Branch cost */
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
195 static const
196 struct processor_costs pentium_cost = {
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
199 4, /* variable shift costs */
200 1, /* constant shift costs */
201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
202 0, /* cost of multiply per each bit set */
203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
204 3, /* cost of movsx */
205 2, /* cost of movzx */
206 8, /* "large" insn */
207 6, /* MOVE_RATIO */
208 6, /* cost for loading QImode using movzbl */
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
211 Relative to reg-reg move (2). */
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
230 2, /* Branch cost */
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
239 static const
240 struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* variable shift costs */
244 1, /* constant shift costs */
245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
246 0, /* cost of multiply per each bit set */
247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
248 1, /* cost of movsx */
249 1, /* cost of movzx */
250 8, /* "large" insn */
251 6, /* MOVE_RATIO */
252 2, /* cost for loading QImode using movzbl */
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
274 2, /* Branch cost */
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
283 static const
284 struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
286 2, /* cost of a lea instruction */
287 1, /* variable shift costs */
288 1, /* constant shift costs */
289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
290 0, /* cost of multiply per each bit set */
291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
292 2, /* cost of movsx */
293 2, /* cost of movzx */
294 8, /* "large" insn */
295 4, /* MOVE_RATIO */
296 3, /* cost for loading QImode using movzbl */
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
318 1, /* Branch cost */
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
327 static const
328 struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
330 2, /* cost of a lea instruction */
331 1, /* variable shift costs */
332 1, /* constant shift costs */
333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
334 0, /* cost of multiply per each bit set */
335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
336 1, /* cost of movsx */
337 1, /* cost of movzx */
338 8, /* "large" insn */
339 9, /* MOVE_RATIO */
340 4, /* cost for loading QImode using movzbl */
341 {3, 4, 3}, /* cost of loading integer registers
342 in QImode, HImode and SImode.
343 Relative to reg-reg move (2). */
344 {3, 4, 3}, /* cost of storing integer registers */
345 4, /* cost of reg,reg fld/fst */
346 {4, 4, 12}, /* cost of loading fp registers
347 in SFmode, DFmode and XFmode */
348 {6, 6, 8}, /* cost of loading integer registers */
349 2, /* cost of moving MMX register */
350 {4, 4}, /* cost of loading MMX registers
351 in SImode and DImode */
352 {4, 4}, /* cost of storing MMX registers
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
355 {4, 4, 6}, /* cost of loading SSE registers
356 in SImode, DImode and TImode */
357 {4, 4, 5}, /* cost of storing SSE registers
358 in SImode, DImode and TImode */
359 5, /* MMX or SSE register to integer */
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
362 2, /* Branch cost */
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
371 static const
372 struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
383 9, /* MOVE_RATIO */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
406 2, /* Branch cost */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
415 static const
416 struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
422 0, /* cost of multiply per each bit set */
423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
424 1, /* cost of movsx */
425 1, /* cost of movzx */
426 16, /* "large" insn */
427 6, /* MOVE_RATIO */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
450 2, /* Branch cost */
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
459 const struct processor_costs *ix86_cost = &pentium_cost;
461 /* Processor feature/optimization bitmasks. */
462 #define m_386 (1<<PROCESSOR_I386)
463 #define m_486 (1<<PROCESSOR_I486)
464 #define m_PENT (1<<PROCESSOR_PENTIUM)
465 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466 #define m_K6 (1<<PROCESSOR_K6)
467 #define m_ATHLON (1<<PROCESSOR_ATHLON)
468 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
469 #define m_K8 (1<<PROCESSOR_K8)
470 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_zero_extend_with_and = m_486 | m_PENT;
475 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
476 const int x86_double_with_add = ~m_386;
477 const int x86_use_bit_test = m_386;
478 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480 const int x86_3dnow_a = m_ATHLON_K8;
481 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
482 const int x86_branch_hints = m_PENT4;
483 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
484 const int x86_partial_reg_stall = m_PPRO;
485 const int x86_use_loop = m_K6;
486 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
487 const int x86_use_mov0 = m_K6;
488 const int x86_use_cltd = ~(m_PENT | m_K6);
489 const int x86_read_modify_write = ~m_PENT;
490 const int x86_read_modify = ~(m_PENT | m_PPRO);
491 const int x86_split_long_moves = m_PPRO;
492 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
493 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
494 const int x86_single_stringop = m_386 | m_PENT4;
495 const int x86_qimode_math = ~(0);
496 const int x86_promote_qi_regs = 0;
497 const int x86_himode_math = ~(m_PPRO);
498 const int x86_promote_hi_regs = m_PPRO;
499 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_decompose_lea = m_PENT4;
510 const int x86_shift1 = ~m_486;
511 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513 /* Set for machines where the type and dependencies are resolved on SSE register
514 parts instead of whole registers, so we may maintain just lower part of
515 scalar values in proper format leaving the upper part undefined. */
516 const int x86_sse_partial_regs = m_ATHLON_K8;
517 /* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520 const int x86_sse_typeless_stores = m_ATHLON_K8;
521 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522 const int x86_use_ffreep = m_ATHLON_K8;
523 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
524 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
525 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
527 /* In case the average insn count for single function invocation is
528 lower than this constant, emit fast (but longer) prologue and
529 epilogue code. */
530 #define FAST_PROLOGUE_INSN_COUNT 20
532 /* Set by prologue expander and used by epilogue expander to determine
533 the style used. */
534 static int use_fast_prologue_epilogue;
536 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
537 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
538 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
539 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
541 /* Array of the smallest class containing reg number REGNO, indexed by
542 REGNO. Used by REGNO_REG_CLASS in i386.h. */
544 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
546 /* ax, dx, cx, bx */
547 AREG, DREG, CREG, BREG,
548 /* si, di, bp, sp */
549 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
550 /* FP registers */
551 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
552 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
553 /* arg pointer */
554 NON_Q_REGS,
555 /* flags, fpsr, dirflag, frame */
556 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
557 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
558 SSE_REGS, SSE_REGS,
559 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
560 MMX_REGS, MMX_REGS,
561 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
562 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
563 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 SSE_REGS, SSE_REGS,
567 /* The "default" register map used in 32bit mode. */
569 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
571 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
572 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
573 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
574 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
575 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
576 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
580 static int const x86_64_int_parameter_registers[6] =
582 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
583 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
586 static int const x86_64_int_return_registers[4] =
588 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
591 /* The "default" register map used in 64bit mode. */
592 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
594 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
595 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
596 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
597 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
598 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
599 8,9,10,11,12,13,14,15, /* extended integer registers */
600 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
603 /* Define the register numbers to be used in Dwarf debugging information.
604 The SVR4 reference port C compiler uses the following register numbers
605 in its Dwarf output code:
606 0 for %eax (gcc regno = 0)
607 1 for %ecx (gcc regno = 2)
608 2 for %edx (gcc regno = 1)
609 3 for %ebx (gcc regno = 3)
610 4 for %esp (gcc regno = 7)
611 5 for %ebp (gcc regno = 6)
612 6 for %esi (gcc regno = 4)
613 7 for %edi (gcc regno = 5)
614 The following three DWARF register numbers are never generated by
615 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
616 believes these numbers have these meanings.
617 8 for %eip (no gcc equivalent)
618 9 for %eflags (gcc regno = 17)
619 10 for %trapno (no gcc equivalent)
620 It is not at all clear how we should number the FP stack registers
621 for the x86 architecture. If the version of SDB on x86/svr4 were
622 a bit less brain dead with respect to floating-point then we would
623 have a precedent to follow with respect to DWARF register numbers
624 for x86 FP registers, but the SDB on x86/svr4 is so completely
625 broken with respect to FP registers that it is hardly worth thinking
626 of it as something to strive for compatibility with.
627 The version of x86/svr4 SDB I have at the moment does (partially)
628 seem to believe that DWARF register number 11 is associated with
629 the x86 register %st(0), but that's about all. Higher DWARF
630 register numbers don't seem to be associated with anything in
631 particular, and even for DWARF regno 11, SDB only seems to under-
632 stand that it should say that a variable lives in %st(0) (when
633 asked via an `=' command) if we said it was in DWARF regno 11,
634 but SDB still prints garbage when asked for the value of the
635 variable in question (via a `/' command).
636 (Also note that the labels SDB prints for various FP stack regs
637 when doing an `x' command are all wrong.)
638 Note that these problems generally don't affect the native SVR4
639 C compiler because it doesn't allow the use of -O with -g and
640 because when it is *not* optimizing, it allocates a memory
641 location for each floating-point variable, and the memory
642 location is what gets described in the DWARF AT_location
643 attribute for the variable in question.
644 Regardless of the severe mental illness of the x86/svr4 SDB, we
645 do something sensible here and we use the following DWARF
646 register numbers. Note that these are all stack-top-relative
647 numbers.
648 11 for %st(0) (gcc regno = 8)
649 12 for %st(1) (gcc regno = 9)
650 13 for %st(2) (gcc regno = 10)
651 14 for %st(3) (gcc regno = 11)
652 15 for %st(4) (gcc regno = 12)
653 16 for %st(5) (gcc regno = 13)
654 17 for %st(6) (gcc regno = 14)
655 18 for %st(7) (gcc regno = 15)
657 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
659 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
660 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
661 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
662 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
663 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
664 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
668 /* Test and compare insns in i386.md store the information needed to
669 generate branch and scc insns here. */
671 rtx ix86_compare_op0 = NULL_RTX;
672 rtx ix86_compare_op1 = NULL_RTX;
674 /* The encoding characters for the four TLS models present in ELF. */
676 static char const tls_model_chars[] = " GLil";
678 #define MAX_386_STACK_LOCALS 3
679 /* Size of the register save area. */
680 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
682 /* Define the structure for the machine field in struct function. */
684 struct stack_local_entry GTY(())
686 unsigned short mode;
687 unsigned short n;
688 rtx rtl;
689 struct stack_local_entry *next;
693 struct machine_function GTY(())
695 struct stack_local_entry *stack_locals;
696 const char *some_ld_name;
697 int save_varrargs_registers;
698 int accesses_prev_frame;
701 #define ix86_stack_locals (cfun->machine->stack_locals)
702 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
704 /* Structure describing stack frame layout.
705 Stack grows downward:
707 [arguments]
708 <- ARG_POINTER
709 saved pc
711 saved frame pointer if frame_pointer_needed
712 <- HARD_FRAME_POINTER
713 [saved regs]
715 [padding1] \
717 [va_arg registers] (
718 > to_allocate <- FRAME_POINTER
719 [frame] (
721 [padding2] /
723 struct ix86_frame
725 int nregs;
726 int padding1;
727 int va_arg_size;
728 HOST_WIDE_INT frame;
729 int padding2;
730 int outgoing_arguments_size;
731 int red_zone_size;
733 HOST_WIDE_INT to_allocate;
734 /* The offsets relative to ARG_POINTER. */
735 HOST_WIDE_INT frame_pointer_offset;
736 HOST_WIDE_INT hard_frame_pointer_offset;
737 HOST_WIDE_INT stack_pointer_offset;
740 /* Used to enable/disable debugging features. */
741 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
742 /* Code model option as passed by user. */
743 const char *ix86_cmodel_string;
744 /* Parsed value. */
745 enum cmodel ix86_cmodel;
746 /* Asm dialect. */
747 const char *ix86_asm_string;
748 enum asm_dialect ix86_asm_dialect = ASM_ATT;
749 /* TLS dialext. */
750 const char *ix86_tls_dialect_string;
751 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
753 /* Which unit we are generating floating point math for. */
754 enum fpmath_unit ix86_fpmath;
756 /* Which cpu are we scheduling for. */
757 enum processor_type ix86_tune;
758 /* Which instruction set architecture to use. */
759 enum processor_type ix86_arch;
761 /* Strings to hold which cpu and instruction set architecture to use. */
762 const char *ix86_tune_string; /* for -mtune=<xxx> */
763 const char *ix86_arch_string; /* for -march=<xxx> */
764 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
766 /* # of registers to use to pass arguments. */
767 const char *ix86_regparm_string;
769 /* true if sse prefetch instruction is not NOOP. */
770 int x86_prefetch_sse;
772 /* ix86_regparm_string as a number */
773 int ix86_regparm;
775 /* Alignment to use for loops and jumps: */
777 /* Power of two alignment for loops. */
778 const char *ix86_align_loops_string;
780 /* Power of two alignment for non-loop jumps. */
781 const char *ix86_align_jumps_string;
783 /* Power of two alignment for stack boundary in bytes. */
784 const char *ix86_preferred_stack_boundary_string;
786 /* Preferred alignment for stack boundary in bits. */
787 int ix86_preferred_stack_boundary;
789 /* Values 1-5: see jump.c */
790 int ix86_branch_cost;
791 const char *ix86_branch_cost_string;
793 /* Power of two alignment for functions. */
794 const char *ix86_align_funcs_string;
796 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
797 static char internal_label_prefix[16];
798 static int internal_label_prefix_len;
800 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
801 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
802 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
803 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
804 int, int, FILE *));
805 static const char *get_some_local_dynamic_name PARAMS ((void));
806 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
807 static rtx maybe_get_pool_constant PARAMS ((rtx));
808 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
809 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
810 rtx *, rtx *));
811 static rtx get_thread_pointer PARAMS ((void));
812 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
813 static rtx gen_push PARAMS ((rtx));
814 static int memory_address_length PARAMS ((rtx addr));
815 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
816 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
817 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
818 static void ix86_dump_ppro_packet PARAMS ((FILE *));
819 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
820 static struct machine_function * ix86_init_machine_status PARAMS ((void));
821 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
822 static int ix86_nsaved_regs PARAMS ((void));
823 static void ix86_emit_save_regs PARAMS ((void));
824 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
825 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
826 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
827 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
828 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
829 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
830 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
831 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
832 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
833 static int ix86_issue_rate PARAMS ((void));
834 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
835 static void ix86_sched_init PARAMS ((FILE *, int, int));
836 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
837 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
838 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
839 static int ia32_multipass_dfa_lookahead PARAMS ((void));
840 static void ix86_init_mmx_sse_builtins PARAMS ((void));
841 static rtx x86_this_parameter PARAMS ((tree));
842 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
843 HOST_WIDE_INT, tree));
844 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
845 HOST_WIDE_INT, tree));
846 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
848 struct ix86_address
850 rtx base, index, disp;
851 HOST_WIDE_INT scale;
854 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
855 static int ix86_address_cost PARAMS ((rtx));
856 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
857 static rtx ix86_delegitimize_address PARAMS ((rtx));
859 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
860 static const char *ix86_strip_name_encoding PARAMS ((const char *))
861 ATTRIBUTE_UNUSED;
863 struct builtin_description;
864 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
865 tree, rtx));
866 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
867 tree, rtx));
868 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
869 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
870 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
871 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
872 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
873 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
874 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
875 enum rtx_code *,
876 enum rtx_code *,
877 enum rtx_code *));
878 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
879 rtx *, rtx *));
880 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
881 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
882 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
883 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
884 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
885 static int ix86_save_reg PARAMS ((unsigned int, int));
886 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
887 static int ix86_comp_type_attributes PARAMS ((tree, tree));
888 static int ix86_fntype_regparm PARAMS ((tree));
889 const struct attribute_spec ix86_attribute_table[];
890 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
891 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
892 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
893 static int ix86_value_regno PARAMS ((enum machine_mode));
894 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
895 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
896 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
897 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
898 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
900 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
901 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
902 #endif
904 /* Register class used for passing given 64bit part of the argument.
905 These represent classes as documented by the PS ABI, with the exception
906 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
907 use SF or DFmode move instead of DImode to avoid reformatting penalties.
909 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
910 whenever possible (upper half does contain padding).
912 enum x86_64_reg_class
914 X86_64_NO_CLASS,
915 X86_64_INTEGER_CLASS,
916 X86_64_INTEGERSI_CLASS,
917 X86_64_SSE_CLASS,
918 X86_64_SSESF_CLASS,
919 X86_64_SSEDF_CLASS,
920 X86_64_SSEUP_CLASS,
921 X86_64_X87_CLASS,
922 X86_64_X87UP_CLASS,
923 X86_64_MEMORY_CLASS
925 static const char * const x86_64_reg_class_name[] =
926 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
928 #define MAX_CLASSES 4
929 static int classify_argument PARAMS ((enum machine_mode, tree,
930 enum x86_64_reg_class [MAX_CLASSES],
931 int));
932 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
933 int *));
934 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
935 const int *, int));
936 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
937 enum x86_64_reg_class));
939 /* Table of constants used by fldpi, fldln2, etc... */
940 static REAL_VALUE_TYPE ext_80387_constants_table [5];
941 static bool ext_80387_constants_init = 0;
942 static void init_ext_80387_constants PARAMS ((void));
944 /* Initialize the GCC target structure. */
945 #undef TARGET_ATTRIBUTE_TABLE
946 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
947 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
948 # undef TARGET_MERGE_DECL_ATTRIBUTES
949 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
950 #endif
952 #undef TARGET_COMP_TYPE_ATTRIBUTES
953 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
955 #undef TARGET_INIT_BUILTINS
956 #define TARGET_INIT_BUILTINS ix86_init_builtins
958 #undef TARGET_EXPAND_BUILTIN
959 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
961 #undef TARGET_ASM_FUNCTION_EPILOGUE
962 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
964 #undef TARGET_ASM_OPEN_PAREN
965 #define TARGET_ASM_OPEN_PAREN ""
966 #undef TARGET_ASM_CLOSE_PAREN
967 #define TARGET_ASM_CLOSE_PAREN ""
969 #undef TARGET_ASM_ALIGNED_HI_OP
970 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
971 #undef TARGET_ASM_ALIGNED_SI_OP
972 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
973 #ifdef ASM_QUAD
974 #undef TARGET_ASM_ALIGNED_DI_OP
975 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
976 #endif
978 #undef TARGET_ASM_UNALIGNED_HI_OP
979 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
980 #undef TARGET_ASM_UNALIGNED_SI_OP
981 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
982 #undef TARGET_ASM_UNALIGNED_DI_OP
983 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
985 #undef TARGET_SCHED_ADJUST_COST
986 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
987 #undef TARGET_SCHED_ISSUE_RATE
988 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
989 #undef TARGET_SCHED_VARIABLE_ISSUE
990 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
991 #undef TARGET_SCHED_INIT
992 #define TARGET_SCHED_INIT ix86_sched_init
993 #undef TARGET_SCHED_REORDER
994 #define TARGET_SCHED_REORDER ix86_sched_reorder
995 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
996 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
997 ia32_use_dfa_pipeline_interface
998 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
999 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1000 ia32_multipass_dfa_lookahead
1002 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1003 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1005 #ifdef HAVE_AS_TLS
1006 #undef TARGET_HAVE_TLS
1007 #define TARGET_HAVE_TLS true
1008 #endif
1009 #undef TARGET_CANNOT_FORCE_CONST_MEM
1010 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1012 #undef TARGET_DELEGITIMIZE_ADDRESS
1013 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1015 #undef TARGET_MS_BITFIELD_LAYOUT_P
1016 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1018 #undef TARGET_ASM_OUTPUT_MI_THUNK
1019 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1020 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1021 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1023 #undef TARGET_RTX_COSTS
1024 #define TARGET_RTX_COSTS ix86_rtx_costs
1025 #undef TARGET_ADDRESS_COST
1026 #define TARGET_ADDRESS_COST ix86_address_cost
1028 struct gcc_target targetm = TARGET_INITIALIZER;
1030 /* Sometimes certain combinations of command options do not make
1031 sense on a particular target machine. You can define a macro
1032 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1033 defined, is executed once just after all the command options have
1034 been parsed.
1036 Don't use this macro to turn on various extra optimizations for
1037 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1039 void
1040 override_options ()
1042 int i;
1043 /* Comes from final.c -- no real reason to change it. */
1044 #define MAX_CODE_ALIGN 16
1046 static struct ptt
1048 const struct processor_costs *cost; /* Processor costs */
1049 const int target_enable; /* Target flags to enable. */
1050 const int target_disable; /* Target flags to disable. */
1051 const int align_loop; /* Default alignments. */
1052 const int align_loop_max_skip;
1053 const int align_jump;
1054 const int align_jump_max_skip;
1055 const int align_func;
1057 const processor_target_table[PROCESSOR_max] =
1059 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1060 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1061 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1062 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1063 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1064 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1065 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1066 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1069 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1070 static struct pta
1072 const char *const name; /* processor name or nickname. */
1073 const enum processor_type processor;
1074 const enum pta_flags
1076 PTA_SSE = 1,
1077 PTA_SSE2 = 2,
1078 PTA_MMX = 4,
1079 PTA_PREFETCH_SSE = 8,
1080 PTA_3DNOW = 16,
1081 PTA_3DNOW_A = 64,
1082 PTA_64BIT = 128
1083 } flags;
1085 const processor_alias_table[] =
1087 {"i386", PROCESSOR_I386, 0},
1088 {"i486", PROCESSOR_I486, 0},
1089 {"i586", PROCESSOR_PENTIUM, 0},
1090 {"pentium", PROCESSOR_PENTIUM, 0},
1091 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1092 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1093 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1094 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1095 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1096 {"i686", PROCESSOR_PENTIUMPRO, 0},
1097 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1098 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1099 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1100 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1101 PTA_MMX | PTA_PREFETCH_SSE},
1102 {"k6", PROCESSOR_K6, PTA_MMX},
1103 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1104 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1105 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1106 | PTA_3DNOW_A},
1107 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1108 | PTA_3DNOW | PTA_3DNOW_A},
1109 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1110 | PTA_3DNOW_A | PTA_SSE},
1111 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1112 | PTA_3DNOW_A | PTA_SSE},
1113 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1114 | PTA_3DNOW_A | PTA_SSE},
1115 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1116 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1119 int const pta_size = ARRAY_SIZE (processor_alias_table);
1121 /* By default our XFmode is the 80-bit extended format. If we have
1122 use TFmode instead, it's also the 80-bit format, but with padding. */
1123 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1124 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1126 /* Set the default values for switches whose default depends on TARGET_64BIT
1127 in case they weren't overwritten by command line options. */
1128 if (TARGET_64BIT)
1130 if (flag_omit_frame_pointer == 2)
1131 flag_omit_frame_pointer = 1;
1132 if (flag_asynchronous_unwind_tables == 2)
1133 flag_asynchronous_unwind_tables = 1;
1134 if (flag_pcc_struct_return == 2)
1135 flag_pcc_struct_return = 0;
1137 else
1139 if (flag_omit_frame_pointer == 2)
1140 flag_omit_frame_pointer = 0;
1141 if (flag_asynchronous_unwind_tables == 2)
1142 flag_asynchronous_unwind_tables = 0;
1143 if (flag_pcc_struct_return == 2)
1144 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1147 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1148 SUBTARGET_OVERRIDE_OPTIONS;
1149 #endif
1151 if (!ix86_tune_string && ix86_arch_string)
1152 ix86_tune_string = ix86_arch_string;
1153 if (!ix86_tune_string)
1154 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1155 if (!ix86_arch_string)
1156 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1158 if (ix86_cmodel_string != 0)
1160 if (!strcmp (ix86_cmodel_string, "small"))
1161 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1162 else if (flag_pic)
1163 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1164 else if (!strcmp (ix86_cmodel_string, "32"))
1165 ix86_cmodel = CM_32;
1166 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1167 ix86_cmodel = CM_KERNEL;
1168 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1169 ix86_cmodel = CM_MEDIUM;
1170 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1171 ix86_cmodel = CM_LARGE;
1172 else
1173 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1175 else
1177 ix86_cmodel = CM_32;
1178 if (TARGET_64BIT)
1179 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1181 if (ix86_asm_string != 0)
1183 if (!strcmp (ix86_asm_string, "intel"))
1184 ix86_asm_dialect = ASM_INTEL;
1185 else if (!strcmp (ix86_asm_string, "att"))
1186 ix86_asm_dialect = ASM_ATT;
1187 else
1188 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1190 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1191 error ("code model `%s' not supported in the %s bit mode",
1192 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1193 if (ix86_cmodel == CM_LARGE)
1194 sorry ("code model `large' not supported yet");
1195 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1196 sorry ("%i-bit mode not compiled in",
1197 (target_flags & MASK_64BIT) ? 64 : 32);
1199 for (i = 0; i < pta_size; i++)
1200 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1202 ix86_arch = processor_alias_table[i].processor;
1203 /* Default cpu tuning to the architecture. */
1204 ix86_tune = ix86_arch;
1205 if (processor_alias_table[i].flags & PTA_MMX
1206 && !(target_flags_explicit & MASK_MMX))
1207 target_flags |= MASK_MMX;
1208 if (processor_alias_table[i].flags & PTA_3DNOW
1209 && !(target_flags_explicit & MASK_3DNOW))
1210 target_flags |= MASK_3DNOW;
1211 if (processor_alias_table[i].flags & PTA_3DNOW_A
1212 && !(target_flags_explicit & MASK_3DNOW_A))
1213 target_flags |= MASK_3DNOW_A;
1214 if (processor_alias_table[i].flags & PTA_SSE
1215 && !(target_flags_explicit & MASK_SSE))
1216 target_flags |= MASK_SSE;
1217 if (processor_alias_table[i].flags & PTA_SSE2
1218 && !(target_flags_explicit & MASK_SSE2))
1219 target_flags |= MASK_SSE2;
1220 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1221 x86_prefetch_sse = true;
1222 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1223 error ("CPU you selected does not support x86-64 instruction set");
1224 break;
1227 if (i == pta_size)
1228 error ("bad value (%s) for -march= switch", ix86_arch_string);
1230 for (i = 0; i < pta_size; i++)
1231 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1233 ix86_tune = processor_alias_table[i].processor;
1234 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1235 error ("CPU you selected does not support x86-64 instruction set");
1236 break;
1238 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1239 x86_prefetch_sse = true;
1240 if (i == pta_size)
1241 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1243 if (optimize_size)
1244 ix86_cost = &size_cost;
1245 else
1246 ix86_cost = processor_target_table[ix86_tune].cost;
1247 target_flags |= processor_target_table[ix86_tune].target_enable;
1248 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1250 /* Arrange to set up i386_stack_locals for all functions. */
1251 init_machine_status = ix86_init_machine_status;
1253 /* Validate -mregparm= value. */
1254 if (ix86_regparm_string)
1256 i = atoi (ix86_regparm_string);
1257 if (i < 0 || i > REGPARM_MAX)
1258 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1259 else
1260 ix86_regparm = i;
1262 else
1263 if (TARGET_64BIT)
1264 ix86_regparm = REGPARM_MAX;
1266 /* If the user has provided any of the -malign-* options,
1267 warn and use that value only if -falign-* is not set.
1268 Remove this code in GCC 3.2 or later. */
1269 if (ix86_align_loops_string)
1271 warning ("-malign-loops is obsolete, use -falign-loops");
1272 if (align_loops == 0)
1274 i = atoi (ix86_align_loops_string);
1275 if (i < 0 || i > MAX_CODE_ALIGN)
1276 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1277 else
1278 align_loops = 1 << i;
1282 if (ix86_align_jumps_string)
1284 warning ("-malign-jumps is obsolete, use -falign-jumps");
1285 if (align_jumps == 0)
1287 i = atoi (ix86_align_jumps_string);
1288 if (i < 0 || i > MAX_CODE_ALIGN)
1289 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1290 else
1291 align_jumps = 1 << i;
1295 if (ix86_align_funcs_string)
1297 warning ("-malign-functions is obsolete, use -falign-functions");
1298 if (align_functions == 0)
1300 i = atoi (ix86_align_funcs_string);
1301 if (i < 0 || i > MAX_CODE_ALIGN)
1302 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1303 else
1304 align_functions = 1 << i;
1308 /* Default align_* from the processor table. */
1309 if (align_loops == 0)
1311 align_loops = processor_target_table[ix86_tune].align_loop;
1312 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1314 if (align_jumps == 0)
1316 align_jumps = processor_target_table[ix86_tune].align_jump;
1317 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1319 if (align_functions == 0)
1321 align_functions = processor_target_table[ix86_tune].align_func;
1324 /* Validate -mpreferred-stack-boundary= value, or provide default.
1325 The default of 128 bits is for Pentium III's SSE __m128, but we
1326 don't want additional code to keep the stack aligned when
1327 optimizing for code size. */
1328 ix86_preferred_stack_boundary = (optimize_size
1329 ? TARGET_64BIT ? 128 : 32
1330 : 128);
1331 if (ix86_preferred_stack_boundary_string)
1333 i = atoi (ix86_preferred_stack_boundary_string);
1334 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1335 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1336 TARGET_64BIT ? 4 : 2);
1337 else
1338 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1341 /* Validate -mbranch-cost= value, or provide default. */
1342 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1343 if (ix86_branch_cost_string)
1345 i = atoi (ix86_branch_cost_string);
1346 if (i < 0 || i > 5)
1347 error ("-mbranch-cost=%d is not between 0 and 5", i);
1348 else
1349 ix86_branch_cost = i;
1352 if (ix86_tls_dialect_string)
1354 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1355 ix86_tls_dialect = TLS_DIALECT_GNU;
1356 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1357 ix86_tls_dialect = TLS_DIALECT_SUN;
1358 else
1359 error ("bad value (%s) for -mtls-dialect= switch",
1360 ix86_tls_dialect_string);
1363 /* Keep nonleaf frame pointers. */
1364 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1365 flag_omit_frame_pointer = 1;
1367 /* If we're doing fast math, we don't care about comparison order
1368 wrt NaNs. This lets us use a shorter comparison sequence. */
1369 if (flag_unsafe_math_optimizations)
1370 target_flags &= ~MASK_IEEE_FP;
1372 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1373 since the insns won't need emulation. */
1374 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1375 target_flags &= ~MASK_NO_FANCY_MATH_387;
1377 if (TARGET_64BIT)
1379 if (TARGET_ALIGN_DOUBLE)
1380 error ("-malign-double makes no sense in the 64bit mode");
1381 if (TARGET_RTD)
1382 error ("-mrtd calling convention not supported in the 64bit mode");
1383 /* Enable by default the SSE and MMX builtins. */
1384 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1385 ix86_fpmath = FPMATH_SSE;
1387 else
1388 ix86_fpmath = FPMATH_387;
1390 if (ix86_fpmath_string != 0)
1392 if (! strcmp (ix86_fpmath_string, "387"))
1393 ix86_fpmath = FPMATH_387;
1394 else if (! strcmp (ix86_fpmath_string, "sse"))
1396 if (!TARGET_SSE)
1398 warning ("SSE instruction set disabled, using 387 arithmetics");
1399 ix86_fpmath = FPMATH_387;
1401 else
1402 ix86_fpmath = FPMATH_SSE;
1404 else if (! strcmp (ix86_fpmath_string, "387,sse")
1405 || ! strcmp (ix86_fpmath_string, "sse,387"))
1407 if (!TARGET_SSE)
1409 warning ("SSE instruction set disabled, using 387 arithmetics");
1410 ix86_fpmath = FPMATH_387;
1412 else if (!TARGET_80387)
1414 warning ("387 instruction set disabled, using SSE arithmetics");
1415 ix86_fpmath = FPMATH_SSE;
1417 else
1418 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1420 else
1421 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1424 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1425 on by -msse. */
1426 if (TARGET_SSE)
1428 target_flags |= MASK_MMX;
1429 x86_prefetch_sse = true;
1432 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1433 if (TARGET_3DNOW)
1435 target_flags |= MASK_MMX;
1436 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1437 extensions it adds. */
1438 if (x86_3dnow_a & (1 << ix86_arch))
1439 target_flags |= MASK_3DNOW_A;
1441 if ((x86_accumulate_outgoing_args & TUNEMASK)
1442 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1443 && !optimize_size)
1444 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1446 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1448 char *p;
1449 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1450 p = strchr (internal_label_prefix, 'X');
1451 internal_label_prefix_len = p - internal_label_prefix;
1452 *p = '\0';
1456 void
1457 optimization_options (level, size)
1458 int level;
1459 int size ATTRIBUTE_UNUSED;
1461 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1462 make the problem with not enough registers even worse. */
1463 #ifdef INSN_SCHEDULING
1464 if (level > 1)
1465 flag_schedule_insns = 0;
1466 #endif
1468 /* The default values of these switches depend on the TARGET_64BIT
1469 that is not known at this moment. Mark these values with 2 and
1470 let user the to override these. In case there is no command line option
1471 specifying them, we will set the defaults in override_options. */
1472 if (optimize >= 1)
1473 flag_omit_frame_pointer = 2;
1474 flag_pcc_struct_return = 2;
1475 flag_asynchronous_unwind_tables = 2;
1478 /* Table of valid machine attributes. */
1479 const struct attribute_spec ix86_attribute_table[] =
1481 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1482 /* Stdcall attribute says callee is responsible for popping arguments
1483 if they are not variable. */
1484 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1485 /* Fastcall attribute says callee is responsible for popping arguments
1486 if they are not variable. */
1487 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1488 /* Cdecl attribute says the callee is a normal C declaration */
1489 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1490 /* Regparm attribute specifies how many integer arguments are to be
1491 passed in registers. */
1492 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1493 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1494 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1495 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1496 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1497 #endif
1498 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1499 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1500 { NULL, 0, 0, false, false, false, NULL }
1503 /* Decide whether we can make a sibling call to a function. DECL is the
1504 declaration of the function being targeted by the call and EXP is the
1505 CALL_EXPR representing the call. */
1507 static bool
1508 ix86_function_ok_for_sibcall (decl, exp)
1509 tree decl;
1510 tree exp;
1512 /* If we are generating position-independent code, we cannot sibcall
1513 optimize any indirect call, or a direct call to a global function,
1514 as the PLT requires %ebx be live. */
1515 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1516 return false;
1518 /* If we are returning floats on the 80387 register stack, we cannot
1519 make a sibcall from a function that doesn't return a float to a
1520 function that does or, conversely, from a function that does return
1521 a float to a function that doesn't; the necessary stack adjustment
1522 would not be executed. */
1523 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1524 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1525 return false;
1527 /* If this call is indirect, we'll need to be able to use a call-clobbered
1528 register for the address of the target function. Make sure that all
1529 such registers are not used for passing parameters. */
1530 if (!decl && !TARGET_64BIT)
1532 int regparm = ix86_regparm;
1533 tree attr, type;
1535 /* We're looking at the CALL_EXPR, we need the type of the function. */
1536 type = TREE_OPERAND (exp, 0); /* pointer expression */
1537 type = TREE_TYPE (type); /* pointer type */
1538 type = TREE_TYPE (type); /* function type */
1540 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1541 if (attr)
1542 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1544 if (regparm >= 3)
1546 /* ??? Need to count the actual number of registers to be used,
1547 not the possible number of registers. Fix later. */
1548 return false;
1552 /* Otherwise okay. That also includes certain types of indirect calls. */
1553 return true;
1556 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1557 arguments as in struct attribute_spec.handler. */
1558 static tree
1559 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1560 tree *node;
1561 tree name;
1562 tree args ATTRIBUTE_UNUSED;
1563 int flags ATTRIBUTE_UNUSED;
1564 bool *no_add_attrs;
1566 if (TREE_CODE (*node) != FUNCTION_TYPE
1567 && TREE_CODE (*node) != METHOD_TYPE
1568 && TREE_CODE (*node) != FIELD_DECL
1569 && TREE_CODE (*node) != TYPE_DECL)
1571 warning ("`%s' attribute only applies to functions",
1572 IDENTIFIER_POINTER (name));
1573 *no_add_attrs = true;
1575 else
1577 if (is_attribute_p ("fastcall", name))
1579 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1581 error ("fastcall and stdcall attributes are not compatible");
1583 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1585 error ("fastcall and regparm attributes are not compatible");
1588 else if (is_attribute_p ("stdcall", name))
1590 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1592 error ("fastcall and stdcall attributes are not compatible");
1597 if (TARGET_64BIT)
1599 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1600 *no_add_attrs = true;
1603 return NULL_TREE;
1606 /* Handle a "regparm" attribute;
1607 arguments as in struct attribute_spec.handler. */
1608 static tree
1609 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1610 tree *node;
1611 tree name;
1612 tree args;
1613 int flags ATTRIBUTE_UNUSED;
1614 bool *no_add_attrs;
1616 if (TREE_CODE (*node) != FUNCTION_TYPE
1617 && TREE_CODE (*node) != METHOD_TYPE
1618 && TREE_CODE (*node) != FIELD_DECL
1619 && TREE_CODE (*node) != TYPE_DECL)
1621 warning ("`%s' attribute only applies to functions",
1622 IDENTIFIER_POINTER (name));
1623 *no_add_attrs = true;
1625 else
1627 tree cst;
1629 cst = TREE_VALUE (args);
1630 if (TREE_CODE (cst) != INTEGER_CST)
1632 warning ("`%s' attribute requires an integer constant argument",
1633 IDENTIFIER_POINTER (name));
1634 *no_add_attrs = true;
1636 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1638 warning ("argument to `%s' attribute larger than %d",
1639 IDENTIFIER_POINTER (name), REGPARM_MAX);
1640 *no_add_attrs = true;
1643 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1645 error ("fastcall and regparm attributes are not compatible");
1649 return NULL_TREE;
1652 /* Return 0 if the attributes for two types are incompatible, 1 if they
1653 are compatible, and 2 if they are nearly compatible (which causes a
1654 warning to be generated). */
1656 static int
1657 ix86_comp_type_attributes (type1, type2)
1658 tree type1;
1659 tree type2;
1661 /* Check for mismatch of non-default calling convention. */
1662 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1664 if (TREE_CODE (type1) != FUNCTION_TYPE)
1665 return 1;
1667 /* Check for mismatched fastcall types */
1668 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1669 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1670 return 0;
1672 /* Check for mismatched return types (cdecl vs stdcall). */
1673 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1674 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1675 return 0;
1676 return 1;
1679 /* Return the regparm value for a fuctio with the indicated TYPE. */
1681 static int
1682 ix86_fntype_regparm (type)
1683 tree type;
1685 tree attr;
1687 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1688 if (attr)
1689 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1690 else
1691 return ix86_regparm;
1694 /* Value is the number of bytes of arguments automatically
1695 popped when returning from a subroutine call.
1696 FUNDECL is the declaration node of the function (as a tree),
1697 FUNTYPE is the data type of the function (as a tree),
1698 or for a library call it is an identifier node for the subroutine name.
1699 SIZE is the number of bytes of arguments passed on the stack.
1701 On the 80386, the RTD insn may be used to pop them if the number
1702 of args is fixed, but if the number is variable then the caller
1703 must pop them all. RTD can't be used for library calls now
1704 because the library is compiled with the Unix compiler.
1705 Use of RTD is a selectable option, since it is incompatible with
1706 standard Unix calling sequences. If the option is not selected,
1707 the caller must always pop the args.
1709 The attribute stdcall is equivalent to RTD on a per module basis. */
1712 ix86_return_pops_args (fundecl, funtype, size)
1713 tree fundecl;
1714 tree funtype;
1715 int size;
1717 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1719 /* Cdecl functions override -mrtd, and never pop the stack. */
1720 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1722 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1723 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1724 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1725 rtd = 1;
1727 if (rtd
1728 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1729 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1730 == void_type_node)))
1731 return size;
1734 /* Lose any fake structure return argument if it is passed on the stack. */
1735 if (aggregate_value_p (TREE_TYPE (funtype))
1736 && !TARGET_64BIT)
1738 int nregs = ix86_fntype_regparm (funtype);
1740 if (!nregs)
1741 return GET_MODE_SIZE (Pmode);
1744 return 0;
1747 /* Argument support functions. */
1749 /* Return true when register may be used to pass function parameters. */
1750 bool
1751 ix86_function_arg_regno_p (regno)
1752 int regno;
1754 int i;
1755 if (!TARGET_64BIT)
1756 return (regno < REGPARM_MAX
1757 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1758 if (SSE_REGNO_P (regno) && TARGET_SSE)
1759 return true;
1760 /* RAX is used as hidden argument to va_arg functions. */
1761 if (!regno)
1762 return true;
1763 for (i = 0; i < REGPARM_MAX; i++)
1764 if (regno == x86_64_int_parameter_registers[i])
1765 return true;
1766 return false;
1769 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1770 for a call to a function whose data type is FNTYPE.
1771 For a library call, FNTYPE is 0. */
1773 void
1774 init_cumulative_args (cum, fntype, libname)
1775 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1776 tree fntype; /* tree ptr for function decl */
1777 rtx libname; /* SYMBOL_REF of library name or 0 */
1779 static CUMULATIVE_ARGS zero_cum;
1780 tree param, next_param;
1782 if (TARGET_DEBUG_ARG)
1784 fprintf (stderr, "\ninit_cumulative_args (");
1785 if (fntype)
1786 fprintf (stderr, "fntype code = %s, ret code = %s",
1787 tree_code_name[(int) TREE_CODE (fntype)],
1788 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1789 else
1790 fprintf (stderr, "no fntype");
1792 if (libname)
1793 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1796 *cum = zero_cum;
1798 /* Set up the number of registers to use for passing arguments. */
1799 cum->nregs = ix86_regparm;
1800 cum->sse_nregs = SSE_REGPARM_MAX;
1801 if (fntype && !TARGET_64BIT)
1803 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1805 if (attr)
1806 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1808 cum->maybe_vaarg = false;
1810 /* Use ecx and edx registers if function has fastcall attribute */
1811 if (fntype && !TARGET_64BIT)
1813 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1815 cum->nregs = 2;
1816 cum->fastcall = 1;
1821 /* Determine if this function has variable arguments. This is
1822 indicated by the last argument being 'void_type_mode' if there
1823 are no variable arguments. If there are variable arguments, then
1824 we won't pass anything in registers */
1826 if (cum->nregs)
1828 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1829 param != 0; param = next_param)
1831 next_param = TREE_CHAIN (param);
1832 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1834 if (!TARGET_64BIT)
1836 cum->nregs = 0;
1837 cum->fastcall = 0;
1839 cum->maybe_vaarg = true;
1843 if ((!fntype && !libname)
1844 || (fntype && !TYPE_ARG_TYPES (fntype)))
1845 cum->maybe_vaarg = 1;
1847 if (TARGET_DEBUG_ARG)
1848 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1850 return;
1853 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1854 of this code is to classify each 8bytes of incoming argument by the register
1855 class and assign registers accordingly. */
1857 /* Return the union class of CLASS1 and CLASS2.
1858 See the x86-64 PS ABI for details. */
1860 static enum x86_64_reg_class
1861 merge_classes (class1, class2)
1862 enum x86_64_reg_class class1, class2;
1864 /* Rule #1: If both classes are equal, this is the resulting class. */
1865 if (class1 == class2)
1866 return class1;
1868 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1869 the other class. */
1870 if (class1 == X86_64_NO_CLASS)
1871 return class2;
1872 if (class2 == X86_64_NO_CLASS)
1873 return class1;
1875 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1876 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1877 return X86_64_MEMORY_CLASS;
1879 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1880 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1881 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1882 return X86_64_INTEGERSI_CLASS;
1883 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1884 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1885 return X86_64_INTEGER_CLASS;
1887 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1888 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1889 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1890 return X86_64_MEMORY_CLASS;
1892 /* Rule #6: Otherwise class SSE is used. */
1893 return X86_64_SSE_CLASS;
1896 /* Classify the argument of type TYPE and mode MODE.
1897 CLASSES will be filled by the register class used to pass each word
1898 of the operand. The number of words is returned. In case the parameter
1899 should be passed in memory, 0 is returned. As a special case for zero
1900 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1902 BIT_OFFSET is used internally for handling records and specifies offset
1903 of the offset in bits modulo 256 to avoid overflow cases.
1905 See the x86-64 PS ABI for details.
1908 static int
1909 classify_argument (mode, type, classes, bit_offset)
1910 enum machine_mode mode;
1911 tree type;
1912 enum x86_64_reg_class classes[MAX_CLASSES];
1913 int bit_offset;
1915 int bytes =
1916 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1917 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1919 /* Variable sized entities are always passed/returned in memory. */
1920 if (bytes < 0)
1921 return 0;
1923 if (type && AGGREGATE_TYPE_P (type))
1925 int i;
1926 tree field;
1927 enum x86_64_reg_class subclasses[MAX_CLASSES];
1929 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1930 if (bytes > 16)
1931 return 0;
1933 for (i = 0; i < words; i++)
1934 classes[i] = X86_64_NO_CLASS;
1936 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1937 signalize memory class, so handle it as special case. */
1938 if (!words)
1940 classes[0] = X86_64_NO_CLASS;
1941 return 1;
1944 /* Classify each field of record and merge classes. */
1945 if (TREE_CODE (type) == RECORD_TYPE)
1947 /* For classes first merge in the field of the subclasses. */
1948 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1950 tree bases = TYPE_BINFO_BASETYPES (type);
1951 int n_bases = TREE_VEC_LENGTH (bases);
1952 int i;
1954 for (i = 0; i < n_bases; ++i)
1956 tree binfo = TREE_VEC_ELT (bases, i);
1957 int num;
1958 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1959 tree type = BINFO_TYPE (binfo);
1961 num = classify_argument (TYPE_MODE (type),
1962 type, subclasses,
1963 (offset + bit_offset) % 256);
1964 if (!num)
1965 return 0;
1966 for (i = 0; i < num; i++)
1968 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1969 classes[i + pos] =
1970 merge_classes (subclasses[i], classes[i + pos]);
1974 /* And now merge the fields of structure. */
1975 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1977 if (TREE_CODE (field) == FIELD_DECL)
1979 int num;
1981 /* Bitfields are always classified as integer. Handle them
1982 early, since later code would consider them to be
1983 misaligned integers. */
1984 if (DECL_BIT_FIELD (field))
1986 for (i = int_bit_position (field) / 8 / 8;
1987 i < (int_bit_position (field)
1988 + tree_low_cst (DECL_SIZE (field), 0)
1989 + 63) / 8 / 8; i++)
1990 classes[i] =
1991 merge_classes (X86_64_INTEGER_CLASS,
1992 classes[i]);
1994 else
1996 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1997 TREE_TYPE (field), subclasses,
1998 (int_bit_position (field)
1999 + bit_offset) % 256);
2000 if (!num)
2001 return 0;
2002 for (i = 0; i < num; i++)
2004 int pos =
2005 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2006 classes[i + pos] =
2007 merge_classes (subclasses[i], classes[i + pos]);
2013 /* Arrays are handled as small records. */
2014 else if (TREE_CODE (type) == ARRAY_TYPE)
2016 int num;
2017 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2018 TREE_TYPE (type), subclasses, bit_offset);
2019 if (!num)
2020 return 0;
2022 /* The partial classes are now full classes. */
2023 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2024 subclasses[0] = X86_64_SSE_CLASS;
2025 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2026 subclasses[0] = X86_64_INTEGER_CLASS;
2028 for (i = 0; i < words; i++)
2029 classes[i] = subclasses[i % num];
2031 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2032 else if (TREE_CODE (type) == UNION_TYPE
2033 || TREE_CODE (type) == QUAL_UNION_TYPE)
2035 /* For classes first merge in the field of the subclasses. */
2036 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2038 tree bases = TYPE_BINFO_BASETYPES (type);
2039 int n_bases = TREE_VEC_LENGTH (bases);
2040 int i;
2042 for (i = 0; i < n_bases; ++i)
2044 tree binfo = TREE_VEC_ELT (bases, i);
2045 int num;
2046 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2047 tree type = BINFO_TYPE (binfo);
2049 num = classify_argument (TYPE_MODE (type),
2050 type, subclasses,
2051 (offset + (bit_offset % 64)) % 256);
2052 if (!num)
2053 return 0;
2054 for (i = 0; i < num; i++)
2056 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2057 classes[i + pos] =
2058 merge_classes (subclasses[i], classes[i + pos]);
2062 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2064 if (TREE_CODE (field) == FIELD_DECL)
2066 int num;
2067 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2068 TREE_TYPE (field), subclasses,
2069 bit_offset);
2070 if (!num)
2071 return 0;
2072 for (i = 0; i < num; i++)
2073 classes[i] = merge_classes (subclasses[i], classes[i]);
2077 else
2078 abort ();
2080 /* Final merger cleanup. */
2081 for (i = 0; i < words; i++)
2083 /* If one class is MEMORY, everything should be passed in
2084 memory. */
2085 if (classes[i] == X86_64_MEMORY_CLASS)
2086 return 0;
2088 /* The X86_64_SSEUP_CLASS should be always preceded by
2089 X86_64_SSE_CLASS. */
2090 if (classes[i] == X86_64_SSEUP_CLASS
2091 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2092 classes[i] = X86_64_SSE_CLASS;
2094 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2095 if (classes[i] == X86_64_X87UP_CLASS
2096 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2097 classes[i] = X86_64_SSE_CLASS;
2099 return words;
2102 /* Compute alignment needed. We align all types to natural boundaries with
2103 exception of XFmode that is aligned to 64bits. */
2104 if (mode != VOIDmode && mode != BLKmode)
2106 int mode_alignment = GET_MODE_BITSIZE (mode);
2108 if (mode == XFmode)
2109 mode_alignment = 128;
2110 else if (mode == XCmode)
2111 mode_alignment = 256;
2112 /* Misaligned fields are always returned in memory. */
2113 if (bit_offset % mode_alignment)
2114 return 0;
2117 /* Classification of atomic types. */
2118 switch (mode)
2120 case DImode:
2121 case SImode:
2122 case HImode:
2123 case QImode:
2124 case CSImode:
2125 case CHImode:
2126 case CQImode:
2127 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2128 classes[0] = X86_64_INTEGERSI_CLASS;
2129 else
2130 classes[0] = X86_64_INTEGER_CLASS;
2131 return 1;
2132 case CDImode:
2133 case TImode:
2134 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2135 return 2;
2136 case CTImode:
2137 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2138 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2139 return 4;
2140 case SFmode:
2141 if (!(bit_offset % 64))
2142 classes[0] = X86_64_SSESF_CLASS;
2143 else
2144 classes[0] = X86_64_SSE_CLASS;
2145 return 1;
2146 case DFmode:
2147 classes[0] = X86_64_SSEDF_CLASS;
2148 return 1;
2149 case TFmode:
2150 classes[0] = X86_64_X87_CLASS;
2151 classes[1] = X86_64_X87UP_CLASS;
2152 return 2;
2153 case TCmode:
2154 classes[0] = X86_64_X87_CLASS;
2155 classes[1] = X86_64_X87UP_CLASS;
2156 classes[2] = X86_64_X87_CLASS;
2157 classes[3] = X86_64_X87UP_CLASS;
2158 return 4;
2159 case DCmode:
2160 classes[0] = X86_64_SSEDF_CLASS;
2161 classes[1] = X86_64_SSEDF_CLASS;
2162 return 2;
2163 case SCmode:
2164 classes[0] = X86_64_SSE_CLASS;
2165 return 1;
2166 case V4SFmode:
2167 case V4SImode:
2168 case V16QImode:
2169 case V8HImode:
2170 case V2DFmode:
2171 case V2DImode:
2172 classes[0] = X86_64_SSE_CLASS;
2173 classes[1] = X86_64_SSEUP_CLASS;
2174 return 2;
2175 case V2SFmode:
2176 case V2SImode:
2177 case V4HImode:
2178 case V8QImode:
2179 return 0;
2180 case BLKmode:
2181 case VOIDmode:
2182 return 0;
2183 default:
2184 abort ();
2188 /* Examine the argument and return set number of register required in each
2189 class. Return 0 iff parameter should be passed in memory. */
2190 static int
2191 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2192 enum machine_mode mode;
2193 tree type;
2194 int *int_nregs, *sse_nregs;
2195 int in_return;
2197 enum x86_64_reg_class class[MAX_CLASSES];
2198 int n = classify_argument (mode, type, class, 0);
2200 *int_nregs = 0;
2201 *sse_nregs = 0;
2202 if (!n)
2203 return 0;
2204 for (n--; n >= 0; n--)
2205 switch (class[n])
2207 case X86_64_INTEGER_CLASS:
2208 case X86_64_INTEGERSI_CLASS:
2209 (*int_nregs)++;
2210 break;
2211 case X86_64_SSE_CLASS:
2212 case X86_64_SSESF_CLASS:
2213 case X86_64_SSEDF_CLASS:
2214 (*sse_nregs)++;
2215 break;
2216 case X86_64_NO_CLASS:
2217 case X86_64_SSEUP_CLASS:
2218 break;
2219 case X86_64_X87_CLASS:
2220 case X86_64_X87UP_CLASS:
2221 if (!in_return)
2222 return 0;
2223 break;
2224 case X86_64_MEMORY_CLASS:
2225 abort ();
2227 return 1;
2229 /* Construct container for the argument used by GCC interface. See
2230 FUNCTION_ARG for the detailed description. */
2231 static rtx
2232 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2233 enum machine_mode mode;
2234 tree type;
2235 int in_return;
2236 int nintregs, nsseregs;
2237 const int * intreg;
2238 int sse_regno;
2240 enum machine_mode tmpmode;
2241 int bytes =
2242 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2243 enum x86_64_reg_class class[MAX_CLASSES];
2244 int n;
2245 int i;
2246 int nexps = 0;
2247 int needed_sseregs, needed_intregs;
2248 rtx exp[MAX_CLASSES];
2249 rtx ret;
2251 n = classify_argument (mode, type, class, 0);
2252 if (TARGET_DEBUG_ARG)
2254 if (!n)
2255 fprintf (stderr, "Memory class\n");
2256 else
2258 fprintf (stderr, "Classes:");
2259 for (i = 0; i < n; i++)
2261 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2263 fprintf (stderr, "\n");
2266 if (!n)
2267 return NULL;
2268 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2269 return NULL;
2270 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2271 return NULL;
2273 /* First construct simple cases. Avoid SCmode, since we want to use
2274 single register to pass this type. */
2275 if (n == 1 && mode != SCmode)
2276 switch (class[0])
2278 case X86_64_INTEGER_CLASS:
2279 case X86_64_INTEGERSI_CLASS:
2280 return gen_rtx_REG (mode, intreg[0]);
2281 case X86_64_SSE_CLASS:
2282 case X86_64_SSESF_CLASS:
2283 case X86_64_SSEDF_CLASS:
2284 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2285 case X86_64_X87_CLASS:
2286 return gen_rtx_REG (mode, FIRST_STACK_REG);
2287 case X86_64_NO_CLASS:
2288 /* Zero sized array, struct or class. */
2289 return NULL;
2290 default:
2291 abort ();
2293 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2294 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2295 if (n == 2
2296 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2297 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2298 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2299 && class[1] == X86_64_INTEGER_CLASS
2300 && (mode == CDImode || mode == TImode)
2301 && intreg[0] + 1 == intreg[1])
2302 return gen_rtx_REG (mode, intreg[0]);
2303 if (n == 4
2304 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2305 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2306 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2308 /* Otherwise figure out the entries of the PARALLEL. */
2309 for (i = 0; i < n; i++)
2311 switch (class[i])
2313 case X86_64_NO_CLASS:
2314 break;
2315 case X86_64_INTEGER_CLASS:
2316 case X86_64_INTEGERSI_CLASS:
2317 /* Merge TImodes on aligned occasions here too. */
2318 if (i * 8 + 8 > bytes)
2319 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2320 else if (class[i] == X86_64_INTEGERSI_CLASS)
2321 tmpmode = SImode;
2322 else
2323 tmpmode = DImode;
2324 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2325 if (tmpmode == BLKmode)
2326 tmpmode = DImode;
2327 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2328 gen_rtx_REG (tmpmode, *intreg),
2329 GEN_INT (i*8));
2330 intreg++;
2331 break;
2332 case X86_64_SSESF_CLASS:
2333 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2334 gen_rtx_REG (SFmode,
2335 SSE_REGNO (sse_regno)),
2336 GEN_INT (i*8));
2337 sse_regno++;
2338 break;
2339 case X86_64_SSEDF_CLASS:
2340 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2341 gen_rtx_REG (DFmode,
2342 SSE_REGNO (sse_regno)),
2343 GEN_INT (i*8));
2344 sse_regno++;
2345 break;
2346 case X86_64_SSE_CLASS:
2347 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2348 tmpmode = TImode;
2349 else
2350 tmpmode = DImode;
2351 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2352 gen_rtx_REG (tmpmode,
2353 SSE_REGNO (sse_regno)),
2354 GEN_INT (i*8));
2355 if (tmpmode == TImode)
2356 i++;
2357 sse_regno++;
2358 break;
2359 default:
2360 abort ();
2363 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2364 for (i = 0; i < nexps; i++)
2365 XVECEXP (ret, 0, i) = exp [i];
2366 return ret;
2369 /* Update the data in CUM to advance over an argument
2370 of mode MODE and data type TYPE.
2371 (TYPE is null for libcalls where that information may not be available.) */
2373 void
2374 function_arg_advance (cum, mode, type, named)
2375 CUMULATIVE_ARGS *cum; /* current arg information */
2376 enum machine_mode mode; /* current arg mode */
2377 tree type; /* type of the argument or 0 if lib support */
2378 int named; /* whether or not the argument was named */
2380 int bytes =
2381 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2382 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2384 if (TARGET_DEBUG_ARG)
2385 fprintf (stderr,
2386 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2387 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2388 if (TARGET_64BIT)
2390 int int_nregs, sse_nregs;
2391 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2392 cum->words += words;
2393 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2395 cum->nregs -= int_nregs;
2396 cum->sse_nregs -= sse_nregs;
2397 cum->regno += int_nregs;
2398 cum->sse_regno += sse_nregs;
2400 else
2401 cum->words += words;
2403 else
2405 if (TARGET_SSE && mode == TImode)
2407 cum->sse_words += words;
2408 cum->sse_nregs -= 1;
2409 cum->sse_regno += 1;
2410 if (cum->sse_nregs <= 0)
2412 cum->sse_nregs = 0;
2413 cum->sse_regno = 0;
2416 else
2418 cum->words += words;
2419 cum->nregs -= words;
2420 cum->regno += words;
2422 if (cum->nregs <= 0)
2424 cum->nregs = 0;
2425 cum->regno = 0;
2429 return;
2432 /* Define where to put the arguments to a function.
2433 Value is zero to push the argument on the stack,
2434 or a hard register in which to store the argument.
2436 MODE is the argument's machine mode.
2437 TYPE is the data type of the argument (as a tree).
2438 This is null for libcalls where that information may
2439 not be available.
2440 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2441 the preceding args and about the function being called.
2442 NAMED is nonzero if this argument is a named parameter
2443 (otherwise it is an extra parameter matching an ellipsis). */
2446 function_arg (cum, mode, type, named)
2447 CUMULATIVE_ARGS *cum; /* current arg information */
2448 enum machine_mode mode; /* current arg mode */
2449 tree type; /* type of the argument or 0 if lib support */
2450 int named; /* != 0 for normal args, == 0 for ... args */
2452 rtx ret = NULL_RTX;
2453 int bytes =
2454 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2455 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2457 /* Handle a hidden AL argument containing number of registers for varargs
2458 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2459 any AL settings. */
2460 if (mode == VOIDmode)
2462 if (TARGET_64BIT)
2463 return GEN_INT (cum->maybe_vaarg
2464 ? (cum->sse_nregs < 0
2465 ? SSE_REGPARM_MAX
2466 : cum->sse_regno)
2467 : -1);
2468 else
2469 return constm1_rtx;
2471 if (TARGET_64BIT)
2472 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2473 &x86_64_int_parameter_registers [cum->regno],
2474 cum->sse_regno);
2475 else
2476 switch (mode)
2478 /* For now, pass fp/complex values on the stack. */
2479 default:
2480 break;
2482 case BLKmode:
2483 case DImode:
2484 case SImode:
2485 case HImode:
2486 case QImode:
2487 if (words <= cum->nregs)
2489 int regno = cum->regno;
2491 /* Fastcall allocates the first two DWORD (SImode) or
2492 smaller arguments to ECX and EDX. */
2493 if (cum->fastcall)
2495 if (mode == BLKmode || mode == DImode)
2496 break;
2498 /* ECX not EAX is the first allocated register. */
2499 if (regno == 0)
2500 regno = 2;
2502 ret = gen_rtx_REG (mode, regno);
2504 break;
2505 case TImode:
2506 if (cum->sse_nregs)
2507 ret = gen_rtx_REG (mode, cum->sse_regno);
2508 break;
2511 if (TARGET_DEBUG_ARG)
2513 fprintf (stderr,
2514 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2515 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2517 if (ret)
2518 print_simple_rtl (stderr, ret);
2519 else
2520 fprintf (stderr, ", stack");
2522 fprintf (stderr, " )\n");
2525 return ret;
2528 /* A C expression that indicates when an argument must be passed by
2529 reference. If nonzero for an argument, a copy of that argument is
2530 made in memory and a pointer to the argument is passed instead of
2531 the argument itself. The pointer is passed in whatever way is
2532 appropriate for passing a pointer to that type. */
2535 function_arg_pass_by_reference (cum, mode, type, named)
2536 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2537 enum machine_mode mode ATTRIBUTE_UNUSED;
2538 tree type;
2539 int named ATTRIBUTE_UNUSED;
2541 if (!TARGET_64BIT)
2542 return 0;
2544 if (type && int_size_in_bytes (type) == -1)
2546 if (TARGET_DEBUG_ARG)
2547 fprintf (stderr, "function_arg_pass_by_reference\n");
2548 return 1;
2551 return 0;
2554 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2555 ABI */
2556 static bool
2557 contains_128bit_aligned_vector_p (type)
2558 tree type;
2560 enum machine_mode mode = TYPE_MODE (type);
2561 if (SSE_REG_MODE_P (mode)
2562 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2563 return true;
2564 if (TYPE_ALIGN (type) < 128)
2565 return false;
2567 if (AGGREGATE_TYPE_P (type))
2569 /* Walk the agregates recursivly. */
2570 if (TREE_CODE (type) == RECORD_TYPE
2571 || TREE_CODE (type) == UNION_TYPE
2572 || TREE_CODE (type) == QUAL_UNION_TYPE)
2574 tree field;
2576 if (TYPE_BINFO (type) != NULL
2577 && TYPE_BINFO_BASETYPES (type) != NULL)
2579 tree bases = TYPE_BINFO_BASETYPES (type);
2580 int n_bases = TREE_VEC_LENGTH (bases);
2581 int i;
2583 for (i = 0; i < n_bases; ++i)
2585 tree binfo = TREE_VEC_ELT (bases, i);
2586 tree type = BINFO_TYPE (binfo);
2588 if (contains_128bit_aligned_vector_p (type))
2589 return true;
2592 /* And now merge the fields of structure. */
2593 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2595 if (TREE_CODE (field) == FIELD_DECL
2596 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2597 return true;
2600 /* Just for use if some languages passes arrays by value. */
2601 else if (TREE_CODE (type) == ARRAY_TYPE)
2603 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2604 return true;
2606 else
2607 abort ();
2609 return false;
2612 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2613 and type. */
2616 ix86_function_arg_boundary (mode, type)
2617 enum machine_mode mode;
2618 tree type;
2620 int align;
2621 if (type)
2622 align = TYPE_ALIGN (type);
2623 else
2624 align = GET_MODE_ALIGNMENT (mode);
2625 if (align < PARM_BOUNDARY)
2626 align = PARM_BOUNDARY;
2627 if (!TARGET_64BIT)
2629 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2630 make an exception for SSE modes since these require 128bit
2631 alignment.
2633 The handling here differs from field_alignment. ICC aligns MMX
2634 arguments to 4 byte boundaries, while structure fields are aligned
2635 to 8 byte boundaries. */
2636 if (!type)
2638 if (!SSE_REG_MODE_P (mode))
2639 align = PARM_BOUNDARY;
2641 else
2643 if (!contains_128bit_aligned_vector_p (type))
2644 align = PARM_BOUNDARY;
2646 if (align != PARM_BOUNDARY && !TARGET_SSE)
2647 abort();
2649 if (align > 128)
2650 align = 128;
2651 return align;
2654 /* Return true if N is a possible register number of function value. */
2655 bool
2656 ix86_function_value_regno_p (regno)
2657 int regno;
2659 if (!TARGET_64BIT)
2661 return ((regno) == 0
2662 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2663 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2665 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2666 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2667 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2670 /* Define how to find the value returned by a function.
2671 VALTYPE is the data type of the value (as a tree).
2672 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2673 otherwise, FUNC is 0. */
2675 ix86_function_value (valtype)
2676 tree valtype;
2678 if (TARGET_64BIT)
2680 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2681 REGPARM_MAX, SSE_REGPARM_MAX,
2682 x86_64_int_return_registers, 0);
2683 /* For zero sized structures, construct_container return NULL, but we need
2684 to keep rest of compiler happy by returning meaningful value. */
2685 if (!ret)
2686 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2687 return ret;
2689 else
2690 return gen_rtx_REG (TYPE_MODE (valtype),
2691 ix86_value_regno (TYPE_MODE (valtype)));
2694 /* Return false iff type is returned in memory. */
2696 ix86_return_in_memory (type)
2697 tree type;
2699 int needed_intregs, needed_sseregs;
2700 if (TARGET_64BIT)
2702 return !examine_argument (TYPE_MODE (type), type, 1,
2703 &needed_intregs, &needed_sseregs);
2705 else
2707 if (TYPE_MODE (type) == BLKmode)
2708 return 1;
2709 else if (MS_AGGREGATE_RETURN
2710 && AGGREGATE_TYPE_P (type)
2711 && int_size_in_bytes(type) <= 8)
2712 return 0;
2713 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2714 && int_size_in_bytes (type) == 8)
2715 || (int_size_in_bytes (type) > 12
2716 && TYPE_MODE (type) != TImode
2717 && TYPE_MODE (type) != TFmode
2718 && !VECTOR_MODE_P (TYPE_MODE (type))))
2719 return 1;
2720 return 0;
2724 /* Define how to find the value returned by a library function
2725 assuming the value has mode MODE. */
2727 ix86_libcall_value (mode)
2728 enum machine_mode mode;
2730 if (TARGET_64BIT)
2732 switch (mode)
2734 case SFmode:
2735 case SCmode:
2736 case DFmode:
2737 case DCmode:
2738 return gen_rtx_REG (mode, FIRST_SSE_REG);
2739 case TFmode:
2740 case TCmode:
2741 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2742 default:
2743 return gen_rtx_REG (mode, 0);
2746 else
2747 return gen_rtx_REG (mode, ix86_value_regno (mode));
2750 /* Given a mode, return the register to use for a return value. */
2752 static int
2753 ix86_value_regno (mode)
2754 enum machine_mode mode;
2756 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2757 return FIRST_FLOAT_REG;
2758 if (mode == TImode || VECTOR_MODE_P (mode))
2759 return FIRST_SSE_REG;
2760 return 0;
2763 /* Create the va_list data type. */
2765 tree
2766 ix86_build_va_list ()
2768 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2770 /* For i386 we use plain pointer to argument area. */
2771 if (!TARGET_64BIT)
2772 return build_pointer_type (char_type_node);
2774 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2775 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2777 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2778 unsigned_type_node);
2779 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2780 unsigned_type_node);
2781 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2782 ptr_type_node);
2783 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2784 ptr_type_node);
2786 DECL_FIELD_CONTEXT (f_gpr) = record;
2787 DECL_FIELD_CONTEXT (f_fpr) = record;
2788 DECL_FIELD_CONTEXT (f_ovf) = record;
2789 DECL_FIELD_CONTEXT (f_sav) = record;
2791 TREE_CHAIN (record) = type_decl;
2792 TYPE_NAME (record) = type_decl;
2793 TYPE_FIELDS (record) = f_gpr;
2794 TREE_CHAIN (f_gpr) = f_fpr;
2795 TREE_CHAIN (f_fpr) = f_ovf;
2796 TREE_CHAIN (f_ovf) = f_sav;
2798 layout_type (record);
2800 /* The correct type is an array type of one element. */
2801 return build_array_type (record, build_index_type (size_zero_node));
2804 /* Perform any needed actions needed for a function that is receiving a
2805 variable number of arguments.
2807 CUM is as above.
2809 MODE and TYPE are the mode and type of the current parameter.
2811 PRETEND_SIZE is a variable that should be set to the amount of stack
2812 that must be pushed by the prolog to pretend that our caller pushed
2815 Normally, this macro will push all remaining incoming registers on the
2816 stack and set PRETEND_SIZE to the length of the registers pushed. */
2818 void
2819 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2820 CUMULATIVE_ARGS *cum;
2821 enum machine_mode mode;
2822 tree type;
2823 int *pretend_size ATTRIBUTE_UNUSED;
2824 int no_rtl;
2827 CUMULATIVE_ARGS next_cum;
2828 rtx save_area = NULL_RTX, mem;
2829 rtx label;
2830 rtx label_ref;
2831 rtx tmp_reg;
2832 rtx nsse_reg;
2833 int set;
2834 tree fntype;
2835 int stdarg_p;
2836 int i;
2838 if (!TARGET_64BIT)
2839 return;
2841 /* Indicate to allocate space on the stack for varargs save area. */
2842 ix86_save_varrargs_registers = 1;
2844 fntype = TREE_TYPE (current_function_decl);
2845 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2846 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2847 != void_type_node));
2849 /* For varargs, we do not want to skip the dummy va_dcl argument.
2850 For stdargs, we do want to skip the last named argument. */
2851 next_cum = *cum;
2852 if (stdarg_p)
2853 function_arg_advance (&next_cum, mode, type, 1);
2855 if (!no_rtl)
2856 save_area = frame_pointer_rtx;
2858 set = get_varargs_alias_set ();
2860 for (i = next_cum.regno; i < ix86_regparm; i++)
2862 mem = gen_rtx_MEM (Pmode,
2863 plus_constant (save_area, i * UNITS_PER_WORD));
2864 set_mem_alias_set (mem, set);
2865 emit_move_insn (mem, gen_rtx_REG (Pmode,
2866 x86_64_int_parameter_registers[i]));
2869 if (next_cum.sse_nregs)
2871 /* Now emit code to save SSE registers. The AX parameter contains number
2872 of SSE parameter registers used to call this function. We use
2873 sse_prologue_save insn template that produces computed jump across
2874 SSE saves. We need some preparation work to get this working. */
2876 label = gen_label_rtx ();
2877 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2879 /* Compute address to jump to :
2880 label - 5*eax + nnamed_sse_arguments*5 */
2881 tmp_reg = gen_reg_rtx (Pmode);
2882 nsse_reg = gen_reg_rtx (Pmode);
2883 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2884 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2885 gen_rtx_MULT (Pmode, nsse_reg,
2886 GEN_INT (4))));
2887 if (next_cum.sse_regno)
2888 emit_move_insn
2889 (nsse_reg,
2890 gen_rtx_CONST (DImode,
2891 gen_rtx_PLUS (DImode,
2892 label_ref,
2893 GEN_INT (next_cum.sse_regno * 4))));
2894 else
2895 emit_move_insn (nsse_reg, label_ref);
2896 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2898 /* Compute address of memory block we save into. We always use pointer
2899 pointing 127 bytes after first byte to store - this is needed to keep
2900 instruction size limited by 4 bytes. */
2901 tmp_reg = gen_reg_rtx (Pmode);
2902 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2903 plus_constant (save_area,
2904 8 * REGPARM_MAX + 127)));
2905 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2906 set_mem_alias_set (mem, set);
2907 set_mem_align (mem, BITS_PER_WORD);
2909 /* And finally do the dirty job! */
2910 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2911 GEN_INT (next_cum.sse_regno), label));
2916 /* Implement va_start. */
2918 void
2919 ix86_va_start (valist, nextarg)
2920 tree valist;
2921 rtx nextarg;
2923 HOST_WIDE_INT words, n_gpr, n_fpr;
2924 tree f_gpr, f_fpr, f_ovf, f_sav;
2925 tree gpr, fpr, ovf, sav, t;
2927 /* Only 64bit target needs something special. */
2928 if (!TARGET_64BIT)
2930 std_expand_builtin_va_start (valist, nextarg);
2931 return;
2934 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2935 f_fpr = TREE_CHAIN (f_gpr);
2936 f_ovf = TREE_CHAIN (f_fpr);
2937 f_sav = TREE_CHAIN (f_ovf);
2939 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2940 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2941 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2942 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2943 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2945 /* Count number of gp and fp argument registers used. */
2946 words = current_function_args_info.words;
2947 n_gpr = current_function_args_info.regno;
2948 n_fpr = current_function_args_info.sse_regno;
2950 if (TARGET_DEBUG_ARG)
2951 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2952 (int) words, (int) n_gpr, (int) n_fpr);
2954 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2955 build_int_2 (n_gpr * 8, 0));
2956 TREE_SIDE_EFFECTS (t) = 1;
2957 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2959 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2960 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2961 TREE_SIDE_EFFECTS (t) = 1;
2962 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2964 /* Find the overflow area. */
2965 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2966 if (words != 0)
2967 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2968 build_int_2 (words * UNITS_PER_WORD, 0));
2969 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2970 TREE_SIDE_EFFECTS (t) = 1;
2971 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2973 /* Find the register save area.
2974 Prologue of the function save it right above stack frame. */
2975 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2976 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2977 TREE_SIDE_EFFECTS (t) = 1;
2978 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2981 /* Implement va_arg. */
2983 ix86_va_arg (valist, type)
2984 tree valist, type;
2986 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2987 tree f_gpr, f_fpr, f_ovf, f_sav;
2988 tree gpr, fpr, ovf, sav, t;
2989 int size, rsize;
2990 rtx lab_false, lab_over = NULL_RTX;
2991 rtx addr_rtx, r;
2992 rtx container;
2993 int indirect_p = 0;
2995 /* Only 64bit target needs something special. */
2996 if (!TARGET_64BIT)
2998 return std_expand_builtin_va_arg (valist, type);
3001 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3002 f_fpr = TREE_CHAIN (f_gpr);
3003 f_ovf = TREE_CHAIN (f_fpr);
3004 f_sav = TREE_CHAIN (f_ovf);
3006 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3007 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3008 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3009 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3010 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3012 size = int_size_in_bytes (type);
3013 if (size == -1)
3015 /* Passed by reference. */
3016 indirect_p = 1;
3017 type = build_pointer_type (type);
3018 size = int_size_in_bytes (type);
3020 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3022 container = construct_container (TYPE_MODE (type), type, 0,
3023 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3025 * Pull the value out of the saved registers ...
3028 addr_rtx = gen_reg_rtx (Pmode);
3030 if (container)
3032 rtx int_addr_rtx, sse_addr_rtx;
3033 int needed_intregs, needed_sseregs;
3034 int need_temp;
3036 lab_over = gen_label_rtx ();
3037 lab_false = gen_label_rtx ();
3039 examine_argument (TYPE_MODE (type), type, 0,
3040 &needed_intregs, &needed_sseregs);
3043 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3044 || TYPE_ALIGN (type) > 128);
3046 /* In case we are passing structure, verify that it is consecutive block
3047 on the register save area. If not we need to do moves. */
3048 if (!need_temp && !REG_P (container))
3050 /* Verify that all registers are strictly consecutive */
3051 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3053 int i;
3055 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3057 rtx slot = XVECEXP (container, 0, i);
3058 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3059 || INTVAL (XEXP (slot, 1)) != i * 16)
3060 need_temp = 1;
3063 else
3065 int i;
3067 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3069 rtx slot = XVECEXP (container, 0, i);
3070 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3071 || INTVAL (XEXP (slot, 1)) != i * 8)
3072 need_temp = 1;
3076 if (!need_temp)
3078 int_addr_rtx = addr_rtx;
3079 sse_addr_rtx = addr_rtx;
3081 else
3083 int_addr_rtx = gen_reg_rtx (Pmode);
3084 sse_addr_rtx = gen_reg_rtx (Pmode);
3086 /* First ensure that we fit completely in registers. */
3087 if (needed_intregs)
3089 emit_cmp_and_jump_insns (expand_expr
3090 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3091 GEN_INT ((REGPARM_MAX - needed_intregs +
3092 1) * 8), GE, const1_rtx, SImode,
3093 1, lab_false);
3095 if (needed_sseregs)
3097 emit_cmp_and_jump_insns (expand_expr
3098 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3099 GEN_INT ((SSE_REGPARM_MAX -
3100 needed_sseregs + 1) * 16 +
3101 REGPARM_MAX * 8), GE, const1_rtx,
3102 SImode, 1, lab_false);
3105 /* Compute index to start of area used for integer regs. */
3106 if (needed_intregs)
3108 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3109 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3110 if (r != int_addr_rtx)
3111 emit_move_insn (int_addr_rtx, r);
3113 if (needed_sseregs)
3115 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3116 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3117 if (r != sse_addr_rtx)
3118 emit_move_insn (sse_addr_rtx, r);
3120 if (need_temp)
3122 int i;
3123 rtx mem;
3125 /* Never use the memory itself, as it has the alias set. */
3126 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3127 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3128 set_mem_alias_set (mem, get_varargs_alias_set ());
3129 set_mem_align (mem, BITS_PER_UNIT);
3131 for (i = 0; i < XVECLEN (container, 0); i++)
3133 rtx slot = XVECEXP (container, 0, i);
3134 rtx reg = XEXP (slot, 0);
3135 enum machine_mode mode = GET_MODE (reg);
3136 rtx src_addr;
3137 rtx src_mem;
3138 int src_offset;
3139 rtx dest_mem;
3141 if (SSE_REGNO_P (REGNO (reg)))
3143 src_addr = sse_addr_rtx;
3144 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3146 else
3148 src_addr = int_addr_rtx;
3149 src_offset = REGNO (reg) * 8;
3151 src_mem = gen_rtx_MEM (mode, src_addr);
3152 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3153 src_mem = adjust_address (src_mem, mode, src_offset);
3154 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3155 emit_move_insn (dest_mem, src_mem);
3159 if (needed_intregs)
3162 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3163 build_int_2 (needed_intregs * 8, 0));
3164 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3165 TREE_SIDE_EFFECTS (t) = 1;
3166 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3168 if (needed_sseregs)
3171 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3172 build_int_2 (needed_sseregs * 16, 0));
3173 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3174 TREE_SIDE_EFFECTS (t) = 1;
3175 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3178 emit_jump_insn (gen_jump (lab_over));
3179 emit_barrier ();
3180 emit_label (lab_false);
3183 /* ... otherwise out of the overflow area. */
3185 /* Care for on-stack alignment if needed. */
3186 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3187 t = ovf;
3188 else
3190 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3191 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3192 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3194 t = save_expr (t);
3196 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3197 if (r != addr_rtx)
3198 emit_move_insn (addr_rtx, r);
3201 build (PLUS_EXPR, TREE_TYPE (t), t,
3202 build_int_2 (rsize * UNITS_PER_WORD, 0));
3203 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3204 TREE_SIDE_EFFECTS (t) = 1;
3205 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3207 if (container)
3208 emit_label (lab_over);
3210 if (indirect_p)
3212 r = gen_rtx_MEM (Pmode, addr_rtx);
3213 set_mem_alias_set (r, get_varargs_alias_set ());
3214 emit_move_insn (addr_rtx, r);
3217 return addr_rtx;
3220 /* Return nonzero if OP is either a i387 or SSE fp register. */
3222 any_fp_register_operand (op, mode)
3223 rtx op;
3224 enum machine_mode mode ATTRIBUTE_UNUSED;
3226 return ANY_FP_REG_P (op);
3229 /* Return nonzero if OP is an i387 fp register. */
3231 fp_register_operand (op, mode)
3232 rtx op;
3233 enum machine_mode mode ATTRIBUTE_UNUSED;
3235 return FP_REG_P (op);
3238 /* Return nonzero if OP is a non-fp register_operand. */
3240 register_and_not_any_fp_reg_operand (op, mode)
3241 rtx op;
3242 enum machine_mode mode;
3244 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3247 /* Return nonzero if OP is a register operand other than an
3248 i387 fp register. */
3250 register_and_not_fp_reg_operand (op, mode)
3251 rtx op;
3252 enum machine_mode mode;
3254 return register_operand (op, mode) && !FP_REG_P (op);
3257 /* Return nonzero if OP is general operand representable on x86_64. */
3260 x86_64_general_operand (op, mode)
3261 rtx op;
3262 enum machine_mode mode;
3264 if (!TARGET_64BIT)
3265 return general_operand (op, mode);
3266 if (nonimmediate_operand (op, mode))
3267 return 1;
3268 return x86_64_sign_extended_value (op);
3271 /* Return nonzero if OP is general operand representable on x86_64
3272 as either sign extended or zero extended constant. */
3275 x86_64_szext_general_operand (op, mode)
3276 rtx op;
3277 enum machine_mode mode;
3279 if (!TARGET_64BIT)
3280 return general_operand (op, mode);
3281 if (nonimmediate_operand (op, mode))
3282 return 1;
3283 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3286 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3289 x86_64_nonmemory_operand (op, mode)
3290 rtx op;
3291 enum machine_mode mode;
3293 if (!TARGET_64BIT)
3294 return nonmemory_operand (op, mode);
3295 if (register_operand (op, mode))
3296 return 1;
3297 return x86_64_sign_extended_value (op);
3300 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3303 x86_64_movabs_operand (op, mode)
3304 rtx op;
3305 enum machine_mode mode;
3307 if (!TARGET_64BIT || !flag_pic)
3308 return nonmemory_operand (op, mode);
3309 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3310 return 1;
3311 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3312 return 1;
3313 return 0;
3316 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3319 x86_64_szext_nonmemory_operand (op, mode)
3320 rtx op;
3321 enum machine_mode mode;
3323 if (!TARGET_64BIT)
3324 return nonmemory_operand (op, mode);
3325 if (register_operand (op, mode))
3326 return 1;
3327 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3330 /* Return nonzero if OP is immediate operand representable on x86_64. */
3333 x86_64_immediate_operand (op, mode)
3334 rtx op;
3335 enum machine_mode mode;
3337 if (!TARGET_64BIT)
3338 return immediate_operand (op, mode);
3339 return x86_64_sign_extended_value (op);
3342 /* Return nonzero if OP is immediate operand representable on x86_64. */
3345 x86_64_zext_immediate_operand (op, mode)
3346 rtx op;
3347 enum machine_mode mode ATTRIBUTE_UNUSED;
3349 return x86_64_zero_extended_value (op);
3352 /* Return nonzero if OP is (const_int 1), else return zero. */
3355 const_int_1_operand (op, mode)
3356 rtx op;
3357 enum machine_mode mode ATTRIBUTE_UNUSED;
3359 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3362 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3363 for shift & compare patterns, as shifting by 0 does not change flags),
3364 else return zero. */
3367 const_int_1_31_operand (op, mode)
3368 rtx op;
3369 enum machine_mode mode ATTRIBUTE_UNUSED;
3371 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3374 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3375 reference and a constant. */
3378 symbolic_operand (op, mode)
3379 register rtx op;
3380 enum machine_mode mode ATTRIBUTE_UNUSED;
3382 switch (GET_CODE (op))
3384 case SYMBOL_REF:
3385 case LABEL_REF:
3386 return 1;
3388 case CONST:
3389 op = XEXP (op, 0);
3390 if (GET_CODE (op) == SYMBOL_REF
3391 || GET_CODE (op) == LABEL_REF
3392 || (GET_CODE (op) == UNSPEC
3393 && (XINT (op, 1) == UNSPEC_GOT
3394 || XINT (op, 1) == UNSPEC_GOTOFF
3395 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3396 return 1;
3397 if (GET_CODE (op) != PLUS
3398 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3399 return 0;
3401 op = XEXP (op, 0);
3402 if (GET_CODE (op) == SYMBOL_REF
3403 || GET_CODE (op) == LABEL_REF)
3404 return 1;
3405 /* Only @GOTOFF gets offsets. */
3406 if (GET_CODE (op) != UNSPEC
3407 || XINT (op, 1) != UNSPEC_GOTOFF)
3408 return 0;
3410 op = XVECEXP (op, 0, 0);
3411 if (GET_CODE (op) == SYMBOL_REF
3412 || GET_CODE (op) == LABEL_REF)
3413 return 1;
3414 return 0;
3416 default:
3417 return 0;
3421 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3424 pic_symbolic_operand (op, mode)
3425 register rtx op;
3426 enum machine_mode mode ATTRIBUTE_UNUSED;
3428 if (GET_CODE (op) != CONST)
3429 return 0;
3430 op = XEXP (op, 0);
3431 if (TARGET_64BIT)
3433 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3434 return 1;
3436 else
3438 if (GET_CODE (op) == UNSPEC)
3439 return 1;
3440 if (GET_CODE (op) != PLUS
3441 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3442 return 0;
3443 op = XEXP (op, 0);
3444 if (GET_CODE (op) == UNSPEC)
3445 return 1;
3447 return 0;
3450 /* Return true if OP is a symbolic operand that resolves locally. */
3452 static int
3453 local_symbolic_operand (op, mode)
3454 rtx op;
3455 enum machine_mode mode ATTRIBUTE_UNUSED;
3457 if (GET_CODE (op) == CONST
3458 && GET_CODE (XEXP (op, 0)) == PLUS
3459 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3460 op = XEXP (XEXP (op, 0), 0);
3462 if (GET_CODE (op) == LABEL_REF)
3463 return 1;
3465 if (GET_CODE (op) != SYMBOL_REF)
3466 return 0;
3468 /* These we've been told are local by varasm and encode_section_info
3469 respectively. */
3470 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3471 return 1;
3473 /* There is, however, a not insubstantial body of code in the rest of
3474 the compiler that assumes it can just stick the results of
3475 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3476 /* ??? This is a hack. Should update the body of the compiler to
3477 always create a DECL an invoke targetm.encode_section_info. */
3478 if (strncmp (XSTR (op, 0), internal_label_prefix,
3479 internal_label_prefix_len) == 0)
3480 return 1;
3482 return 0;
3485 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3488 tls_symbolic_operand (op, mode)
3489 register rtx op;
3490 enum machine_mode mode ATTRIBUTE_UNUSED;
3492 const char *symbol_str;
3494 if (GET_CODE (op) != SYMBOL_REF)
3495 return 0;
3496 symbol_str = XSTR (op, 0);
3498 if (symbol_str[0] != '%')
3499 return 0;
3500 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3503 static int
3504 tls_symbolic_operand_1 (op, kind)
3505 rtx op;
3506 enum tls_model kind;
3508 const char *symbol_str;
3510 if (GET_CODE (op) != SYMBOL_REF)
3511 return 0;
3512 symbol_str = XSTR (op, 0);
3514 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3518 global_dynamic_symbolic_operand (op, mode)
3519 register rtx op;
3520 enum machine_mode mode ATTRIBUTE_UNUSED;
3522 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3526 local_dynamic_symbolic_operand (op, mode)
3527 register rtx op;
3528 enum machine_mode mode ATTRIBUTE_UNUSED;
3530 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3534 initial_exec_symbolic_operand (op, mode)
3535 register rtx op;
3536 enum machine_mode mode ATTRIBUTE_UNUSED;
3538 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3542 local_exec_symbolic_operand (op, mode)
3543 register rtx op;
3544 enum machine_mode mode ATTRIBUTE_UNUSED;
3546 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3549 /* Test for a valid operand for a call instruction. Don't allow the
3550 arg pointer register or virtual regs since they may decay into
3551 reg + const, which the patterns can't handle. */
3554 call_insn_operand (op, mode)
3555 rtx op;
3556 enum machine_mode mode ATTRIBUTE_UNUSED;
3558 /* Disallow indirect through a virtual register. This leads to
3559 compiler aborts when trying to eliminate them. */
3560 if (GET_CODE (op) == REG
3561 && (op == arg_pointer_rtx
3562 || op == frame_pointer_rtx
3563 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3564 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3565 return 0;
3567 /* Disallow `call 1234'. Due to varying assembler lameness this
3568 gets either rejected or translated to `call .+1234'. */
3569 if (GET_CODE (op) == CONST_INT)
3570 return 0;
3572 /* Explicitly allow SYMBOL_REF even if pic. */
3573 if (GET_CODE (op) == SYMBOL_REF)
3574 return 1;
3576 /* Otherwise we can allow any general_operand in the address. */
3577 return general_operand (op, Pmode);
3580 /* Test for a valid operand for a call instruction. Don't allow the
3581 arg pointer register or virtual regs since they may decay into
3582 reg + const, which the patterns can't handle. */
3585 sibcall_insn_operand (op, mode)
3586 rtx op;
3587 enum machine_mode mode ATTRIBUTE_UNUSED;
3589 /* Disallow indirect through a virtual register. This leads to
3590 compiler aborts when trying to eliminate them. */
3591 if (GET_CODE (op) == REG
3592 && (op == arg_pointer_rtx
3593 || op == frame_pointer_rtx
3594 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3595 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3596 return 0;
3598 /* Explicitly allow SYMBOL_REF even if pic. */
3599 if (GET_CODE (op) == SYMBOL_REF)
3600 return 1;
3602 /* Otherwise we can only allow register operands. */
3603 return register_operand (op, Pmode);
3607 constant_call_address_operand (op, mode)
3608 rtx op;
3609 enum machine_mode mode ATTRIBUTE_UNUSED;
3611 if (GET_CODE (op) == CONST
3612 && GET_CODE (XEXP (op, 0)) == PLUS
3613 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3614 op = XEXP (XEXP (op, 0), 0);
3615 return GET_CODE (op) == SYMBOL_REF;
3618 /* Match exactly zero and one. */
3621 const0_operand (op, mode)
3622 register rtx op;
3623 enum machine_mode mode;
3625 return op == CONST0_RTX (mode);
3629 const1_operand (op, mode)
3630 register rtx op;
3631 enum machine_mode mode ATTRIBUTE_UNUSED;
3633 return op == const1_rtx;
3636 /* Match 2, 4, or 8. Used for leal multiplicands. */
3639 const248_operand (op, mode)
3640 register rtx op;
3641 enum machine_mode mode ATTRIBUTE_UNUSED;
3643 return (GET_CODE (op) == CONST_INT
3644 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3647 /* True if this is a constant appropriate for an increment or decrement. */
3650 incdec_operand (op, mode)
3651 register rtx op;
3652 enum machine_mode mode ATTRIBUTE_UNUSED;
3654 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3655 registers, since carry flag is not set. */
3656 if (TARGET_PENTIUM4 && !optimize_size)
3657 return 0;
3658 return op == const1_rtx || op == constm1_rtx;
3661 /* Return nonzero if OP is acceptable as operand of DImode shift
3662 expander. */
3665 shiftdi_operand (op, mode)
3666 rtx op;
3667 enum machine_mode mode ATTRIBUTE_UNUSED;
3669 if (TARGET_64BIT)
3670 return nonimmediate_operand (op, mode);
3671 else
3672 return register_operand (op, mode);
3675 /* Return false if this is the stack pointer, or any other fake
3676 register eliminable to the stack pointer. Otherwise, this is
3677 a register operand.
3679 This is used to prevent esp from being used as an index reg.
3680 Which would only happen in pathological cases. */
3683 reg_no_sp_operand (op, mode)
3684 register rtx op;
3685 enum machine_mode mode;
3687 rtx t = op;
3688 if (GET_CODE (t) == SUBREG)
3689 t = SUBREG_REG (t);
3690 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3691 return 0;
3693 return register_operand (op, mode);
3697 mmx_reg_operand (op, mode)
3698 register rtx op;
3699 enum machine_mode mode ATTRIBUTE_UNUSED;
3701 return MMX_REG_P (op);
3704 /* Return false if this is any eliminable register. Otherwise
3705 general_operand. */
3708 general_no_elim_operand (op, mode)
3709 register rtx op;
3710 enum machine_mode mode;
3712 rtx t = op;
3713 if (GET_CODE (t) == SUBREG)
3714 t = SUBREG_REG (t);
3715 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3716 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3717 || t == virtual_stack_dynamic_rtx)
3718 return 0;
3719 if (REG_P (t)
3720 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3721 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3722 return 0;
3724 return general_operand (op, mode);
3727 /* Return false if this is any eliminable register. Otherwise
3728 register_operand or const_int. */
3731 nonmemory_no_elim_operand (op, mode)
3732 register rtx op;
3733 enum machine_mode mode;
3735 rtx t = op;
3736 if (GET_CODE (t) == SUBREG)
3737 t = SUBREG_REG (t);
3738 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3739 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3740 || t == virtual_stack_dynamic_rtx)
3741 return 0;
3743 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3746 /* Return false if this is any eliminable register or stack register,
3747 otherwise work like register_operand. */
3750 index_register_operand (op, mode)
3751 register rtx op;
3752 enum machine_mode mode;
3754 rtx t = op;
3755 if (GET_CODE (t) == SUBREG)
3756 t = SUBREG_REG (t);
3757 if (!REG_P (t))
3758 return 0;
3759 if (t == arg_pointer_rtx
3760 || t == frame_pointer_rtx
3761 || t == virtual_incoming_args_rtx
3762 || t == virtual_stack_vars_rtx
3763 || t == virtual_stack_dynamic_rtx
3764 || REGNO (t) == STACK_POINTER_REGNUM)
3765 return 0;
3767 return general_operand (op, mode);
3770 /* Return true if op is a Q_REGS class register. */
3773 q_regs_operand (op, mode)
3774 register rtx op;
3775 enum machine_mode mode;
3777 if (mode != VOIDmode && GET_MODE (op) != mode)
3778 return 0;
3779 if (GET_CODE (op) == SUBREG)
3780 op = SUBREG_REG (op);
3781 return ANY_QI_REG_P (op);
3784 /* Return true if op is an flags register. */
3787 flags_reg_operand (op, mode)
3788 register rtx op;
3789 enum machine_mode mode;
3791 if (mode != VOIDmode && GET_MODE (op) != mode)
3792 return 0;
3793 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3796 /* Return true if op is a NON_Q_REGS class register. */
3799 non_q_regs_operand (op, mode)
3800 register rtx op;
3801 enum machine_mode mode;
3803 if (mode != VOIDmode && GET_MODE (op) != mode)
3804 return 0;
3805 if (GET_CODE (op) == SUBREG)
3806 op = SUBREG_REG (op);
3807 return NON_QI_REG_P (op);
3811 zero_extended_scalar_load_operand (op, mode)
3812 rtx op;
3813 enum machine_mode mode ATTRIBUTE_UNUSED;
3815 unsigned n_elts;
3816 if (GET_CODE (op) != MEM)
3817 return 0;
3818 op = maybe_get_pool_constant (op);
3819 if (!op)
3820 return 0;
3821 if (GET_CODE (op) != CONST_VECTOR)
3822 return 0;
3823 n_elts =
3824 (GET_MODE_SIZE (GET_MODE (op)) /
3825 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3826 for (n_elts--; n_elts > 0; n_elts--)
3828 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3829 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3830 return 0;
3832 return 1;
3835 /* Return 1 when OP is operand acceptable for standard SSE move. */
3837 vector_move_operand (op, mode)
3838 rtx op;
3839 enum machine_mode mode;
3841 if (nonimmediate_operand (op, mode))
3842 return 1;
3843 if (GET_MODE (op) != mode && mode != VOIDmode)
3844 return 0;
3845 return (op == CONST0_RTX (GET_MODE (op)));
3848 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3849 insns. */
3851 sse_comparison_operator (op, mode)
3852 rtx op;
3853 enum machine_mode mode ATTRIBUTE_UNUSED;
3855 enum rtx_code code = GET_CODE (op);
3856 switch (code)
3858 /* Operations supported directly. */
3859 case EQ:
3860 case LT:
3861 case LE:
3862 case UNORDERED:
3863 case NE:
3864 case UNGE:
3865 case UNGT:
3866 case ORDERED:
3867 return 1;
3868 /* These are equivalent to ones above in non-IEEE comparisons. */
3869 case UNEQ:
3870 case UNLT:
3871 case UNLE:
3872 case LTGT:
3873 case GE:
3874 case GT:
3875 return !TARGET_IEEE_FP;
3876 default:
3877 return 0;
3880 /* Return 1 if OP is a valid comparison operator in valid mode. */
3882 ix86_comparison_operator (op, mode)
3883 register rtx op;
3884 enum machine_mode mode;
3886 enum machine_mode inmode;
3887 enum rtx_code code = GET_CODE (op);
3888 if (mode != VOIDmode && GET_MODE (op) != mode)
3889 return 0;
3890 if (GET_RTX_CLASS (code) != '<')
3891 return 0;
3892 inmode = GET_MODE (XEXP (op, 0));
3894 if (inmode == CCFPmode || inmode == CCFPUmode)
3896 enum rtx_code second_code, bypass_code;
3897 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3898 return (bypass_code == NIL && second_code == NIL);
3900 switch (code)
3902 case EQ: case NE:
3903 return 1;
3904 case LT: case GE:
3905 if (inmode == CCmode || inmode == CCGCmode
3906 || inmode == CCGOCmode || inmode == CCNOmode)
3907 return 1;
3908 return 0;
3909 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3910 if (inmode == CCmode)
3911 return 1;
3912 return 0;
3913 case GT: case LE:
3914 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3915 return 1;
3916 return 0;
3917 default:
3918 return 0;
3922 /* Return 1 if OP is a valid comparison operator testing carry flag
3923 to be set. */
3925 ix86_carry_flag_operator (op, mode)
3926 register rtx op;
3927 enum machine_mode mode;
3929 enum machine_mode inmode;
3930 enum rtx_code code = GET_CODE (op);
3932 if (mode != VOIDmode && GET_MODE (op) != mode)
3933 return 0;
3934 if (GET_RTX_CLASS (code) != '<')
3935 return 0;
3936 inmode = GET_MODE (XEXP (op, 0));
3937 if (GET_CODE (XEXP (op, 0)) != REG
3938 || REGNO (XEXP (op, 0)) != 17
3939 || XEXP (op, 1) != const0_rtx)
3940 return 0;
3942 if (inmode == CCFPmode || inmode == CCFPUmode)
3944 enum rtx_code second_code, bypass_code;
3946 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3947 if (bypass_code != NIL || second_code != NIL)
3948 return 0;
3949 code = ix86_fp_compare_code_to_integer (code);
3951 else if (inmode != CCmode)
3952 return 0;
3953 return code == LTU;
3956 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3959 fcmov_comparison_operator (op, mode)
3960 register rtx op;
3961 enum machine_mode mode;
3963 enum machine_mode inmode;
3964 enum rtx_code code = GET_CODE (op);
3966 if (mode != VOIDmode && GET_MODE (op) != mode)
3967 return 0;
3968 if (GET_RTX_CLASS (code) != '<')
3969 return 0;
3970 inmode = GET_MODE (XEXP (op, 0));
3971 if (inmode == CCFPmode || inmode == CCFPUmode)
3973 enum rtx_code second_code, bypass_code;
3975 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3976 if (bypass_code != NIL || second_code != NIL)
3977 return 0;
3978 code = ix86_fp_compare_code_to_integer (code);
3980 /* i387 supports just limited amount of conditional codes. */
3981 switch (code)
3983 case LTU: case GTU: case LEU: case GEU:
3984 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3985 return 1;
3986 return 0;
3987 case ORDERED: case UNORDERED:
3988 case EQ: case NE:
3989 return 1;
3990 default:
3991 return 0;
3995 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3998 promotable_binary_operator (op, mode)
3999 register rtx op;
4000 enum machine_mode mode ATTRIBUTE_UNUSED;
4002 switch (GET_CODE (op))
4004 case MULT:
4005 /* Modern CPUs have same latency for HImode and SImode multiply,
4006 but 386 and 486 do HImode multiply faster. */
4007 return ix86_tune > PROCESSOR_I486;
4008 case PLUS:
4009 case AND:
4010 case IOR:
4011 case XOR:
4012 case ASHIFT:
4013 return 1;
4014 default:
4015 return 0;
4019 /* Nearly general operand, but accept any const_double, since we wish
4020 to be able to drop them into memory rather than have them get pulled
4021 into registers. */
4024 cmp_fp_expander_operand (op, mode)
4025 register rtx op;
4026 enum machine_mode mode;
4028 if (mode != VOIDmode && mode != GET_MODE (op))
4029 return 0;
4030 if (GET_CODE (op) == CONST_DOUBLE)
4031 return 1;
4032 return general_operand (op, mode);
4035 /* Match an SI or HImode register for a zero_extract. */
4038 ext_register_operand (op, mode)
4039 register rtx op;
4040 enum machine_mode mode ATTRIBUTE_UNUSED;
4042 int regno;
4043 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4044 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4045 return 0;
4047 if (!register_operand (op, VOIDmode))
4048 return 0;
4050 /* Be careful to accept only registers having upper parts. */
4051 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4052 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4055 /* Return 1 if this is a valid binary floating-point operation.
4056 OP is the expression matched, and MODE is its mode. */
4059 binary_fp_operator (op, mode)
4060 register rtx op;
4061 enum machine_mode mode;
4063 if (mode != VOIDmode && mode != GET_MODE (op))
4064 return 0;
4066 switch (GET_CODE (op))
4068 case PLUS:
4069 case MINUS:
4070 case MULT:
4071 case DIV:
4072 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4074 default:
4075 return 0;
4080 mult_operator (op, mode)
4081 register rtx op;
4082 enum machine_mode mode ATTRIBUTE_UNUSED;
4084 return GET_CODE (op) == MULT;
4088 div_operator (op, mode)
4089 register rtx op;
4090 enum machine_mode mode ATTRIBUTE_UNUSED;
4092 return GET_CODE (op) == DIV;
4096 arith_or_logical_operator (op, mode)
4097 rtx op;
4098 enum machine_mode mode;
4100 return ((mode == VOIDmode || GET_MODE (op) == mode)
4101 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4102 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4105 /* Returns 1 if OP is memory operand with a displacement. */
4108 memory_displacement_operand (op, mode)
4109 register rtx op;
4110 enum machine_mode mode;
4112 struct ix86_address parts;
4114 if (! memory_operand (op, mode))
4115 return 0;
4117 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4118 abort ();
4120 return parts.disp != NULL_RTX;
4123 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4124 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4126 ??? It seems likely that this will only work because cmpsi is an
4127 expander, and no actual insns use this. */
4130 cmpsi_operand (op, mode)
4131 rtx op;
4132 enum machine_mode mode;
4134 if (nonimmediate_operand (op, mode))
4135 return 1;
4137 if (GET_CODE (op) == AND
4138 && GET_MODE (op) == SImode
4139 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4140 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4141 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4142 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4143 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4144 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4145 return 1;
4147 return 0;
4150 /* Returns 1 if OP is memory operand that can not be represented by the
4151 modRM array. */
4154 long_memory_operand (op, mode)
4155 register rtx op;
4156 enum machine_mode mode;
4158 if (! memory_operand (op, mode))
4159 return 0;
4161 return memory_address_length (op) != 0;
4164 /* Return nonzero if the rtx is known aligned. */
4167 aligned_operand (op, mode)
4168 rtx op;
4169 enum machine_mode mode;
4171 struct ix86_address parts;
4173 if (!general_operand (op, mode))
4174 return 0;
4176 /* Registers and immediate operands are always "aligned". */
4177 if (GET_CODE (op) != MEM)
4178 return 1;
4180 /* Don't even try to do any aligned optimizations with volatiles. */
4181 if (MEM_VOLATILE_P (op))
4182 return 0;
4184 op = XEXP (op, 0);
4186 /* Pushes and pops are only valid on the stack pointer. */
4187 if (GET_CODE (op) == PRE_DEC
4188 || GET_CODE (op) == POST_INC)
4189 return 1;
4191 /* Decode the address. */
4192 if (! ix86_decompose_address (op, &parts))
4193 abort ();
4195 if (parts.base && GET_CODE (parts.base) == SUBREG)
4196 parts.base = SUBREG_REG (parts.base);
4197 if (parts.index && GET_CODE (parts.index) == SUBREG)
4198 parts.index = SUBREG_REG (parts.index);
4200 /* Look for some component that isn't known to be aligned. */
4201 if (parts.index)
4203 if (parts.scale < 4
4204 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4205 return 0;
4207 if (parts.base)
4209 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4210 return 0;
4212 if (parts.disp)
4214 if (GET_CODE (parts.disp) != CONST_INT
4215 || (INTVAL (parts.disp) & 3) != 0)
4216 return 0;
4219 /* Didn't find one -- this must be an aligned address. */
4220 return 1;
4223 /* Initialize the table of extra 80387 mathematical constants. */
4225 static void
4226 init_ext_80387_constants ()
4228 static const char * cst[5] =
4230 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4231 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4232 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4233 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4234 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4236 int i;
4238 for (i = 0; i < 5; i++)
4240 real_from_string (&ext_80387_constants_table[i], cst[i]);
4241 /* Ensure each constant is rounded to XFmode precision. */
4242 real_convert (&ext_80387_constants_table[i], XFmode,
4243 &ext_80387_constants_table[i]);
4246 ext_80387_constants_init = 1;
4249 /* Return true if the constant is something that can be loaded with
4250 a special instruction. */
4253 standard_80387_constant_p (x)
4254 rtx x;
4256 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4257 return -1;
4259 if (x == CONST0_RTX (GET_MODE (x)))
4260 return 1;
4261 if (x == CONST1_RTX (GET_MODE (x)))
4262 return 2;
4264 /* For XFmode constants, try to find a special 80387 instruction on
4265 those CPUs that benefit from them. */
4266 if (GET_MODE (x) == XFmode
4267 && x86_ext_80387_constants & TUNEMASK)
4269 REAL_VALUE_TYPE r;
4270 int i;
4272 if (! ext_80387_constants_init)
4273 init_ext_80387_constants ();
4275 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4276 for (i = 0; i < 5; i++)
4277 if (real_identical (&r, &ext_80387_constants_table[i]))
4278 return i + 3;
4281 return 0;
4284 /* Return the opcode of the special instruction to be used to load
4285 the constant X. */
4287 const char *
4288 standard_80387_constant_opcode (x)
4289 rtx x;
4291 switch (standard_80387_constant_p (x))
4293 case 1:
4294 return "fldz";
4295 case 2:
4296 return "fld1";
4297 case 3:
4298 return "fldlg2";
4299 case 4:
4300 return "fldln2";
4301 case 5:
4302 return "fldl2e";
4303 case 6:
4304 return "fldl2t";
4305 case 7:
4306 return "fldpi";
4308 abort ();
4311 /* Return the CONST_DOUBLE representing the 80387 constant that is
4312 loaded by the specified special instruction. The argument IDX
4313 matches the return value from standard_80387_constant_p. */
4316 standard_80387_constant_rtx (idx)
4317 int idx;
4319 int i;
4321 if (! ext_80387_constants_init)
4322 init_ext_80387_constants ();
4324 switch (idx)
4326 case 3:
4327 case 4:
4328 case 5:
4329 case 6:
4330 case 7:
4331 i = idx - 3;
4332 break;
4334 default:
4335 abort ();
4338 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], XFmode);
4341 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4344 standard_sse_constant_p (x)
4345 rtx x;
4347 if (x == const0_rtx)
4348 return 1;
4349 return (x == CONST0_RTX (GET_MODE (x)));
4352 /* Returns 1 if OP contains a symbol reference */
4355 symbolic_reference_mentioned_p (op)
4356 rtx op;
4358 register const char *fmt;
4359 register int i;
4361 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4362 return 1;
4364 fmt = GET_RTX_FORMAT (GET_CODE (op));
4365 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4367 if (fmt[i] == 'E')
4369 register int j;
4371 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4372 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4373 return 1;
4376 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4377 return 1;
4380 return 0;
4383 /* Return 1 if it is appropriate to emit `ret' instructions in the
4384 body of a function. Do this only if the epilogue is simple, needing a
4385 couple of insns. Prior to reloading, we can't tell how many registers
4386 must be saved, so return 0 then. Return 0 if there is no frame
4387 marker to de-allocate.
4389 If NON_SAVING_SETJMP is defined and true, then it is not possible
4390 for the epilogue to be simple, so return 0. This is a special case
4391 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4392 until final, but jump_optimize may need to know sooner if a
4393 `return' is OK. */
4396 ix86_can_use_return_insn_p ()
4398 struct ix86_frame frame;
4400 #ifdef NON_SAVING_SETJMP
4401 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4402 return 0;
4403 #endif
4405 if (! reload_completed || frame_pointer_needed)
4406 return 0;
4408 /* Don't allow more than 32 pop, since that's all we can do
4409 with one instruction. */
4410 if (current_function_pops_args
4411 && current_function_args_size >= 32768)
4412 return 0;
4414 ix86_compute_frame_layout (&frame);
4415 return frame.to_allocate == 0 && frame.nregs == 0;
4418 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4420 x86_64_sign_extended_value (value)
4421 rtx value;
4423 switch (GET_CODE (value))
4425 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4426 to be at least 32 and this all acceptable constants are
4427 represented as CONST_INT. */
4428 case CONST_INT:
4429 if (HOST_BITS_PER_WIDE_INT == 32)
4430 return 1;
4431 else
4433 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4434 return trunc_int_for_mode (val, SImode) == val;
4436 break;
4438 /* For certain code models, the symbolic references are known to fit.
4439 in CM_SMALL_PIC model we know it fits if it is local to the shared
4440 library. Don't count TLS SYMBOL_REFs here, since they should fit
4441 only if inside of UNSPEC handled below. */
4442 case SYMBOL_REF:
4443 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4445 /* For certain code models, the code is near as well. */
4446 case LABEL_REF:
4447 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4448 || ix86_cmodel == CM_KERNEL);
4450 /* We also may accept the offsetted memory references in certain special
4451 cases. */
4452 case CONST:
4453 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4454 switch (XINT (XEXP (value, 0), 1))
4456 case UNSPEC_GOTPCREL:
4457 case UNSPEC_DTPOFF:
4458 case UNSPEC_GOTNTPOFF:
4459 case UNSPEC_NTPOFF:
4460 return 1;
4461 default:
4462 break;
4464 if (GET_CODE (XEXP (value, 0)) == PLUS)
4466 rtx op1 = XEXP (XEXP (value, 0), 0);
4467 rtx op2 = XEXP (XEXP (value, 0), 1);
4468 HOST_WIDE_INT offset;
4470 if (ix86_cmodel == CM_LARGE)
4471 return 0;
4472 if (GET_CODE (op2) != CONST_INT)
4473 return 0;
4474 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4475 switch (GET_CODE (op1))
4477 case SYMBOL_REF:
4478 /* For CM_SMALL assume that latest object is 16MB before
4479 end of 31bits boundary. We may also accept pretty
4480 large negative constants knowing that all objects are
4481 in the positive half of address space. */
4482 if (ix86_cmodel == CM_SMALL
4483 && offset < 16*1024*1024
4484 && trunc_int_for_mode (offset, SImode) == offset)
4485 return 1;
4486 /* For CM_KERNEL we know that all object resist in the
4487 negative half of 32bits address space. We may not
4488 accept negative offsets, since they may be just off
4489 and we may accept pretty large positive ones. */
4490 if (ix86_cmodel == CM_KERNEL
4491 && offset > 0
4492 && trunc_int_for_mode (offset, SImode) == offset)
4493 return 1;
4494 break;
4495 case LABEL_REF:
4496 /* These conditions are similar to SYMBOL_REF ones, just the
4497 constraints for code models differ. */
4498 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4499 && offset < 16*1024*1024
4500 && trunc_int_for_mode (offset, SImode) == offset)
4501 return 1;
4502 if (ix86_cmodel == CM_KERNEL
4503 && offset > 0
4504 && trunc_int_for_mode (offset, SImode) == offset)
4505 return 1;
4506 break;
4507 case UNSPEC:
4508 switch (XINT (op1, 1))
4510 case UNSPEC_DTPOFF:
4511 case UNSPEC_NTPOFF:
4512 if (offset > 0
4513 && trunc_int_for_mode (offset, SImode) == offset)
4514 return 1;
4516 break;
4517 default:
4518 return 0;
4521 return 0;
4522 default:
4523 return 0;
4527 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4529 x86_64_zero_extended_value (value)
4530 rtx value;
4532 switch (GET_CODE (value))
4534 case CONST_DOUBLE:
4535 if (HOST_BITS_PER_WIDE_INT == 32)
4536 return (GET_MODE (value) == VOIDmode
4537 && !CONST_DOUBLE_HIGH (value));
4538 else
4539 return 0;
4540 case CONST_INT:
4541 if (HOST_BITS_PER_WIDE_INT == 32)
4542 return INTVAL (value) >= 0;
4543 else
4544 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4545 break;
4547 /* For certain code models, the symbolic references are known to fit. */
4548 case SYMBOL_REF:
4549 return ix86_cmodel == CM_SMALL;
4551 /* For certain code models, the code is near as well. */
4552 case LABEL_REF:
4553 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4555 /* We also may accept the offsetted memory references in certain special
4556 cases. */
4557 case CONST:
4558 if (GET_CODE (XEXP (value, 0)) == PLUS)
4560 rtx op1 = XEXP (XEXP (value, 0), 0);
4561 rtx op2 = XEXP (XEXP (value, 0), 1);
4563 if (ix86_cmodel == CM_LARGE)
4564 return 0;
4565 switch (GET_CODE (op1))
4567 case SYMBOL_REF:
4568 return 0;
4569 /* For small code model we may accept pretty large positive
4570 offsets, since one bit is available for free. Negative
4571 offsets are limited by the size of NULL pointer area
4572 specified by the ABI. */
4573 if (ix86_cmodel == CM_SMALL
4574 && GET_CODE (op2) == CONST_INT
4575 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4576 && (trunc_int_for_mode (INTVAL (op2), SImode)
4577 == INTVAL (op2)))
4578 return 1;
4579 /* ??? For the kernel, we may accept adjustment of
4580 -0x10000000, since we know that it will just convert
4581 negative address space to positive, but perhaps this
4582 is not worthwhile. */
4583 break;
4584 case LABEL_REF:
4585 /* These conditions are similar to SYMBOL_REF ones, just the
4586 constraints for code models differ. */
4587 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4588 && GET_CODE (op2) == CONST_INT
4589 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4590 && (trunc_int_for_mode (INTVAL (op2), SImode)
4591 == INTVAL (op2)))
4592 return 1;
4593 break;
4594 default:
4595 return 0;
4598 return 0;
4599 default:
4600 return 0;
4604 /* Value should be nonzero if functions must have frame pointers.
4605 Zero means the frame pointer need not be set up (and parms may
4606 be accessed via the stack pointer) in functions that seem suitable. */
4609 ix86_frame_pointer_required ()
4611 /* If we accessed previous frames, then the generated code expects
4612 to be able to access the saved ebp value in our frame. */
4613 if (cfun->machine->accesses_prev_frame)
4614 return 1;
4616 /* Several x86 os'es need a frame pointer for other reasons,
4617 usually pertaining to setjmp. */
4618 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4619 return 1;
4621 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4622 the frame pointer by default. Turn it back on now if we've not
4623 got a leaf function. */
4624 if (TARGET_OMIT_LEAF_FRAME_POINTER
4625 && (!current_function_is_leaf))
4626 return 1;
4628 if (current_function_profile)
4629 return 1;
4631 return 0;
4634 /* Record that the current function accesses previous call frames. */
4636 void
4637 ix86_setup_frame_addresses ()
4639 cfun->machine->accesses_prev_frame = 1;
4642 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4643 # define USE_HIDDEN_LINKONCE 1
4644 #else
4645 # define USE_HIDDEN_LINKONCE 0
4646 #endif
4648 static int pic_labels_used;
4650 /* Fills in the label name that should be used for a pc thunk for
4651 the given register. */
4653 static void
4654 get_pc_thunk_name (name, regno)
4655 char name[32];
4656 unsigned int regno;
4658 if (USE_HIDDEN_LINKONCE)
4659 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4660 else
4661 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4665 /* This function generates code for -fpic that loads %ebx with
4666 the return address of the caller and then returns. */
4668 void
4669 ix86_asm_file_end (file)
4670 FILE *file;
4672 rtx xops[2];
4673 int regno;
4675 for (regno = 0; regno < 8; ++regno)
4677 char name[32];
4679 if (! ((pic_labels_used >> regno) & 1))
4680 continue;
4682 get_pc_thunk_name (name, regno);
4684 if (USE_HIDDEN_LINKONCE)
4686 tree decl;
4688 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4689 error_mark_node);
4690 TREE_PUBLIC (decl) = 1;
4691 TREE_STATIC (decl) = 1;
4692 DECL_ONE_ONLY (decl) = 1;
4694 (*targetm.asm_out.unique_section) (decl, 0);
4695 named_section (decl, NULL, 0);
4697 (*targetm.asm_out.globalize_label) (file, name);
4698 fputs ("\t.hidden\t", file);
4699 assemble_name (file, name);
4700 fputc ('\n', file);
4701 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4703 else
4705 text_section ();
4706 ASM_OUTPUT_LABEL (file, name);
4709 xops[0] = gen_rtx_REG (SImode, regno);
4710 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4711 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4712 output_asm_insn ("ret", xops);
4716 /* Emit code for the SET_GOT patterns. */
4718 const char *
4719 output_set_got (dest)
4720 rtx dest;
4722 rtx xops[3];
4724 xops[0] = dest;
4725 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4727 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4729 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4731 if (!flag_pic)
4732 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4733 else
4734 output_asm_insn ("call\t%a2", xops);
4736 #if TARGET_MACHO
4737 /* Output the "canonical" label name ("Lxx$pb") here too. This
4738 is what will be referred to by the Mach-O PIC subsystem. */
4739 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4740 #endif
4741 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4742 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4744 if (flag_pic)
4745 output_asm_insn ("pop{l}\t%0", xops);
4747 else
4749 char name[32];
4750 get_pc_thunk_name (name, REGNO (dest));
4751 pic_labels_used |= 1 << REGNO (dest);
4753 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4754 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4755 output_asm_insn ("call\t%X2", xops);
4758 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4759 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4760 else if (!TARGET_MACHO)
4761 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4763 return "";
4766 /* Generate an "push" pattern for input ARG. */
4768 static rtx
4769 gen_push (arg)
4770 rtx arg;
4772 return gen_rtx_SET (VOIDmode,
4773 gen_rtx_MEM (Pmode,
4774 gen_rtx_PRE_DEC (Pmode,
4775 stack_pointer_rtx)),
4776 arg);
4779 /* Return >= 0 if there is an unused call-clobbered register available
4780 for the entire function. */
4782 static unsigned int
4783 ix86_select_alt_pic_regnum ()
4785 if (current_function_is_leaf && !current_function_profile)
4787 int i;
4788 for (i = 2; i >= 0; --i)
4789 if (!regs_ever_live[i])
4790 return i;
4793 return INVALID_REGNUM;
4796 /* Return 1 if we need to save REGNO. */
4797 static int
4798 ix86_save_reg (regno, maybe_eh_return)
4799 unsigned int regno;
4800 int maybe_eh_return;
4802 if (pic_offset_table_rtx
4803 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4804 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4805 || current_function_profile
4806 || current_function_calls_eh_return))
4808 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4809 return 0;
4810 return 1;
4813 if (current_function_calls_eh_return && maybe_eh_return)
4815 unsigned i;
4816 for (i = 0; ; i++)
4818 unsigned test = EH_RETURN_DATA_REGNO (i);
4819 if (test == INVALID_REGNUM)
4820 break;
4821 if (test == regno)
4822 return 1;
4826 return (regs_ever_live[regno]
4827 && !call_used_regs[regno]
4828 && !fixed_regs[regno]
4829 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4832 /* Return number of registers to be saved on the stack. */
4834 static int
4835 ix86_nsaved_regs ()
4837 int nregs = 0;
4838 int regno;
4840 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4841 if (ix86_save_reg (regno, true))
4842 nregs++;
4843 return nregs;
4846 /* Return the offset between two registers, one to be eliminated, and the other
4847 its replacement, at the start of a routine. */
4849 HOST_WIDE_INT
4850 ix86_initial_elimination_offset (from, to)
4851 int from;
4852 int to;
4854 struct ix86_frame frame;
4855 ix86_compute_frame_layout (&frame);
4857 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4858 return frame.hard_frame_pointer_offset;
4859 else if (from == FRAME_POINTER_REGNUM
4860 && to == HARD_FRAME_POINTER_REGNUM)
4861 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4862 else
4864 if (to != STACK_POINTER_REGNUM)
4865 abort ();
4866 else if (from == ARG_POINTER_REGNUM)
4867 return frame.stack_pointer_offset;
4868 else if (from != FRAME_POINTER_REGNUM)
4869 abort ();
4870 else
4871 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4875 /* Fill structure ix86_frame about frame of currently computed function. */
4877 static void
4878 ix86_compute_frame_layout (frame)
4879 struct ix86_frame *frame;
4881 HOST_WIDE_INT total_size;
4882 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4883 int offset;
4884 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4885 HOST_WIDE_INT size = get_frame_size ();
4887 frame->nregs = ix86_nsaved_regs ();
4888 total_size = size;
4890 /* Skip return address and saved base pointer. */
4891 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4893 frame->hard_frame_pointer_offset = offset;
4895 /* Do some sanity checking of stack_alignment_needed and
4896 preferred_alignment, since i386 port is the only using those features
4897 that may break easily. */
4899 if (size && !stack_alignment_needed)
4900 abort ();
4901 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4902 abort ();
4903 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4904 abort ();
4905 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4906 abort ();
4908 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4909 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4911 /* Register save area */
4912 offset += frame->nregs * UNITS_PER_WORD;
4914 /* Va-arg area */
4915 if (ix86_save_varrargs_registers)
4917 offset += X86_64_VARARGS_SIZE;
4918 frame->va_arg_size = X86_64_VARARGS_SIZE;
4920 else
4921 frame->va_arg_size = 0;
4923 /* Align start of frame for local function. */
4924 frame->padding1 = ((offset + stack_alignment_needed - 1)
4925 & -stack_alignment_needed) - offset;
4927 offset += frame->padding1;
4929 /* Frame pointer points here. */
4930 frame->frame_pointer_offset = offset;
4932 offset += size;
4934 /* Add outgoing arguments area. Can be skipped if we eliminated
4935 all the function calls as dead code. */
4936 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4938 offset += current_function_outgoing_args_size;
4939 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4941 else
4942 frame->outgoing_arguments_size = 0;
4944 /* Align stack boundary. Only needed if we're calling another function
4945 or using alloca. */
4946 if (!current_function_is_leaf || current_function_calls_alloca)
4947 frame->padding2 = ((offset + preferred_alignment - 1)
4948 & -preferred_alignment) - offset;
4949 else
4950 frame->padding2 = 0;
4952 offset += frame->padding2;
4954 /* We've reached end of stack frame. */
4955 frame->stack_pointer_offset = offset;
4957 /* Size prologue needs to allocate. */
4958 frame->to_allocate =
4959 (size + frame->padding1 + frame->padding2
4960 + frame->outgoing_arguments_size + frame->va_arg_size);
4962 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4963 && current_function_is_leaf)
4965 frame->red_zone_size = frame->to_allocate;
4966 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4967 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4969 else
4970 frame->red_zone_size = 0;
4971 frame->to_allocate -= frame->red_zone_size;
4972 frame->stack_pointer_offset -= frame->red_zone_size;
4973 #if 0
4974 fprintf (stderr, "nregs: %i\n", frame->nregs);
4975 fprintf (stderr, "size: %i\n", size);
4976 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4977 fprintf (stderr, "padding1: %i\n", frame->padding1);
4978 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4979 fprintf (stderr, "padding2: %i\n", frame->padding2);
4980 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4981 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4982 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4983 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4984 frame->hard_frame_pointer_offset);
4985 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4986 #endif
4989 /* Emit code to save registers in the prologue. */
4991 static void
4992 ix86_emit_save_regs ()
4994 register int regno;
4995 rtx insn;
4997 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4998 if (ix86_save_reg (regno, true))
5000 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5001 RTX_FRAME_RELATED_P (insn) = 1;
5005 /* Emit code to save registers using MOV insns. First register
5006 is restored from POINTER + OFFSET. */
5007 static void
5008 ix86_emit_save_regs_using_mov (pointer, offset)
5009 rtx pointer;
5010 HOST_WIDE_INT offset;
5012 int regno;
5013 rtx insn;
5015 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5016 if (ix86_save_reg (regno, true))
5018 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5019 Pmode, offset),
5020 gen_rtx_REG (Pmode, regno));
5021 RTX_FRAME_RELATED_P (insn) = 1;
5022 offset += UNITS_PER_WORD;
5026 /* Expand the prologue into a bunch of separate insns. */
5028 void
5029 ix86_expand_prologue ()
5031 rtx insn;
5032 bool pic_reg_used;
5033 struct ix86_frame frame;
5034 int use_mov = 0;
5035 HOST_WIDE_INT allocate;
5037 ix86_compute_frame_layout (&frame);
5038 if (!optimize_size)
5040 int count = frame.nregs;
5042 /* The fast prologue uses move instead of push to save registers. This
5043 is significantly longer, but also executes faster as modern hardware
5044 can execute the moves in parallel, but can't do that for push/pop.
5046 Be careful about choosing what prologue to emit: When function takes
5047 many instructions to execute we may use slow version as well as in
5048 case function is known to be outside hot spot (this is known with
5049 feedback only). Weight the size of function by number of registers
5050 to save as it is cheap to use one or two push instructions but very
5051 slow to use many of them. */
5052 if (count)
5053 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5054 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5055 || (flag_branch_probabilities
5056 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5057 use_fast_prologue_epilogue = 0;
5058 else
5059 use_fast_prologue_epilogue = !expensive_function_p (count);
5060 if (TARGET_PROLOGUE_USING_MOVE)
5061 use_mov = use_fast_prologue_epilogue;
5064 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5065 slower on all targets. Also sdb doesn't like it. */
5067 if (frame_pointer_needed)
5069 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5070 RTX_FRAME_RELATED_P (insn) = 1;
5072 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5073 RTX_FRAME_RELATED_P (insn) = 1;
5076 allocate = frame.to_allocate;
5077 /* In case we are dealing only with single register and empty frame,
5078 push is equivalent of the mov+add sequence. */
5079 if (allocate == 0 && frame.nregs <= 1)
5080 use_mov = 0;
5082 if (!use_mov)
5083 ix86_emit_save_regs ();
5084 else
5085 allocate += frame.nregs * UNITS_PER_WORD;
5087 if (allocate == 0)
5089 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5091 insn = emit_insn (gen_pro_epilogue_adjust_stack
5092 (stack_pointer_rtx, stack_pointer_rtx,
5093 GEN_INT (-allocate)));
5094 RTX_FRAME_RELATED_P (insn) = 1;
5096 else
5098 /* ??? Is this only valid for Win32? */
5100 rtx arg0, sym;
5102 if (TARGET_64BIT)
5103 abort ();
5105 arg0 = gen_rtx_REG (SImode, 0);
5106 emit_move_insn (arg0, GEN_INT (allocate));
5108 sym = gen_rtx_MEM (FUNCTION_MODE,
5109 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5110 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5112 CALL_INSN_FUNCTION_USAGE (insn)
5113 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5114 CALL_INSN_FUNCTION_USAGE (insn));
5116 /* Don't allow scheduling pass to move insns across __alloca
5117 call. */
5118 emit_insn (gen_blockage (const0_rtx));
5120 if (use_mov)
5122 if (!frame_pointer_needed || !frame.to_allocate)
5123 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5124 else
5125 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5126 -frame.nregs * UNITS_PER_WORD);
5129 #ifdef SUBTARGET_PROLOGUE
5130 SUBTARGET_PROLOGUE;
5131 #endif
5133 pic_reg_used = false;
5134 if (pic_offset_table_rtx
5135 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5136 || current_function_profile))
5138 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5140 if (alt_pic_reg_used != INVALID_REGNUM)
5141 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5143 pic_reg_used = true;
5146 if (pic_reg_used)
5148 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5150 /* Even with accurate pre-reload life analysis, we can wind up
5151 deleting all references to the pic register after reload.
5152 Consider if cross-jumping unifies two sides of a branch
5153 controlled by a comparison vs the only read from a global.
5154 In which case, allow the set_got to be deleted, though we're
5155 too late to do anything about the ebx save in the prologue. */
5156 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5159 /* Prevent function calls from be scheduled before the call to mcount.
5160 In the pic_reg_used case, make sure that the got load isn't deleted. */
5161 if (current_function_profile)
5162 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5165 /* Emit code to restore saved registers using MOV insns. First register
5166 is restored from POINTER + OFFSET. */
5167 static void
5168 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
5169 rtx pointer;
5170 int offset;
5171 int maybe_eh_return;
5173 int regno;
5175 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5176 if (ix86_save_reg (regno, maybe_eh_return))
5178 emit_move_insn (gen_rtx_REG (Pmode, regno),
5179 adjust_address (gen_rtx_MEM (Pmode, pointer),
5180 Pmode, offset));
5181 offset += UNITS_PER_WORD;
5185 /* Restore function stack, frame, and registers. */
5187 void
5188 ix86_expand_epilogue (style)
5189 int style;
5191 int regno;
5192 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5193 struct ix86_frame frame;
5194 HOST_WIDE_INT offset;
5196 ix86_compute_frame_layout (&frame);
5198 /* Calculate start of saved registers relative to ebp. Special care
5199 must be taken for the normal return case of a function using
5200 eh_return: the eax and edx registers are marked as saved, but not
5201 restored along this path. */
5202 offset = frame.nregs;
5203 if (current_function_calls_eh_return && style != 2)
5204 offset -= 2;
5205 offset *= -UNITS_PER_WORD;
5207 /* If we're only restoring one register and sp is not valid then
5208 using a move instruction to restore the register since it's
5209 less work than reloading sp and popping the register.
5211 The default code result in stack adjustment using add/lea instruction,
5212 while this code results in LEAVE instruction (or discrete equivalent),
5213 so it is profitable in some other cases as well. Especially when there
5214 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5215 and there is exactly one register to pop. This heuristic may need some
5216 tuning in future. */
5217 if ((!sp_valid && frame.nregs <= 1)
5218 || (TARGET_EPILOGUE_USING_MOVE
5219 && use_fast_prologue_epilogue
5220 && (frame.nregs > 1 || frame.to_allocate))
5221 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5222 || (frame_pointer_needed && TARGET_USE_LEAVE
5223 && use_fast_prologue_epilogue && frame.nregs == 1)
5224 || current_function_calls_eh_return)
5226 /* Restore registers. We can use ebp or esp to address the memory
5227 locations. If both are available, default to ebp, since offsets
5228 are known to be small. Only exception is esp pointing directly to the
5229 end of block of saved registers, where we may simplify addressing
5230 mode. */
5232 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5233 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5234 frame.to_allocate, style == 2);
5235 else
5236 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5237 offset, style == 2);
5239 /* eh_return epilogues need %ecx added to the stack pointer. */
5240 if (style == 2)
5242 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5244 if (frame_pointer_needed)
5246 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5247 tmp = plus_constant (tmp, UNITS_PER_WORD);
5248 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5250 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5251 emit_move_insn (hard_frame_pointer_rtx, tmp);
5253 emit_insn (gen_pro_epilogue_adjust_stack
5254 (stack_pointer_rtx, sa, const0_rtx));
5256 else
5258 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5259 tmp = plus_constant (tmp, (frame.to_allocate
5260 + frame.nregs * UNITS_PER_WORD));
5261 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5264 else if (!frame_pointer_needed)
5265 emit_insn (gen_pro_epilogue_adjust_stack
5266 (stack_pointer_rtx, stack_pointer_rtx,
5267 GEN_INT (frame.to_allocate
5268 + frame.nregs * UNITS_PER_WORD)));
5269 /* If not an i386, mov & pop is faster than "leave". */
5270 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
5271 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5272 else
5274 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5275 hard_frame_pointer_rtx,
5276 const0_rtx));
5277 if (TARGET_64BIT)
5278 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5279 else
5280 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5283 else
5285 /* First step is to deallocate the stack frame so that we can
5286 pop the registers. */
5287 if (!sp_valid)
5289 if (!frame_pointer_needed)
5290 abort ();
5291 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5292 hard_frame_pointer_rtx,
5293 GEN_INT (offset)));
5295 else if (frame.to_allocate)
5296 emit_insn (gen_pro_epilogue_adjust_stack
5297 (stack_pointer_rtx, stack_pointer_rtx,
5298 GEN_INT (frame.to_allocate)));
5300 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5301 if (ix86_save_reg (regno, false))
5303 if (TARGET_64BIT)
5304 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5305 else
5306 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5308 if (frame_pointer_needed)
5310 /* Leave results in shorter dependency chains on CPUs that are
5311 able to grok it fast. */
5312 if (TARGET_USE_LEAVE)
5313 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5314 else if (TARGET_64BIT)
5315 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5316 else
5317 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5321 /* Sibcall epilogues don't want a return instruction. */
5322 if (style == 0)
5323 return;
5325 if (current_function_pops_args && current_function_args_size)
5327 rtx popc = GEN_INT (current_function_pops_args);
5329 /* i386 can only pop 64K bytes. If asked to pop more, pop
5330 return address, do explicit add, and jump indirectly to the
5331 caller. */
5333 if (current_function_pops_args >= 65536)
5335 rtx ecx = gen_rtx_REG (SImode, 2);
5337 /* There are is no "pascal" calling convention in 64bit ABI. */
5338 if (TARGET_64BIT)
5339 abort ();
5341 emit_insn (gen_popsi1 (ecx));
5342 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5343 emit_jump_insn (gen_return_indirect_internal (ecx));
5345 else
5346 emit_jump_insn (gen_return_pop_internal (popc));
5348 else
5349 emit_jump_insn (gen_return_internal ());
5352 /* Reset from the function's potential modifications. */
5354 static void
5355 ix86_output_function_epilogue (file, size)
5356 FILE *file ATTRIBUTE_UNUSED;
5357 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5359 if (pic_offset_table_rtx)
5360 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5363 /* Extract the parts of an RTL expression that is a valid memory address
5364 for an instruction. Return 0 if the structure of the address is
5365 grossly off. Return -1 if the address contains ASHIFT, so it is not
5366 strictly valid, but still used for computing length of lea instruction.
5369 static int
5370 ix86_decompose_address (addr, out)
5371 register rtx addr;
5372 struct ix86_address *out;
5374 rtx base = NULL_RTX;
5375 rtx index = NULL_RTX;
5376 rtx disp = NULL_RTX;
5377 HOST_WIDE_INT scale = 1;
5378 rtx scale_rtx = NULL_RTX;
5379 int retval = 1;
5381 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5382 base = addr;
5383 else if (GET_CODE (addr) == PLUS)
5385 rtx op0 = XEXP (addr, 0);
5386 rtx op1 = XEXP (addr, 1);
5387 enum rtx_code code0 = GET_CODE (op0);
5388 enum rtx_code code1 = GET_CODE (op1);
5390 if (code0 == REG || code0 == SUBREG)
5392 if (code1 == REG || code1 == SUBREG)
5393 index = op0, base = op1; /* index + base */
5394 else
5395 base = op0, disp = op1; /* base + displacement */
5397 else if (code0 == MULT)
5399 index = XEXP (op0, 0);
5400 scale_rtx = XEXP (op0, 1);
5401 if (code1 == REG || code1 == SUBREG)
5402 base = op1; /* index*scale + base */
5403 else
5404 disp = op1; /* index*scale + disp */
5406 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5408 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5409 scale_rtx = XEXP (XEXP (op0, 0), 1);
5410 base = XEXP (op0, 1);
5411 disp = op1;
5413 else if (code0 == PLUS)
5415 index = XEXP (op0, 0); /* index + base + disp */
5416 base = XEXP (op0, 1);
5417 disp = op1;
5419 else
5420 return 0;
5422 else if (GET_CODE (addr) == MULT)
5424 index = XEXP (addr, 0); /* index*scale */
5425 scale_rtx = XEXP (addr, 1);
5427 else if (GET_CODE (addr) == ASHIFT)
5429 rtx tmp;
5431 /* We're called for lea too, which implements ashift on occasion. */
5432 index = XEXP (addr, 0);
5433 tmp = XEXP (addr, 1);
5434 if (GET_CODE (tmp) != CONST_INT)
5435 return 0;
5436 scale = INTVAL (tmp);
5437 if ((unsigned HOST_WIDE_INT) scale > 3)
5438 return 0;
5439 scale = 1 << scale;
5440 retval = -1;
5442 else
5443 disp = addr; /* displacement */
5445 /* Extract the integral value of scale. */
5446 if (scale_rtx)
5448 if (GET_CODE (scale_rtx) != CONST_INT)
5449 return 0;
5450 scale = INTVAL (scale_rtx);
5453 /* Allow arg pointer and stack pointer as index if there is not scaling */
5454 if (base && index && scale == 1
5455 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5456 || index == stack_pointer_rtx))
5458 rtx tmp = base;
5459 base = index;
5460 index = tmp;
5463 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5464 if ((base == hard_frame_pointer_rtx
5465 || base == frame_pointer_rtx
5466 || base == arg_pointer_rtx) && !disp)
5467 disp = const0_rtx;
5469 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5470 Avoid this by transforming to [%esi+0]. */
5471 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5472 && base && !index && !disp
5473 && REG_P (base)
5474 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5475 disp = const0_rtx;
5477 /* Special case: encode reg+reg instead of reg*2. */
5478 if (!base && index && scale && scale == 2)
5479 base = index, scale = 1;
5481 /* Special case: scaling cannot be encoded without base or displacement. */
5482 if (!base && !disp && index && scale != 1)
5483 disp = const0_rtx;
5485 out->base = base;
5486 out->index = index;
5487 out->disp = disp;
5488 out->scale = scale;
5490 return retval;
5493 /* Return cost of the memory address x.
5494 For i386, it is better to use a complex address than let gcc copy
5495 the address into a reg and make a new pseudo. But not if the address
5496 requires to two regs - that would mean more pseudos with longer
5497 lifetimes. */
5498 static int
5499 ix86_address_cost (x)
5500 rtx x;
5502 struct ix86_address parts;
5503 int cost = 1;
5505 if (!ix86_decompose_address (x, &parts))
5506 abort ();
5508 if (parts.base && GET_CODE (parts.base) == SUBREG)
5509 parts.base = SUBREG_REG (parts.base);
5510 if (parts.index && GET_CODE (parts.index) == SUBREG)
5511 parts.index = SUBREG_REG (parts.index);
5513 /* More complex memory references are better. */
5514 if (parts.disp && parts.disp != const0_rtx)
5515 cost--;
5517 /* Attempt to minimize number of registers in the address. */
5518 if ((parts.base
5519 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5520 || (parts.index
5521 && (!REG_P (parts.index)
5522 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5523 cost++;
5525 if (parts.base
5526 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5527 && parts.index
5528 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5529 && parts.base != parts.index)
5530 cost++;
5532 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5533 since it's predecode logic can't detect the length of instructions
5534 and it degenerates to vector decoded. Increase cost of such
5535 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5536 to split such addresses or even refuse such addresses at all.
5538 Following addressing modes are affected:
5539 [base+scale*index]
5540 [scale*index+disp]
5541 [base+index]
5543 The first and last case may be avoidable by explicitly coding the zero in
5544 memory address, but I don't have AMD-K6 machine handy to check this
5545 theory. */
5547 if (TARGET_K6
5548 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5549 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5550 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5551 cost += 10;
5553 return cost;
5556 /* If X is a machine specific address (i.e. a symbol or label being
5557 referenced as a displacement from the GOT implemented using an
5558 UNSPEC), then return the base term. Otherwise return X. */
5561 ix86_find_base_term (x)
5562 rtx x;
5564 rtx term;
5566 if (TARGET_64BIT)
5568 if (GET_CODE (x) != CONST)
5569 return x;
5570 term = XEXP (x, 0);
5571 if (GET_CODE (term) == PLUS
5572 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5573 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5574 term = XEXP (term, 0);
5575 if (GET_CODE (term) != UNSPEC
5576 || XINT (term, 1) != UNSPEC_GOTPCREL)
5577 return x;
5579 term = XVECEXP (term, 0, 0);
5581 if (GET_CODE (term) != SYMBOL_REF
5582 && GET_CODE (term) != LABEL_REF)
5583 return x;
5585 return term;
5588 term = ix86_delegitimize_address (x);
5590 if (GET_CODE (term) != SYMBOL_REF
5591 && GET_CODE (term) != LABEL_REF)
5592 return x;
5594 return term;
5597 /* Determine if a given RTX is a valid constant. We already know this
5598 satisfies CONSTANT_P. */
5600 bool
5601 legitimate_constant_p (x)
5602 rtx x;
5604 rtx inner;
5606 switch (GET_CODE (x))
5608 case SYMBOL_REF:
5609 /* TLS symbols are not constant. */
5610 if (tls_symbolic_operand (x, Pmode))
5611 return false;
5612 break;
5614 case CONST:
5615 inner = XEXP (x, 0);
5617 /* Offsets of TLS symbols are never valid.
5618 Discourage CSE from creating them. */
5619 if (GET_CODE (inner) == PLUS
5620 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5621 return false;
5623 /* Only some unspecs are valid as "constants". */
5624 if (GET_CODE (inner) == UNSPEC)
5625 switch (XINT (inner, 1))
5627 case UNSPEC_TPOFF:
5628 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5629 default:
5630 return false;
5632 break;
5634 default:
5635 break;
5638 /* Otherwise we handle everything else in the move patterns. */
5639 return true;
5642 /* Determine if it's legal to put X into the constant pool. This
5643 is not possible for the address of thread-local symbols, which
5644 is checked above. */
5646 static bool
5647 ix86_cannot_force_const_mem (x)
5648 rtx x;
5650 return !legitimate_constant_p (x);
5653 /* Determine if a given RTX is a valid constant address. */
5655 bool
5656 constant_address_p (x)
5657 rtx x;
5659 switch (GET_CODE (x))
5661 case LABEL_REF:
5662 case CONST_INT:
5663 return true;
5665 case CONST_DOUBLE:
5666 return TARGET_64BIT;
5668 case CONST:
5669 /* For Mach-O, really believe the CONST. */
5670 if (TARGET_MACHO)
5671 return true;
5672 /* Otherwise fall through. */
5673 case SYMBOL_REF:
5674 return !flag_pic && legitimate_constant_p (x);
5676 default:
5677 return false;
5681 /* Nonzero if the constant value X is a legitimate general operand
5682 when generating PIC code. It is given that flag_pic is on and
5683 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5685 bool
5686 legitimate_pic_operand_p (x)
5687 rtx x;
5689 rtx inner;
5691 switch (GET_CODE (x))
5693 case CONST:
5694 inner = XEXP (x, 0);
5696 /* Only some unspecs are valid as "constants". */
5697 if (GET_CODE (inner) == UNSPEC)
5698 switch (XINT (inner, 1))
5700 case UNSPEC_TPOFF:
5701 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5702 default:
5703 return false;
5705 /* FALLTHRU */
5707 case SYMBOL_REF:
5708 case LABEL_REF:
5709 return legitimate_pic_address_disp_p (x);
5711 default:
5712 return true;
5716 /* Determine if a given CONST RTX is a valid memory displacement
5717 in PIC mode. */
5720 legitimate_pic_address_disp_p (disp)
5721 register rtx disp;
5723 bool saw_plus;
5725 /* In 64bit mode we can allow direct addresses of symbols and labels
5726 when they are not dynamic symbols. */
5727 if (TARGET_64BIT)
5729 /* TLS references should always be enclosed in UNSPEC. */
5730 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5731 return 0;
5732 if (GET_CODE (disp) == SYMBOL_REF
5733 && ix86_cmodel == CM_SMALL_PIC
5734 && (CONSTANT_POOL_ADDRESS_P (disp)
5735 || SYMBOL_REF_FLAG (disp)))
5736 return 1;
5737 if (GET_CODE (disp) == LABEL_REF)
5738 return 1;
5739 if (GET_CODE (disp) == CONST
5740 && GET_CODE (XEXP (disp, 0)) == PLUS
5741 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5742 && ix86_cmodel == CM_SMALL_PIC
5743 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5744 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5745 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5746 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5747 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5748 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5749 return 1;
5751 if (GET_CODE (disp) != CONST)
5752 return 0;
5753 disp = XEXP (disp, 0);
5755 if (TARGET_64BIT)
5757 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5758 of GOT tables. We should not need these anyway. */
5759 if (GET_CODE (disp) != UNSPEC
5760 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5761 return 0;
5763 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5764 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5765 return 0;
5766 return 1;
5769 saw_plus = false;
5770 if (GET_CODE (disp) == PLUS)
5772 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5773 return 0;
5774 disp = XEXP (disp, 0);
5775 saw_plus = true;
5778 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5779 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5781 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5782 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5783 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5785 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5786 if (strstr (sym_name, "$pb") != 0)
5787 return 1;
5791 if (GET_CODE (disp) != UNSPEC)
5792 return 0;
5794 switch (XINT (disp, 1))
5796 case UNSPEC_GOT:
5797 if (saw_plus)
5798 return false;
5799 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5800 case UNSPEC_GOTOFF:
5801 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5802 case UNSPEC_GOTTPOFF:
5803 case UNSPEC_GOTNTPOFF:
5804 case UNSPEC_INDNTPOFF:
5805 if (saw_plus)
5806 return false;
5807 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5808 case UNSPEC_NTPOFF:
5809 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5810 case UNSPEC_DTPOFF:
5811 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5814 return 0;
5817 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5818 memory address for an instruction. The MODE argument is the machine mode
5819 for the MEM expression that wants to use this address.
5821 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5822 convert common non-canonical forms to canonical form so that they will
5823 be recognized. */
5826 legitimate_address_p (mode, addr, strict)
5827 enum machine_mode mode;
5828 register rtx addr;
5829 int strict;
5831 struct ix86_address parts;
5832 rtx base, index, disp;
5833 HOST_WIDE_INT scale;
5834 const char *reason = NULL;
5835 rtx reason_rtx = NULL_RTX;
5837 if (TARGET_DEBUG_ADDR)
5839 fprintf (stderr,
5840 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5841 GET_MODE_NAME (mode), strict);
5842 debug_rtx (addr);
5845 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5847 if (TARGET_DEBUG_ADDR)
5848 fprintf (stderr, "Success.\n");
5849 return TRUE;
5852 if (ix86_decompose_address (addr, &parts) <= 0)
5854 reason = "decomposition failed";
5855 goto report_error;
5858 base = parts.base;
5859 index = parts.index;
5860 disp = parts.disp;
5861 scale = parts.scale;
5863 /* Validate base register.
5865 Don't allow SUBREG's here, it can lead to spill failures when the base
5866 is one word out of a two word structure, which is represented internally
5867 as a DImode int. */
5869 if (base)
5871 rtx reg;
5872 reason_rtx = base;
5874 if (GET_CODE (base) == SUBREG)
5875 reg = SUBREG_REG (base);
5876 else
5877 reg = base;
5879 if (GET_CODE (reg) != REG)
5881 reason = "base is not a register";
5882 goto report_error;
5885 if (GET_MODE (base) != Pmode)
5887 reason = "base is not in Pmode";
5888 goto report_error;
5891 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5892 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5894 reason = "base is not valid";
5895 goto report_error;
5899 /* Validate index register.
5901 Don't allow SUBREG's here, it can lead to spill failures when the index
5902 is one word out of a two word structure, which is represented internally
5903 as a DImode int. */
5905 if (index)
5907 rtx reg;
5908 reason_rtx = index;
5910 if (GET_CODE (index) == SUBREG)
5911 reg = SUBREG_REG (index);
5912 else
5913 reg = index;
5915 if (GET_CODE (reg) != REG)
5917 reason = "index is not a register";
5918 goto report_error;
5921 if (GET_MODE (index) != Pmode)
5923 reason = "index is not in Pmode";
5924 goto report_error;
5927 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5928 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5930 reason = "index is not valid";
5931 goto report_error;
5935 /* Validate scale factor. */
5936 if (scale != 1)
5938 reason_rtx = GEN_INT (scale);
5939 if (!index)
5941 reason = "scale without index";
5942 goto report_error;
5945 if (scale != 2 && scale != 4 && scale != 8)
5947 reason = "scale is not a valid multiplier";
5948 goto report_error;
5952 /* Validate displacement. */
5953 if (disp)
5955 reason_rtx = disp;
5957 if (GET_CODE (disp) == CONST
5958 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5959 switch (XINT (XEXP (disp, 0), 1))
5961 case UNSPEC_GOT:
5962 case UNSPEC_GOTOFF:
5963 case UNSPEC_GOTPCREL:
5964 if (!flag_pic)
5965 abort ();
5966 goto is_legitimate_pic;
5968 case UNSPEC_GOTTPOFF:
5969 case UNSPEC_GOTNTPOFF:
5970 case UNSPEC_INDNTPOFF:
5971 case UNSPEC_NTPOFF:
5972 case UNSPEC_DTPOFF:
5973 break;
5975 default:
5976 reason = "invalid address unspec";
5977 goto report_error;
5980 else if (flag_pic && (SYMBOLIC_CONST (disp)
5981 #if TARGET_MACHO
5982 && !machopic_operand_p (disp)
5983 #endif
5986 is_legitimate_pic:
5987 if (TARGET_64BIT && (index || base))
5989 /* foo@dtpoff(%rX) is ok. */
5990 if (GET_CODE (disp) != CONST
5991 || GET_CODE (XEXP (disp, 0)) != PLUS
5992 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5993 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5994 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5995 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5997 reason = "non-constant pic memory reference";
5998 goto report_error;
6001 else if (! legitimate_pic_address_disp_p (disp))
6003 reason = "displacement is an invalid pic construct";
6004 goto report_error;
6007 /* This code used to verify that a symbolic pic displacement
6008 includes the pic_offset_table_rtx register.
6010 While this is good idea, unfortunately these constructs may
6011 be created by "adds using lea" optimization for incorrect
6012 code like:
6014 int a;
6015 int foo(int i)
6017 return *(&a+i);
6020 This code is nonsensical, but results in addressing
6021 GOT table with pic_offset_table_rtx base. We can't
6022 just refuse it easily, since it gets matched by
6023 "addsi3" pattern, that later gets split to lea in the
6024 case output register differs from input. While this
6025 can be handled by separate addsi pattern for this case
6026 that never results in lea, this seems to be easier and
6027 correct fix for crash to disable this test. */
6029 else if (!CONSTANT_ADDRESS_P (disp))
6031 reason = "displacement is not constant";
6032 goto report_error;
6034 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6036 reason = "displacement is out of range";
6037 goto report_error;
6039 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
6041 reason = "displacement is a const_double";
6042 goto report_error;
6046 /* Everything looks valid. */
6047 if (TARGET_DEBUG_ADDR)
6048 fprintf (stderr, "Success.\n");
6049 return TRUE;
6051 report_error:
6052 if (TARGET_DEBUG_ADDR)
6054 fprintf (stderr, "Error: %s\n", reason);
6055 debug_rtx (reason_rtx);
6057 return FALSE;
6060 /* Return an unique alias set for the GOT. */
6062 static HOST_WIDE_INT
6063 ix86_GOT_alias_set ()
6065 static HOST_WIDE_INT set = -1;
6066 if (set == -1)
6067 set = new_alias_set ();
6068 return set;
6071 /* Return a legitimate reference for ORIG (an address) using the
6072 register REG. If REG is 0, a new pseudo is generated.
6074 There are two types of references that must be handled:
6076 1. Global data references must load the address from the GOT, via
6077 the PIC reg. An insn is emitted to do this load, and the reg is
6078 returned.
6080 2. Static data references, constant pool addresses, and code labels
6081 compute the address as an offset from the GOT, whose base is in
6082 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
6083 differentiate them from global data objects. The returned
6084 address is the PIC reg + an unspec constant.
6086 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6087 reg also appears in the address. */
6090 legitimize_pic_address (orig, reg)
6091 rtx orig;
6092 rtx reg;
6094 rtx addr = orig;
6095 rtx new = orig;
6096 rtx base;
6098 #if TARGET_MACHO
6099 if (reg == 0)
6100 reg = gen_reg_rtx (Pmode);
6101 /* Use the generic Mach-O PIC machinery. */
6102 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6103 #endif
6105 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6106 new = addr;
6107 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6109 /* This symbol may be referenced via a displacement from the PIC
6110 base address (@GOTOFF). */
6112 if (reload_in_progress)
6113 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6114 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6115 new = gen_rtx_CONST (Pmode, new);
6116 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6118 if (reg != 0)
6120 emit_move_insn (reg, new);
6121 new = reg;
6124 else if (GET_CODE (addr) == SYMBOL_REF)
6126 if (TARGET_64BIT)
6128 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6129 new = gen_rtx_CONST (Pmode, new);
6130 new = gen_rtx_MEM (Pmode, new);
6131 RTX_UNCHANGING_P (new) = 1;
6132 set_mem_alias_set (new, ix86_GOT_alias_set ());
6134 if (reg == 0)
6135 reg = gen_reg_rtx (Pmode);
6136 /* Use directly gen_movsi, otherwise the address is loaded
6137 into register for CSE. We don't want to CSE this addresses,
6138 instead we CSE addresses from the GOT table, so skip this. */
6139 emit_insn (gen_movsi (reg, new));
6140 new = reg;
6142 else
6144 /* This symbol must be referenced via a load from the
6145 Global Offset Table (@GOT). */
6147 if (reload_in_progress)
6148 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6149 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6150 new = gen_rtx_CONST (Pmode, new);
6151 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6152 new = gen_rtx_MEM (Pmode, new);
6153 RTX_UNCHANGING_P (new) = 1;
6154 set_mem_alias_set (new, ix86_GOT_alias_set ());
6156 if (reg == 0)
6157 reg = gen_reg_rtx (Pmode);
6158 emit_move_insn (reg, new);
6159 new = reg;
6162 else
6164 if (GET_CODE (addr) == CONST)
6166 addr = XEXP (addr, 0);
6168 /* We must match stuff we generate before. Assume the only
6169 unspecs that can get here are ours. Not that we could do
6170 anything with them anyway... */
6171 if (GET_CODE (addr) == UNSPEC
6172 || (GET_CODE (addr) == PLUS
6173 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6174 return orig;
6175 if (GET_CODE (addr) != PLUS)
6176 abort ();
6178 if (GET_CODE (addr) == PLUS)
6180 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6182 /* Check first to see if this is a constant offset from a @GOTOFF
6183 symbol reference. */
6184 if (local_symbolic_operand (op0, Pmode)
6185 && GET_CODE (op1) == CONST_INT)
6187 if (!TARGET_64BIT)
6189 if (reload_in_progress)
6190 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6191 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6192 UNSPEC_GOTOFF);
6193 new = gen_rtx_PLUS (Pmode, new, op1);
6194 new = gen_rtx_CONST (Pmode, new);
6195 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6197 if (reg != 0)
6199 emit_move_insn (reg, new);
6200 new = reg;
6203 else
6205 if (INTVAL (op1) < -16*1024*1024
6206 || INTVAL (op1) >= 16*1024*1024)
6207 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6210 else
6212 base = legitimize_pic_address (XEXP (addr, 0), reg);
6213 new = legitimize_pic_address (XEXP (addr, 1),
6214 base == reg ? NULL_RTX : reg);
6216 if (GET_CODE (new) == CONST_INT)
6217 new = plus_constant (base, INTVAL (new));
6218 else
6220 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6222 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6223 new = XEXP (new, 1);
6225 new = gen_rtx_PLUS (Pmode, base, new);
6230 return new;
6233 static void
6234 ix86_encode_section_info (decl, first)
6235 tree decl;
6236 int first ATTRIBUTE_UNUSED;
6238 bool local_p = (*targetm.binds_local_p) (decl);
6239 rtx rtl, symbol;
6241 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6242 if (GET_CODE (rtl) != MEM)
6243 return;
6244 symbol = XEXP (rtl, 0);
6245 if (GET_CODE (symbol) != SYMBOL_REF)
6246 return;
6248 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6249 symbol so that we may access it directly in the GOT. */
6251 if (flag_pic)
6252 SYMBOL_REF_FLAG (symbol) = local_p;
6254 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6255 "local dynamic", "initial exec" or "local exec" TLS models
6256 respectively. */
6258 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6260 const char *symbol_str;
6261 char *newstr;
6262 size_t len;
6263 enum tls_model kind = decl_tls_model (decl);
6265 if (TARGET_64BIT && ! flag_pic)
6267 /* x86-64 doesn't allow non-pic code for shared libraries,
6268 so don't generate GD/LD TLS models for non-pic code. */
6269 switch (kind)
6271 case TLS_MODEL_GLOBAL_DYNAMIC:
6272 kind = TLS_MODEL_INITIAL_EXEC; break;
6273 case TLS_MODEL_LOCAL_DYNAMIC:
6274 kind = TLS_MODEL_LOCAL_EXEC; break;
6275 default:
6276 break;
6280 symbol_str = XSTR (symbol, 0);
6282 if (symbol_str[0] == '%')
6284 if (symbol_str[1] == tls_model_chars[kind])
6285 return;
6286 symbol_str += 2;
6288 len = strlen (symbol_str) + 1;
6289 newstr = alloca (len + 2);
6291 newstr[0] = '%';
6292 newstr[1] = tls_model_chars[kind];
6293 memcpy (newstr + 2, symbol_str, len);
6295 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6299 /* Undo the above when printing symbol names. */
6301 static const char *
6302 ix86_strip_name_encoding (str)
6303 const char *str;
6305 if (str[0] == '%')
6306 str += 2;
6307 if (str [0] == '*')
6308 str += 1;
6309 return str;
6312 /* Load the thread pointer into a register. */
6314 static rtx
6315 get_thread_pointer ()
6317 rtx tp;
6319 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6320 tp = gen_rtx_MEM (Pmode, tp);
6321 RTX_UNCHANGING_P (tp) = 1;
6322 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6323 tp = force_reg (Pmode, tp);
6325 return tp;
6328 /* Try machine-dependent ways of modifying an illegitimate address
6329 to be legitimate. If we find one, return the new, valid address.
6330 This macro is used in only one place: `memory_address' in explow.c.
6332 OLDX is the address as it was before break_out_memory_refs was called.
6333 In some cases it is useful to look at this to decide what needs to be done.
6335 MODE and WIN are passed so that this macro can use
6336 GO_IF_LEGITIMATE_ADDRESS.
6338 It is always safe for this macro to do nothing. It exists to recognize
6339 opportunities to optimize the output.
6341 For the 80386, we handle X+REG by loading X into a register R and
6342 using R+REG. R will go in a general reg and indexing will be used.
6343 However, if REG is a broken-out memory address or multiplication,
6344 nothing needs to be done because REG can certainly go in a general reg.
6346 When -fpic is used, special handling is needed for symbolic references.
6347 See comments by legitimize_pic_address in i386.c for details. */
6350 legitimize_address (x, oldx, mode)
6351 register rtx x;
6352 register rtx oldx ATTRIBUTE_UNUSED;
6353 enum machine_mode mode;
6355 int changed = 0;
6356 unsigned log;
6358 if (TARGET_DEBUG_ADDR)
6360 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6361 GET_MODE_NAME (mode));
6362 debug_rtx (x);
6365 log = tls_symbolic_operand (x, mode);
6366 if (log)
6368 rtx dest, base, off, pic;
6369 int type;
6371 switch (log)
6373 case TLS_MODEL_GLOBAL_DYNAMIC:
6374 dest = gen_reg_rtx (Pmode);
6375 if (TARGET_64BIT)
6377 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6379 start_sequence ();
6380 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6381 insns = get_insns ();
6382 end_sequence ();
6384 emit_libcall_block (insns, dest, rax, x);
6386 else
6387 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6388 break;
6390 case TLS_MODEL_LOCAL_DYNAMIC:
6391 base = gen_reg_rtx (Pmode);
6392 if (TARGET_64BIT)
6394 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6396 start_sequence ();
6397 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6398 insns = get_insns ();
6399 end_sequence ();
6401 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6402 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6403 emit_libcall_block (insns, base, rax, note);
6405 else
6406 emit_insn (gen_tls_local_dynamic_base_32 (base));
6408 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6409 off = gen_rtx_CONST (Pmode, off);
6411 return gen_rtx_PLUS (Pmode, base, off);
6413 case TLS_MODEL_INITIAL_EXEC:
6414 if (TARGET_64BIT)
6416 pic = NULL;
6417 type = UNSPEC_GOTNTPOFF;
6419 else if (flag_pic)
6421 if (reload_in_progress)
6422 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6423 pic = pic_offset_table_rtx;
6424 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6426 else if (!TARGET_GNU_TLS)
6428 pic = gen_reg_rtx (Pmode);
6429 emit_insn (gen_set_got (pic));
6430 type = UNSPEC_GOTTPOFF;
6432 else
6434 pic = NULL;
6435 type = UNSPEC_INDNTPOFF;
6438 base = get_thread_pointer ();
6440 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6441 off = gen_rtx_CONST (Pmode, off);
6442 if (pic)
6443 off = gen_rtx_PLUS (Pmode, pic, off);
6444 off = gen_rtx_MEM (Pmode, off);
6445 RTX_UNCHANGING_P (off) = 1;
6446 set_mem_alias_set (off, ix86_GOT_alias_set ());
6447 dest = gen_reg_rtx (Pmode);
6449 if (TARGET_64BIT || TARGET_GNU_TLS)
6451 emit_move_insn (dest, off);
6452 return gen_rtx_PLUS (Pmode, base, dest);
6454 else
6455 emit_insn (gen_subsi3 (dest, base, off));
6456 break;
6458 case TLS_MODEL_LOCAL_EXEC:
6459 base = get_thread_pointer ();
6461 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6462 (TARGET_64BIT || TARGET_GNU_TLS)
6463 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6464 off = gen_rtx_CONST (Pmode, off);
6466 if (TARGET_64BIT || TARGET_GNU_TLS)
6467 return gen_rtx_PLUS (Pmode, base, off);
6468 else
6470 dest = gen_reg_rtx (Pmode);
6471 emit_insn (gen_subsi3 (dest, base, off));
6473 break;
6475 default:
6476 abort ();
6479 return dest;
6482 if (flag_pic && SYMBOLIC_CONST (x))
6483 return legitimize_pic_address (x, 0);
6485 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6486 if (GET_CODE (x) == ASHIFT
6487 && GET_CODE (XEXP (x, 1)) == CONST_INT
6488 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6490 changed = 1;
6491 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6492 GEN_INT (1 << log));
6495 if (GET_CODE (x) == PLUS)
6497 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6499 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6500 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6501 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6503 changed = 1;
6504 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6505 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6506 GEN_INT (1 << log));
6509 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6510 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6511 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6513 changed = 1;
6514 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6515 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6516 GEN_INT (1 << log));
6519 /* Put multiply first if it isn't already. */
6520 if (GET_CODE (XEXP (x, 1)) == MULT)
6522 rtx tmp = XEXP (x, 0);
6523 XEXP (x, 0) = XEXP (x, 1);
6524 XEXP (x, 1) = tmp;
6525 changed = 1;
6528 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6529 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6530 created by virtual register instantiation, register elimination, and
6531 similar optimizations. */
6532 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6534 changed = 1;
6535 x = gen_rtx_PLUS (Pmode,
6536 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6537 XEXP (XEXP (x, 1), 0)),
6538 XEXP (XEXP (x, 1), 1));
6541 /* Canonicalize
6542 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6543 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6544 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6545 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6546 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6547 && CONSTANT_P (XEXP (x, 1)))
6549 rtx constant;
6550 rtx other = NULL_RTX;
6552 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6554 constant = XEXP (x, 1);
6555 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6557 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6559 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6560 other = XEXP (x, 1);
6562 else
6563 constant = 0;
6565 if (constant)
6567 changed = 1;
6568 x = gen_rtx_PLUS (Pmode,
6569 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6570 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6571 plus_constant (other, INTVAL (constant)));
6575 if (changed && legitimate_address_p (mode, x, FALSE))
6576 return x;
6578 if (GET_CODE (XEXP (x, 0)) == MULT)
6580 changed = 1;
6581 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6584 if (GET_CODE (XEXP (x, 1)) == MULT)
6586 changed = 1;
6587 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6590 if (changed
6591 && GET_CODE (XEXP (x, 1)) == REG
6592 && GET_CODE (XEXP (x, 0)) == REG)
6593 return x;
6595 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6597 changed = 1;
6598 x = legitimize_pic_address (x, 0);
6601 if (changed && legitimate_address_p (mode, x, FALSE))
6602 return x;
6604 if (GET_CODE (XEXP (x, 0)) == REG)
6606 register rtx temp = gen_reg_rtx (Pmode);
6607 register rtx val = force_operand (XEXP (x, 1), temp);
6608 if (val != temp)
6609 emit_move_insn (temp, val);
6611 XEXP (x, 1) = temp;
6612 return x;
6615 else if (GET_CODE (XEXP (x, 1)) == REG)
6617 register rtx temp = gen_reg_rtx (Pmode);
6618 register rtx val = force_operand (XEXP (x, 0), temp);
6619 if (val != temp)
6620 emit_move_insn (temp, val);
6622 XEXP (x, 0) = temp;
6623 return x;
6627 return x;
6630 /* Print an integer constant expression in assembler syntax. Addition
6631 and subtraction are the only arithmetic that may appear in these
6632 expressions. FILE is the stdio stream to write to, X is the rtx, and
6633 CODE is the operand print code from the output string. */
6635 static void
6636 output_pic_addr_const (file, x, code)
6637 FILE *file;
6638 rtx x;
6639 int code;
6641 char buf[256];
6643 switch (GET_CODE (x))
6645 case PC:
6646 if (flag_pic)
6647 putc ('.', file);
6648 else
6649 abort ();
6650 break;
6652 case SYMBOL_REF:
6653 assemble_name (file, XSTR (x, 0));
6654 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6655 fputs ("@PLT", file);
6656 break;
6658 case LABEL_REF:
6659 x = XEXP (x, 0);
6660 /* FALLTHRU */
6661 case CODE_LABEL:
6662 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6663 assemble_name (asm_out_file, buf);
6664 break;
6666 case CONST_INT:
6667 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6668 break;
6670 case CONST:
6671 /* This used to output parentheses around the expression,
6672 but that does not work on the 386 (either ATT or BSD assembler). */
6673 output_pic_addr_const (file, XEXP (x, 0), code);
6674 break;
6676 case CONST_DOUBLE:
6677 if (GET_MODE (x) == VOIDmode)
6679 /* We can use %d if the number is <32 bits and positive. */
6680 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6681 fprintf (file, "0x%lx%08lx",
6682 (unsigned long) CONST_DOUBLE_HIGH (x),
6683 (unsigned long) CONST_DOUBLE_LOW (x));
6684 else
6685 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6687 else
6688 /* We can't handle floating point constants;
6689 PRINT_OPERAND must handle them. */
6690 output_operand_lossage ("floating constant misused");
6691 break;
6693 case PLUS:
6694 /* Some assemblers need integer constants to appear first. */
6695 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6697 output_pic_addr_const (file, XEXP (x, 0), code);
6698 putc ('+', file);
6699 output_pic_addr_const (file, XEXP (x, 1), code);
6701 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6703 output_pic_addr_const (file, XEXP (x, 1), code);
6704 putc ('+', file);
6705 output_pic_addr_const (file, XEXP (x, 0), code);
6707 else
6708 abort ();
6709 break;
6711 case MINUS:
6712 if (!TARGET_MACHO)
6713 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6714 output_pic_addr_const (file, XEXP (x, 0), code);
6715 putc ('-', file);
6716 output_pic_addr_const (file, XEXP (x, 1), code);
6717 if (!TARGET_MACHO)
6718 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6719 break;
6721 case UNSPEC:
6722 if (XVECLEN (x, 0) != 1)
6723 abort ();
6724 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6725 switch (XINT (x, 1))
6727 case UNSPEC_GOT:
6728 fputs ("@GOT", file);
6729 break;
6730 case UNSPEC_GOTOFF:
6731 fputs ("@GOTOFF", file);
6732 break;
6733 case UNSPEC_GOTPCREL:
6734 fputs ("@GOTPCREL(%rip)", file);
6735 break;
6736 case UNSPEC_GOTTPOFF:
6737 /* FIXME: This might be @TPOFF in Sun ld too. */
6738 fputs ("@GOTTPOFF", file);
6739 break;
6740 case UNSPEC_TPOFF:
6741 fputs ("@TPOFF", file);
6742 break;
6743 case UNSPEC_NTPOFF:
6744 if (TARGET_64BIT)
6745 fputs ("@TPOFF", file);
6746 else
6747 fputs ("@NTPOFF", file);
6748 break;
6749 case UNSPEC_DTPOFF:
6750 fputs ("@DTPOFF", file);
6751 break;
6752 case UNSPEC_GOTNTPOFF:
6753 if (TARGET_64BIT)
6754 fputs ("@GOTTPOFF(%rip)", file);
6755 else
6756 fputs ("@GOTNTPOFF", file);
6757 break;
6758 case UNSPEC_INDNTPOFF:
6759 fputs ("@INDNTPOFF", file);
6760 break;
6761 default:
6762 output_operand_lossage ("invalid UNSPEC as operand");
6763 break;
6765 break;
6767 default:
6768 output_operand_lossage ("invalid expression as operand");
6772 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6773 We need to handle our special PIC relocations. */
6775 void
6776 i386_dwarf_output_addr_const (file, x)
6777 FILE *file;
6778 rtx x;
6780 #ifdef ASM_QUAD
6781 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6782 #else
6783 if (TARGET_64BIT)
6784 abort ();
6785 fprintf (file, "%s", ASM_LONG);
6786 #endif
6787 if (flag_pic)
6788 output_pic_addr_const (file, x, '\0');
6789 else
6790 output_addr_const (file, x);
6791 fputc ('\n', file);
6794 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6795 We need to emit DTP-relative relocations. */
6797 void
6798 i386_output_dwarf_dtprel (file, size, x)
6799 FILE *file;
6800 int size;
6801 rtx x;
6803 fputs (ASM_LONG, file);
6804 output_addr_const (file, x);
6805 fputs ("@DTPOFF", file);
6806 switch (size)
6808 case 4:
6809 break;
6810 case 8:
6811 fputs (", 0", file);
6812 break;
6813 default:
6814 abort ();
6818 /* In the name of slightly smaller debug output, and to cater to
6819 general assembler losage, recognize PIC+GOTOFF and turn it back
6820 into a direct symbol reference. */
6822 static rtx
6823 ix86_delegitimize_address (orig_x)
6824 rtx orig_x;
6826 rtx x = orig_x, y;
6828 if (GET_CODE (x) == MEM)
6829 x = XEXP (x, 0);
6831 if (TARGET_64BIT)
6833 if (GET_CODE (x) != CONST
6834 || GET_CODE (XEXP (x, 0)) != UNSPEC
6835 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6836 || GET_CODE (orig_x) != MEM)
6837 return orig_x;
6838 return XVECEXP (XEXP (x, 0), 0, 0);
6841 if (GET_CODE (x) != PLUS
6842 || GET_CODE (XEXP (x, 1)) != CONST)
6843 return orig_x;
6845 if (GET_CODE (XEXP (x, 0)) == REG
6846 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6847 /* %ebx + GOT/GOTOFF */
6848 y = NULL;
6849 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6851 /* %ebx + %reg * scale + GOT/GOTOFF */
6852 y = XEXP (x, 0);
6853 if (GET_CODE (XEXP (y, 0)) == REG
6854 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6855 y = XEXP (y, 1);
6856 else if (GET_CODE (XEXP (y, 1)) == REG
6857 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6858 y = XEXP (y, 0);
6859 else
6860 return orig_x;
6861 if (GET_CODE (y) != REG
6862 && GET_CODE (y) != MULT
6863 && GET_CODE (y) != ASHIFT)
6864 return orig_x;
6866 else
6867 return orig_x;
6869 x = XEXP (XEXP (x, 1), 0);
6870 if (GET_CODE (x) == UNSPEC
6871 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6872 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6874 if (y)
6875 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6876 return XVECEXP (x, 0, 0);
6879 if (GET_CODE (x) == PLUS
6880 && GET_CODE (XEXP (x, 0)) == UNSPEC
6881 && GET_CODE (XEXP (x, 1)) == CONST_INT
6882 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6883 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6884 && GET_CODE (orig_x) != MEM)))
6886 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6887 if (y)
6888 return gen_rtx_PLUS (Pmode, y, x);
6889 return x;
6892 return orig_x;
6895 static void
6896 put_condition_code (code, mode, reverse, fp, file)
6897 enum rtx_code code;
6898 enum machine_mode mode;
6899 int reverse, fp;
6900 FILE *file;
6902 const char *suffix;
6904 if (mode == CCFPmode || mode == CCFPUmode)
6906 enum rtx_code second_code, bypass_code;
6907 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6908 if (bypass_code != NIL || second_code != NIL)
6909 abort ();
6910 code = ix86_fp_compare_code_to_integer (code);
6911 mode = CCmode;
6913 if (reverse)
6914 code = reverse_condition (code);
6916 switch (code)
6918 case EQ:
6919 suffix = "e";
6920 break;
6921 case NE:
6922 suffix = "ne";
6923 break;
6924 case GT:
6925 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6926 abort ();
6927 suffix = "g";
6928 break;
6929 case GTU:
6930 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6931 Those same assemblers have the same but opposite losage on cmov. */
6932 if (mode != CCmode)
6933 abort ();
6934 suffix = fp ? "nbe" : "a";
6935 break;
6936 case LT:
6937 if (mode == CCNOmode || mode == CCGOCmode)
6938 suffix = "s";
6939 else if (mode == CCmode || mode == CCGCmode)
6940 suffix = "l";
6941 else
6942 abort ();
6943 break;
6944 case LTU:
6945 if (mode != CCmode)
6946 abort ();
6947 suffix = "b";
6948 break;
6949 case GE:
6950 if (mode == CCNOmode || mode == CCGOCmode)
6951 suffix = "ns";
6952 else if (mode == CCmode || mode == CCGCmode)
6953 suffix = "ge";
6954 else
6955 abort ();
6956 break;
6957 case GEU:
6958 /* ??? As above. */
6959 if (mode != CCmode)
6960 abort ();
6961 suffix = fp ? "nb" : "ae";
6962 break;
6963 case LE:
6964 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6965 abort ();
6966 suffix = "le";
6967 break;
6968 case LEU:
6969 if (mode != CCmode)
6970 abort ();
6971 suffix = "be";
6972 break;
6973 case UNORDERED:
6974 suffix = fp ? "u" : "p";
6975 break;
6976 case ORDERED:
6977 suffix = fp ? "nu" : "np";
6978 break;
6979 default:
6980 abort ();
6982 fputs (suffix, file);
6985 void
6986 print_reg (x, code, file)
6987 rtx x;
6988 int code;
6989 FILE *file;
6991 if (REGNO (x) == ARG_POINTER_REGNUM
6992 || REGNO (x) == FRAME_POINTER_REGNUM
6993 || REGNO (x) == FLAGS_REG
6994 || REGNO (x) == FPSR_REG)
6995 abort ();
6997 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6998 putc ('%', file);
7000 if (code == 'w' || MMX_REG_P (x))
7001 code = 2;
7002 else if (code == 'b')
7003 code = 1;
7004 else if (code == 'k')
7005 code = 4;
7006 else if (code == 'q')
7007 code = 8;
7008 else if (code == 'y')
7009 code = 3;
7010 else if (code == 'h')
7011 code = 0;
7012 else
7013 code = GET_MODE_SIZE (GET_MODE (x));
7015 /* Irritatingly, AMD extended registers use different naming convention
7016 from the normal registers. */
7017 if (REX_INT_REG_P (x))
7019 if (!TARGET_64BIT)
7020 abort ();
7021 switch (code)
7023 case 0:
7024 error ("extended registers have no high halves");
7025 break;
7026 case 1:
7027 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7028 break;
7029 case 2:
7030 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7031 break;
7032 case 4:
7033 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7034 break;
7035 case 8:
7036 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7037 break;
7038 default:
7039 error ("unsupported operand size for extended register");
7040 break;
7042 return;
7044 switch (code)
7046 case 3:
7047 if (STACK_TOP_P (x))
7049 fputs ("st(0)", file);
7050 break;
7052 /* FALLTHRU */
7053 case 8:
7054 case 4:
7055 case 12:
7056 if (! ANY_FP_REG_P (x))
7057 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7058 /* FALLTHRU */
7059 case 16:
7060 case 2:
7061 fputs (hi_reg_name[REGNO (x)], file);
7062 break;
7063 case 1:
7064 fputs (qi_reg_name[REGNO (x)], file);
7065 break;
7066 case 0:
7067 fputs (qi_high_reg_name[REGNO (x)], file);
7068 break;
7069 default:
7070 abort ();
7074 /* Locate some local-dynamic symbol still in use by this function
7075 so that we can print its name in some tls_local_dynamic_base
7076 pattern. */
7078 static const char *
7079 get_some_local_dynamic_name ()
7081 rtx insn;
7083 if (cfun->machine->some_ld_name)
7084 return cfun->machine->some_ld_name;
7086 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7087 if (INSN_P (insn)
7088 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7089 return cfun->machine->some_ld_name;
7091 abort ();
7094 static int
7095 get_some_local_dynamic_name_1 (px, data)
7096 rtx *px;
7097 void *data ATTRIBUTE_UNUSED;
7099 rtx x = *px;
7101 if (GET_CODE (x) == SYMBOL_REF
7102 && local_dynamic_symbolic_operand (x, Pmode))
7104 cfun->machine->some_ld_name = XSTR (x, 0);
7105 return 1;
7108 return 0;
7111 /* Meaning of CODE:
7112 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7113 C -- print opcode suffix for set/cmov insn.
7114 c -- like C, but print reversed condition
7115 F,f -- likewise, but for floating-point.
7116 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
7117 nothing
7118 R -- print the prefix for register names.
7119 z -- print the opcode suffix for the size of the current operand.
7120 * -- print a star (in certain assembler syntax)
7121 A -- print an absolute memory reference.
7122 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7123 s -- print a shift double count, followed by the assemblers argument
7124 delimiter.
7125 b -- print the QImode name of the register for the indicated operand.
7126 %b0 would print %al if operands[0] is reg 0.
7127 w -- likewise, print the HImode name of the register.
7128 k -- likewise, print the SImode name of the register.
7129 q -- likewise, print the DImode name of the register.
7130 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7131 y -- print "st(0)" instead of "st" as a register.
7132 D -- print condition for SSE cmp instruction.
7133 P -- if PIC, print an @PLT suffix.
7134 X -- don't print any sort of PIC '@' suffix for a symbol.
7135 & -- print some in-use local-dynamic symbol name.
7138 void
7139 print_operand (file, x, code)
7140 FILE *file;
7141 rtx x;
7142 int code;
7144 if (code)
7146 switch (code)
7148 case '*':
7149 if (ASSEMBLER_DIALECT == ASM_ATT)
7150 putc ('*', file);
7151 return;
7153 case '&':
7154 assemble_name (file, get_some_local_dynamic_name ());
7155 return;
7157 case 'A':
7158 if (ASSEMBLER_DIALECT == ASM_ATT)
7159 putc ('*', file);
7160 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7162 /* Intel syntax. For absolute addresses, registers should not
7163 be surrounded by braces. */
7164 if (GET_CODE (x) != REG)
7166 putc ('[', file);
7167 PRINT_OPERAND (file, x, 0);
7168 putc (']', file);
7169 return;
7172 else
7173 abort ();
7175 PRINT_OPERAND (file, x, 0);
7176 return;
7179 case 'L':
7180 if (ASSEMBLER_DIALECT == ASM_ATT)
7181 putc ('l', file);
7182 return;
7184 case 'W':
7185 if (ASSEMBLER_DIALECT == ASM_ATT)
7186 putc ('w', file);
7187 return;
7189 case 'B':
7190 if (ASSEMBLER_DIALECT == ASM_ATT)
7191 putc ('b', file);
7192 return;
7194 case 'Q':
7195 if (ASSEMBLER_DIALECT == ASM_ATT)
7196 putc ('l', file);
7197 return;
7199 case 'S':
7200 if (ASSEMBLER_DIALECT == ASM_ATT)
7201 putc ('s', file);
7202 return;
7204 case 'T':
7205 if (ASSEMBLER_DIALECT == ASM_ATT)
7206 putc ('t', file);
7207 return;
7209 case 'z':
7210 /* 387 opcodes don't get size suffixes if the operands are
7211 registers. */
7212 if (STACK_REG_P (x))
7213 return;
7215 /* Likewise if using Intel opcodes. */
7216 if (ASSEMBLER_DIALECT == ASM_INTEL)
7217 return;
7219 /* This is the size of op from size of operand. */
7220 switch (GET_MODE_SIZE (GET_MODE (x)))
7222 case 2:
7223 #ifdef HAVE_GAS_FILDS_FISTS
7224 putc ('s', file);
7225 #endif
7226 return;
7228 case 4:
7229 if (GET_MODE (x) == SFmode)
7231 putc ('s', file);
7232 return;
7234 else
7235 putc ('l', file);
7236 return;
7238 case 12:
7239 case 16:
7240 putc ('t', file);
7241 return;
7243 case 8:
7244 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7246 #ifdef GAS_MNEMONICS
7247 putc ('q', file);
7248 #else
7249 putc ('l', file);
7250 putc ('l', file);
7251 #endif
7253 else
7254 putc ('l', file);
7255 return;
7257 default:
7258 abort ();
7261 case 'b':
7262 case 'w':
7263 case 'k':
7264 case 'q':
7265 case 'h':
7266 case 'y':
7267 case 'X':
7268 case 'P':
7269 break;
7271 case 's':
7272 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7274 PRINT_OPERAND (file, x, 0);
7275 putc (',', file);
7277 return;
7279 case 'D':
7280 /* Little bit of braindamage here. The SSE compare instructions
7281 does use completely different names for the comparisons that the
7282 fp conditional moves. */
7283 switch (GET_CODE (x))
7285 case EQ:
7286 case UNEQ:
7287 fputs ("eq", file);
7288 break;
7289 case LT:
7290 case UNLT:
7291 fputs ("lt", file);
7292 break;
7293 case LE:
7294 case UNLE:
7295 fputs ("le", file);
7296 break;
7297 case UNORDERED:
7298 fputs ("unord", file);
7299 break;
7300 case NE:
7301 case LTGT:
7302 fputs ("neq", file);
7303 break;
7304 case UNGE:
7305 case GE:
7306 fputs ("nlt", file);
7307 break;
7308 case UNGT:
7309 case GT:
7310 fputs ("nle", file);
7311 break;
7312 case ORDERED:
7313 fputs ("ord", file);
7314 break;
7315 default:
7316 abort ();
7317 break;
7319 return;
7320 case 'O':
7321 #ifdef CMOV_SUN_AS_SYNTAX
7322 if (ASSEMBLER_DIALECT == ASM_ATT)
7324 switch (GET_MODE (x))
7326 case HImode: putc ('w', file); break;
7327 case SImode:
7328 case SFmode: putc ('l', file); break;
7329 case DImode:
7330 case DFmode: putc ('q', file); break;
7331 default: abort ();
7333 putc ('.', file);
7335 #endif
7336 return;
7337 case 'C':
7338 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7339 return;
7340 case 'F':
7341 #ifdef CMOV_SUN_AS_SYNTAX
7342 if (ASSEMBLER_DIALECT == ASM_ATT)
7343 putc ('.', file);
7344 #endif
7345 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7346 return;
7348 /* Like above, but reverse condition */
7349 case 'c':
7350 /* Check to see if argument to %c is really a constant
7351 and not a condition code which needs to be reversed. */
7352 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7354 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7355 return;
7357 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7358 return;
7359 case 'f':
7360 #ifdef CMOV_SUN_AS_SYNTAX
7361 if (ASSEMBLER_DIALECT == ASM_ATT)
7362 putc ('.', file);
7363 #endif
7364 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7365 return;
7366 case '+':
7368 rtx x;
7370 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7371 return;
7373 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7374 if (x)
7376 int pred_val = INTVAL (XEXP (x, 0));
7378 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7379 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7381 int taken = pred_val > REG_BR_PROB_BASE / 2;
7382 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7384 /* Emit hints only in the case default branch prediction
7385 heuristics would fail. */
7386 if (taken != cputaken)
7388 /* We use 3e (DS) prefix for taken branches and
7389 2e (CS) prefix for not taken branches. */
7390 if (taken)
7391 fputs ("ds ; ", file);
7392 else
7393 fputs ("cs ; ", file);
7397 return;
7399 default:
7400 output_operand_lossage ("invalid operand code `%c'", code);
7404 if (GET_CODE (x) == REG)
7406 PRINT_REG (x, code, file);
7409 else if (GET_CODE (x) == MEM)
7411 /* No `byte ptr' prefix for call instructions. */
7412 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7414 const char * size;
7415 switch (GET_MODE_SIZE (GET_MODE (x)))
7417 case 1: size = "BYTE"; break;
7418 case 2: size = "WORD"; break;
7419 case 4: size = "DWORD"; break;
7420 case 8: size = "QWORD"; break;
7421 case 12: size = "XWORD"; break;
7422 case 16: size = "XMMWORD"; break;
7423 default:
7424 abort ();
7427 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7428 if (code == 'b')
7429 size = "BYTE";
7430 else if (code == 'w')
7431 size = "WORD";
7432 else if (code == 'k')
7433 size = "DWORD";
7435 fputs (size, file);
7436 fputs (" PTR ", file);
7439 x = XEXP (x, 0);
7440 if (flag_pic && CONSTANT_ADDRESS_P (x))
7441 output_pic_addr_const (file, x, code);
7442 /* Avoid (%rip) for call operands. */
7443 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7444 && GET_CODE (x) != CONST_INT)
7445 output_addr_const (file, x);
7446 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7447 output_operand_lossage ("invalid constraints for operand");
7448 else
7449 output_address (x);
7452 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7454 REAL_VALUE_TYPE r;
7455 long l;
7457 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7458 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7460 if (ASSEMBLER_DIALECT == ASM_ATT)
7461 putc ('$', file);
7462 fprintf (file, "0x%lx", l);
7465 /* These float cases don't actually occur as immediate operands. */
7466 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7468 char dstr[30];
7470 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7471 fprintf (file, "%s", dstr);
7474 else if (GET_CODE (x) == CONST_DOUBLE
7475 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7477 char dstr[30];
7479 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7480 fprintf (file, "%s", dstr);
7483 else
7485 if (code != 'P')
7487 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7489 if (ASSEMBLER_DIALECT == ASM_ATT)
7490 putc ('$', file);
7492 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7493 || GET_CODE (x) == LABEL_REF)
7495 if (ASSEMBLER_DIALECT == ASM_ATT)
7496 putc ('$', file);
7497 else
7498 fputs ("OFFSET FLAT:", file);
7501 if (GET_CODE (x) == CONST_INT)
7502 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7503 else if (flag_pic)
7504 output_pic_addr_const (file, x, code);
7505 else
7506 output_addr_const (file, x);
7510 /* Print a memory operand whose address is ADDR. */
7512 void
7513 print_operand_address (file, addr)
7514 FILE *file;
7515 register rtx addr;
7517 struct ix86_address parts;
7518 rtx base, index, disp;
7519 int scale;
7521 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7523 if (ASSEMBLER_DIALECT == ASM_INTEL)
7524 fputs ("DWORD PTR ", file);
7525 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7526 putc ('%', file);
7527 if (TARGET_64BIT)
7528 fputs ("fs:0", file);
7529 else
7530 fputs ("gs:0", file);
7531 return;
7534 if (! ix86_decompose_address (addr, &parts))
7535 abort ();
7537 base = parts.base;
7538 index = parts.index;
7539 disp = parts.disp;
7540 scale = parts.scale;
7542 if (!base && !index)
7544 /* Displacement only requires special attention. */
7546 if (GET_CODE (disp) == CONST_INT)
7548 if (ASSEMBLER_DIALECT == ASM_INTEL)
7550 if (USER_LABEL_PREFIX[0] == 0)
7551 putc ('%', file);
7552 fputs ("ds:", file);
7554 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7556 else if (flag_pic)
7557 output_pic_addr_const (file, addr, 0);
7558 else
7559 output_addr_const (file, addr);
7561 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7562 if (TARGET_64BIT
7563 && ((GET_CODE (addr) == SYMBOL_REF
7564 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7565 || GET_CODE (addr) == LABEL_REF
7566 || (GET_CODE (addr) == CONST
7567 && GET_CODE (XEXP (addr, 0)) == PLUS
7568 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7569 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7570 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7571 fputs ("(%rip)", file);
7573 else
7575 if (ASSEMBLER_DIALECT == ASM_ATT)
7577 if (disp)
7579 if (flag_pic)
7580 output_pic_addr_const (file, disp, 0);
7581 else if (GET_CODE (disp) == LABEL_REF)
7582 output_asm_label (disp);
7583 else
7584 output_addr_const (file, disp);
7587 putc ('(', file);
7588 if (base)
7589 PRINT_REG (base, 0, file);
7590 if (index)
7592 putc (',', file);
7593 PRINT_REG (index, 0, file);
7594 if (scale != 1)
7595 fprintf (file, ",%d", scale);
7597 putc (')', file);
7599 else
7601 rtx offset = NULL_RTX;
7603 if (disp)
7605 /* Pull out the offset of a symbol; print any symbol itself. */
7606 if (GET_CODE (disp) == CONST
7607 && GET_CODE (XEXP (disp, 0)) == PLUS
7608 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7610 offset = XEXP (XEXP (disp, 0), 1);
7611 disp = gen_rtx_CONST (VOIDmode,
7612 XEXP (XEXP (disp, 0), 0));
7615 if (flag_pic)
7616 output_pic_addr_const (file, disp, 0);
7617 else if (GET_CODE (disp) == LABEL_REF)
7618 output_asm_label (disp);
7619 else if (GET_CODE (disp) == CONST_INT)
7620 offset = disp;
7621 else
7622 output_addr_const (file, disp);
7625 putc ('[', file);
7626 if (base)
7628 PRINT_REG (base, 0, file);
7629 if (offset)
7631 if (INTVAL (offset) >= 0)
7632 putc ('+', file);
7633 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7636 else if (offset)
7637 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7638 else
7639 putc ('0', file);
7641 if (index)
7643 putc ('+', file);
7644 PRINT_REG (index, 0, file);
7645 if (scale != 1)
7646 fprintf (file, "*%d", scale);
7648 putc (']', file);
7653 bool
7654 output_addr_const_extra (file, x)
7655 FILE *file;
7656 rtx x;
7658 rtx op;
7660 if (GET_CODE (x) != UNSPEC)
7661 return false;
7663 op = XVECEXP (x, 0, 0);
7664 switch (XINT (x, 1))
7666 case UNSPEC_GOTTPOFF:
7667 output_addr_const (file, op);
7668 /* FIXME: This might be @TPOFF in Sun ld. */
7669 fputs ("@GOTTPOFF", file);
7670 break;
7671 case UNSPEC_TPOFF:
7672 output_addr_const (file, op);
7673 fputs ("@TPOFF", file);
7674 break;
7675 case UNSPEC_NTPOFF:
7676 output_addr_const (file, op);
7677 if (TARGET_64BIT)
7678 fputs ("@TPOFF", file);
7679 else
7680 fputs ("@NTPOFF", file);
7681 break;
7682 case UNSPEC_DTPOFF:
7683 output_addr_const (file, op);
7684 fputs ("@DTPOFF", file);
7685 break;
7686 case UNSPEC_GOTNTPOFF:
7687 output_addr_const (file, op);
7688 if (TARGET_64BIT)
7689 fputs ("@GOTTPOFF(%rip)", file);
7690 else
7691 fputs ("@GOTNTPOFF", file);
7692 break;
7693 case UNSPEC_INDNTPOFF:
7694 output_addr_const (file, op);
7695 fputs ("@INDNTPOFF", file);
7696 break;
7698 default:
7699 return false;
7702 return true;
7705 /* Split one or more DImode RTL references into pairs of SImode
7706 references. The RTL can be REG, offsettable MEM, integer constant, or
7707 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7708 split and "num" is its length. lo_half and hi_half are output arrays
7709 that parallel "operands". */
7711 void
7712 split_di (operands, num, lo_half, hi_half)
7713 rtx operands[];
7714 int num;
7715 rtx lo_half[], hi_half[];
7717 while (num--)
7719 rtx op = operands[num];
7721 /* simplify_subreg refuse to split volatile memory addresses,
7722 but we still have to handle it. */
7723 if (GET_CODE (op) == MEM)
7725 lo_half[num] = adjust_address (op, SImode, 0);
7726 hi_half[num] = adjust_address (op, SImode, 4);
7728 else
7730 lo_half[num] = simplify_gen_subreg (SImode, op,
7731 GET_MODE (op) == VOIDmode
7732 ? DImode : GET_MODE (op), 0);
7733 hi_half[num] = simplify_gen_subreg (SImode, op,
7734 GET_MODE (op) == VOIDmode
7735 ? DImode : GET_MODE (op), 4);
7739 /* Split one or more TImode RTL references into pairs of SImode
7740 references. The RTL can be REG, offsettable MEM, integer constant, or
7741 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7742 split and "num" is its length. lo_half and hi_half are output arrays
7743 that parallel "operands". */
7745 void
7746 split_ti (operands, num, lo_half, hi_half)
7747 rtx operands[];
7748 int num;
7749 rtx lo_half[], hi_half[];
7751 while (num--)
7753 rtx op = operands[num];
7755 /* simplify_subreg refuse to split volatile memory addresses, but we
7756 still have to handle it. */
7757 if (GET_CODE (op) == MEM)
7759 lo_half[num] = adjust_address (op, DImode, 0);
7760 hi_half[num] = adjust_address (op, DImode, 8);
7762 else
7764 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7765 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7770 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7771 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7772 is the expression of the binary operation. The output may either be
7773 emitted here, or returned to the caller, like all output_* functions.
7775 There is no guarantee that the operands are the same mode, as they
7776 might be within FLOAT or FLOAT_EXTEND expressions. */
7778 #ifndef SYSV386_COMPAT
7779 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7780 wants to fix the assemblers because that causes incompatibility
7781 with gcc. No-one wants to fix gcc because that causes
7782 incompatibility with assemblers... You can use the option of
7783 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7784 #define SYSV386_COMPAT 1
7785 #endif
7787 const char *
7788 output_387_binary_op (insn, operands)
7789 rtx insn;
7790 rtx *operands;
7792 static char buf[30];
7793 const char *p;
7794 const char *ssep;
7795 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7797 #ifdef ENABLE_CHECKING
7798 /* Even if we do not want to check the inputs, this documents input
7799 constraints. Which helps in understanding the following code. */
7800 if (STACK_REG_P (operands[0])
7801 && ((REG_P (operands[1])
7802 && REGNO (operands[0]) == REGNO (operands[1])
7803 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7804 || (REG_P (operands[2])
7805 && REGNO (operands[0]) == REGNO (operands[2])
7806 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7807 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7808 ; /* ok */
7809 else if (!is_sse)
7810 abort ();
7811 #endif
7813 switch (GET_CODE (operands[3]))
7815 case PLUS:
7816 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7817 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7818 p = "fiadd";
7819 else
7820 p = "fadd";
7821 ssep = "add";
7822 break;
7824 case MINUS:
7825 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7826 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7827 p = "fisub";
7828 else
7829 p = "fsub";
7830 ssep = "sub";
7831 break;
7833 case MULT:
7834 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7835 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7836 p = "fimul";
7837 else
7838 p = "fmul";
7839 ssep = "mul";
7840 break;
7842 case DIV:
7843 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7844 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7845 p = "fidiv";
7846 else
7847 p = "fdiv";
7848 ssep = "div";
7849 break;
7851 default:
7852 abort ();
7855 if (is_sse)
7857 strcpy (buf, ssep);
7858 if (GET_MODE (operands[0]) == SFmode)
7859 strcat (buf, "ss\t{%2, %0|%0, %2}");
7860 else
7861 strcat (buf, "sd\t{%2, %0|%0, %2}");
7862 return buf;
7864 strcpy (buf, p);
7866 switch (GET_CODE (operands[3]))
7868 case MULT:
7869 case PLUS:
7870 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7872 rtx temp = operands[2];
7873 operands[2] = operands[1];
7874 operands[1] = temp;
7877 /* know operands[0] == operands[1]. */
7879 if (GET_CODE (operands[2]) == MEM)
7881 p = "%z2\t%2";
7882 break;
7885 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7887 if (STACK_TOP_P (operands[0]))
7888 /* How is it that we are storing to a dead operand[2]?
7889 Well, presumably operands[1] is dead too. We can't
7890 store the result to st(0) as st(0) gets popped on this
7891 instruction. Instead store to operands[2] (which I
7892 think has to be st(1)). st(1) will be popped later.
7893 gcc <= 2.8.1 didn't have this check and generated
7894 assembly code that the Unixware assembler rejected. */
7895 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7896 else
7897 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7898 break;
7901 if (STACK_TOP_P (operands[0]))
7902 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7903 else
7904 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7905 break;
7907 case MINUS:
7908 case DIV:
7909 if (GET_CODE (operands[1]) == MEM)
7911 p = "r%z1\t%1";
7912 break;
7915 if (GET_CODE (operands[2]) == MEM)
7917 p = "%z2\t%2";
7918 break;
7921 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7923 #if SYSV386_COMPAT
7924 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7925 derived assemblers, confusingly reverse the direction of
7926 the operation for fsub{r} and fdiv{r} when the
7927 destination register is not st(0). The Intel assembler
7928 doesn't have this brain damage. Read !SYSV386_COMPAT to
7929 figure out what the hardware really does. */
7930 if (STACK_TOP_P (operands[0]))
7931 p = "{p\t%0, %2|rp\t%2, %0}";
7932 else
7933 p = "{rp\t%2, %0|p\t%0, %2}";
7934 #else
7935 if (STACK_TOP_P (operands[0]))
7936 /* As above for fmul/fadd, we can't store to st(0). */
7937 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7938 else
7939 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7940 #endif
7941 break;
7944 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7946 #if SYSV386_COMPAT
7947 if (STACK_TOP_P (operands[0]))
7948 p = "{rp\t%0, %1|p\t%1, %0}";
7949 else
7950 p = "{p\t%1, %0|rp\t%0, %1}";
7951 #else
7952 if (STACK_TOP_P (operands[0]))
7953 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7954 else
7955 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7956 #endif
7957 break;
7960 if (STACK_TOP_P (operands[0]))
7962 if (STACK_TOP_P (operands[1]))
7963 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7964 else
7965 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7966 break;
7968 else if (STACK_TOP_P (operands[1]))
7970 #if SYSV386_COMPAT
7971 p = "{\t%1, %0|r\t%0, %1}";
7972 #else
7973 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7974 #endif
7976 else
7978 #if SYSV386_COMPAT
7979 p = "{r\t%2, %0|\t%0, %2}";
7980 #else
7981 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7982 #endif
7984 break;
7986 default:
7987 abort ();
7990 strcat (buf, p);
7991 return buf;
7994 /* Output code to initialize control word copies used by
7995 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7996 is set to control word rounding downwards. */
7997 void
7998 emit_i387_cw_initialization (normal, round_down)
7999 rtx normal, round_down;
8001 rtx reg = gen_reg_rtx (HImode);
8003 emit_insn (gen_x86_fnstcw_1 (normal));
8004 emit_move_insn (reg, normal);
8005 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8006 && !TARGET_64BIT)
8007 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8008 else
8009 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8010 emit_move_insn (round_down, reg);
8013 /* Output code for INSN to convert a float to a signed int. OPERANDS
8014 are the insn operands. The output may be [HSD]Imode and the input
8015 operand may be [SDX]Fmode. */
8017 const char *
8018 output_fix_trunc (insn, operands)
8019 rtx insn;
8020 rtx *operands;
8022 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8023 int dimode_p = GET_MODE (operands[0]) == DImode;
8025 /* Jump through a hoop or two for DImode, since the hardware has no
8026 non-popping instruction. We used to do this a different way, but
8027 that was somewhat fragile and broke with post-reload splitters. */
8028 if (dimode_p && !stack_top_dies)
8029 output_asm_insn ("fld\t%y1", operands);
8031 if (!STACK_TOP_P (operands[1]))
8032 abort ();
8034 if (GET_CODE (operands[0]) != MEM)
8035 abort ();
8037 output_asm_insn ("fldcw\t%3", operands);
8038 if (stack_top_dies || dimode_p)
8039 output_asm_insn ("fistp%z0\t%0", operands);
8040 else
8041 output_asm_insn ("fist%z0\t%0", operands);
8042 output_asm_insn ("fldcw\t%2", operands);
8044 return "";
8047 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8048 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8049 when fucom should be used. */
8051 const char *
8052 output_fp_compare (insn, operands, eflags_p, unordered_p)
8053 rtx insn;
8054 rtx *operands;
8055 int eflags_p, unordered_p;
8057 int stack_top_dies;
8058 rtx cmp_op0 = operands[0];
8059 rtx cmp_op1 = operands[1];
8060 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8062 if (eflags_p == 2)
8064 cmp_op0 = cmp_op1;
8065 cmp_op1 = operands[2];
8067 if (is_sse)
8069 if (GET_MODE (operands[0]) == SFmode)
8070 if (unordered_p)
8071 return "ucomiss\t{%1, %0|%0, %1}";
8072 else
8073 return "comiss\t{%1, %0|%0, %1}";
8074 else
8075 if (unordered_p)
8076 return "ucomisd\t{%1, %0|%0, %1}";
8077 else
8078 return "comisd\t{%1, %0|%0, %1}";
8081 if (! STACK_TOP_P (cmp_op0))
8082 abort ();
8084 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8086 if (STACK_REG_P (cmp_op1)
8087 && stack_top_dies
8088 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8089 && REGNO (cmp_op1) != FIRST_STACK_REG)
8091 /* If both the top of the 387 stack dies, and the other operand
8092 is also a stack register that dies, then this must be a
8093 `fcompp' float compare */
8095 if (eflags_p == 1)
8097 /* There is no double popping fcomi variant. Fortunately,
8098 eflags is immune from the fstp's cc clobbering. */
8099 if (unordered_p)
8100 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8101 else
8102 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8103 return "fstp\t%y0";
8105 else
8107 if (eflags_p == 2)
8109 if (unordered_p)
8110 return "fucompp\n\tfnstsw\t%0";
8111 else
8112 return "fcompp\n\tfnstsw\t%0";
8114 else
8116 if (unordered_p)
8117 return "fucompp";
8118 else
8119 return "fcompp";
8123 else
8125 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8127 static const char * const alt[24] =
8129 "fcom%z1\t%y1",
8130 "fcomp%z1\t%y1",
8131 "fucom%z1\t%y1",
8132 "fucomp%z1\t%y1",
8134 "ficom%z1\t%y1",
8135 "ficomp%z1\t%y1",
8136 NULL,
8137 NULL,
8139 "fcomi\t{%y1, %0|%0, %y1}",
8140 "fcomip\t{%y1, %0|%0, %y1}",
8141 "fucomi\t{%y1, %0|%0, %y1}",
8142 "fucomip\t{%y1, %0|%0, %y1}",
8144 NULL,
8145 NULL,
8146 NULL,
8147 NULL,
8149 "fcom%z2\t%y2\n\tfnstsw\t%0",
8150 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8151 "fucom%z2\t%y2\n\tfnstsw\t%0",
8152 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8154 "ficom%z2\t%y2\n\tfnstsw\t%0",
8155 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8156 NULL,
8157 NULL
8160 int mask;
8161 const char *ret;
8163 mask = eflags_p << 3;
8164 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8165 mask |= unordered_p << 1;
8166 mask |= stack_top_dies;
8168 if (mask >= 24)
8169 abort ();
8170 ret = alt[mask];
8171 if (ret == NULL)
8172 abort ();
8174 return ret;
8178 void
8179 ix86_output_addr_vec_elt (file, value)
8180 FILE *file;
8181 int value;
8183 const char *directive = ASM_LONG;
8185 if (TARGET_64BIT)
8187 #ifdef ASM_QUAD
8188 directive = ASM_QUAD;
8189 #else
8190 abort ();
8191 #endif
8194 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8197 void
8198 ix86_output_addr_diff_elt (file, value, rel)
8199 FILE *file;
8200 int value, rel;
8202 if (TARGET_64BIT)
8203 fprintf (file, "%s%s%d-%s%d\n",
8204 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8205 else if (HAVE_AS_GOTOFF_IN_DATA)
8206 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8207 #if TARGET_MACHO
8208 else if (TARGET_MACHO)
8209 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8210 machopic_function_base_name () + 1);
8211 #endif
8212 else
8213 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8214 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8217 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8218 for the target. */
8220 void
8221 ix86_expand_clear (dest)
8222 rtx dest;
8224 rtx tmp;
8226 /* We play register width games, which are only valid after reload. */
8227 if (!reload_completed)
8228 abort ();
8230 /* Avoid HImode and its attendant prefix byte. */
8231 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8232 dest = gen_rtx_REG (SImode, REGNO (dest));
8234 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8236 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8237 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8239 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8240 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8243 emit_insn (tmp);
8246 /* X is an unchanging MEM. If it is a constant pool reference, return
8247 the constant pool rtx, else NULL. */
8249 static rtx
8250 maybe_get_pool_constant (x)
8251 rtx x;
8253 x = ix86_delegitimize_address (XEXP (x, 0));
8255 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8256 return get_pool_constant (x);
8258 return NULL_RTX;
8261 void
8262 ix86_expand_move (mode, operands)
8263 enum machine_mode mode;
8264 rtx operands[];
8266 int strict = (reload_in_progress || reload_completed);
8267 rtx insn, op0, op1, tmp;
8269 op0 = operands[0];
8270 op1 = operands[1];
8272 if (tls_symbolic_operand (op1, Pmode))
8274 op1 = legitimize_address (op1, op1, VOIDmode);
8275 if (GET_CODE (op0) == MEM)
8277 tmp = gen_reg_rtx (mode);
8278 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8279 op1 = tmp;
8282 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8284 #if TARGET_MACHO
8285 if (MACHOPIC_PURE)
8287 rtx temp = ((reload_in_progress
8288 || ((op0 && GET_CODE (op0) == REG)
8289 && mode == Pmode))
8290 ? op0 : gen_reg_rtx (Pmode));
8291 op1 = machopic_indirect_data_reference (op1, temp);
8292 op1 = machopic_legitimize_pic_address (op1, mode,
8293 temp == op1 ? 0 : temp);
8295 else
8297 if (MACHOPIC_INDIRECT)
8298 op1 = machopic_indirect_data_reference (op1, 0);
8300 if (op0 != op1)
8302 insn = gen_rtx_SET (VOIDmode, op0, op1);
8303 emit_insn (insn);
8305 return;
8306 #endif /* TARGET_MACHO */
8307 if (GET_CODE (op0) == MEM)
8308 op1 = force_reg (Pmode, op1);
8309 else
8311 rtx temp = op0;
8312 if (GET_CODE (temp) != REG)
8313 temp = gen_reg_rtx (Pmode);
8314 temp = legitimize_pic_address (op1, temp);
8315 if (temp == op0)
8316 return;
8317 op1 = temp;
8320 else
8322 if (GET_CODE (op0) == MEM
8323 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8324 || !push_operand (op0, mode))
8325 && GET_CODE (op1) == MEM)
8326 op1 = force_reg (mode, op1);
8328 if (push_operand (op0, mode)
8329 && ! general_no_elim_operand (op1, mode))
8330 op1 = copy_to_mode_reg (mode, op1);
8332 /* Force large constants in 64bit compilation into register
8333 to get them CSEed. */
8334 if (TARGET_64BIT && mode == DImode
8335 && immediate_operand (op1, mode)
8336 && !x86_64_zero_extended_value (op1)
8337 && !register_operand (op0, mode)
8338 && optimize && !reload_completed && !reload_in_progress)
8339 op1 = copy_to_mode_reg (mode, op1);
8341 if (FLOAT_MODE_P (mode))
8343 /* If we are loading a floating point constant to a register,
8344 force the value to memory now, since we'll get better code
8345 out the back end. */
8347 if (strict)
8349 else if (GET_CODE (op1) == CONST_DOUBLE
8350 && register_operand (op0, mode))
8351 op1 = validize_mem (force_const_mem (mode, op1));
8355 insn = gen_rtx_SET (VOIDmode, op0, op1);
8357 emit_insn (insn);
8360 void
8361 ix86_expand_vector_move (mode, operands)
8362 enum machine_mode mode;
8363 rtx operands[];
8365 /* Force constants other than zero into memory. We do not know how
8366 the instructions used to build constants modify the upper 64 bits
8367 of the register, once we have that information we may be able
8368 to handle some of them more efficiently. */
8369 if ((reload_in_progress | reload_completed) == 0
8370 && register_operand (operands[0], mode)
8371 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8372 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8374 /* Make operand1 a register if it isn't already. */
8375 if (!no_new_pseudos
8376 && !register_operand (operands[0], mode)
8377 && !register_operand (operands[1], mode))
8379 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8380 emit_move_insn (operands[0], temp);
8381 return;
8384 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8387 /* Attempt to expand a binary operator. Make the expansion closer to the
8388 actual machine, then just general_operand, which will allow 3 separate
8389 memory references (one output, two input) in a single insn. */
8391 void
8392 ix86_expand_binary_operator (code, mode, operands)
8393 enum rtx_code code;
8394 enum machine_mode mode;
8395 rtx operands[];
8397 int matching_memory;
8398 rtx src1, src2, dst, op, clob;
8400 dst = operands[0];
8401 src1 = operands[1];
8402 src2 = operands[2];
8404 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8405 if (GET_RTX_CLASS (code) == 'c'
8406 && (rtx_equal_p (dst, src2)
8407 || immediate_operand (src1, mode)))
8409 rtx temp = src1;
8410 src1 = src2;
8411 src2 = temp;
8414 /* If the destination is memory, and we do not have matching source
8415 operands, do things in registers. */
8416 matching_memory = 0;
8417 if (GET_CODE (dst) == MEM)
8419 if (rtx_equal_p (dst, src1))
8420 matching_memory = 1;
8421 else if (GET_RTX_CLASS (code) == 'c'
8422 && rtx_equal_p (dst, src2))
8423 matching_memory = 2;
8424 else
8425 dst = gen_reg_rtx (mode);
8428 /* Both source operands cannot be in memory. */
8429 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8431 if (matching_memory != 2)
8432 src2 = force_reg (mode, src2);
8433 else
8434 src1 = force_reg (mode, src1);
8437 /* If the operation is not commutable, source 1 cannot be a constant
8438 or non-matching memory. */
8439 if ((CONSTANT_P (src1)
8440 || (!matching_memory && GET_CODE (src1) == MEM))
8441 && GET_RTX_CLASS (code) != 'c')
8442 src1 = force_reg (mode, src1);
8444 /* If optimizing, copy to regs to improve CSE */
8445 if (optimize && ! no_new_pseudos)
8447 if (GET_CODE (dst) == MEM)
8448 dst = gen_reg_rtx (mode);
8449 if (GET_CODE (src1) == MEM)
8450 src1 = force_reg (mode, src1);
8451 if (GET_CODE (src2) == MEM)
8452 src2 = force_reg (mode, src2);
8455 /* Emit the instruction. */
8457 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8458 if (reload_in_progress)
8460 /* Reload doesn't know about the flags register, and doesn't know that
8461 it doesn't want to clobber it. We can only do this with PLUS. */
8462 if (code != PLUS)
8463 abort ();
8464 emit_insn (op);
8466 else
8468 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8469 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8472 /* Fix up the destination if needed. */
8473 if (dst != operands[0])
8474 emit_move_insn (operands[0], dst);
8477 /* Return TRUE or FALSE depending on whether the binary operator meets the
8478 appropriate constraints. */
8481 ix86_binary_operator_ok (code, mode, operands)
8482 enum rtx_code code;
8483 enum machine_mode mode ATTRIBUTE_UNUSED;
8484 rtx operands[3];
8486 /* Both source operands cannot be in memory. */
8487 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8488 return 0;
8489 /* If the operation is not commutable, source 1 cannot be a constant. */
8490 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8491 return 0;
8492 /* If the destination is memory, we must have a matching source operand. */
8493 if (GET_CODE (operands[0]) == MEM
8494 && ! (rtx_equal_p (operands[0], operands[1])
8495 || (GET_RTX_CLASS (code) == 'c'
8496 && rtx_equal_p (operands[0], operands[2]))))
8497 return 0;
8498 /* If the operation is not commutable and the source 1 is memory, we must
8499 have a matching destination. */
8500 if (GET_CODE (operands[1]) == MEM
8501 && GET_RTX_CLASS (code) != 'c'
8502 && ! rtx_equal_p (operands[0], operands[1]))
8503 return 0;
8504 return 1;
8507 /* Attempt to expand a unary operator. Make the expansion closer to the
8508 actual machine, then just general_operand, which will allow 2 separate
8509 memory references (one output, one input) in a single insn. */
8511 void
8512 ix86_expand_unary_operator (code, mode, operands)
8513 enum rtx_code code;
8514 enum machine_mode mode;
8515 rtx operands[];
8517 int matching_memory;
8518 rtx src, dst, op, clob;
8520 dst = operands[0];
8521 src = operands[1];
8523 /* If the destination is memory, and we do not have matching source
8524 operands, do things in registers. */
8525 matching_memory = 0;
8526 if (GET_CODE (dst) == MEM)
8528 if (rtx_equal_p (dst, src))
8529 matching_memory = 1;
8530 else
8531 dst = gen_reg_rtx (mode);
8534 /* When source operand is memory, destination must match. */
8535 if (!matching_memory && GET_CODE (src) == MEM)
8536 src = force_reg (mode, src);
8538 /* If optimizing, copy to regs to improve CSE */
8539 if (optimize && ! no_new_pseudos)
8541 if (GET_CODE (dst) == MEM)
8542 dst = gen_reg_rtx (mode);
8543 if (GET_CODE (src) == MEM)
8544 src = force_reg (mode, src);
8547 /* Emit the instruction. */
8549 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8550 if (reload_in_progress || code == NOT)
8552 /* Reload doesn't know about the flags register, and doesn't know that
8553 it doesn't want to clobber it. */
8554 if (code != NOT)
8555 abort ();
8556 emit_insn (op);
8558 else
8560 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8561 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8564 /* Fix up the destination if needed. */
8565 if (dst != operands[0])
8566 emit_move_insn (operands[0], dst);
8569 /* Return TRUE or FALSE depending on whether the unary operator meets the
8570 appropriate constraints. */
8573 ix86_unary_operator_ok (code, mode, operands)
8574 enum rtx_code code ATTRIBUTE_UNUSED;
8575 enum machine_mode mode ATTRIBUTE_UNUSED;
8576 rtx operands[2] ATTRIBUTE_UNUSED;
8578 /* If one of operands is memory, source and destination must match. */
8579 if ((GET_CODE (operands[0]) == MEM
8580 || GET_CODE (operands[1]) == MEM)
8581 && ! rtx_equal_p (operands[0], operands[1]))
8582 return FALSE;
8583 return TRUE;
8586 /* Return TRUE or FALSE depending on whether the first SET in INSN
8587 has source and destination with matching CC modes, and that the
8588 CC mode is at least as constrained as REQ_MODE. */
8591 ix86_match_ccmode (insn, req_mode)
8592 rtx insn;
8593 enum machine_mode req_mode;
8595 rtx set;
8596 enum machine_mode set_mode;
8598 set = PATTERN (insn);
8599 if (GET_CODE (set) == PARALLEL)
8600 set = XVECEXP (set, 0, 0);
8601 if (GET_CODE (set) != SET)
8602 abort ();
8603 if (GET_CODE (SET_SRC (set)) != COMPARE)
8604 abort ();
8606 set_mode = GET_MODE (SET_DEST (set));
8607 switch (set_mode)
8609 case CCNOmode:
8610 if (req_mode != CCNOmode
8611 && (req_mode != CCmode
8612 || XEXP (SET_SRC (set), 1) != const0_rtx))
8613 return 0;
8614 break;
8615 case CCmode:
8616 if (req_mode == CCGCmode)
8617 return 0;
8618 /* FALLTHRU */
8619 case CCGCmode:
8620 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8621 return 0;
8622 /* FALLTHRU */
8623 case CCGOCmode:
8624 if (req_mode == CCZmode)
8625 return 0;
8626 /* FALLTHRU */
8627 case CCZmode:
8628 break;
8630 default:
8631 abort ();
8634 return (GET_MODE (SET_SRC (set)) == set_mode);
8637 /* Generate insn patterns to do an integer compare of OPERANDS. */
8639 static rtx
8640 ix86_expand_int_compare (code, op0, op1)
8641 enum rtx_code code;
8642 rtx op0, op1;
8644 enum machine_mode cmpmode;
8645 rtx tmp, flags;
8647 cmpmode = SELECT_CC_MODE (code, op0, op1);
8648 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8650 /* This is very simple, but making the interface the same as in the
8651 FP case makes the rest of the code easier. */
8652 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8653 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8655 /* Return the test that should be put into the flags user, i.e.
8656 the bcc, scc, or cmov instruction. */
8657 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8660 /* Figure out whether to use ordered or unordered fp comparisons.
8661 Return the appropriate mode to use. */
8663 enum machine_mode
8664 ix86_fp_compare_mode (code)
8665 enum rtx_code code ATTRIBUTE_UNUSED;
8667 /* ??? In order to make all comparisons reversible, we do all comparisons
8668 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8669 all forms trapping and nontrapping comparisons, we can make inequality
8670 comparisons trapping again, since it results in better code when using
8671 FCOM based compares. */
8672 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8675 enum machine_mode
8676 ix86_cc_mode (code, op0, op1)
8677 enum rtx_code code;
8678 rtx op0, op1;
8680 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8681 return ix86_fp_compare_mode (code);
8682 switch (code)
8684 /* Only zero flag is needed. */
8685 case EQ: /* ZF=0 */
8686 case NE: /* ZF!=0 */
8687 return CCZmode;
8688 /* Codes needing carry flag. */
8689 case GEU: /* CF=0 */
8690 case GTU: /* CF=0 & ZF=0 */
8691 case LTU: /* CF=1 */
8692 case LEU: /* CF=1 | ZF=1 */
8693 return CCmode;
8694 /* Codes possibly doable only with sign flag when
8695 comparing against zero. */
8696 case GE: /* SF=OF or SF=0 */
8697 case LT: /* SF<>OF or SF=1 */
8698 if (op1 == const0_rtx)
8699 return CCGOCmode;
8700 else
8701 /* For other cases Carry flag is not required. */
8702 return CCGCmode;
8703 /* Codes doable only with sign flag when comparing
8704 against zero, but we miss jump instruction for it
8705 so we need to use relational tests against overflow
8706 that thus needs to be zero. */
8707 case GT: /* ZF=0 & SF=OF */
8708 case LE: /* ZF=1 | SF<>OF */
8709 if (op1 == const0_rtx)
8710 return CCNOmode;
8711 else
8712 return CCGCmode;
8713 /* strcmp pattern do (use flags) and combine may ask us for proper
8714 mode. */
8715 case USE:
8716 return CCmode;
8717 default:
8718 abort ();
8722 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8725 ix86_use_fcomi_compare (code)
8726 enum rtx_code code ATTRIBUTE_UNUSED;
8728 enum rtx_code swapped_code = swap_condition (code);
8729 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8730 || (ix86_fp_comparison_cost (swapped_code)
8731 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8734 /* Swap, force into registers, or otherwise massage the two operands
8735 to a fp comparison. The operands are updated in place; the new
8736 comparison code is returned. */
8738 static enum rtx_code
8739 ix86_prepare_fp_compare_args (code, pop0, pop1)
8740 enum rtx_code code;
8741 rtx *pop0, *pop1;
8743 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8744 rtx op0 = *pop0, op1 = *pop1;
8745 enum machine_mode op_mode = GET_MODE (op0);
8746 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8748 /* All of the unordered compare instructions only work on registers.
8749 The same is true of the XFmode compare instructions. The same is
8750 true of the fcomi compare instructions. */
8752 if (!is_sse
8753 && (fpcmp_mode == CCFPUmode
8754 || op_mode == XFmode
8755 || op_mode == TFmode
8756 || ix86_use_fcomi_compare (code)))
8758 op0 = force_reg (op_mode, op0);
8759 op1 = force_reg (op_mode, op1);
8761 else
8763 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8764 things around if they appear profitable, otherwise force op0
8765 into a register. */
8767 if (standard_80387_constant_p (op0) == 0
8768 || (GET_CODE (op0) == MEM
8769 && ! (standard_80387_constant_p (op1) == 0
8770 || GET_CODE (op1) == MEM)))
8772 rtx tmp;
8773 tmp = op0, op0 = op1, op1 = tmp;
8774 code = swap_condition (code);
8777 if (GET_CODE (op0) != REG)
8778 op0 = force_reg (op_mode, op0);
8780 if (CONSTANT_P (op1))
8782 if (standard_80387_constant_p (op1))
8783 op1 = force_reg (op_mode, op1);
8784 else
8785 op1 = validize_mem (force_const_mem (op_mode, op1));
8789 /* Try to rearrange the comparison to make it cheaper. */
8790 if (ix86_fp_comparison_cost (code)
8791 > ix86_fp_comparison_cost (swap_condition (code))
8792 && (GET_CODE (op1) == REG || !no_new_pseudos))
8794 rtx tmp;
8795 tmp = op0, op0 = op1, op1 = tmp;
8796 code = swap_condition (code);
8797 if (GET_CODE (op0) != REG)
8798 op0 = force_reg (op_mode, op0);
8801 *pop0 = op0;
8802 *pop1 = op1;
8803 return code;
8806 /* Convert comparison codes we use to represent FP comparison to integer
8807 code that will result in proper branch. Return UNKNOWN if no such code
8808 is available. */
8809 static enum rtx_code
8810 ix86_fp_compare_code_to_integer (code)
8811 enum rtx_code code;
8813 switch (code)
8815 case GT:
8816 return GTU;
8817 case GE:
8818 return GEU;
8819 case ORDERED:
8820 case UNORDERED:
8821 return code;
8822 break;
8823 case UNEQ:
8824 return EQ;
8825 break;
8826 case UNLT:
8827 return LTU;
8828 break;
8829 case UNLE:
8830 return LEU;
8831 break;
8832 case LTGT:
8833 return NE;
8834 break;
8835 default:
8836 return UNKNOWN;
8840 /* Split comparison code CODE into comparisons we can do using branch
8841 instructions. BYPASS_CODE is comparison code for branch that will
8842 branch around FIRST_CODE and SECOND_CODE. If some of branches
8843 is not required, set value to NIL.
8844 We never require more than two branches. */
8845 static void
8846 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8847 enum rtx_code code, *bypass_code, *first_code, *second_code;
8849 *first_code = code;
8850 *bypass_code = NIL;
8851 *second_code = NIL;
8853 /* The fcomi comparison sets flags as follows:
8855 cmp ZF PF CF
8856 > 0 0 0
8857 < 0 0 1
8858 = 1 0 0
8859 un 1 1 1 */
8861 switch (code)
8863 case GT: /* GTU - CF=0 & ZF=0 */
8864 case GE: /* GEU - CF=0 */
8865 case ORDERED: /* PF=0 */
8866 case UNORDERED: /* PF=1 */
8867 case UNEQ: /* EQ - ZF=1 */
8868 case UNLT: /* LTU - CF=1 */
8869 case UNLE: /* LEU - CF=1 | ZF=1 */
8870 case LTGT: /* EQ - ZF=0 */
8871 break;
8872 case LT: /* LTU - CF=1 - fails on unordered */
8873 *first_code = UNLT;
8874 *bypass_code = UNORDERED;
8875 break;
8876 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8877 *first_code = UNLE;
8878 *bypass_code = UNORDERED;
8879 break;
8880 case EQ: /* EQ - ZF=1 - fails on unordered */
8881 *first_code = UNEQ;
8882 *bypass_code = UNORDERED;
8883 break;
8884 case NE: /* NE - ZF=0 - fails on unordered */
8885 *first_code = LTGT;
8886 *second_code = UNORDERED;
8887 break;
8888 case UNGE: /* GEU - CF=0 - fails on unordered */
8889 *first_code = GE;
8890 *second_code = UNORDERED;
8891 break;
8892 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8893 *first_code = GT;
8894 *second_code = UNORDERED;
8895 break;
8896 default:
8897 abort ();
8899 if (!TARGET_IEEE_FP)
8901 *second_code = NIL;
8902 *bypass_code = NIL;
8906 /* Return cost of comparison done fcom + arithmetics operations on AX.
8907 All following functions do use number of instructions as a cost metrics.
8908 In future this should be tweaked to compute bytes for optimize_size and
8909 take into account performance of various instructions on various CPUs. */
8910 static int
8911 ix86_fp_comparison_arithmetics_cost (code)
8912 enum rtx_code code;
8914 if (!TARGET_IEEE_FP)
8915 return 4;
8916 /* The cost of code output by ix86_expand_fp_compare. */
8917 switch (code)
8919 case UNLE:
8920 case UNLT:
8921 case LTGT:
8922 case GT:
8923 case GE:
8924 case UNORDERED:
8925 case ORDERED:
8926 case UNEQ:
8927 return 4;
8928 break;
8929 case LT:
8930 case NE:
8931 case EQ:
8932 case UNGE:
8933 return 5;
8934 break;
8935 case LE:
8936 case UNGT:
8937 return 6;
8938 break;
8939 default:
8940 abort ();
8944 /* Return cost of comparison done using fcomi operation.
8945 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8946 static int
8947 ix86_fp_comparison_fcomi_cost (code)
8948 enum rtx_code code;
8950 enum rtx_code bypass_code, first_code, second_code;
8951 /* Return arbitrarily high cost when instruction is not supported - this
8952 prevents gcc from using it. */
8953 if (!TARGET_CMOVE)
8954 return 1024;
8955 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8956 return (bypass_code != NIL || second_code != NIL) + 2;
8959 /* Return cost of comparison done using sahf operation.
8960 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8961 static int
8962 ix86_fp_comparison_sahf_cost (code)
8963 enum rtx_code code;
8965 enum rtx_code bypass_code, first_code, second_code;
8966 /* Return arbitrarily high cost when instruction is not preferred - this
8967 avoids gcc from using it. */
8968 if (!TARGET_USE_SAHF && !optimize_size)
8969 return 1024;
8970 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8971 return (bypass_code != NIL || second_code != NIL) + 3;
8974 /* Compute cost of the comparison done using any method.
8975 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8976 static int
8977 ix86_fp_comparison_cost (code)
8978 enum rtx_code code;
8980 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8981 int min;
8983 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8984 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8986 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8987 if (min > sahf_cost)
8988 min = sahf_cost;
8989 if (min > fcomi_cost)
8990 min = fcomi_cost;
8991 return min;
8994 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8996 static rtx
8997 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8998 enum rtx_code code;
8999 rtx op0, op1, scratch;
9000 rtx *second_test;
9001 rtx *bypass_test;
9003 enum machine_mode fpcmp_mode, intcmp_mode;
9004 rtx tmp, tmp2;
9005 int cost = ix86_fp_comparison_cost (code);
9006 enum rtx_code bypass_code, first_code, second_code;
9008 fpcmp_mode = ix86_fp_compare_mode (code);
9009 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9011 if (second_test)
9012 *second_test = NULL_RTX;
9013 if (bypass_test)
9014 *bypass_test = NULL_RTX;
9016 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9018 /* Do fcomi/sahf based test when profitable. */
9019 if ((bypass_code == NIL || bypass_test)
9020 && (second_code == NIL || second_test)
9021 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9023 if (TARGET_CMOVE)
9025 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9026 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9027 tmp);
9028 emit_insn (tmp);
9030 else
9032 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9033 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9034 if (!scratch)
9035 scratch = gen_reg_rtx (HImode);
9036 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9037 emit_insn (gen_x86_sahf_1 (scratch));
9040 /* The FP codes work out to act like unsigned. */
9041 intcmp_mode = fpcmp_mode;
9042 code = first_code;
9043 if (bypass_code != NIL)
9044 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9045 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9046 const0_rtx);
9047 if (second_code != NIL)
9048 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9049 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9050 const0_rtx);
9052 else
9054 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9055 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9056 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9057 if (!scratch)
9058 scratch = gen_reg_rtx (HImode);
9059 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9061 /* In the unordered case, we have to check C2 for NaN's, which
9062 doesn't happen to work out to anything nice combination-wise.
9063 So do some bit twiddling on the value we've got in AH to come
9064 up with an appropriate set of condition codes. */
9066 intcmp_mode = CCNOmode;
9067 switch (code)
9069 case GT:
9070 case UNGT:
9071 if (code == GT || !TARGET_IEEE_FP)
9073 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9074 code = EQ;
9076 else
9078 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9079 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9080 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9081 intcmp_mode = CCmode;
9082 code = GEU;
9084 break;
9085 case LT:
9086 case UNLT:
9087 if (code == LT && TARGET_IEEE_FP)
9089 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9090 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9091 intcmp_mode = CCmode;
9092 code = EQ;
9094 else
9096 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9097 code = NE;
9099 break;
9100 case GE:
9101 case UNGE:
9102 if (code == GE || !TARGET_IEEE_FP)
9104 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9105 code = EQ;
9107 else
9109 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9110 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9111 GEN_INT (0x01)));
9112 code = NE;
9114 break;
9115 case LE:
9116 case UNLE:
9117 if (code == LE && TARGET_IEEE_FP)
9119 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9120 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9121 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9122 intcmp_mode = CCmode;
9123 code = LTU;
9125 else
9127 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9128 code = NE;
9130 break;
9131 case EQ:
9132 case UNEQ:
9133 if (code == EQ && TARGET_IEEE_FP)
9135 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9136 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9137 intcmp_mode = CCmode;
9138 code = EQ;
9140 else
9142 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9143 code = NE;
9144 break;
9146 break;
9147 case NE:
9148 case LTGT:
9149 if (code == NE && TARGET_IEEE_FP)
9151 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9152 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9153 GEN_INT (0x40)));
9154 code = NE;
9156 else
9158 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9159 code = EQ;
9161 break;
9163 case UNORDERED:
9164 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9165 code = NE;
9166 break;
9167 case ORDERED:
9168 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9169 code = EQ;
9170 break;
9172 default:
9173 abort ();
9177 /* Return the test that should be put into the flags user, i.e.
9178 the bcc, scc, or cmov instruction. */
9179 return gen_rtx_fmt_ee (code, VOIDmode,
9180 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9181 const0_rtx);
9185 ix86_expand_compare (code, second_test, bypass_test)
9186 enum rtx_code code;
9187 rtx *second_test, *bypass_test;
9189 rtx op0, op1, ret;
9190 op0 = ix86_compare_op0;
9191 op1 = ix86_compare_op1;
9193 if (second_test)
9194 *second_test = NULL_RTX;
9195 if (bypass_test)
9196 *bypass_test = NULL_RTX;
9198 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9199 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9200 second_test, bypass_test);
9201 else
9202 ret = ix86_expand_int_compare (code, op0, op1);
9204 return ret;
9207 /* Return true if the CODE will result in nontrivial jump sequence. */
9208 bool
9209 ix86_fp_jump_nontrivial_p (code)
9210 enum rtx_code code;
9212 enum rtx_code bypass_code, first_code, second_code;
9213 if (!TARGET_CMOVE)
9214 return true;
9215 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9216 return bypass_code != NIL || second_code != NIL;
9219 void
9220 ix86_expand_branch (code, label)
9221 enum rtx_code code;
9222 rtx label;
9224 rtx tmp;
9226 switch (GET_MODE (ix86_compare_op0))
9228 case QImode:
9229 case HImode:
9230 case SImode:
9231 simple:
9232 tmp = ix86_expand_compare (code, NULL, NULL);
9233 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9234 gen_rtx_LABEL_REF (VOIDmode, label),
9235 pc_rtx);
9236 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9237 return;
9239 case SFmode:
9240 case DFmode:
9241 case XFmode:
9242 case TFmode:
9244 rtvec vec;
9245 int use_fcomi;
9246 enum rtx_code bypass_code, first_code, second_code;
9248 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9249 &ix86_compare_op1);
9251 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9253 /* Check whether we will use the natural sequence with one jump. If
9254 so, we can expand jump early. Otherwise delay expansion by
9255 creating compound insn to not confuse optimizers. */
9256 if (bypass_code == NIL && second_code == NIL
9257 && TARGET_CMOVE)
9259 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9260 gen_rtx_LABEL_REF (VOIDmode, label),
9261 pc_rtx, NULL_RTX);
9263 else
9265 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9266 ix86_compare_op0, ix86_compare_op1);
9267 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9268 gen_rtx_LABEL_REF (VOIDmode, label),
9269 pc_rtx);
9270 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9272 use_fcomi = ix86_use_fcomi_compare (code);
9273 vec = rtvec_alloc (3 + !use_fcomi);
9274 RTVEC_ELT (vec, 0) = tmp;
9275 RTVEC_ELT (vec, 1)
9276 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9277 RTVEC_ELT (vec, 2)
9278 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9279 if (! use_fcomi)
9280 RTVEC_ELT (vec, 3)
9281 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9283 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9285 return;
9288 case DImode:
9289 if (TARGET_64BIT)
9290 goto simple;
9291 /* Expand DImode branch into multiple compare+branch. */
9293 rtx lo[2], hi[2], label2;
9294 enum rtx_code code1, code2, code3;
9296 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9298 tmp = ix86_compare_op0;
9299 ix86_compare_op0 = ix86_compare_op1;
9300 ix86_compare_op1 = tmp;
9301 code = swap_condition (code);
9303 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9304 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9306 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9307 avoid two branches. This costs one extra insn, so disable when
9308 optimizing for size. */
9310 if ((code == EQ || code == NE)
9311 && (!optimize_size
9312 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9314 rtx xor0, xor1;
9316 xor1 = hi[0];
9317 if (hi[1] != const0_rtx)
9318 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9319 NULL_RTX, 0, OPTAB_WIDEN);
9321 xor0 = lo[0];
9322 if (lo[1] != const0_rtx)
9323 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9324 NULL_RTX, 0, OPTAB_WIDEN);
9326 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9327 NULL_RTX, 0, OPTAB_WIDEN);
9329 ix86_compare_op0 = tmp;
9330 ix86_compare_op1 = const0_rtx;
9331 ix86_expand_branch (code, label);
9332 return;
9335 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9336 op1 is a constant and the low word is zero, then we can just
9337 examine the high word. */
9339 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9340 switch (code)
9342 case LT: case LTU: case GE: case GEU:
9343 ix86_compare_op0 = hi[0];
9344 ix86_compare_op1 = hi[1];
9345 ix86_expand_branch (code, label);
9346 return;
9347 default:
9348 break;
9351 /* Otherwise, we need two or three jumps. */
9353 label2 = gen_label_rtx ();
9355 code1 = code;
9356 code2 = swap_condition (code);
9357 code3 = unsigned_condition (code);
9359 switch (code)
9361 case LT: case GT: case LTU: case GTU:
9362 break;
9364 case LE: code1 = LT; code2 = GT; break;
9365 case GE: code1 = GT; code2 = LT; break;
9366 case LEU: code1 = LTU; code2 = GTU; break;
9367 case GEU: code1 = GTU; code2 = LTU; break;
9369 case EQ: code1 = NIL; code2 = NE; break;
9370 case NE: code2 = NIL; break;
9372 default:
9373 abort ();
9377 * a < b =>
9378 * if (hi(a) < hi(b)) goto true;
9379 * if (hi(a) > hi(b)) goto false;
9380 * if (lo(a) < lo(b)) goto true;
9381 * false:
9384 ix86_compare_op0 = hi[0];
9385 ix86_compare_op1 = hi[1];
9387 if (code1 != NIL)
9388 ix86_expand_branch (code1, label);
9389 if (code2 != NIL)
9390 ix86_expand_branch (code2, label2);
9392 ix86_compare_op0 = lo[0];
9393 ix86_compare_op1 = lo[1];
9394 ix86_expand_branch (code3, label);
9396 if (code2 != NIL)
9397 emit_label (label2);
9398 return;
9401 default:
9402 abort ();
9406 /* Split branch based on floating point condition. */
9407 void
9408 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9409 enum rtx_code code;
9410 rtx op1, op2, target1, target2, tmp;
9412 rtx second, bypass;
9413 rtx label = NULL_RTX;
9414 rtx condition;
9415 int bypass_probability = -1, second_probability = -1, probability = -1;
9416 rtx i;
9418 if (target2 != pc_rtx)
9420 rtx tmp = target2;
9421 code = reverse_condition_maybe_unordered (code);
9422 target2 = target1;
9423 target1 = tmp;
9426 condition = ix86_expand_fp_compare (code, op1, op2,
9427 tmp, &second, &bypass);
9429 if (split_branch_probability >= 0)
9431 /* Distribute the probabilities across the jumps.
9432 Assume the BYPASS and SECOND to be always test
9433 for UNORDERED. */
9434 probability = split_branch_probability;
9436 /* Value of 1 is low enough to make no need for probability
9437 to be updated. Later we may run some experiments and see
9438 if unordered values are more frequent in practice. */
9439 if (bypass)
9440 bypass_probability = 1;
9441 if (second)
9442 second_probability = 1;
9444 if (bypass != NULL_RTX)
9446 label = gen_label_rtx ();
9447 i = emit_jump_insn (gen_rtx_SET
9448 (VOIDmode, pc_rtx,
9449 gen_rtx_IF_THEN_ELSE (VOIDmode,
9450 bypass,
9451 gen_rtx_LABEL_REF (VOIDmode,
9452 label),
9453 pc_rtx)));
9454 if (bypass_probability >= 0)
9455 REG_NOTES (i)
9456 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9457 GEN_INT (bypass_probability),
9458 REG_NOTES (i));
9460 i = emit_jump_insn (gen_rtx_SET
9461 (VOIDmode, pc_rtx,
9462 gen_rtx_IF_THEN_ELSE (VOIDmode,
9463 condition, target1, target2)));
9464 if (probability >= 0)
9465 REG_NOTES (i)
9466 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9467 GEN_INT (probability),
9468 REG_NOTES (i));
9469 if (second != NULL_RTX)
9471 i = emit_jump_insn (gen_rtx_SET
9472 (VOIDmode, pc_rtx,
9473 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9474 target2)));
9475 if (second_probability >= 0)
9476 REG_NOTES (i)
9477 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9478 GEN_INT (second_probability),
9479 REG_NOTES (i));
9481 if (label != NULL_RTX)
9482 emit_label (label);
9486 ix86_expand_setcc (code, dest)
9487 enum rtx_code code;
9488 rtx dest;
9490 rtx ret, tmp, tmpreg;
9491 rtx second_test, bypass_test;
9493 if (GET_MODE (ix86_compare_op0) == DImode
9494 && !TARGET_64BIT)
9495 return 0; /* FAIL */
9497 if (GET_MODE (dest) != QImode)
9498 abort ();
9500 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9501 PUT_MODE (ret, QImode);
9503 tmp = dest;
9504 tmpreg = dest;
9506 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9507 if (bypass_test || second_test)
9509 rtx test = second_test;
9510 int bypass = 0;
9511 rtx tmp2 = gen_reg_rtx (QImode);
9512 if (bypass_test)
9514 if (second_test)
9515 abort ();
9516 test = bypass_test;
9517 bypass = 1;
9518 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9520 PUT_MODE (test, QImode);
9521 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9523 if (bypass)
9524 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9525 else
9526 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9529 return 1; /* DONE */
9532 /* Expand comparison setting or clearing carry flag. Return true when successful
9533 and set pop for the operation. */
9534 bool
9535 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9536 rtx op0, op1, *pop;
9537 enum rtx_code code;
9539 enum machine_mode mode =
9540 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9542 /* Do not handle DImode compares that go trought special path. Also we can't
9543 deal with FP compares yet. This is possible to add. */
9544 if ((mode == DImode && !TARGET_64BIT))
9545 return false;
9546 if (FLOAT_MODE_P (mode))
9548 rtx second_test = NULL, bypass_test = NULL;
9549 rtx compare_op, compare_seq;
9551 /* Shortcut: following common codes never translate into carry flag compares. */
9552 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9553 || code == ORDERED || code == UNORDERED)
9554 return false;
9556 /* These comparisons require zero flag; swap operands so they won't. */
9557 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9558 && !TARGET_IEEE_FP)
9560 rtx tmp = op0;
9561 op0 = op1;
9562 op1 = tmp;
9563 code = swap_condition (code);
9566 /* Try to expand the comparsion and verify that we end up with carry flag
9567 based comparsion. This is fails to be true only when we decide to expand
9568 comparsion using arithmetic that is not too common scenario. */
9569 start_sequence ();
9570 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9571 &second_test, &bypass_test);
9572 compare_seq = get_insns ();
9573 end_sequence ();
9575 if (second_test || bypass_test)
9576 return false;
9577 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9578 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9579 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9580 else
9581 code = GET_CODE (compare_op);
9582 if (code != LTU && code != GEU)
9583 return false;
9584 emit_insn (compare_seq);
9585 *pop = compare_op;
9586 return true;
9588 if (!INTEGRAL_MODE_P (mode))
9589 return false;
9590 switch (code)
9592 case LTU:
9593 case GEU:
9594 break;
9596 /* Convert a==0 into (unsigned)a<1. */
9597 case EQ:
9598 case NE:
9599 if (op1 != const0_rtx)
9600 return false;
9601 op1 = const1_rtx;
9602 code = (code == EQ ? LTU : GEU);
9603 break;
9605 /* Convert a>b into b<a or a>=b-1. */
9606 case GTU:
9607 case LEU:
9608 if (GET_CODE (op1) == CONST_INT)
9610 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9611 /* Bail out on overflow. We still can swap operands but that
9612 would force loading of the constant into register. */
9613 if (op1 == const0_rtx
9614 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9615 return false;
9616 code = (code == GTU ? GEU : LTU);
9618 else
9620 rtx tmp = op1;
9621 op1 = op0;
9622 op0 = tmp;
9623 code = (code == GTU ? LTU : GEU);
9625 break;
9627 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9628 case LT:
9629 case GE:
9630 if (mode == DImode || op1 != const0_rtx)
9631 return false;
9632 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9633 code = (code == LT ? GEU : LTU);
9634 break;
9635 case LE:
9636 case GT:
9637 if (mode == DImode || op1 != constm1_rtx)
9638 return false;
9639 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9640 code = (code == LE ? GEU : LTU);
9641 break;
9643 default:
9644 return false;
9646 ix86_compare_op0 = op0;
9647 ix86_compare_op1 = op1;
9648 *pop = ix86_expand_compare (code, NULL, NULL);
9649 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9650 abort ();
9651 return true;
9655 ix86_expand_int_movcc (operands)
9656 rtx operands[];
9658 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9659 rtx compare_seq, compare_op;
9660 rtx second_test, bypass_test;
9661 enum machine_mode mode = GET_MODE (operands[0]);
9662 bool sign_bit_compare_p = false;;
9664 start_sequence ();
9665 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9666 compare_seq = get_insns ();
9667 end_sequence ();
9669 compare_code = GET_CODE (compare_op);
9671 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9672 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9673 sign_bit_compare_p = true;
9675 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9676 HImode insns, we'd be swallowed in word prefix ops. */
9678 if ((mode != HImode || TARGET_FAST_PREFIX)
9679 && (mode != DImode || TARGET_64BIT)
9680 && GET_CODE (operands[2]) == CONST_INT
9681 && GET_CODE (operands[3]) == CONST_INT)
9683 rtx out = operands[0];
9684 HOST_WIDE_INT ct = INTVAL (operands[2]);
9685 HOST_WIDE_INT cf = INTVAL (operands[3]);
9686 HOST_WIDE_INT diff;
9688 diff = ct - cf;
9689 /* Sign bit compares are better done using shifts than we do by using
9690 sbb. */
9691 if (sign_bit_compare_p
9692 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9693 ix86_compare_op1, &compare_op))
9695 /* Detect overlap between destination and compare sources. */
9696 rtx tmp = out;
9698 if (!sign_bit_compare_p)
9700 bool fpcmp = false;
9702 compare_code = GET_CODE (compare_op);
9704 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9705 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9707 fpcmp = true;
9708 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9711 /* To simplify rest of code, restrict to the GEU case. */
9712 if (compare_code == LTU)
9714 HOST_WIDE_INT tmp = ct;
9715 ct = cf;
9716 cf = tmp;
9717 compare_code = reverse_condition (compare_code);
9718 code = reverse_condition (code);
9720 else
9722 if (fpcmp)
9723 PUT_CODE (compare_op,
9724 reverse_condition_maybe_unordered
9725 (GET_CODE (compare_op)));
9726 else
9727 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9729 diff = ct - cf;
9731 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9732 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9733 tmp = gen_reg_rtx (mode);
9735 if (mode == DImode)
9736 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9737 else
9738 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9740 else
9742 if (code == GT || code == GE)
9743 code = reverse_condition (code);
9744 else
9746 HOST_WIDE_INT tmp = ct;
9747 ct = cf;
9748 cf = tmp;
9749 diff = ct - cf;
9751 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9752 ix86_compare_op1, VOIDmode, 0, -1);
9755 if (diff == 1)
9758 * cmpl op0,op1
9759 * sbbl dest,dest
9760 * [addl dest, ct]
9762 * Size 5 - 8.
9764 if (ct)
9765 tmp = expand_simple_binop (mode, PLUS,
9766 tmp, GEN_INT (ct),
9767 copy_rtx (tmp), 1, OPTAB_DIRECT);
9769 else if (cf == -1)
9772 * cmpl op0,op1
9773 * sbbl dest,dest
9774 * orl $ct, dest
9776 * Size 8.
9778 tmp = expand_simple_binop (mode, IOR,
9779 tmp, GEN_INT (ct),
9780 copy_rtx (tmp), 1, OPTAB_DIRECT);
9782 else if (diff == -1 && ct)
9785 * cmpl op0,op1
9786 * sbbl dest,dest
9787 * notl dest
9788 * [addl dest, cf]
9790 * Size 8 - 11.
9792 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9793 if (cf)
9794 tmp = expand_simple_binop (mode, PLUS,
9795 copy_rtx (tmp), GEN_INT (cf),
9796 copy_rtx (tmp), 1, OPTAB_DIRECT);
9798 else
9801 * cmpl op0,op1
9802 * sbbl dest,dest
9803 * [notl dest]
9804 * andl cf - ct, dest
9805 * [addl dest, ct]
9807 * Size 8 - 11.
9810 if (cf == 0)
9812 cf = ct;
9813 ct = 0;
9814 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9817 tmp = expand_simple_binop (mode, AND,
9818 copy_rtx (tmp),
9819 gen_int_mode (cf - ct, mode),
9820 copy_rtx (tmp), 1, OPTAB_DIRECT);
9821 if (ct)
9822 tmp = expand_simple_binop (mode, PLUS,
9823 copy_rtx (tmp), GEN_INT (ct),
9824 copy_rtx (tmp), 1, OPTAB_DIRECT);
9827 if (!rtx_equal_p (tmp, out))
9828 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9830 return 1; /* DONE */
9833 if (diff < 0)
9835 HOST_WIDE_INT tmp;
9836 tmp = ct, ct = cf, cf = tmp;
9837 diff = -diff;
9838 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9840 /* We may be reversing unordered compare to normal compare, that
9841 is not valid in general (we may convert non-trapping condition
9842 to trapping one), however on i386 we currently emit all
9843 comparisons unordered. */
9844 compare_code = reverse_condition_maybe_unordered (compare_code);
9845 code = reverse_condition_maybe_unordered (code);
9847 else
9849 compare_code = reverse_condition (compare_code);
9850 code = reverse_condition (code);
9854 compare_code = NIL;
9855 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9856 && GET_CODE (ix86_compare_op1) == CONST_INT)
9858 if (ix86_compare_op1 == const0_rtx
9859 && (code == LT || code == GE))
9860 compare_code = code;
9861 else if (ix86_compare_op1 == constm1_rtx)
9863 if (code == LE)
9864 compare_code = LT;
9865 else if (code == GT)
9866 compare_code = GE;
9870 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9871 if (compare_code != NIL
9872 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9873 && (cf == -1 || ct == -1))
9875 /* If lea code below could be used, only optimize
9876 if it results in a 2 insn sequence. */
9878 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9879 || diff == 3 || diff == 5 || diff == 9)
9880 || (compare_code == LT && ct == -1)
9881 || (compare_code == GE && cf == -1))
9884 * notl op1 (if necessary)
9885 * sarl $31, op1
9886 * orl cf, op1
9888 if (ct != -1)
9890 cf = ct;
9891 ct = -1;
9892 code = reverse_condition (code);
9895 out = emit_store_flag (out, code, ix86_compare_op0,
9896 ix86_compare_op1, VOIDmode, 0, -1);
9898 out = expand_simple_binop (mode, IOR,
9899 out, GEN_INT (cf),
9900 out, 1, OPTAB_DIRECT);
9901 if (out != operands[0])
9902 emit_move_insn (operands[0], out);
9904 return 1; /* DONE */
9909 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9910 || diff == 3 || diff == 5 || diff == 9)
9911 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9912 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9915 * xorl dest,dest
9916 * cmpl op1,op2
9917 * setcc dest
9918 * lea cf(dest*(ct-cf)),dest
9920 * Size 14.
9922 * This also catches the degenerate setcc-only case.
9925 rtx tmp;
9926 int nops;
9928 out = emit_store_flag (out, code, ix86_compare_op0,
9929 ix86_compare_op1, VOIDmode, 0, 1);
9931 nops = 0;
9932 /* On x86_64 the lea instruction operates on Pmode, so we need
9933 to get arithmetics done in proper mode to match. */
9934 if (diff == 1)
9935 tmp = copy_rtx (out);
9936 else
9938 rtx out1;
9939 out1 = copy_rtx (out);
9940 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9941 nops++;
9942 if (diff & 1)
9944 tmp = gen_rtx_PLUS (mode, tmp, out1);
9945 nops++;
9948 if (cf != 0)
9950 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9951 nops++;
9953 if (!rtx_equal_p (tmp, out))
9955 if (nops == 1)
9956 out = force_operand (tmp, copy_rtx (out));
9957 else
9958 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9960 if (!rtx_equal_p (out, operands[0]))
9961 emit_move_insn (operands[0], copy_rtx (out));
9963 return 1; /* DONE */
9967 * General case: Jumpful:
9968 * xorl dest,dest cmpl op1, op2
9969 * cmpl op1, op2 movl ct, dest
9970 * setcc dest jcc 1f
9971 * decl dest movl cf, dest
9972 * andl (cf-ct),dest 1:
9973 * addl ct,dest
9975 * Size 20. Size 14.
9977 * This is reasonably steep, but branch mispredict costs are
9978 * high on modern cpus, so consider failing only if optimizing
9979 * for space.
9982 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9983 && BRANCH_COST >= 2)
9985 if (cf == 0)
9987 cf = ct;
9988 ct = 0;
9989 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9990 /* We may be reversing unordered compare to normal compare,
9991 that is not valid in general (we may convert non-trapping
9992 condition to trapping one), however on i386 we currently
9993 emit all comparisons unordered. */
9994 code = reverse_condition_maybe_unordered (code);
9995 else
9997 code = reverse_condition (code);
9998 if (compare_code != NIL)
9999 compare_code = reverse_condition (compare_code);
10003 if (compare_code != NIL)
10005 /* notl op1 (if needed)
10006 sarl $31, op1
10007 andl (cf-ct), op1
10008 addl ct, op1
10010 For x < 0 (resp. x <= -1) there will be no notl,
10011 so if possible swap the constants to get rid of the
10012 complement.
10013 True/false will be -1/0 while code below (store flag
10014 followed by decrement) is 0/-1, so the constants need
10015 to be exchanged once more. */
10017 if (compare_code == GE || !cf)
10019 code = reverse_condition (code);
10020 compare_code = LT;
10022 else
10024 HOST_WIDE_INT tmp = cf;
10025 cf = ct;
10026 ct = tmp;
10029 out = emit_store_flag (out, code, ix86_compare_op0,
10030 ix86_compare_op1, VOIDmode, 0, -1);
10032 else
10034 out = emit_store_flag (out, code, ix86_compare_op0,
10035 ix86_compare_op1, VOIDmode, 0, 1);
10037 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10038 copy_rtx (out), 1, OPTAB_DIRECT);
10041 out = expand_simple_binop (mode, AND, copy_rtx (out),
10042 gen_int_mode (cf - ct, mode),
10043 copy_rtx (out), 1, OPTAB_DIRECT);
10044 if (ct)
10045 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10046 copy_rtx (out), 1, OPTAB_DIRECT);
10047 if (!rtx_equal_p (out, operands[0]))
10048 emit_move_insn (operands[0], copy_rtx (out));
10050 return 1; /* DONE */
10054 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10056 /* Try a few things more with specific constants and a variable. */
10058 optab op;
10059 rtx var, orig_out, out, tmp;
10061 if (BRANCH_COST <= 2)
10062 return 0; /* FAIL */
10064 /* If one of the two operands is an interesting constant, load a
10065 constant with the above and mask it in with a logical operation. */
10067 if (GET_CODE (operands[2]) == CONST_INT)
10069 var = operands[3];
10070 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10071 operands[3] = constm1_rtx, op = and_optab;
10072 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10073 operands[3] = const0_rtx, op = ior_optab;
10074 else
10075 return 0; /* FAIL */
10077 else if (GET_CODE (operands[3]) == CONST_INT)
10079 var = operands[2];
10080 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10081 operands[2] = constm1_rtx, op = and_optab;
10082 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10083 operands[2] = const0_rtx, op = ior_optab;
10084 else
10085 return 0; /* FAIL */
10087 else
10088 return 0; /* FAIL */
10090 orig_out = operands[0];
10091 tmp = gen_reg_rtx (mode);
10092 operands[0] = tmp;
10094 /* Recurse to get the constant loaded. */
10095 if (ix86_expand_int_movcc (operands) == 0)
10096 return 0; /* FAIL */
10098 /* Mask in the interesting variable. */
10099 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10100 OPTAB_WIDEN);
10101 if (!rtx_equal_p (out, orig_out))
10102 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10104 return 1; /* DONE */
10108 * For comparison with above,
10110 * movl cf,dest
10111 * movl ct,tmp
10112 * cmpl op1,op2
10113 * cmovcc tmp,dest
10115 * Size 15.
10118 if (! nonimmediate_operand (operands[2], mode))
10119 operands[2] = force_reg (mode, operands[2]);
10120 if (! nonimmediate_operand (operands[3], mode))
10121 operands[3] = force_reg (mode, operands[3]);
10123 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10125 rtx tmp = gen_reg_rtx (mode);
10126 emit_move_insn (tmp, operands[3]);
10127 operands[3] = tmp;
10129 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10131 rtx tmp = gen_reg_rtx (mode);
10132 emit_move_insn (tmp, operands[2]);
10133 operands[2] = tmp;
10136 if (! register_operand (operands[2], VOIDmode)
10137 && (mode == QImode
10138 || ! register_operand (operands[3], VOIDmode)))
10139 operands[2] = force_reg (mode, operands[2]);
10141 if (mode == QImode
10142 && ! register_operand (operands[3], VOIDmode))
10143 operands[3] = force_reg (mode, operands[3]);
10145 emit_insn (compare_seq);
10146 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10147 gen_rtx_IF_THEN_ELSE (mode,
10148 compare_op, operands[2],
10149 operands[3])));
10150 if (bypass_test)
10151 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10152 gen_rtx_IF_THEN_ELSE (mode,
10153 bypass_test,
10154 copy_rtx (operands[3]),
10155 copy_rtx (operands[0]))));
10156 if (second_test)
10157 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10158 gen_rtx_IF_THEN_ELSE (mode,
10159 second_test,
10160 copy_rtx (operands[2]),
10161 copy_rtx (operands[0]))));
10163 return 1; /* DONE */
10167 ix86_expand_fp_movcc (operands)
10168 rtx operands[];
10170 enum rtx_code code;
10171 rtx tmp;
10172 rtx compare_op, second_test, bypass_test;
10174 /* For SF/DFmode conditional moves based on comparisons
10175 in same mode, we may want to use SSE min/max instructions. */
10176 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10177 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10178 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10179 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10180 && (!TARGET_IEEE_FP
10181 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10182 /* We may be called from the post-reload splitter. */
10183 && (!REG_P (operands[0])
10184 || SSE_REG_P (operands[0])
10185 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10187 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10188 code = GET_CODE (operands[1]);
10190 /* See if we have (cross) match between comparison operands and
10191 conditional move operands. */
10192 if (rtx_equal_p (operands[2], op1))
10194 rtx tmp = op0;
10195 op0 = op1;
10196 op1 = tmp;
10197 code = reverse_condition_maybe_unordered (code);
10199 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10201 /* Check for min operation. */
10202 if (code == LT || code == UNLE)
10204 if (code == UNLE)
10206 rtx tmp = op0;
10207 op0 = op1;
10208 op1 = tmp;
10210 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10211 if (memory_operand (op0, VOIDmode))
10212 op0 = force_reg (GET_MODE (operands[0]), op0);
10213 if (GET_MODE (operands[0]) == SFmode)
10214 emit_insn (gen_minsf3 (operands[0], op0, op1));
10215 else
10216 emit_insn (gen_mindf3 (operands[0], op0, op1));
10217 return 1;
10219 /* Check for max operation. */
10220 if (code == GT || code == UNGE)
10222 if (code == UNGE)
10224 rtx tmp = op0;
10225 op0 = op1;
10226 op1 = tmp;
10228 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10229 if (memory_operand (op0, VOIDmode))
10230 op0 = force_reg (GET_MODE (operands[0]), op0);
10231 if (GET_MODE (operands[0]) == SFmode)
10232 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10233 else
10234 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10235 return 1;
10238 /* Manage condition to be sse_comparison_operator. In case we are
10239 in non-ieee mode, try to canonicalize the destination operand
10240 to be first in the comparison - this helps reload to avoid extra
10241 moves. */
10242 if (!sse_comparison_operator (operands[1], VOIDmode)
10243 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10245 rtx tmp = ix86_compare_op0;
10246 ix86_compare_op0 = ix86_compare_op1;
10247 ix86_compare_op1 = tmp;
10248 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10249 VOIDmode, ix86_compare_op0,
10250 ix86_compare_op1);
10252 /* Similarly try to manage result to be first operand of conditional
10253 move. We also don't support the NE comparison on SSE, so try to
10254 avoid it. */
10255 if ((rtx_equal_p (operands[0], operands[3])
10256 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10257 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10259 rtx tmp = operands[2];
10260 operands[2] = operands[3];
10261 operands[3] = tmp;
10262 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10263 (GET_CODE (operands[1])),
10264 VOIDmode, ix86_compare_op0,
10265 ix86_compare_op1);
10267 if (GET_MODE (operands[0]) == SFmode)
10268 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10269 operands[2], operands[3],
10270 ix86_compare_op0, ix86_compare_op1));
10271 else
10272 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10273 operands[2], operands[3],
10274 ix86_compare_op0, ix86_compare_op1));
10275 return 1;
10278 /* The floating point conditional move instructions don't directly
10279 support conditions resulting from a signed integer comparison. */
10281 code = GET_CODE (operands[1]);
10282 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10284 /* The floating point conditional move instructions don't directly
10285 support signed integer comparisons. */
10287 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10289 if (second_test != NULL || bypass_test != NULL)
10290 abort ();
10291 tmp = gen_reg_rtx (QImode);
10292 ix86_expand_setcc (code, tmp);
10293 code = NE;
10294 ix86_compare_op0 = tmp;
10295 ix86_compare_op1 = const0_rtx;
10296 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10298 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10300 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10301 emit_move_insn (tmp, operands[3]);
10302 operands[3] = tmp;
10304 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10306 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10307 emit_move_insn (tmp, operands[2]);
10308 operands[2] = tmp;
10311 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10312 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10313 compare_op,
10314 operands[2],
10315 operands[3])));
10316 if (bypass_test)
10317 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10318 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10319 bypass_test,
10320 operands[3],
10321 operands[0])));
10322 if (second_test)
10323 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10324 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10325 second_test,
10326 operands[2],
10327 operands[0])));
10329 return 1;
10332 /* Expand conditional increment or decrement using adb/sbb instructions.
10333 The default case using setcc followed by the conditional move can be
10334 done by generic code. */
10336 ix86_expand_int_addcc (operands)
10337 rtx operands[];
10339 enum rtx_code code = GET_CODE (operands[1]);
10340 rtx compare_op;
10341 rtx val = const0_rtx;
10342 bool fpcmp = false;
10343 enum machine_mode mode = GET_MODE (operands[0]);
10345 if (operands[3] != const1_rtx
10346 && operands[3] != constm1_rtx)
10347 return 0;
10348 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10349 ix86_compare_op1, &compare_op))
10350 return 0;
10351 code = GET_CODE (compare_op);
10353 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10354 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10356 fpcmp = true;
10357 code = ix86_fp_compare_code_to_integer (code);
10360 if (code != LTU)
10362 val = constm1_rtx;
10363 if (fpcmp)
10364 PUT_CODE (compare_op,
10365 reverse_condition_maybe_unordered
10366 (GET_CODE (compare_op)));
10367 else
10368 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10370 PUT_MODE (compare_op, mode);
10372 /* Construct either adc or sbb insn. */
10373 if ((code == LTU) == (operands[3] == constm1_rtx))
10375 switch (GET_MODE (operands[0]))
10377 case QImode:
10378 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10379 break;
10380 case HImode:
10381 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10382 break;
10383 case SImode:
10384 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10385 break;
10386 case DImode:
10387 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10388 break;
10389 default:
10390 abort ();
10393 else
10395 switch (GET_MODE (operands[0]))
10397 case QImode:
10398 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10399 break;
10400 case HImode:
10401 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10402 break;
10403 case SImode:
10404 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10405 break;
10406 case DImode:
10407 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10408 break;
10409 default:
10410 abort ();
10413 return 1; /* DONE */
10417 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10418 works for floating pointer parameters and nonoffsetable memories.
10419 For pushes, it returns just stack offsets; the values will be saved
10420 in the right order. Maximally three parts are generated. */
10422 static int
10423 ix86_split_to_parts (operand, parts, mode)
10424 rtx operand;
10425 rtx *parts;
10426 enum machine_mode mode;
10428 int size;
10430 if (!TARGET_64BIT)
10431 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10432 else
10433 size = (GET_MODE_SIZE (mode) + 4) / 8;
10435 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10436 abort ();
10437 if (size < 2 || size > 3)
10438 abort ();
10440 /* Optimize constant pool reference to immediates. This is used by fp
10441 moves, that force all constants to memory to allow combining. */
10442 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10444 rtx tmp = maybe_get_pool_constant (operand);
10445 if (tmp)
10446 operand = tmp;
10449 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10451 /* The only non-offsetable memories we handle are pushes. */
10452 if (! push_operand (operand, VOIDmode))
10453 abort ();
10455 operand = copy_rtx (operand);
10456 PUT_MODE (operand, Pmode);
10457 parts[0] = parts[1] = parts[2] = operand;
10459 else if (!TARGET_64BIT)
10461 if (mode == DImode)
10462 split_di (&operand, 1, &parts[0], &parts[1]);
10463 else
10465 if (REG_P (operand))
10467 if (!reload_completed)
10468 abort ();
10469 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10470 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10471 if (size == 3)
10472 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10474 else if (offsettable_memref_p (operand))
10476 operand = adjust_address (operand, SImode, 0);
10477 parts[0] = operand;
10478 parts[1] = adjust_address (operand, SImode, 4);
10479 if (size == 3)
10480 parts[2] = adjust_address (operand, SImode, 8);
10482 else if (GET_CODE (operand) == CONST_DOUBLE)
10484 REAL_VALUE_TYPE r;
10485 long l[4];
10487 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10488 switch (mode)
10490 case XFmode:
10491 case TFmode:
10492 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10493 parts[2] = gen_int_mode (l[2], SImode);
10494 break;
10495 case DFmode:
10496 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10497 break;
10498 default:
10499 abort ();
10501 parts[1] = gen_int_mode (l[1], SImode);
10502 parts[0] = gen_int_mode (l[0], SImode);
10504 else
10505 abort ();
10508 else
10510 if (mode == TImode)
10511 split_ti (&operand, 1, &parts[0], &parts[1]);
10512 if (mode == XFmode || mode == TFmode)
10514 if (REG_P (operand))
10516 if (!reload_completed)
10517 abort ();
10518 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10519 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10521 else if (offsettable_memref_p (operand))
10523 operand = adjust_address (operand, DImode, 0);
10524 parts[0] = operand;
10525 parts[1] = adjust_address (operand, SImode, 8);
10527 else if (GET_CODE (operand) == CONST_DOUBLE)
10529 REAL_VALUE_TYPE r;
10530 long l[3];
10532 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10533 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10534 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10535 if (HOST_BITS_PER_WIDE_INT >= 64)
10536 parts[0]
10537 = gen_int_mode
10538 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10539 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10540 DImode);
10541 else
10542 parts[0] = immed_double_const (l[0], l[1], DImode);
10543 parts[1] = gen_int_mode (l[2], SImode);
10545 else
10546 abort ();
10550 return size;
10553 /* Emit insns to perform a move or push of DI, DF, and XF values.
10554 Return false when normal moves are needed; true when all required
10555 insns have been emitted. Operands 2-4 contain the input values
10556 int the correct order; operands 5-7 contain the output values. */
10558 void
10559 ix86_split_long_move (operands)
10560 rtx operands[];
10562 rtx part[2][3];
10563 int nparts;
10564 int push = 0;
10565 int collisions = 0;
10566 enum machine_mode mode = GET_MODE (operands[0]);
10568 /* The DFmode expanders may ask us to move double.
10569 For 64bit target this is single move. By hiding the fact
10570 here we simplify i386.md splitters. */
10571 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10573 /* Optimize constant pool reference to immediates. This is used by
10574 fp moves, that force all constants to memory to allow combining. */
10576 if (GET_CODE (operands[1]) == MEM
10577 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10578 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10579 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10580 if (push_operand (operands[0], VOIDmode))
10582 operands[0] = copy_rtx (operands[0]);
10583 PUT_MODE (operands[0], Pmode);
10585 else
10586 operands[0] = gen_lowpart (DImode, operands[0]);
10587 operands[1] = gen_lowpart (DImode, operands[1]);
10588 emit_move_insn (operands[0], operands[1]);
10589 return;
10592 /* The only non-offsettable memory we handle is push. */
10593 if (push_operand (operands[0], VOIDmode))
10594 push = 1;
10595 else if (GET_CODE (operands[0]) == MEM
10596 && ! offsettable_memref_p (operands[0]))
10597 abort ();
10599 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10600 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10602 /* When emitting push, take care for source operands on the stack. */
10603 if (push && GET_CODE (operands[1]) == MEM
10604 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10606 if (nparts == 3)
10607 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10608 XEXP (part[1][2], 0));
10609 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10610 XEXP (part[1][1], 0));
10613 /* We need to do copy in the right order in case an address register
10614 of the source overlaps the destination. */
10615 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10617 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10618 collisions++;
10619 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10620 collisions++;
10621 if (nparts == 3
10622 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10623 collisions++;
10625 /* Collision in the middle part can be handled by reordering. */
10626 if (collisions == 1 && nparts == 3
10627 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10629 rtx tmp;
10630 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10631 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10634 /* If there are more collisions, we can't handle it by reordering.
10635 Do an lea to the last part and use only one colliding move. */
10636 else if (collisions > 1)
10638 collisions = 1;
10639 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10640 XEXP (part[1][0], 0)));
10641 part[1][0] = change_address (part[1][0],
10642 TARGET_64BIT ? DImode : SImode,
10643 part[0][nparts - 1]);
10644 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10645 if (nparts == 3)
10646 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10650 if (push)
10652 if (!TARGET_64BIT)
10654 if (nparts == 3)
10656 /* We use only first 12 bytes of TFmode value, but for pushing we
10657 are required to adjust stack as if we were pushing real 16byte
10658 value. */
10659 if (mode == TFmode && !TARGET_64BIT)
10660 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10661 GEN_INT (-4)));
10662 emit_move_insn (part[0][2], part[1][2]);
10665 else
10667 /* In 64bit mode we don't have 32bit push available. In case this is
10668 register, it is OK - we will just use larger counterpart. We also
10669 retype memory - these comes from attempt to avoid REX prefix on
10670 moving of second half of TFmode value. */
10671 if (GET_MODE (part[1][1]) == SImode)
10673 if (GET_CODE (part[1][1]) == MEM)
10674 part[1][1] = adjust_address (part[1][1], DImode, 0);
10675 else if (REG_P (part[1][1]))
10676 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10677 else
10678 abort ();
10679 if (GET_MODE (part[1][0]) == SImode)
10680 part[1][0] = part[1][1];
10683 emit_move_insn (part[0][1], part[1][1]);
10684 emit_move_insn (part[0][0], part[1][0]);
10685 return;
10688 /* Choose correct order to not overwrite the source before it is copied. */
10689 if ((REG_P (part[0][0])
10690 && REG_P (part[1][1])
10691 && (REGNO (part[0][0]) == REGNO (part[1][1])
10692 || (nparts == 3
10693 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10694 || (collisions > 0
10695 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10697 if (nparts == 3)
10699 operands[2] = part[0][2];
10700 operands[3] = part[0][1];
10701 operands[4] = part[0][0];
10702 operands[5] = part[1][2];
10703 operands[6] = part[1][1];
10704 operands[7] = part[1][0];
10706 else
10708 operands[2] = part[0][1];
10709 operands[3] = part[0][0];
10710 operands[5] = part[1][1];
10711 operands[6] = part[1][0];
10714 else
10716 if (nparts == 3)
10718 operands[2] = part[0][0];
10719 operands[3] = part[0][1];
10720 operands[4] = part[0][2];
10721 operands[5] = part[1][0];
10722 operands[6] = part[1][1];
10723 operands[7] = part[1][2];
10725 else
10727 operands[2] = part[0][0];
10728 operands[3] = part[0][1];
10729 operands[5] = part[1][0];
10730 operands[6] = part[1][1];
10733 emit_move_insn (operands[2], operands[5]);
10734 emit_move_insn (operands[3], operands[6]);
10735 if (nparts == 3)
10736 emit_move_insn (operands[4], operands[7]);
10738 return;
10741 void
10742 ix86_split_ashldi (operands, scratch)
10743 rtx *operands, scratch;
10745 rtx low[2], high[2];
10746 int count;
10748 if (GET_CODE (operands[2]) == CONST_INT)
10750 split_di (operands, 2, low, high);
10751 count = INTVAL (operands[2]) & 63;
10753 if (count >= 32)
10755 emit_move_insn (high[0], low[1]);
10756 emit_move_insn (low[0], const0_rtx);
10758 if (count > 32)
10759 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10761 else
10763 if (!rtx_equal_p (operands[0], operands[1]))
10764 emit_move_insn (operands[0], operands[1]);
10765 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10766 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10769 else
10771 if (!rtx_equal_p (operands[0], operands[1]))
10772 emit_move_insn (operands[0], operands[1]);
10774 split_di (operands, 1, low, high);
10776 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10777 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10779 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10781 if (! no_new_pseudos)
10782 scratch = force_reg (SImode, const0_rtx);
10783 else
10784 emit_move_insn (scratch, const0_rtx);
10786 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10787 scratch));
10789 else
10790 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10794 void
10795 ix86_split_ashrdi (operands, scratch)
10796 rtx *operands, scratch;
10798 rtx low[2], high[2];
10799 int count;
10801 if (GET_CODE (operands[2]) == CONST_INT)
10803 split_di (operands, 2, low, high);
10804 count = INTVAL (operands[2]) & 63;
10806 if (count >= 32)
10808 emit_move_insn (low[0], high[1]);
10810 if (! reload_completed)
10811 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10812 else
10814 emit_move_insn (high[0], low[0]);
10815 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10818 if (count > 32)
10819 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10821 else
10823 if (!rtx_equal_p (operands[0], operands[1]))
10824 emit_move_insn (operands[0], operands[1]);
10825 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10826 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10829 else
10831 if (!rtx_equal_p (operands[0], operands[1]))
10832 emit_move_insn (operands[0], operands[1]);
10834 split_di (operands, 1, low, high);
10836 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10837 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10839 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10841 if (! no_new_pseudos)
10842 scratch = gen_reg_rtx (SImode);
10843 emit_move_insn (scratch, high[0]);
10844 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10845 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10846 scratch));
10848 else
10849 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10853 void
10854 ix86_split_lshrdi (operands, scratch)
10855 rtx *operands, scratch;
10857 rtx low[2], high[2];
10858 int count;
10860 if (GET_CODE (operands[2]) == CONST_INT)
10862 split_di (operands, 2, low, high);
10863 count = INTVAL (operands[2]) & 63;
10865 if (count >= 32)
10867 emit_move_insn (low[0], high[1]);
10868 emit_move_insn (high[0], const0_rtx);
10870 if (count > 32)
10871 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10873 else
10875 if (!rtx_equal_p (operands[0], operands[1]))
10876 emit_move_insn (operands[0], operands[1]);
10877 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10878 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10881 else
10883 if (!rtx_equal_p (operands[0], operands[1]))
10884 emit_move_insn (operands[0], operands[1]);
10886 split_di (operands, 1, low, high);
10888 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10889 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10891 /* Heh. By reversing the arguments, we can reuse this pattern. */
10892 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10894 if (! no_new_pseudos)
10895 scratch = force_reg (SImode, const0_rtx);
10896 else
10897 emit_move_insn (scratch, const0_rtx);
10899 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10900 scratch));
10902 else
10903 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10907 /* Helper function for the string operations below. Dest VARIABLE whether
10908 it is aligned to VALUE bytes. If true, jump to the label. */
10909 static rtx
10910 ix86_expand_aligntest (variable, value)
10911 rtx variable;
10912 int value;
10914 rtx label = gen_label_rtx ();
10915 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10916 if (GET_MODE (variable) == DImode)
10917 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10918 else
10919 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10920 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10921 1, label);
10922 return label;
10925 /* Adjust COUNTER by the VALUE. */
10926 static void
10927 ix86_adjust_counter (countreg, value)
10928 rtx countreg;
10929 HOST_WIDE_INT value;
10931 if (GET_MODE (countreg) == DImode)
10932 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10933 else
10934 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10937 /* Zero extend possibly SImode EXP to Pmode register. */
10939 ix86_zero_extend_to_Pmode (exp)
10940 rtx exp;
10942 rtx r;
10943 if (GET_MODE (exp) == VOIDmode)
10944 return force_reg (Pmode, exp);
10945 if (GET_MODE (exp) == Pmode)
10946 return copy_to_mode_reg (Pmode, exp);
10947 r = gen_reg_rtx (Pmode);
10948 emit_insn (gen_zero_extendsidi2 (r, exp));
10949 return r;
10952 /* Expand string move (memcpy) operation. Use i386 string operations when
10953 profitable. expand_clrstr contains similar code. */
10955 ix86_expand_movstr (dst, src, count_exp, align_exp)
10956 rtx dst, src, count_exp, align_exp;
10958 rtx srcreg, destreg, countreg;
10959 enum machine_mode counter_mode;
10960 HOST_WIDE_INT align = 0;
10961 unsigned HOST_WIDE_INT count = 0;
10962 rtx insns;
10964 if (GET_CODE (align_exp) == CONST_INT)
10965 align = INTVAL (align_exp);
10967 /* Can't use any of this if the user has appropriated esi or edi. */
10968 if (global_regs[4] || global_regs[5])
10969 return 0;
10971 /* This simple hack avoids all inlining code and simplifies code below. */
10972 if (!TARGET_ALIGN_STRINGOPS)
10973 align = 64;
10975 if (GET_CODE (count_exp) == CONST_INT)
10977 count = INTVAL (count_exp);
10978 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10979 return 0;
10982 /* Figure out proper mode for counter. For 32bits it is always SImode,
10983 for 64bits use SImode when possible, otherwise DImode.
10984 Set count to number of bytes copied when known at compile time. */
10985 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10986 || x86_64_zero_extended_value (count_exp))
10987 counter_mode = SImode;
10988 else
10989 counter_mode = DImode;
10991 start_sequence ();
10993 if (counter_mode != SImode && counter_mode != DImode)
10994 abort ();
10996 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10997 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10999 emit_insn (gen_cld ());
11001 /* When optimizing for size emit simple rep ; movsb instruction for
11002 counts not divisible by 4. */
11004 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11006 countreg = ix86_zero_extend_to_Pmode (count_exp);
11007 if (TARGET_64BIT)
11008 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
11009 destreg, srcreg, countreg));
11010 else
11011 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
11012 destreg, srcreg, countreg));
11015 /* For constant aligned (or small unaligned) copies use rep movsl
11016 followed by code copying the rest. For PentiumPro ensure 8 byte
11017 alignment to allow rep movsl acceleration. */
11019 else if (count != 0
11020 && (align >= 8
11021 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11022 || optimize_size || count < (unsigned int) 64))
11024 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11025 if (count & ~(size - 1))
11027 countreg = copy_to_mode_reg (counter_mode,
11028 GEN_INT ((count >> (size == 4 ? 2 : 3))
11029 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11030 countreg = ix86_zero_extend_to_Pmode (countreg);
11031 if (size == 4)
11033 if (TARGET_64BIT)
11034 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
11035 destreg, srcreg, countreg));
11036 else
11037 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
11038 destreg, srcreg, countreg));
11040 else
11041 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
11042 destreg, srcreg, countreg));
11044 if (size == 8 && (count & 0x04))
11045 emit_insn (gen_strmovsi (destreg, srcreg));
11046 if (count & 0x02)
11047 emit_insn (gen_strmovhi (destreg, srcreg));
11048 if (count & 0x01)
11049 emit_insn (gen_strmovqi (destreg, srcreg));
11051 /* The generic code based on the glibc implementation:
11052 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11053 allowing accelerated copying there)
11054 - copy the data using rep movsl
11055 - copy the rest. */
11056 else
11058 rtx countreg2;
11059 rtx label = NULL;
11060 int desired_alignment = (TARGET_PENTIUMPRO
11061 && (count == 0 || count >= (unsigned int) 260)
11062 ? 8 : UNITS_PER_WORD);
11064 /* In case we don't know anything about the alignment, default to
11065 library version, since it is usually equally fast and result in
11066 shorter code.
11068 Also emit call when we know that the count is large and call overhead
11069 will not be important. */
11070 if (!TARGET_INLINE_ALL_STRINGOPS
11071 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11073 end_sequence ();
11074 return 0;
11077 if (TARGET_SINGLE_STRINGOP)
11078 emit_insn (gen_cld ());
11080 countreg2 = gen_reg_rtx (Pmode);
11081 countreg = copy_to_mode_reg (counter_mode, count_exp);
11083 /* We don't use loops to align destination and to copy parts smaller
11084 than 4 bytes, because gcc is able to optimize such code better (in
11085 the case the destination or the count really is aligned, gcc is often
11086 able to predict the branches) and also it is friendlier to the
11087 hardware branch prediction.
11089 Using loops is beneficial for generic case, because we can
11090 handle small counts using the loops. Many CPUs (such as Athlon)
11091 have large REP prefix setup costs.
11093 This is quite costly. Maybe we can revisit this decision later or
11094 add some customizability to this code. */
11096 if (count == 0 && align < desired_alignment)
11098 label = gen_label_rtx ();
11099 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11100 LEU, 0, counter_mode, 1, label);
11102 if (align <= 1)
11104 rtx label = ix86_expand_aligntest (destreg, 1);
11105 emit_insn (gen_strmovqi (destreg, srcreg));
11106 ix86_adjust_counter (countreg, 1);
11107 emit_label (label);
11108 LABEL_NUSES (label) = 1;
11110 if (align <= 2)
11112 rtx label = ix86_expand_aligntest (destreg, 2);
11113 emit_insn (gen_strmovhi (destreg, srcreg));
11114 ix86_adjust_counter (countreg, 2);
11115 emit_label (label);
11116 LABEL_NUSES (label) = 1;
11118 if (align <= 4 && desired_alignment > 4)
11120 rtx label = ix86_expand_aligntest (destreg, 4);
11121 emit_insn (gen_strmovsi (destreg, srcreg));
11122 ix86_adjust_counter (countreg, 4);
11123 emit_label (label);
11124 LABEL_NUSES (label) = 1;
11127 if (label && desired_alignment > 4 && !TARGET_64BIT)
11129 emit_label (label);
11130 LABEL_NUSES (label) = 1;
11131 label = NULL_RTX;
11133 if (!TARGET_SINGLE_STRINGOP)
11134 emit_insn (gen_cld ());
11135 if (TARGET_64BIT)
11137 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11138 GEN_INT (3)));
11139 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11140 destreg, srcreg, countreg2));
11142 else
11144 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11145 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11146 destreg, srcreg, countreg2));
11149 if (label)
11151 emit_label (label);
11152 LABEL_NUSES (label) = 1;
11154 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11155 emit_insn (gen_strmovsi (destreg, srcreg));
11156 if ((align <= 4 || count == 0) && TARGET_64BIT)
11158 rtx label = ix86_expand_aligntest (countreg, 4);
11159 emit_insn (gen_strmovsi (destreg, srcreg));
11160 emit_label (label);
11161 LABEL_NUSES (label) = 1;
11163 if (align > 2 && count != 0 && (count & 2))
11164 emit_insn (gen_strmovhi (destreg, srcreg));
11165 if (align <= 2 || count == 0)
11167 rtx label = ix86_expand_aligntest (countreg, 2);
11168 emit_insn (gen_strmovhi (destreg, srcreg));
11169 emit_label (label);
11170 LABEL_NUSES (label) = 1;
11172 if (align > 1 && count != 0 && (count & 1))
11173 emit_insn (gen_strmovqi (destreg, srcreg));
11174 if (align <= 1 || count == 0)
11176 rtx label = ix86_expand_aligntest (countreg, 1);
11177 emit_insn (gen_strmovqi (destreg, srcreg));
11178 emit_label (label);
11179 LABEL_NUSES (label) = 1;
11183 insns = get_insns ();
11184 end_sequence ();
11186 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11187 emit_insn (insns);
11188 return 1;
11191 /* Expand string clear operation (bzero). Use i386 string operations when
11192 profitable. expand_movstr contains similar code. */
11194 ix86_expand_clrstr (src, count_exp, align_exp)
11195 rtx src, count_exp, align_exp;
11197 rtx destreg, zeroreg, countreg;
11198 enum machine_mode counter_mode;
11199 HOST_WIDE_INT align = 0;
11200 unsigned HOST_WIDE_INT count = 0;
11202 if (GET_CODE (align_exp) == CONST_INT)
11203 align = INTVAL (align_exp);
11205 /* Can't use any of this if the user has appropriated esi. */
11206 if (global_regs[4])
11207 return 0;
11209 /* This simple hack avoids all inlining code and simplifies code below. */
11210 if (!TARGET_ALIGN_STRINGOPS)
11211 align = 32;
11213 if (GET_CODE (count_exp) == CONST_INT)
11215 count = INTVAL (count_exp);
11216 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11217 return 0;
11219 /* Figure out proper mode for counter. For 32bits it is always SImode,
11220 for 64bits use SImode when possible, otherwise DImode.
11221 Set count to number of bytes copied when known at compile time. */
11222 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11223 || x86_64_zero_extended_value (count_exp))
11224 counter_mode = SImode;
11225 else
11226 counter_mode = DImode;
11228 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11230 emit_insn (gen_cld ());
11232 /* When optimizing for size emit simple rep ; movsb instruction for
11233 counts not divisible by 4. */
11235 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11237 countreg = ix86_zero_extend_to_Pmode (count_exp);
11238 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11239 if (TARGET_64BIT)
11240 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11241 destreg, countreg));
11242 else
11243 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11244 destreg, countreg));
11246 else if (count != 0
11247 && (align >= 8
11248 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11249 || optimize_size || count < (unsigned int) 64))
11251 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11252 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11253 if (count & ~(size - 1))
11255 countreg = copy_to_mode_reg (counter_mode,
11256 GEN_INT ((count >> (size == 4 ? 2 : 3))
11257 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11258 countreg = ix86_zero_extend_to_Pmode (countreg);
11259 if (size == 4)
11261 if (TARGET_64BIT)
11262 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11263 destreg, countreg));
11264 else
11265 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11266 destreg, countreg));
11268 else
11269 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11270 destreg, countreg));
11272 if (size == 8 && (count & 0x04))
11273 emit_insn (gen_strsetsi (destreg,
11274 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11275 if (count & 0x02)
11276 emit_insn (gen_strsethi (destreg,
11277 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11278 if (count & 0x01)
11279 emit_insn (gen_strsetqi (destreg,
11280 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11282 else
11284 rtx countreg2;
11285 rtx label = NULL;
11286 /* Compute desired alignment of the string operation. */
11287 int desired_alignment = (TARGET_PENTIUMPRO
11288 && (count == 0 || count >= (unsigned int) 260)
11289 ? 8 : UNITS_PER_WORD);
11291 /* In case we don't know anything about the alignment, default to
11292 library version, since it is usually equally fast and result in
11293 shorter code.
11295 Also emit call when we know that the count is large and call overhead
11296 will not be important. */
11297 if (!TARGET_INLINE_ALL_STRINGOPS
11298 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11299 return 0;
11301 if (TARGET_SINGLE_STRINGOP)
11302 emit_insn (gen_cld ());
11304 countreg2 = gen_reg_rtx (Pmode);
11305 countreg = copy_to_mode_reg (counter_mode, count_exp);
11306 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11308 if (count == 0 && align < desired_alignment)
11310 label = gen_label_rtx ();
11311 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11312 LEU, 0, counter_mode, 1, label);
11314 if (align <= 1)
11316 rtx label = ix86_expand_aligntest (destreg, 1);
11317 emit_insn (gen_strsetqi (destreg,
11318 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11319 ix86_adjust_counter (countreg, 1);
11320 emit_label (label);
11321 LABEL_NUSES (label) = 1;
11323 if (align <= 2)
11325 rtx label = ix86_expand_aligntest (destreg, 2);
11326 emit_insn (gen_strsethi (destreg,
11327 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11328 ix86_adjust_counter (countreg, 2);
11329 emit_label (label);
11330 LABEL_NUSES (label) = 1;
11332 if (align <= 4 && desired_alignment > 4)
11334 rtx label = ix86_expand_aligntest (destreg, 4);
11335 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11336 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11337 : zeroreg)));
11338 ix86_adjust_counter (countreg, 4);
11339 emit_label (label);
11340 LABEL_NUSES (label) = 1;
11343 if (label && desired_alignment > 4 && !TARGET_64BIT)
11345 emit_label (label);
11346 LABEL_NUSES (label) = 1;
11347 label = NULL_RTX;
11350 if (!TARGET_SINGLE_STRINGOP)
11351 emit_insn (gen_cld ());
11352 if (TARGET_64BIT)
11354 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11355 GEN_INT (3)));
11356 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11357 destreg, countreg2));
11359 else
11361 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11362 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11363 destreg, countreg2));
11365 if (label)
11367 emit_label (label);
11368 LABEL_NUSES (label) = 1;
11371 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11372 emit_insn (gen_strsetsi (destreg,
11373 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11374 if (TARGET_64BIT && (align <= 4 || count == 0))
11376 rtx label = ix86_expand_aligntest (countreg, 4);
11377 emit_insn (gen_strsetsi (destreg,
11378 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11379 emit_label (label);
11380 LABEL_NUSES (label) = 1;
11382 if (align > 2 && count != 0 && (count & 2))
11383 emit_insn (gen_strsethi (destreg,
11384 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11385 if (align <= 2 || count == 0)
11387 rtx label = ix86_expand_aligntest (countreg, 2);
11388 emit_insn (gen_strsethi (destreg,
11389 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11390 emit_label (label);
11391 LABEL_NUSES (label) = 1;
11393 if (align > 1 && count != 0 && (count & 1))
11394 emit_insn (gen_strsetqi (destreg,
11395 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11396 if (align <= 1 || count == 0)
11398 rtx label = ix86_expand_aligntest (countreg, 1);
11399 emit_insn (gen_strsetqi (destreg,
11400 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11401 emit_label (label);
11402 LABEL_NUSES (label) = 1;
11405 return 1;
11407 /* Expand strlen. */
11409 ix86_expand_strlen (out, src, eoschar, align)
11410 rtx out, src, eoschar, align;
11412 rtx addr, scratch1, scratch2, scratch3, scratch4;
11414 /* The generic case of strlen expander is long. Avoid it's
11415 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11417 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11418 && !TARGET_INLINE_ALL_STRINGOPS
11419 && !optimize_size
11420 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11421 return 0;
11423 addr = force_reg (Pmode, XEXP (src, 0));
11424 scratch1 = gen_reg_rtx (Pmode);
11426 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11427 && !optimize_size)
11429 /* Well it seems that some optimizer does not combine a call like
11430 foo(strlen(bar), strlen(bar));
11431 when the move and the subtraction is done here. It does calculate
11432 the length just once when these instructions are done inside of
11433 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11434 often used and I use one fewer register for the lifetime of
11435 output_strlen_unroll() this is better. */
11437 emit_move_insn (out, addr);
11439 ix86_expand_strlensi_unroll_1 (out, align);
11441 /* strlensi_unroll_1 returns the address of the zero at the end of
11442 the string, like memchr(), so compute the length by subtracting
11443 the start address. */
11444 if (TARGET_64BIT)
11445 emit_insn (gen_subdi3 (out, out, addr));
11446 else
11447 emit_insn (gen_subsi3 (out, out, addr));
11449 else
11451 scratch2 = gen_reg_rtx (Pmode);
11452 scratch3 = gen_reg_rtx (Pmode);
11453 scratch4 = force_reg (Pmode, constm1_rtx);
11455 emit_move_insn (scratch3, addr);
11456 eoschar = force_reg (QImode, eoschar);
11458 emit_insn (gen_cld ());
11459 if (TARGET_64BIT)
11461 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11462 align, scratch4, scratch3));
11463 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11464 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11466 else
11468 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11469 align, scratch4, scratch3));
11470 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11471 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11474 return 1;
11477 /* Expand the appropriate insns for doing strlen if not just doing
11478 repnz; scasb
11480 out = result, initialized with the start address
11481 align_rtx = alignment of the address.
11482 scratch = scratch register, initialized with the startaddress when
11483 not aligned, otherwise undefined
11485 This is just the body. It needs the initialisations mentioned above and
11486 some address computing at the end. These things are done in i386.md. */
11488 static void
11489 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11490 rtx out, align_rtx;
11492 int align;
11493 rtx tmp;
11494 rtx align_2_label = NULL_RTX;
11495 rtx align_3_label = NULL_RTX;
11496 rtx align_4_label = gen_label_rtx ();
11497 rtx end_0_label = gen_label_rtx ();
11498 rtx mem;
11499 rtx tmpreg = gen_reg_rtx (SImode);
11500 rtx scratch = gen_reg_rtx (SImode);
11501 rtx cmp;
11503 align = 0;
11504 if (GET_CODE (align_rtx) == CONST_INT)
11505 align = INTVAL (align_rtx);
11507 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11509 /* Is there a known alignment and is it less than 4? */
11510 if (align < 4)
11512 rtx scratch1 = gen_reg_rtx (Pmode);
11513 emit_move_insn (scratch1, out);
11514 /* Is there a known alignment and is it not 2? */
11515 if (align != 2)
11517 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11518 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11520 /* Leave just the 3 lower bits. */
11521 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11522 NULL_RTX, 0, OPTAB_WIDEN);
11524 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11525 Pmode, 1, align_4_label);
11526 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11527 Pmode, 1, align_2_label);
11528 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11529 Pmode, 1, align_3_label);
11531 else
11533 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11534 check if is aligned to 4 - byte. */
11536 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11537 NULL_RTX, 0, OPTAB_WIDEN);
11539 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11540 Pmode, 1, align_4_label);
11543 mem = gen_rtx_MEM (QImode, out);
11545 /* Now compare the bytes. */
11547 /* Compare the first n unaligned byte on a byte per byte basis. */
11548 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11549 QImode, 1, end_0_label);
11551 /* Increment the address. */
11552 if (TARGET_64BIT)
11553 emit_insn (gen_adddi3 (out, out, const1_rtx));
11554 else
11555 emit_insn (gen_addsi3 (out, out, const1_rtx));
11557 /* Not needed with an alignment of 2 */
11558 if (align != 2)
11560 emit_label (align_2_label);
11562 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11563 end_0_label);
11565 if (TARGET_64BIT)
11566 emit_insn (gen_adddi3 (out, out, const1_rtx));
11567 else
11568 emit_insn (gen_addsi3 (out, out, const1_rtx));
11570 emit_label (align_3_label);
11573 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11574 end_0_label);
11576 if (TARGET_64BIT)
11577 emit_insn (gen_adddi3 (out, out, const1_rtx));
11578 else
11579 emit_insn (gen_addsi3 (out, out, const1_rtx));
11582 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11583 align this loop. It gives only huge programs, but does not help to
11584 speed up. */
11585 emit_label (align_4_label);
11587 mem = gen_rtx_MEM (SImode, out);
11588 emit_move_insn (scratch, mem);
11589 if (TARGET_64BIT)
11590 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11591 else
11592 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11594 /* This formula yields a nonzero result iff one of the bytes is zero.
11595 This saves three branches inside loop and many cycles. */
11597 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11598 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11599 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11600 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11601 gen_int_mode (0x80808080, SImode)));
11602 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11603 align_4_label);
11605 if (TARGET_CMOVE)
11607 rtx reg = gen_reg_rtx (SImode);
11608 rtx reg2 = gen_reg_rtx (Pmode);
11609 emit_move_insn (reg, tmpreg);
11610 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11612 /* If zero is not in the first two bytes, move two bytes forward. */
11613 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11614 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11615 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11616 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11617 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11618 reg,
11619 tmpreg)));
11620 /* Emit lea manually to avoid clobbering of flags. */
11621 emit_insn (gen_rtx_SET (SImode, reg2,
11622 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11624 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11625 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11626 emit_insn (gen_rtx_SET (VOIDmode, out,
11627 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11628 reg2,
11629 out)));
11632 else
11634 rtx end_2_label = gen_label_rtx ();
11635 /* Is zero in the first two bytes? */
11637 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11638 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11639 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11640 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11641 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11642 pc_rtx);
11643 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11644 JUMP_LABEL (tmp) = end_2_label;
11646 /* Not in the first two. Move two bytes forward. */
11647 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11648 if (TARGET_64BIT)
11649 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11650 else
11651 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11653 emit_label (end_2_label);
11657 /* Avoid branch in fixing the byte. */
11658 tmpreg = gen_lowpart (QImode, tmpreg);
11659 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11660 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11661 if (TARGET_64BIT)
11662 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11663 else
11664 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11666 emit_label (end_0_label);
11669 void
11670 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11671 rtx retval, fnaddr, callarg1, callarg2, pop;
11672 int sibcall;
11674 rtx use = NULL, call;
11676 if (pop == const0_rtx)
11677 pop = NULL;
11678 if (TARGET_64BIT && pop)
11679 abort ();
11681 #if TARGET_MACHO
11682 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11683 fnaddr = machopic_indirect_call_target (fnaddr);
11684 #else
11685 /* Static functions and indirect calls don't need the pic register. */
11686 if (! TARGET_64BIT && flag_pic
11687 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11688 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11689 use_reg (&use, pic_offset_table_rtx);
11691 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11693 rtx al = gen_rtx_REG (QImode, 0);
11694 emit_move_insn (al, callarg2);
11695 use_reg (&use, al);
11697 #endif /* TARGET_MACHO */
11699 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11701 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11702 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11704 if (sibcall && TARGET_64BIT
11705 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11707 rtx addr;
11708 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11709 fnaddr = gen_rtx_REG (Pmode, 40);
11710 emit_move_insn (fnaddr, addr);
11711 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11714 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11715 if (retval)
11716 call = gen_rtx_SET (VOIDmode, retval, call);
11717 if (pop)
11719 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11720 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11721 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11724 call = emit_call_insn (call);
11725 if (use)
11726 CALL_INSN_FUNCTION_USAGE (call) = use;
11730 /* Clear stack slot assignments remembered from previous functions.
11731 This is called from INIT_EXPANDERS once before RTL is emitted for each
11732 function. */
11734 static struct machine_function *
11735 ix86_init_machine_status ()
11737 return ggc_alloc_cleared (sizeof (struct machine_function));
11740 /* Return a MEM corresponding to a stack slot with mode MODE.
11741 Allocate a new slot if necessary.
11743 The RTL for a function can have several slots available: N is
11744 which slot to use. */
11747 assign_386_stack_local (mode, n)
11748 enum machine_mode mode;
11749 int n;
11751 struct stack_local_entry *s;
11753 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11754 abort ();
11756 for (s = ix86_stack_locals; s; s = s->next)
11757 if (s->mode == mode && s->n == n)
11758 return s->rtl;
11760 s = (struct stack_local_entry *)
11761 ggc_alloc (sizeof (struct stack_local_entry));
11762 s->n = n;
11763 s->mode = mode;
11764 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11766 s->next = ix86_stack_locals;
11767 ix86_stack_locals = s;
11768 return s->rtl;
11771 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11773 static GTY(()) rtx ix86_tls_symbol;
11775 ix86_tls_get_addr ()
11778 if (!ix86_tls_symbol)
11780 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11781 (TARGET_GNU_TLS && !TARGET_64BIT)
11782 ? "___tls_get_addr"
11783 : "__tls_get_addr");
11786 return ix86_tls_symbol;
11789 /* Calculate the length of the memory address in the instruction
11790 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11792 static int
11793 memory_address_length (addr)
11794 rtx addr;
11796 struct ix86_address parts;
11797 rtx base, index, disp;
11798 int len;
11800 if (GET_CODE (addr) == PRE_DEC
11801 || GET_CODE (addr) == POST_INC
11802 || GET_CODE (addr) == PRE_MODIFY
11803 || GET_CODE (addr) == POST_MODIFY)
11804 return 0;
11806 if (! ix86_decompose_address (addr, &parts))
11807 abort ();
11809 base = parts.base;
11810 index = parts.index;
11811 disp = parts.disp;
11812 len = 0;
11814 /* Register Indirect. */
11815 if (base && !index && !disp)
11817 /* Special cases: ebp and esp need the two-byte modrm form. */
11818 if (addr == stack_pointer_rtx
11819 || addr == arg_pointer_rtx
11820 || addr == frame_pointer_rtx
11821 || addr == hard_frame_pointer_rtx)
11822 len = 1;
11825 /* Direct Addressing. */
11826 else if (disp && !base && !index)
11827 len = 4;
11829 else
11831 /* Find the length of the displacement constant. */
11832 if (disp)
11834 if (GET_CODE (disp) == CONST_INT
11835 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11836 len = 1;
11837 else
11838 len = 4;
11841 /* An index requires the two-byte modrm form. */
11842 if (index)
11843 len += 1;
11846 return len;
11849 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11850 is set, expect that insn have 8bit immediate alternative. */
11852 ix86_attr_length_immediate_default (insn, shortform)
11853 rtx insn;
11854 int shortform;
11856 int len = 0;
11857 int i;
11858 extract_insn_cached (insn);
11859 for (i = recog_data.n_operands - 1; i >= 0; --i)
11860 if (CONSTANT_P (recog_data.operand[i]))
11862 if (len)
11863 abort ();
11864 if (shortform
11865 && GET_CODE (recog_data.operand[i]) == CONST_INT
11866 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11867 len = 1;
11868 else
11870 switch (get_attr_mode (insn))
11872 case MODE_QI:
11873 len+=1;
11874 break;
11875 case MODE_HI:
11876 len+=2;
11877 break;
11878 case MODE_SI:
11879 len+=4;
11880 break;
11881 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11882 case MODE_DI:
11883 len+=4;
11884 break;
11885 default:
11886 fatal_insn ("unknown insn mode", insn);
11890 return len;
11892 /* Compute default value for "length_address" attribute. */
11894 ix86_attr_length_address_default (insn)
11895 rtx insn;
11897 int i;
11898 extract_insn_cached (insn);
11899 for (i = recog_data.n_operands - 1; i >= 0; --i)
11900 if (GET_CODE (recog_data.operand[i]) == MEM)
11902 return memory_address_length (XEXP (recog_data.operand[i], 0));
11903 break;
11905 return 0;
11908 /* Return the maximum number of instructions a cpu can issue. */
11910 static int
11911 ix86_issue_rate ()
11913 switch (ix86_tune)
11915 case PROCESSOR_PENTIUM:
11916 case PROCESSOR_K6:
11917 return 2;
11919 case PROCESSOR_PENTIUMPRO:
11920 case PROCESSOR_PENTIUM4:
11921 case PROCESSOR_ATHLON:
11922 case PROCESSOR_K8:
11923 return 3;
11925 default:
11926 return 1;
11930 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11931 by DEP_INSN and nothing set by DEP_INSN. */
11933 static int
11934 ix86_flags_dependant (insn, dep_insn, insn_type)
11935 rtx insn, dep_insn;
11936 enum attr_type insn_type;
11938 rtx set, set2;
11940 /* Simplify the test for uninteresting insns. */
11941 if (insn_type != TYPE_SETCC
11942 && insn_type != TYPE_ICMOV
11943 && insn_type != TYPE_FCMOV
11944 && insn_type != TYPE_IBR)
11945 return 0;
11947 if ((set = single_set (dep_insn)) != 0)
11949 set = SET_DEST (set);
11950 set2 = NULL_RTX;
11952 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11953 && XVECLEN (PATTERN (dep_insn), 0) == 2
11954 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11955 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11957 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11958 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11960 else
11961 return 0;
11963 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11964 return 0;
11966 /* This test is true if the dependent insn reads the flags but
11967 not any other potentially set register. */
11968 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11969 return 0;
11971 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11972 return 0;
11974 return 1;
11977 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11978 address with operands set by DEP_INSN. */
11980 static int
11981 ix86_agi_dependant (insn, dep_insn, insn_type)
11982 rtx insn, dep_insn;
11983 enum attr_type insn_type;
11985 rtx addr;
11987 if (insn_type == TYPE_LEA
11988 && TARGET_PENTIUM)
11990 addr = PATTERN (insn);
11991 if (GET_CODE (addr) == SET)
11993 else if (GET_CODE (addr) == PARALLEL
11994 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11995 addr = XVECEXP (addr, 0, 0);
11996 else
11997 abort ();
11998 addr = SET_SRC (addr);
12000 else
12002 int i;
12003 extract_insn_cached (insn);
12004 for (i = recog_data.n_operands - 1; i >= 0; --i)
12005 if (GET_CODE (recog_data.operand[i]) == MEM)
12007 addr = XEXP (recog_data.operand[i], 0);
12008 goto found;
12010 return 0;
12011 found:;
12014 return modified_in_p (addr, dep_insn);
12017 static int
12018 ix86_adjust_cost (insn, link, dep_insn, cost)
12019 rtx insn, link, dep_insn;
12020 int cost;
12022 enum attr_type insn_type, dep_insn_type;
12023 enum attr_memory memory, dep_memory;
12024 rtx set, set2;
12025 int dep_insn_code_number;
12027 /* Anti and output dependencies have zero cost on all CPUs. */
12028 if (REG_NOTE_KIND (link) != 0)
12029 return 0;
12031 dep_insn_code_number = recog_memoized (dep_insn);
12033 /* If we can't recognize the insns, we can't really do anything. */
12034 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12035 return cost;
12037 insn_type = get_attr_type (insn);
12038 dep_insn_type = get_attr_type (dep_insn);
12040 switch (ix86_tune)
12042 case PROCESSOR_PENTIUM:
12043 /* Address Generation Interlock adds a cycle of latency. */
12044 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12045 cost += 1;
12047 /* ??? Compares pair with jump/setcc. */
12048 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12049 cost = 0;
12051 /* Floating point stores require value to be ready one cycle earlier. */
12052 if (insn_type == TYPE_FMOV
12053 && get_attr_memory (insn) == MEMORY_STORE
12054 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12055 cost += 1;
12056 break;
12058 case PROCESSOR_PENTIUMPRO:
12059 memory = get_attr_memory (insn);
12060 dep_memory = get_attr_memory (dep_insn);
12062 /* Since we can't represent delayed latencies of load+operation,
12063 increase the cost here for non-imov insns. */
12064 if (dep_insn_type != TYPE_IMOV
12065 && dep_insn_type != TYPE_FMOV
12066 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12067 cost += 1;
12069 /* INT->FP conversion is expensive. */
12070 if (get_attr_fp_int_src (dep_insn))
12071 cost += 5;
12073 /* There is one cycle extra latency between an FP op and a store. */
12074 if (insn_type == TYPE_FMOV
12075 && (set = single_set (dep_insn)) != NULL_RTX
12076 && (set2 = single_set (insn)) != NULL_RTX
12077 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12078 && GET_CODE (SET_DEST (set2)) == MEM)
12079 cost += 1;
12081 /* Show ability of reorder buffer to hide latency of load by executing
12082 in parallel with previous instruction in case
12083 previous instruction is not needed to compute the address. */
12084 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12085 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12087 /* Claim moves to take one cycle, as core can issue one load
12088 at time and the next load can start cycle later. */
12089 if (dep_insn_type == TYPE_IMOV
12090 || dep_insn_type == TYPE_FMOV)
12091 cost = 1;
12092 else if (cost > 1)
12093 cost--;
12095 break;
12097 case PROCESSOR_K6:
12098 memory = get_attr_memory (insn);
12099 dep_memory = get_attr_memory (dep_insn);
12100 /* The esp dependency is resolved before the instruction is really
12101 finished. */
12102 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12103 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12104 return 1;
12106 /* Since we can't represent delayed latencies of load+operation,
12107 increase the cost here for non-imov insns. */
12108 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12109 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12111 /* INT->FP conversion is expensive. */
12112 if (get_attr_fp_int_src (dep_insn))
12113 cost += 5;
12115 /* Show ability of reorder buffer to hide latency of load by executing
12116 in parallel with previous instruction in case
12117 previous instruction is not needed to compute the address. */
12118 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12119 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12121 /* Claim moves to take one cycle, as core can issue one load
12122 at time and the next load can start cycle later. */
12123 if (dep_insn_type == TYPE_IMOV
12124 || dep_insn_type == TYPE_FMOV)
12125 cost = 1;
12126 else if (cost > 2)
12127 cost -= 2;
12128 else
12129 cost = 1;
12131 break;
12133 case PROCESSOR_ATHLON:
12134 case PROCESSOR_K8:
12135 memory = get_attr_memory (insn);
12136 dep_memory = get_attr_memory (dep_insn);
12138 /* Show ability of reorder buffer to hide latency of load by executing
12139 in parallel with previous instruction in case
12140 previous instruction is not needed to compute the address. */
12141 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12142 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12144 /* Claim moves to take one cycle, as core can issue one load
12145 at time and the next load can start cycle later. */
12146 if (dep_insn_type == TYPE_IMOV
12147 || dep_insn_type == TYPE_FMOV)
12148 cost = 0;
12149 else if (cost >= 3)
12150 cost -= 3;
12151 else
12152 cost = 0;
12155 default:
12156 break;
12159 return cost;
12162 static union
12164 struct ppro_sched_data
12166 rtx decode[3];
12167 int issued_this_cycle;
12168 } ppro;
12169 } ix86_sched_data;
12171 static enum attr_ppro_uops
12172 ix86_safe_ppro_uops (insn)
12173 rtx insn;
12175 if (recog_memoized (insn) >= 0)
12176 return get_attr_ppro_uops (insn);
12177 else
12178 return PPRO_UOPS_MANY;
12181 static void
12182 ix86_dump_ppro_packet (dump)
12183 FILE *dump;
12185 if (ix86_sched_data.ppro.decode[0])
12187 fprintf (dump, "PPRO packet: %d",
12188 INSN_UID (ix86_sched_data.ppro.decode[0]));
12189 if (ix86_sched_data.ppro.decode[1])
12190 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12191 if (ix86_sched_data.ppro.decode[2])
12192 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12193 fputc ('\n', dump);
12197 /* We're beginning a new block. Initialize data structures as necessary. */
12199 static void
12200 ix86_sched_init (dump, sched_verbose, veclen)
12201 FILE *dump ATTRIBUTE_UNUSED;
12202 int sched_verbose ATTRIBUTE_UNUSED;
12203 int veclen ATTRIBUTE_UNUSED;
12205 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12208 /* Shift INSN to SLOT, and shift everything else down. */
12210 static void
12211 ix86_reorder_insn (insnp, slot)
12212 rtx *insnp, *slot;
12214 if (insnp != slot)
12216 rtx insn = *insnp;
12218 insnp[0] = insnp[1];
12219 while (++insnp != slot);
12220 *insnp = insn;
12224 static void
12225 ix86_sched_reorder_ppro (ready, e_ready)
12226 rtx *ready;
12227 rtx *e_ready;
12229 rtx decode[3];
12230 enum attr_ppro_uops cur_uops;
12231 int issued_this_cycle;
12232 rtx *insnp;
12233 int i;
12235 /* At this point .ppro.decode contains the state of the three
12236 decoders from last "cycle". That is, those insns that were
12237 actually independent. But here we're scheduling for the
12238 decoder, and we may find things that are decodable in the
12239 same cycle. */
12241 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12242 issued_this_cycle = 0;
12244 insnp = e_ready;
12245 cur_uops = ix86_safe_ppro_uops (*insnp);
12247 /* If the decoders are empty, and we've a complex insn at the
12248 head of the priority queue, let it issue without complaint. */
12249 if (decode[0] == NULL)
12251 if (cur_uops == PPRO_UOPS_MANY)
12253 decode[0] = *insnp;
12254 goto ppro_done;
12257 /* Otherwise, search for a 2-4 uop unsn to issue. */
12258 while (cur_uops != PPRO_UOPS_FEW)
12260 if (insnp == ready)
12261 break;
12262 cur_uops = ix86_safe_ppro_uops (*--insnp);
12265 /* If so, move it to the head of the line. */
12266 if (cur_uops == PPRO_UOPS_FEW)
12267 ix86_reorder_insn (insnp, e_ready);
12269 /* Issue the head of the queue. */
12270 issued_this_cycle = 1;
12271 decode[0] = *e_ready--;
12274 /* Look for simple insns to fill in the other two slots. */
12275 for (i = 1; i < 3; ++i)
12276 if (decode[i] == NULL)
12278 if (ready > e_ready)
12279 goto ppro_done;
12281 insnp = e_ready;
12282 cur_uops = ix86_safe_ppro_uops (*insnp);
12283 while (cur_uops != PPRO_UOPS_ONE)
12285 if (insnp == ready)
12286 break;
12287 cur_uops = ix86_safe_ppro_uops (*--insnp);
12290 /* Found one. Move it to the head of the queue and issue it. */
12291 if (cur_uops == PPRO_UOPS_ONE)
12293 ix86_reorder_insn (insnp, e_ready);
12294 decode[i] = *e_ready--;
12295 issued_this_cycle++;
12296 continue;
12299 /* ??? Didn't find one. Ideally, here we would do a lazy split
12300 of 2-uop insns, issue one and queue the other. */
12303 ppro_done:
12304 if (issued_this_cycle == 0)
12305 issued_this_cycle = 1;
12306 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12309 /* We are about to being issuing insns for this clock cycle.
12310 Override the default sort algorithm to better slot instructions. */
12311 static int
12312 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12313 FILE *dump ATTRIBUTE_UNUSED;
12314 int sched_verbose ATTRIBUTE_UNUSED;
12315 rtx *ready;
12316 int *n_readyp;
12317 int clock_var ATTRIBUTE_UNUSED;
12319 int n_ready = *n_readyp;
12320 rtx *e_ready = ready + n_ready - 1;
12322 /* Make sure to go ahead and initialize key items in
12323 ix86_sched_data if we are not going to bother trying to
12324 reorder the ready queue. */
12325 if (n_ready < 2)
12327 ix86_sched_data.ppro.issued_this_cycle = 1;
12328 goto out;
12331 switch (ix86_tune)
12333 default:
12334 break;
12336 case PROCESSOR_PENTIUMPRO:
12337 ix86_sched_reorder_ppro (ready, e_ready);
12338 break;
12341 out:
12342 return ix86_issue_rate ();
12345 /* We are about to issue INSN. Return the number of insns left on the
12346 ready queue that can be issued this cycle. */
12348 static int
12349 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12350 FILE *dump;
12351 int sched_verbose;
12352 rtx insn;
12353 int can_issue_more;
12355 int i;
12356 switch (ix86_tune)
12358 default:
12359 return can_issue_more - 1;
12361 case PROCESSOR_PENTIUMPRO:
12363 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12365 if (uops == PPRO_UOPS_MANY)
12367 if (sched_verbose)
12368 ix86_dump_ppro_packet (dump);
12369 ix86_sched_data.ppro.decode[0] = insn;
12370 ix86_sched_data.ppro.decode[1] = NULL;
12371 ix86_sched_data.ppro.decode[2] = NULL;
12372 if (sched_verbose)
12373 ix86_dump_ppro_packet (dump);
12374 ix86_sched_data.ppro.decode[0] = NULL;
12376 else if (uops == PPRO_UOPS_FEW)
12378 if (sched_verbose)
12379 ix86_dump_ppro_packet (dump);
12380 ix86_sched_data.ppro.decode[0] = insn;
12381 ix86_sched_data.ppro.decode[1] = NULL;
12382 ix86_sched_data.ppro.decode[2] = NULL;
12384 else
12386 for (i = 0; i < 3; ++i)
12387 if (ix86_sched_data.ppro.decode[i] == NULL)
12389 ix86_sched_data.ppro.decode[i] = insn;
12390 break;
12392 if (i == 3)
12393 abort ();
12394 if (i == 2)
12396 if (sched_verbose)
12397 ix86_dump_ppro_packet (dump);
12398 ix86_sched_data.ppro.decode[0] = NULL;
12399 ix86_sched_data.ppro.decode[1] = NULL;
12400 ix86_sched_data.ppro.decode[2] = NULL;
12404 return --ix86_sched_data.ppro.issued_this_cycle;
12408 static int
12409 ia32_use_dfa_pipeline_interface ()
12411 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12412 return 1;
12413 return 0;
12416 /* How many alternative schedules to try. This should be as wide as the
12417 scheduling freedom in the DFA, but no wider. Making this value too
12418 large results extra work for the scheduler. */
12420 static int
12421 ia32_multipass_dfa_lookahead ()
12423 if (ix86_tune == PROCESSOR_PENTIUM)
12424 return 2;
12425 else
12426 return 0;
12430 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12431 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12432 appropriate. */
12434 void
12435 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12436 rtx insns;
12437 rtx dstref, srcref, dstreg, srcreg;
12439 rtx insn;
12441 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12442 if (INSN_P (insn))
12443 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12444 dstreg, srcreg);
12447 /* Subroutine of above to actually do the updating by recursively walking
12448 the rtx. */
12450 static void
12451 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12452 rtx x;
12453 rtx dstref, srcref, dstreg, srcreg;
12455 enum rtx_code code = GET_CODE (x);
12456 const char *format_ptr = GET_RTX_FORMAT (code);
12457 int i, j;
12459 if (code == MEM && XEXP (x, 0) == dstreg)
12460 MEM_COPY_ATTRIBUTES (x, dstref);
12461 else if (code == MEM && XEXP (x, 0) == srcreg)
12462 MEM_COPY_ATTRIBUTES (x, srcref);
12464 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12466 if (*format_ptr == 'e')
12467 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12468 dstreg, srcreg);
12469 else if (*format_ptr == 'E')
12470 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12471 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12472 dstreg, srcreg);
12476 /* Compute the alignment given to a constant that is being placed in memory.
12477 EXP is the constant and ALIGN is the alignment that the object would
12478 ordinarily have.
12479 The value of this function is used instead of that alignment to align
12480 the object. */
12483 ix86_constant_alignment (exp, align)
12484 tree exp;
12485 int align;
12487 if (TREE_CODE (exp) == REAL_CST)
12489 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12490 return 64;
12491 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12492 return 128;
12494 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12495 && align < 256)
12496 return 256;
12498 return align;
12501 /* Compute the alignment for a static variable.
12502 TYPE is the data type, and ALIGN is the alignment that
12503 the object would ordinarily have. The value of this function is used
12504 instead of that alignment to align the object. */
12507 ix86_data_alignment (type, align)
12508 tree type;
12509 int align;
12511 if (AGGREGATE_TYPE_P (type)
12512 && TYPE_SIZE (type)
12513 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12514 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12515 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12516 return 256;
12518 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12519 to 16byte boundary. */
12520 if (TARGET_64BIT)
12522 if (AGGREGATE_TYPE_P (type)
12523 && TYPE_SIZE (type)
12524 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12525 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12526 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12527 return 128;
12530 if (TREE_CODE (type) == ARRAY_TYPE)
12532 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12533 return 64;
12534 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12535 return 128;
12537 else if (TREE_CODE (type) == COMPLEX_TYPE)
12540 if (TYPE_MODE (type) == DCmode && align < 64)
12541 return 64;
12542 if (TYPE_MODE (type) == XCmode && align < 128)
12543 return 128;
12545 else if ((TREE_CODE (type) == RECORD_TYPE
12546 || TREE_CODE (type) == UNION_TYPE
12547 || TREE_CODE (type) == QUAL_UNION_TYPE)
12548 && TYPE_FIELDS (type))
12550 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12551 return 64;
12552 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12553 return 128;
12555 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12556 || TREE_CODE (type) == INTEGER_TYPE)
12558 if (TYPE_MODE (type) == DFmode && align < 64)
12559 return 64;
12560 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12561 return 128;
12564 return align;
12567 /* Compute the alignment for a local variable.
12568 TYPE is the data type, and ALIGN is the alignment that
12569 the object would ordinarily have. The value of this macro is used
12570 instead of that alignment to align the object. */
12573 ix86_local_alignment (type, align)
12574 tree type;
12575 int align;
12577 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12578 to 16byte boundary. */
12579 if (TARGET_64BIT)
12581 if (AGGREGATE_TYPE_P (type)
12582 && TYPE_SIZE (type)
12583 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12584 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12585 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12586 return 128;
12588 if (TREE_CODE (type) == ARRAY_TYPE)
12590 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12591 return 64;
12592 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12593 return 128;
12595 else if (TREE_CODE (type) == COMPLEX_TYPE)
12597 if (TYPE_MODE (type) == DCmode && align < 64)
12598 return 64;
12599 if (TYPE_MODE (type) == XCmode && align < 128)
12600 return 128;
12602 else if ((TREE_CODE (type) == RECORD_TYPE
12603 || TREE_CODE (type) == UNION_TYPE
12604 || TREE_CODE (type) == QUAL_UNION_TYPE)
12605 && TYPE_FIELDS (type))
12607 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12608 return 64;
12609 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12610 return 128;
12612 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12613 || TREE_CODE (type) == INTEGER_TYPE)
12616 if (TYPE_MODE (type) == DFmode && align < 64)
12617 return 64;
12618 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12619 return 128;
12621 return align;
12624 /* Emit RTL insns to initialize the variable parts of a trampoline.
12625 FNADDR is an RTX for the address of the function's pure code.
12626 CXT is an RTX for the static chain value for the function. */
12627 void
12628 x86_initialize_trampoline (tramp, fnaddr, cxt)
12629 rtx tramp, fnaddr, cxt;
12631 if (!TARGET_64BIT)
12633 /* Compute offset from the end of the jmp to the target function. */
12634 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12635 plus_constant (tramp, 10),
12636 NULL_RTX, 1, OPTAB_DIRECT);
12637 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12638 gen_int_mode (0xb9, QImode));
12639 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12640 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12641 gen_int_mode (0xe9, QImode));
12642 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12644 else
12646 int offset = 0;
12647 /* Try to load address using shorter movl instead of movabs.
12648 We may want to support movq for kernel mode, but kernel does not use
12649 trampolines at the moment. */
12650 if (x86_64_zero_extended_value (fnaddr))
12652 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12653 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12654 gen_int_mode (0xbb41, HImode));
12655 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12656 gen_lowpart (SImode, fnaddr));
12657 offset += 6;
12659 else
12661 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12662 gen_int_mode (0xbb49, HImode));
12663 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12664 fnaddr);
12665 offset += 10;
12667 /* Load static chain using movabs to r10. */
12668 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12669 gen_int_mode (0xba49, HImode));
12670 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12671 cxt);
12672 offset += 10;
12673 /* Jump to the r11 */
12674 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12675 gen_int_mode (0xff49, HImode));
12676 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12677 gen_int_mode (0xe3, QImode));
12678 offset += 3;
12679 if (offset > TRAMPOLINE_SIZE)
12680 abort ();
12683 #ifdef TRANSFER_FROM_TRAMPOLINE
12684 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12685 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12686 #endif
12689 #define def_builtin(MASK, NAME, TYPE, CODE) \
12690 do { \
12691 if ((MASK) & target_flags \
12692 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12693 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12694 NULL, NULL_TREE); \
12695 } while (0)
12697 struct builtin_description
12699 const unsigned int mask;
12700 const enum insn_code icode;
12701 const char *const name;
12702 const enum ix86_builtins code;
12703 const enum rtx_code comparison;
12704 const unsigned int flag;
12707 /* Used for builtins that are enabled both by -msse and -msse2. */
12708 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12709 #define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
12710 #define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
12712 static const struct builtin_description bdesc_comi[] =
12714 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12715 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12716 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12717 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12718 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12719 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12720 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12721 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12722 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12723 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12724 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12725 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12726 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12729 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12730 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12731 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12732 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12733 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12734 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12735 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12736 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12737 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12740 static const struct builtin_description bdesc_2arg[] =
12742 /* SSE */
12743 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12744 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12745 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12746 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12747 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12748 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12749 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12750 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12752 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12753 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12754 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12755 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12756 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12757 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12758 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12759 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12760 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12761 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12762 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12763 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12764 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12765 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12766 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12767 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12768 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12769 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12770 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12771 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12773 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12774 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12775 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12776 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12778 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12779 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12780 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12781 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12783 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12784 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12785 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12786 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12787 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12789 /* MMX */
12790 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12791 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12792 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12793 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12794 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12795 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12796 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12797 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12799 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12800 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12801 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12802 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12803 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12804 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12805 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12806 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12808 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12809 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12810 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12812 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12813 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12814 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12815 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12817 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12818 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12820 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12821 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12822 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12823 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12824 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12825 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12827 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12828 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12829 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12830 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12832 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12833 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12834 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12835 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12836 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12837 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12839 /* Special. */
12840 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12841 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12842 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12844 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12845 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12846 { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12848 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12849 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12850 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12851 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12852 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12853 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12855 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12856 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12857 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12858 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12859 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12860 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12862 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12863 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12864 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12865 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12867 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12868 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12870 /* SSE2 */
12871 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12873 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12881 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12882 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12883 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12884 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12885 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12886 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12887 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12888 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12889 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12890 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12891 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12892 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12893 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12894 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12895 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12896 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12897 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12898 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12899 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12901 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12903 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12915 /* SSE2 MMX */
12916 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12925 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12926 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12927 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12928 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12929 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12930 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12931 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12932 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12936 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12962 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12963 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12964 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12965 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12980 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12996 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12997 { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12998 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
13002 static const struct builtin_description bdesc_1arg[] =
13004 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13005 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13007 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13008 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13009 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13011 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13012 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13013 { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13014 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13015 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13016 { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13038 { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13039 { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13041 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
13048 void
13049 ix86_init_builtins ()
13051 if (TARGET_MMX)
13052 ix86_init_mmx_sse_builtins ();
13055 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13056 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13057 builtins. */
13058 static void
13059 ix86_init_mmx_sse_builtins ()
13061 const struct builtin_description * d;
13062 size_t i;
13064 tree pchar_type_node = build_pointer_type (char_type_node);
13065 tree pcchar_type_node = build_pointer_type (
13066 build_type_variant (char_type_node, 1, 0));
13067 tree pfloat_type_node = build_pointer_type (float_type_node);
13068 tree pcfloat_type_node = build_pointer_type (
13069 build_type_variant (float_type_node, 1, 0));
13070 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13071 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13072 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13074 /* Comparisons. */
13075 tree int_ftype_v4sf_v4sf
13076 = build_function_type_list (integer_type_node,
13077 V4SF_type_node, V4SF_type_node, NULL_TREE);
13078 tree v4si_ftype_v4sf_v4sf
13079 = build_function_type_list (V4SI_type_node,
13080 V4SF_type_node, V4SF_type_node, NULL_TREE);
13081 /* MMX/SSE/integer conversions. */
13082 tree int_ftype_v4sf
13083 = build_function_type_list (integer_type_node,
13084 V4SF_type_node, NULL_TREE);
13085 tree int64_ftype_v4sf
13086 = build_function_type_list (long_long_integer_type_node,
13087 V4SF_type_node, NULL_TREE);
13088 tree int_ftype_v8qi
13089 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13090 tree v4sf_ftype_v4sf_int
13091 = build_function_type_list (V4SF_type_node,
13092 V4SF_type_node, integer_type_node, NULL_TREE);
13093 tree v4sf_ftype_v4sf_int64
13094 = build_function_type_list (V4SF_type_node,
13095 V4SF_type_node, long_long_integer_type_node,
13096 NULL_TREE);
13097 tree v4sf_ftype_v4sf_v2si
13098 = build_function_type_list (V4SF_type_node,
13099 V4SF_type_node, V2SI_type_node, NULL_TREE);
13100 tree int_ftype_v4hi_int
13101 = build_function_type_list (integer_type_node,
13102 V4HI_type_node, integer_type_node, NULL_TREE);
13103 tree v4hi_ftype_v4hi_int_int
13104 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13105 integer_type_node, integer_type_node,
13106 NULL_TREE);
13107 /* Miscellaneous. */
13108 tree v8qi_ftype_v4hi_v4hi
13109 = build_function_type_list (V8QI_type_node,
13110 V4HI_type_node, V4HI_type_node, NULL_TREE);
13111 tree v4hi_ftype_v2si_v2si
13112 = build_function_type_list (V4HI_type_node,
13113 V2SI_type_node, V2SI_type_node, NULL_TREE);
13114 tree v4sf_ftype_v4sf_v4sf_int
13115 = build_function_type_list (V4SF_type_node,
13116 V4SF_type_node, V4SF_type_node,
13117 integer_type_node, NULL_TREE);
13118 tree v2si_ftype_v4hi_v4hi
13119 = build_function_type_list (V2SI_type_node,
13120 V4HI_type_node, V4HI_type_node, NULL_TREE);
13121 tree v4hi_ftype_v4hi_int
13122 = build_function_type_list (V4HI_type_node,
13123 V4HI_type_node, integer_type_node, NULL_TREE);
13124 tree v4hi_ftype_v4hi_di
13125 = build_function_type_list (V4HI_type_node,
13126 V4HI_type_node, long_long_unsigned_type_node,
13127 NULL_TREE);
13128 tree v2si_ftype_v2si_di
13129 = build_function_type_list (V2SI_type_node,
13130 V2SI_type_node, long_long_unsigned_type_node,
13131 NULL_TREE);
13132 tree void_ftype_void
13133 = build_function_type (void_type_node, void_list_node);
13134 tree void_ftype_unsigned
13135 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13136 tree unsigned_ftype_void
13137 = build_function_type (unsigned_type_node, void_list_node);
13138 tree di_ftype_void
13139 = build_function_type (long_long_unsigned_type_node, void_list_node);
13140 tree v4sf_ftype_void
13141 = build_function_type (V4SF_type_node, void_list_node);
13142 tree v2si_ftype_v4sf
13143 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13144 /* Loads/stores. */
13145 tree void_ftype_v8qi_v8qi_pchar
13146 = build_function_type_list (void_type_node,
13147 V8QI_type_node, V8QI_type_node,
13148 pchar_type_node, NULL_TREE);
13149 tree v4sf_ftype_pcfloat
13150 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13151 /* @@@ the type is bogus */
13152 tree v4sf_ftype_v4sf_pv2si
13153 = build_function_type_list (V4SF_type_node,
13154 V4SF_type_node, pv2si_type_node, NULL_TREE);
13155 tree void_ftype_pv2si_v4sf
13156 = build_function_type_list (void_type_node,
13157 pv2si_type_node, V4SF_type_node, NULL_TREE);
13158 tree void_ftype_pfloat_v4sf
13159 = build_function_type_list (void_type_node,
13160 pfloat_type_node, V4SF_type_node, NULL_TREE);
13161 tree void_ftype_pdi_di
13162 = build_function_type_list (void_type_node,
13163 pdi_type_node, long_long_unsigned_type_node,
13164 NULL_TREE);
13165 tree void_ftype_pv2di_v2di
13166 = build_function_type_list (void_type_node,
13167 pv2di_type_node, V2DI_type_node, NULL_TREE);
13168 /* Normal vector unops. */
13169 tree v4sf_ftype_v4sf
13170 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13172 /* Normal vector binops. */
13173 tree v4sf_ftype_v4sf_v4sf
13174 = build_function_type_list (V4SF_type_node,
13175 V4SF_type_node, V4SF_type_node, NULL_TREE);
13176 tree v8qi_ftype_v8qi_v8qi
13177 = build_function_type_list (V8QI_type_node,
13178 V8QI_type_node, V8QI_type_node, NULL_TREE);
13179 tree v4hi_ftype_v4hi_v4hi
13180 = build_function_type_list (V4HI_type_node,
13181 V4HI_type_node, V4HI_type_node, NULL_TREE);
13182 tree v2si_ftype_v2si_v2si
13183 = build_function_type_list (V2SI_type_node,
13184 V2SI_type_node, V2SI_type_node, NULL_TREE);
13185 tree di_ftype_di_di
13186 = build_function_type_list (long_long_unsigned_type_node,
13187 long_long_unsigned_type_node,
13188 long_long_unsigned_type_node, NULL_TREE);
13190 tree v2si_ftype_v2sf
13191 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13192 tree v2sf_ftype_v2si
13193 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13194 tree v2si_ftype_v2si
13195 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13196 tree v2sf_ftype_v2sf
13197 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13198 tree v2sf_ftype_v2sf_v2sf
13199 = build_function_type_list (V2SF_type_node,
13200 V2SF_type_node, V2SF_type_node, NULL_TREE);
13201 tree v2si_ftype_v2sf_v2sf
13202 = build_function_type_list (V2SI_type_node,
13203 V2SF_type_node, V2SF_type_node, NULL_TREE);
13204 tree pint_type_node = build_pointer_type (integer_type_node);
13205 tree pcint_type_node = build_pointer_type (
13206 build_type_variant (integer_type_node, 1, 0));
13207 tree pdouble_type_node = build_pointer_type (double_type_node);
13208 tree pcdouble_type_node = build_pointer_type (
13209 build_type_variant (double_type_node, 1, 0));
13210 tree int_ftype_v2df_v2df
13211 = build_function_type_list (integer_type_node,
13212 V2DF_type_node, V2DF_type_node, NULL_TREE);
13214 tree ti_ftype_void
13215 = build_function_type (intTI_type_node, void_list_node);
13216 tree v2di_ftype_void
13217 = build_function_type (V2DI_type_node, void_list_node);
13218 tree ti_ftype_ti_ti
13219 = build_function_type_list (intTI_type_node,
13220 intTI_type_node, intTI_type_node, NULL_TREE);
13221 tree void_ftype_pcvoid
13222 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13223 tree v2di_ftype_di
13224 = build_function_type_list (V2DI_type_node,
13225 long_long_unsigned_type_node, NULL_TREE);
13226 tree di_ftype_v2di
13227 = build_function_type_list (long_long_unsigned_type_node,
13228 V2DI_type_node, NULL_TREE);
13229 tree v4sf_ftype_v4si
13230 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13231 tree v4si_ftype_v4sf
13232 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13233 tree v2df_ftype_v4si
13234 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13235 tree v4si_ftype_v2df
13236 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13237 tree v2si_ftype_v2df
13238 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13239 tree v4sf_ftype_v2df
13240 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13241 tree v2df_ftype_v2si
13242 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13243 tree v2df_ftype_v4sf
13244 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13245 tree int_ftype_v2df
13246 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13247 tree int64_ftype_v2df
13248 = build_function_type_list (long_long_integer_type_node,
13249 V2DF_type_node, NULL_TREE);
13250 tree v2df_ftype_v2df_int
13251 = build_function_type_list (V2DF_type_node,
13252 V2DF_type_node, integer_type_node, NULL_TREE);
13253 tree v2df_ftype_v2df_int64
13254 = build_function_type_list (V2DF_type_node,
13255 V2DF_type_node, long_long_integer_type_node,
13256 NULL_TREE);
13257 tree v4sf_ftype_v4sf_v2df
13258 = build_function_type_list (V4SF_type_node,
13259 V4SF_type_node, V2DF_type_node, NULL_TREE);
13260 tree v2df_ftype_v2df_v4sf
13261 = build_function_type_list (V2DF_type_node,
13262 V2DF_type_node, V4SF_type_node, NULL_TREE);
13263 tree v2df_ftype_v2df_v2df_int
13264 = build_function_type_list (V2DF_type_node,
13265 V2DF_type_node, V2DF_type_node,
13266 integer_type_node,
13267 NULL_TREE);
13268 tree v2df_ftype_v2df_pv2si
13269 = build_function_type_list (V2DF_type_node,
13270 V2DF_type_node, pv2si_type_node, NULL_TREE);
13271 tree void_ftype_pv2si_v2df
13272 = build_function_type_list (void_type_node,
13273 pv2si_type_node, V2DF_type_node, NULL_TREE);
13274 tree void_ftype_pdouble_v2df
13275 = build_function_type_list (void_type_node,
13276 pdouble_type_node, V2DF_type_node, NULL_TREE);
13277 tree void_ftype_pint_int
13278 = build_function_type_list (void_type_node,
13279 pint_type_node, integer_type_node, NULL_TREE);
13280 tree void_ftype_v16qi_v16qi_pchar
13281 = build_function_type_list (void_type_node,
13282 V16QI_type_node, V16QI_type_node,
13283 pchar_type_node, NULL_TREE);
13284 tree v2df_ftype_pcdouble
13285 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13286 tree v2df_ftype_v2df_v2df
13287 = build_function_type_list (V2DF_type_node,
13288 V2DF_type_node, V2DF_type_node, NULL_TREE);
13289 tree v16qi_ftype_v16qi_v16qi
13290 = build_function_type_list (V16QI_type_node,
13291 V16QI_type_node, V16QI_type_node, NULL_TREE);
13292 tree v8hi_ftype_v8hi_v8hi
13293 = build_function_type_list (V8HI_type_node,
13294 V8HI_type_node, V8HI_type_node, NULL_TREE);
13295 tree v4si_ftype_v4si_v4si
13296 = build_function_type_list (V4SI_type_node,
13297 V4SI_type_node, V4SI_type_node, NULL_TREE);
13298 tree v2di_ftype_v2di_v2di
13299 = build_function_type_list (V2DI_type_node,
13300 V2DI_type_node, V2DI_type_node, NULL_TREE);
13301 tree v2di_ftype_v2df_v2df
13302 = build_function_type_list (V2DI_type_node,
13303 V2DF_type_node, V2DF_type_node, NULL_TREE);
13304 tree v2df_ftype_v2df
13305 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13306 tree v2df_ftype_double
13307 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13308 tree v2df_ftype_double_double
13309 = build_function_type_list (V2DF_type_node,
13310 double_type_node, double_type_node, NULL_TREE);
13311 tree int_ftype_v8hi_int
13312 = build_function_type_list (integer_type_node,
13313 V8HI_type_node, integer_type_node, NULL_TREE);
13314 tree v8hi_ftype_v8hi_int_int
13315 = build_function_type_list (V8HI_type_node,
13316 V8HI_type_node, integer_type_node,
13317 integer_type_node, NULL_TREE);
13318 tree v2di_ftype_v2di_int
13319 = build_function_type_list (V2DI_type_node,
13320 V2DI_type_node, integer_type_node, NULL_TREE);
13321 tree v4si_ftype_v4si_int
13322 = build_function_type_list (V4SI_type_node,
13323 V4SI_type_node, integer_type_node, NULL_TREE);
13324 tree v8hi_ftype_v8hi_int
13325 = build_function_type_list (V8HI_type_node,
13326 V8HI_type_node, integer_type_node, NULL_TREE);
13327 tree v8hi_ftype_v8hi_v2di
13328 = build_function_type_list (V8HI_type_node,
13329 V8HI_type_node, V2DI_type_node, NULL_TREE);
13330 tree v4si_ftype_v4si_v2di
13331 = build_function_type_list (V4SI_type_node,
13332 V4SI_type_node, V2DI_type_node, NULL_TREE);
13333 tree v4si_ftype_v8hi_v8hi
13334 = build_function_type_list (V4SI_type_node,
13335 V8HI_type_node, V8HI_type_node, NULL_TREE);
13336 tree di_ftype_v8qi_v8qi
13337 = build_function_type_list (long_long_unsigned_type_node,
13338 V8QI_type_node, V8QI_type_node, NULL_TREE);
13339 tree v2di_ftype_v16qi_v16qi
13340 = build_function_type_list (V2DI_type_node,
13341 V16QI_type_node, V16QI_type_node, NULL_TREE);
13342 tree int_ftype_v16qi
13343 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13344 tree v16qi_ftype_pcchar
13345 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13346 tree void_ftype_pchar_v16qi
13347 = build_function_type_list (void_type_node,
13348 pchar_type_node, V16QI_type_node, NULL_TREE);
13349 tree v4si_ftype_pcint
13350 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13351 tree void_ftype_pcint_v4si
13352 = build_function_type_list (void_type_node,
13353 pcint_type_node, V4SI_type_node, NULL_TREE);
13354 tree v2di_ftype_v2di
13355 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13357 /* Add all builtins that are more or less simple operations on two
13358 operands. */
13359 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13361 /* Use one of the operands; the target can have a different mode for
13362 mask-generating compares. */
13363 enum machine_mode mode;
13364 tree type;
13366 if (d->name == 0)
13367 continue;
13368 mode = insn_data[d->icode].operand[1].mode;
13370 switch (mode)
13372 case V16QImode:
13373 type = v16qi_ftype_v16qi_v16qi;
13374 break;
13375 case V8HImode:
13376 type = v8hi_ftype_v8hi_v8hi;
13377 break;
13378 case V4SImode:
13379 type = v4si_ftype_v4si_v4si;
13380 break;
13381 case V2DImode:
13382 type = v2di_ftype_v2di_v2di;
13383 break;
13384 case V2DFmode:
13385 type = v2df_ftype_v2df_v2df;
13386 break;
13387 case TImode:
13388 type = ti_ftype_ti_ti;
13389 break;
13390 case V4SFmode:
13391 type = v4sf_ftype_v4sf_v4sf;
13392 break;
13393 case V8QImode:
13394 type = v8qi_ftype_v8qi_v8qi;
13395 break;
13396 case V4HImode:
13397 type = v4hi_ftype_v4hi_v4hi;
13398 break;
13399 case V2SImode:
13400 type = v2si_ftype_v2si_v2si;
13401 break;
13402 case DImode:
13403 type = di_ftype_di_di;
13404 break;
13406 default:
13407 abort ();
13410 /* Override for comparisons. */
13411 if (d->icode == CODE_FOR_maskcmpv4sf3
13412 || d->icode == CODE_FOR_maskncmpv4sf3
13413 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13414 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13415 type = v4si_ftype_v4sf_v4sf;
13417 if (d->icode == CODE_FOR_maskcmpv2df3
13418 || d->icode == CODE_FOR_maskncmpv2df3
13419 || d->icode == CODE_FOR_vmmaskcmpv2df3
13420 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13421 type = v2di_ftype_v2df_v2df;
13423 def_builtin (d->mask, d->name, type, d->code);
13426 /* Add the remaining MMX insns with somewhat more complicated types. */
13427 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13428 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13429 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13430 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13431 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13433 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13434 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13435 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13437 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13438 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13440 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13441 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13443 /* comi/ucomi insns. */
13444 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13445 if (d->mask == MASK_SSE2)
13446 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13447 else
13448 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13450 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13451 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13452 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13454 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13455 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13456 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13457 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13458 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13459 def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13460 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13461 def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13462 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13463 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13464 def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13466 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13467 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13469 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13471 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13472 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13473 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13474 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13475 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13476 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13478 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13479 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13480 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13481 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13483 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13484 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13485 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13486 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13488 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13490 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13492 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13493 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13494 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13495 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13496 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13497 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13499 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13501 /* Original 3DNow! */
13502 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13503 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13504 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13505 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13506 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13507 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13508 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13509 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13510 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13511 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13512 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13513 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13514 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13515 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13516 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13517 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13518 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13519 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13520 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13521 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13523 /* 3DNow! extension as used in the Athlon CPU. */
13524 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13525 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13526 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13527 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13528 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13529 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13531 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13533 /* SSE2 */
13534 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13535 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13537 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13538 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13546 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13548 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13549 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13551 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13557 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13559 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13562 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13570 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13572 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13573 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13574 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13582 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13583 def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13590 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13600 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13610 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13611 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13612 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13614 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13618 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13622 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13632 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13638 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13640 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13643 /* Errors in the source file can cause expand_expr to return const0_rtx
13644 where we expect a vector. To avoid crashing, use one of the vector
13645 clear instructions. */
13646 static rtx
13647 safe_vector_operand (x, mode)
13648 rtx x;
13649 enum machine_mode mode;
13651 if (x != const0_rtx)
13652 return x;
13653 x = gen_reg_rtx (mode);
13655 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13656 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13657 : gen_rtx_SUBREG (DImode, x, 0)));
13658 else
13659 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13660 : gen_rtx_SUBREG (V4SFmode, x, 0),
13661 CONST0_RTX (V4SFmode)));
13662 return x;
13665 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13667 static rtx
13668 ix86_expand_binop_builtin (icode, arglist, target)
13669 enum insn_code icode;
13670 tree arglist;
13671 rtx target;
13673 rtx pat;
13674 tree arg0 = TREE_VALUE (arglist);
13675 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13676 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13677 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13678 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13679 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13680 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13682 if (VECTOR_MODE_P (mode0))
13683 op0 = safe_vector_operand (op0, mode0);
13684 if (VECTOR_MODE_P (mode1))
13685 op1 = safe_vector_operand (op1, mode1);
13687 if (! target
13688 || GET_MODE (target) != tmode
13689 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13690 target = gen_reg_rtx (tmode);
13692 if (GET_MODE (op1) == SImode && mode1 == TImode)
13694 rtx x = gen_reg_rtx (V4SImode);
13695 emit_insn (gen_sse2_loadd (x, op1));
13696 op1 = gen_lowpart (TImode, x);
13699 /* In case the insn wants input operands in modes different from
13700 the result, abort. */
13701 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13702 abort ();
13704 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13705 op0 = copy_to_mode_reg (mode0, op0);
13706 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13707 op1 = copy_to_mode_reg (mode1, op1);
13709 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13710 yet one of the two must not be a memory. This is normally enforced
13711 by expanders, but we didn't bother to create one here. */
13712 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13713 op0 = copy_to_mode_reg (mode0, op0);
13715 pat = GEN_FCN (icode) (target, op0, op1);
13716 if (! pat)
13717 return 0;
13718 emit_insn (pat);
13719 return target;
13722 /* Subroutine of ix86_expand_builtin to take care of stores. */
13724 static rtx
13725 ix86_expand_store_builtin (icode, arglist)
13726 enum insn_code icode;
13727 tree arglist;
13729 rtx pat;
13730 tree arg0 = TREE_VALUE (arglist);
13731 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13732 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13733 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13734 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13735 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13737 if (VECTOR_MODE_P (mode1))
13738 op1 = safe_vector_operand (op1, mode1);
13740 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13741 op1 = copy_to_mode_reg (mode1, op1);
13743 pat = GEN_FCN (icode) (op0, op1);
13744 if (pat)
13745 emit_insn (pat);
13746 return 0;
13749 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13751 static rtx
13752 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13753 enum insn_code icode;
13754 tree arglist;
13755 rtx target;
13756 int do_load;
13758 rtx pat;
13759 tree arg0 = TREE_VALUE (arglist);
13760 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13761 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13762 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13764 if (! target
13765 || GET_MODE (target) != tmode
13766 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13767 target = gen_reg_rtx (tmode);
13768 if (do_load)
13769 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13770 else
13772 if (VECTOR_MODE_P (mode0))
13773 op0 = safe_vector_operand (op0, mode0);
13775 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13776 op0 = copy_to_mode_reg (mode0, op0);
13779 pat = GEN_FCN (icode) (target, op0);
13780 if (! pat)
13781 return 0;
13782 emit_insn (pat);
13783 return target;
13786 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13787 sqrtss, rsqrtss, rcpss. */
13789 static rtx
13790 ix86_expand_unop1_builtin (icode, arglist, target)
13791 enum insn_code icode;
13792 tree arglist;
13793 rtx target;
13795 rtx pat;
13796 tree arg0 = TREE_VALUE (arglist);
13797 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13798 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13799 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13801 if (! target
13802 || GET_MODE (target) != tmode
13803 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13804 target = gen_reg_rtx (tmode);
13806 if (VECTOR_MODE_P (mode0))
13807 op0 = safe_vector_operand (op0, mode0);
13809 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13810 op0 = copy_to_mode_reg (mode0, op0);
13812 op1 = op0;
13813 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13814 op1 = copy_to_mode_reg (mode0, op1);
13816 pat = GEN_FCN (icode) (target, op0, op1);
13817 if (! pat)
13818 return 0;
13819 emit_insn (pat);
13820 return target;
13823 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13825 static rtx
13826 ix86_expand_sse_compare (d, arglist, target)
13827 const struct builtin_description *d;
13828 tree arglist;
13829 rtx target;
13831 rtx pat;
13832 tree arg0 = TREE_VALUE (arglist);
13833 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13834 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13835 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13836 rtx op2;
13837 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13838 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13839 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13840 enum rtx_code comparison = d->comparison;
13842 if (VECTOR_MODE_P (mode0))
13843 op0 = safe_vector_operand (op0, mode0);
13844 if (VECTOR_MODE_P (mode1))
13845 op1 = safe_vector_operand (op1, mode1);
13847 /* Swap operands if we have a comparison that isn't available in
13848 hardware. */
13849 if (d->flag)
13851 rtx tmp = gen_reg_rtx (mode1);
13852 emit_move_insn (tmp, op1);
13853 op1 = op0;
13854 op0 = tmp;
13857 if (! target
13858 || GET_MODE (target) != tmode
13859 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13860 target = gen_reg_rtx (tmode);
13862 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13863 op0 = copy_to_mode_reg (mode0, op0);
13864 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13865 op1 = copy_to_mode_reg (mode1, op1);
13867 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13868 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13869 if (! pat)
13870 return 0;
13871 emit_insn (pat);
13872 return target;
13875 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13877 static rtx
13878 ix86_expand_sse_comi (d, arglist, target)
13879 const struct builtin_description *d;
13880 tree arglist;
13881 rtx target;
13883 rtx pat;
13884 tree arg0 = TREE_VALUE (arglist);
13885 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13886 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13887 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13888 rtx op2;
13889 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13890 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13891 enum rtx_code comparison = d->comparison;
13893 if (VECTOR_MODE_P (mode0))
13894 op0 = safe_vector_operand (op0, mode0);
13895 if (VECTOR_MODE_P (mode1))
13896 op1 = safe_vector_operand (op1, mode1);
13898 /* Swap operands if we have a comparison that isn't available in
13899 hardware. */
13900 if (d->flag)
13902 rtx tmp = op1;
13903 op1 = op0;
13904 op0 = tmp;
13907 target = gen_reg_rtx (SImode);
13908 emit_move_insn (target, const0_rtx);
13909 target = gen_rtx_SUBREG (QImode, target, 0);
13911 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13912 op0 = copy_to_mode_reg (mode0, op0);
13913 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13914 op1 = copy_to_mode_reg (mode1, op1);
13916 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13917 pat = GEN_FCN (d->icode) (op0, op1);
13918 if (! pat)
13919 return 0;
13920 emit_insn (pat);
13921 emit_insn (gen_rtx_SET (VOIDmode,
13922 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13923 gen_rtx_fmt_ee (comparison, QImode,
13924 SET_DEST (pat),
13925 const0_rtx)));
13927 return SUBREG_REG (target);
13930 /* Expand an expression EXP that calls a built-in function,
13931 with result going to TARGET if that's convenient
13932 (and in mode MODE if that's convenient).
13933 SUBTARGET may be used as the target for computing one of EXP's operands.
13934 IGNORE is nonzero if the value is to be ignored. */
13937 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13938 tree exp;
13939 rtx target;
13940 rtx subtarget ATTRIBUTE_UNUSED;
13941 enum machine_mode mode ATTRIBUTE_UNUSED;
13942 int ignore ATTRIBUTE_UNUSED;
13944 const struct builtin_description *d;
13945 size_t i;
13946 enum insn_code icode;
13947 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13948 tree arglist = TREE_OPERAND (exp, 1);
13949 tree arg0, arg1, arg2;
13950 rtx op0, op1, op2, pat;
13951 enum machine_mode tmode, mode0, mode1, mode2;
13952 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13954 switch (fcode)
13956 case IX86_BUILTIN_EMMS:
13957 emit_insn (gen_emms ());
13958 return 0;
13960 case IX86_BUILTIN_SFENCE:
13961 emit_insn (gen_sfence ());
13962 return 0;
13964 case IX86_BUILTIN_PEXTRW:
13965 case IX86_BUILTIN_PEXTRW128:
13966 icode = (fcode == IX86_BUILTIN_PEXTRW
13967 ? CODE_FOR_mmx_pextrw
13968 : CODE_FOR_sse2_pextrw);
13969 arg0 = TREE_VALUE (arglist);
13970 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13971 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13972 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13973 tmode = insn_data[icode].operand[0].mode;
13974 mode0 = insn_data[icode].operand[1].mode;
13975 mode1 = insn_data[icode].operand[2].mode;
13977 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13978 op0 = copy_to_mode_reg (mode0, op0);
13979 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13981 /* @@@ better error message */
13982 error ("selector must be an immediate");
13983 return gen_reg_rtx (tmode);
13985 if (target == 0
13986 || GET_MODE (target) != tmode
13987 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13988 target = gen_reg_rtx (tmode);
13989 pat = GEN_FCN (icode) (target, op0, op1);
13990 if (! pat)
13991 return 0;
13992 emit_insn (pat);
13993 return target;
13995 case IX86_BUILTIN_PINSRW:
13996 case IX86_BUILTIN_PINSRW128:
13997 icode = (fcode == IX86_BUILTIN_PINSRW
13998 ? CODE_FOR_mmx_pinsrw
13999 : CODE_FOR_sse2_pinsrw);
14000 arg0 = TREE_VALUE (arglist);
14001 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14002 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14003 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14004 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14005 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14006 tmode = insn_data[icode].operand[0].mode;
14007 mode0 = insn_data[icode].operand[1].mode;
14008 mode1 = insn_data[icode].operand[2].mode;
14009 mode2 = insn_data[icode].operand[3].mode;
14011 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14012 op0 = copy_to_mode_reg (mode0, op0);
14013 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14014 op1 = copy_to_mode_reg (mode1, op1);
14015 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14017 /* @@@ better error message */
14018 error ("selector must be an immediate");
14019 return const0_rtx;
14021 if (target == 0
14022 || GET_MODE (target) != tmode
14023 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14024 target = gen_reg_rtx (tmode);
14025 pat = GEN_FCN (icode) (target, op0, op1, op2);
14026 if (! pat)
14027 return 0;
14028 emit_insn (pat);
14029 return target;
14031 case IX86_BUILTIN_MASKMOVQ:
14032 case IX86_BUILTIN_MASKMOVDQU:
14033 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14034 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14035 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14036 : CODE_FOR_sse2_maskmovdqu));
14037 /* Note the arg order is different from the operand order. */
14038 arg1 = TREE_VALUE (arglist);
14039 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14040 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14041 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14042 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14043 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14044 mode0 = insn_data[icode].operand[0].mode;
14045 mode1 = insn_data[icode].operand[1].mode;
14046 mode2 = insn_data[icode].operand[2].mode;
14048 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14049 op0 = copy_to_mode_reg (mode0, op0);
14050 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14051 op1 = copy_to_mode_reg (mode1, op1);
14052 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14053 op2 = copy_to_mode_reg (mode2, op2);
14054 pat = GEN_FCN (icode) (op0, op1, op2);
14055 if (! pat)
14056 return 0;
14057 emit_insn (pat);
14058 return 0;
14060 case IX86_BUILTIN_SQRTSS:
14061 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14062 case IX86_BUILTIN_RSQRTSS:
14063 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14064 case IX86_BUILTIN_RCPSS:
14065 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14067 case IX86_BUILTIN_LOADAPS:
14068 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14070 case IX86_BUILTIN_LOADUPS:
14071 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14073 case IX86_BUILTIN_STOREAPS:
14074 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14076 case IX86_BUILTIN_STOREUPS:
14077 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14079 case IX86_BUILTIN_LOADSS:
14080 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14082 case IX86_BUILTIN_STORESS:
14083 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14085 case IX86_BUILTIN_LOADHPS:
14086 case IX86_BUILTIN_LOADLPS:
14087 case IX86_BUILTIN_LOADHPD:
14088 case IX86_BUILTIN_LOADLPD:
14089 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14090 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14091 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14092 : CODE_FOR_sse2_movlpd);
14093 arg0 = TREE_VALUE (arglist);
14094 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14095 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14096 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14097 tmode = insn_data[icode].operand[0].mode;
14098 mode0 = insn_data[icode].operand[1].mode;
14099 mode1 = insn_data[icode].operand[2].mode;
14101 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14102 op0 = copy_to_mode_reg (mode0, op0);
14103 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14104 if (target == 0
14105 || GET_MODE (target) != tmode
14106 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14107 target = gen_reg_rtx (tmode);
14108 pat = GEN_FCN (icode) (target, op0, op1);
14109 if (! pat)
14110 return 0;
14111 emit_insn (pat);
14112 return target;
14114 case IX86_BUILTIN_STOREHPS:
14115 case IX86_BUILTIN_STORELPS:
14116 case IX86_BUILTIN_STOREHPD:
14117 case IX86_BUILTIN_STORELPD:
14118 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14119 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14120 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14121 : CODE_FOR_sse2_movlpd);
14122 arg0 = TREE_VALUE (arglist);
14123 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14124 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14125 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14126 mode0 = insn_data[icode].operand[1].mode;
14127 mode1 = insn_data[icode].operand[2].mode;
14129 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14130 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14131 op1 = copy_to_mode_reg (mode1, op1);
14133 pat = GEN_FCN (icode) (op0, op0, op1);
14134 if (! pat)
14135 return 0;
14136 emit_insn (pat);
14137 return 0;
14139 case IX86_BUILTIN_MOVNTPS:
14140 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14141 case IX86_BUILTIN_MOVNTQ:
14142 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14144 case IX86_BUILTIN_LDMXCSR:
14145 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14146 target = assign_386_stack_local (SImode, 0);
14147 emit_move_insn (target, op0);
14148 emit_insn (gen_ldmxcsr (target));
14149 return 0;
14151 case IX86_BUILTIN_STMXCSR:
14152 target = assign_386_stack_local (SImode, 0);
14153 emit_insn (gen_stmxcsr (target));
14154 return copy_to_mode_reg (SImode, target);
14156 case IX86_BUILTIN_SHUFPS:
14157 case IX86_BUILTIN_SHUFPD:
14158 icode = (fcode == IX86_BUILTIN_SHUFPS
14159 ? CODE_FOR_sse_shufps
14160 : CODE_FOR_sse2_shufpd);
14161 arg0 = TREE_VALUE (arglist);
14162 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14163 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14164 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14165 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14166 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14167 tmode = insn_data[icode].operand[0].mode;
14168 mode0 = insn_data[icode].operand[1].mode;
14169 mode1 = insn_data[icode].operand[2].mode;
14170 mode2 = insn_data[icode].operand[3].mode;
14172 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14173 op0 = copy_to_mode_reg (mode0, op0);
14174 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14175 op1 = copy_to_mode_reg (mode1, op1);
14176 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14178 /* @@@ better error message */
14179 error ("mask must be an immediate");
14180 return gen_reg_rtx (tmode);
14182 if (target == 0
14183 || GET_MODE (target) != tmode
14184 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14185 target = gen_reg_rtx (tmode);
14186 pat = GEN_FCN (icode) (target, op0, op1, op2);
14187 if (! pat)
14188 return 0;
14189 emit_insn (pat);
14190 return target;
14192 case IX86_BUILTIN_PSHUFW:
14193 case IX86_BUILTIN_PSHUFD:
14194 case IX86_BUILTIN_PSHUFHW:
14195 case IX86_BUILTIN_PSHUFLW:
14196 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14197 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14198 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14199 : CODE_FOR_mmx_pshufw);
14200 arg0 = TREE_VALUE (arglist);
14201 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14202 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14203 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14204 tmode = insn_data[icode].operand[0].mode;
14205 mode1 = insn_data[icode].operand[1].mode;
14206 mode2 = insn_data[icode].operand[2].mode;
14208 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14209 op0 = copy_to_mode_reg (mode1, op0);
14210 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14212 /* @@@ better error message */
14213 error ("mask must be an immediate");
14214 return const0_rtx;
14216 if (target == 0
14217 || GET_MODE (target) != tmode
14218 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14219 target = gen_reg_rtx (tmode);
14220 pat = GEN_FCN (icode) (target, op0, op1);
14221 if (! pat)
14222 return 0;
14223 emit_insn (pat);
14224 return target;
14226 case IX86_BUILTIN_PSLLDQI128:
14227 case IX86_BUILTIN_PSRLDQI128:
14228 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14229 : CODE_FOR_sse2_lshrti3);
14230 arg0 = TREE_VALUE (arglist);
14231 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14232 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14233 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14234 tmode = insn_data[icode].operand[0].mode;
14235 mode1 = insn_data[icode].operand[1].mode;
14236 mode2 = insn_data[icode].operand[2].mode;
14238 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14240 op0 = copy_to_reg (op0);
14241 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14243 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14245 error ("shift must be an immediate");
14246 return const0_rtx;
14248 target = gen_reg_rtx (V2DImode);
14249 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14250 if (! pat)
14251 return 0;
14252 emit_insn (pat);
14253 return target;
14255 case IX86_BUILTIN_FEMMS:
14256 emit_insn (gen_femms ());
14257 return NULL_RTX;
14259 case IX86_BUILTIN_PAVGUSB:
14260 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14262 case IX86_BUILTIN_PF2ID:
14263 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14265 case IX86_BUILTIN_PFACC:
14266 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14268 case IX86_BUILTIN_PFADD:
14269 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14271 case IX86_BUILTIN_PFCMPEQ:
14272 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14274 case IX86_BUILTIN_PFCMPGE:
14275 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14277 case IX86_BUILTIN_PFCMPGT:
14278 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14280 case IX86_BUILTIN_PFMAX:
14281 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14283 case IX86_BUILTIN_PFMIN:
14284 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14286 case IX86_BUILTIN_PFMUL:
14287 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14289 case IX86_BUILTIN_PFRCP:
14290 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14292 case IX86_BUILTIN_PFRCPIT1:
14293 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14295 case IX86_BUILTIN_PFRCPIT2:
14296 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14298 case IX86_BUILTIN_PFRSQIT1:
14299 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14301 case IX86_BUILTIN_PFRSQRT:
14302 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14304 case IX86_BUILTIN_PFSUB:
14305 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14307 case IX86_BUILTIN_PFSUBR:
14308 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14310 case IX86_BUILTIN_PI2FD:
14311 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14313 case IX86_BUILTIN_PMULHRW:
14314 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14316 case IX86_BUILTIN_PF2IW:
14317 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14319 case IX86_BUILTIN_PFNACC:
14320 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14322 case IX86_BUILTIN_PFPNACC:
14323 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14325 case IX86_BUILTIN_PI2FW:
14326 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14328 case IX86_BUILTIN_PSWAPDSI:
14329 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14331 case IX86_BUILTIN_PSWAPDSF:
14332 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14334 case IX86_BUILTIN_SSE_ZERO:
14335 target = gen_reg_rtx (V4SFmode);
14336 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14337 return target;
14339 case IX86_BUILTIN_MMX_ZERO:
14340 target = gen_reg_rtx (DImode);
14341 emit_insn (gen_mmx_clrdi (target));
14342 return target;
14344 case IX86_BUILTIN_CLRTI:
14345 target = gen_reg_rtx (V2DImode);
14346 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14347 return target;
14350 case IX86_BUILTIN_SQRTSD:
14351 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14352 case IX86_BUILTIN_LOADAPD:
14353 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14354 case IX86_BUILTIN_LOADUPD:
14355 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14357 case IX86_BUILTIN_STOREAPD:
14358 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14359 case IX86_BUILTIN_STOREUPD:
14360 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14362 case IX86_BUILTIN_LOADSD:
14363 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14365 case IX86_BUILTIN_STORESD:
14366 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14368 case IX86_BUILTIN_SETPD1:
14369 target = assign_386_stack_local (DFmode, 0);
14370 arg0 = TREE_VALUE (arglist);
14371 emit_move_insn (adjust_address (target, DFmode, 0),
14372 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14373 op0 = gen_reg_rtx (V2DFmode);
14374 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14375 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14376 return op0;
14378 case IX86_BUILTIN_SETPD:
14379 target = assign_386_stack_local (V2DFmode, 0);
14380 arg0 = TREE_VALUE (arglist);
14381 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14382 emit_move_insn (adjust_address (target, DFmode, 0),
14383 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14384 emit_move_insn (adjust_address (target, DFmode, 8),
14385 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14386 op0 = gen_reg_rtx (V2DFmode);
14387 emit_insn (gen_sse2_movapd (op0, target));
14388 return op0;
14390 case IX86_BUILTIN_LOADRPD:
14391 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14392 gen_reg_rtx (V2DFmode), 1);
14393 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14394 return target;
14396 case IX86_BUILTIN_LOADPD1:
14397 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14398 gen_reg_rtx (V2DFmode), 1);
14399 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14400 return target;
14402 case IX86_BUILTIN_STOREPD1:
14403 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14404 case IX86_BUILTIN_STORERPD:
14405 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14407 case IX86_BUILTIN_CLRPD:
14408 target = gen_reg_rtx (V2DFmode);
14409 emit_insn (gen_sse_clrv2df (target));
14410 return target;
14412 case IX86_BUILTIN_MFENCE:
14413 emit_insn (gen_sse2_mfence ());
14414 return 0;
14415 case IX86_BUILTIN_LFENCE:
14416 emit_insn (gen_sse2_lfence ());
14417 return 0;
14419 case IX86_BUILTIN_CLFLUSH:
14420 arg0 = TREE_VALUE (arglist);
14421 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14422 icode = CODE_FOR_sse2_clflush;
14423 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14424 op0 = copy_to_mode_reg (Pmode, op0);
14426 emit_insn (gen_sse2_clflush (op0));
14427 return 0;
14429 case IX86_BUILTIN_MOVNTPD:
14430 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14431 case IX86_BUILTIN_MOVNTDQ:
14432 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14433 case IX86_BUILTIN_MOVNTI:
14434 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14436 case IX86_BUILTIN_LOADDQA:
14437 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14438 case IX86_BUILTIN_LOADDQU:
14439 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14440 case IX86_BUILTIN_LOADD:
14441 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14443 case IX86_BUILTIN_STOREDQA:
14444 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14445 case IX86_BUILTIN_STOREDQU:
14446 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14447 case IX86_BUILTIN_STORED:
14448 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14450 default:
14451 break;
14454 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14455 if (d->code == fcode)
14457 /* Compares are treated specially. */
14458 if (d->icode == CODE_FOR_maskcmpv4sf3
14459 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14460 || d->icode == CODE_FOR_maskncmpv4sf3
14461 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14462 || d->icode == CODE_FOR_maskcmpv2df3
14463 || d->icode == CODE_FOR_vmmaskcmpv2df3
14464 || d->icode == CODE_FOR_maskncmpv2df3
14465 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14466 return ix86_expand_sse_compare (d, arglist, target);
14468 return ix86_expand_binop_builtin (d->icode, arglist, target);
14471 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14472 if (d->code == fcode)
14473 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14475 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14476 if (d->code == fcode)
14477 return ix86_expand_sse_comi (d, arglist, target);
14479 /* @@@ Should really do something sensible here. */
14480 return 0;
14483 /* Store OPERAND to the memory after reload is completed. This means
14484 that we can't easily use assign_stack_local. */
14486 ix86_force_to_memory (mode, operand)
14487 enum machine_mode mode;
14488 rtx operand;
14490 rtx result;
14491 if (!reload_completed)
14492 abort ();
14493 if (TARGET_64BIT && TARGET_RED_ZONE)
14495 result = gen_rtx_MEM (mode,
14496 gen_rtx_PLUS (Pmode,
14497 stack_pointer_rtx,
14498 GEN_INT (-RED_ZONE_SIZE)));
14499 emit_move_insn (result, operand);
14501 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14503 switch (mode)
14505 case HImode:
14506 case SImode:
14507 operand = gen_lowpart (DImode, operand);
14508 /* FALLTHRU */
14509 case DImode:
14510 emit_insn (
14511 gen_rtx_SET (VOIDmode,
14512 gen_rtx_MEM (DImode,
14513 gen_rtx_PRE_DEC (DImode,
14514 stack_pointer_rtx)),
14515 operand));
14516 break;
14517 default:
14518 abort ();
14520 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14522 else
14524 switch (mode)
14526 case DImode:
14528 rtx operands[2];
14529 split_di (&operand, 1, operands, operands + 1);
14530 emit_insn (
14531 gen_rtx_SET (VOIDmode,
14532 gen_rtx_MEM (SImode,
14533 gen_rtx_PRE_DEC (Pmode,
14534 stack_pointer_rtx)),
14535 operands[1]));
14536 emit_insn (
14537 gen_rtx_SET (VOIDmode,
14538 gen_rtx_MEM (SImode,
14539 gen_rtx_PRE_DEC (Pmode,
14540 stack_pointer_rtx)),
14541 operands[0]));
14543 break;
14544 case HImode:
14545 /* It is better to store HImodes as SImodes. */
14546 if (!TARGET_PARTIAL_REG_STALL)
14547 operand = gen_lowpart (SImode, operand);
14548 /* FALLTHRU */
14549 case SImode:
14550 emit_insn (
14551 gen_rtx_SET (VOIDmode,
14552 gen_rtx_MEM (GET_MODE (operand),
14553 gen_rtx_PRE_DEC (SImode,
14554 stack_pointer_rtx)),
14555 operand));
14556 break;
14557 default:
14558 abort ();
14560 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14562 return result;
14565 /* Free operand from the memory. */
14566 void
14567 ix86_free_from_memory (mode)
14568 enum machine_mode mode;
14570 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14572 int size;
14574 if (mode == DImode || TARGET_64BIT)
14575 size = 8;
14576 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14577 size = 2;
14578 else
14579 size = 4;
14580 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14581 to pop or add instruction if registers are available. */
14582 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14583 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14584 GEN_INT (size))));
14588 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14589 QImode must go into class Q_REGS.
14590 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14591 movdf to do mem-to-mem moves through integer regs. */
14592 enum reg_class
14593 ix86_preferred_reload_class (x, class)
14594 rtx x;
14595 enum reg_class class;
14597 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14598 return NO_REGS;
14599 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14601 /* SSE can't load any constant directly yet. */
14602 if (SSE_CLASS_P (class))
14603 return NO_REGS;
14604 /* Floats can load 0 and 1. */
14605 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14607 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14608 if (MAYBE_SSE_CLASS_P (class))
14609 return (reg_class_subset_p (class, GENERAL_REGS)
14610 ? GENERAL_REGS : FLOAT_REGS);
14611 else
14612 return class;
14614 /* General regs can load everything. */
14615 if (reg_class_subset_p (class, GENERAL_REGS))
14616 return GENERAL_REGS;
14617 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14618 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14619 return NO_REGS;
14621 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14622 return NO_REGS;
14623 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14624 return Q_REGS;
14625 return class;
14628 /* If we are copying between general and FP registers, we need a memory
14629 location. The same is true for SSE and MMX registers.
14631 The macro can't work reliably when one of the CLASSES is class containing
14632 registers from multiple units (SSE, MMX, integer). We avoid this by never
14633 combining those units in single alternative in the machine description.
14634 Ensure that this constraint holds to avoid unexpected surprises.
14636 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14637 enforce these sanity checks. */
14639 ix86_secondary_memory_needed (class1, class2, mode, strict)
14640 enum reg_class class1, class2;
14641 enum machine_mode mode;
14642 int strict;
14644 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14645 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14646 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14647 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14648 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14649 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14651 if (strict)
14652 abort ();
14653 else
14654 return 1;
14656 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14657 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14658 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14659 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14660 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14662 /* Return the cost of moving data from a register in class CLASS1 to
14663 one in class CLASS2.
14665 It is not required that the cost always equal 2 when FROM is the same as TO;
14666 on some machines it is expensive to move between registers if they are not
14667 general registers. */
14669 ix86_register_move_cost (mode, class1, class2)
14670 enum machine_mode mode;
14671 enum reg_class class1, class2;
14673 /* In case we require secondary memory, compute cost of the store followed
14674 by load. In order to avoid bad register allocation choices, we need
14675 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14677 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14679 int cost = 1;
14681 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14682 MEMORY_MOVE_COST (mode, class1, 1));
14683 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14684 MEMORY_MOVE_COST (mode, class2, 1));
14686 /* In case of copying from general_purpose_register we may emit multiple
14687 stores followed by single load causing memory size mismatch stall.
14688 Count this as arbitrarily high cost of 20. */
14689 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14690 cost += 20;
14692 /* In the case of FP/MMX moves, the registers actually overlap, and we
14693 have to switch modes in order to treat them differently. */
14694 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14695 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14696 cost += 20;
14698 return cost;
14701 /* Moves between SSE/MMX and integer unit are expensive. */
14702 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14703 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14704 return ix86_cost->mmxsse_to_integer;
14705 if (MAYBE_FLOAT_CLASS_P (class1))
14706 return ix86_cost->fp_move;
14707 if (MAYBE_SSE_CLASS_P (class1))
14708 return ix86_cost->sse_move;
14709 if (MAYBE_MMX_CLASS_P (class1))
14710 return ix86_cost->mmx_move;
14711 return 2;
14714 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14716 ix86_hard_regno_mode_ok (regno, mode)
14717 int regno;
14718 enum machine_mode mode;
14720 /* Flags and only flags can only hold CCmode values. */
14721 if (CC_REGNO_P (regno))
14722 return GET_MODE_CLASS (mode) == MODE_CC;
14723 if (GET_MODE_CLASS (mode) == MODE_CC
14724 || GET_MODE_CLASS (mode) == MODE_RANDOM
14725 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14726 return 0;
14727 if (FP_REGNO_P (regno))
14728 return VALID_FP_MODE_P (mode);
14729 if (SSE_REGNO_P (regno))
14730 return VALID_SSE_REG_MODE (mode);
14731 if (MMX_REGNO_P (regno))
14732 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14733 /* We handle both integer and floats in the general purpose registers.
14734 In future we should be able to handle vector modes as well. */
14735 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14736 return 0;
14737 /* Take care for QImode values - they can be in non-QI regs, but then
14738 they do cause partial register stalls. */
14739 if (regno < 4 || mode != QImode || TARGET_64BIT)
14740 return 1;
14741 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14744 /* Return the cost of moving data of mode M between a
14745 register and memory. A value of 2 is the default; this cost is
14746 relative to those in `REGISTER_MOVE_COST'.
14748 If moving between registers and memory is more expensive than
14749 between two registers, you should define this macro to express the
14750 relative cost.
14752 Model also increased moving costs of QImode registers in non
14753 Q_REGS classes.
14756 ix86_memory_move_cost (mode, class, in)
14757 enum machine_mode mode;
14758 enum reg_class class;
14759 int in;
14761 if (FLOAT_CLASS_P (class))
14763 int index;
14764 switch (mode)
14766 case SFmode:
14767 index = 0;
14768 break;
14769 case DFmode:
14770 index = 1;
14771 break;
14772 case XFmode:
14773 case TFmode:
14774 index = 2;
14775 break;
14776 default:
14777 return 100;
14779 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14781 if (SSE_CLASS_P (class))
14783 int index;
14784 switch (GET_MODE_SIZE (mode))
14786 case 4:
14787 index = 0;
14788 break;
14789 case 8:
14790 index = 1;
14791 break;
14792 case 16:
14793 index = 2;
14794 break;
14795 default:
14796 return 100;
14798 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14800 if (MMX_CLASS_P (class))
14802 int index;
14803 switch (GET_MODE_SIZE (mode))
14805 case 4:
14806 index = 0;
14807 break;
14808 case 8:
14809 index = 1;
14810 break;
14811 default:
14812 return 100;
14814 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14816 switch (GET_MODE_SIZE (mode))
14818 case 1:
14819 if (in)
14820 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14821 : ix86_cost->movzbl_load);
14822 else
14823 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14824 : ix86_cost->int_store[0] + 4);
14825 break;
14826 case 2:
14827 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14828 default:
14829 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14830 if (mode == TFmode)
14831 mode = XFmode;
14832 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14833 * ((int) GET_MODE_SIZE (mode)
14834 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14838 /* Compute a (partial) cost for rtx X. Return true if the complete
14839 cost has been computed, and false if subexpressions should be
14840 scanned. In either case, *TOTAL contains the cost result. */
14842 static bool
14843 ix86_rtx_costs (x, code, outer_code, total)
14844 rtx x;
14845 int code, outer_code;
14846 int *total;
14848 enum machine_mode mode = GET_MODE (x);
14850 switch (code)
14852 case CONST_INT:
14853 case CONST:
14854 case LABEL_REF:
14855 case SYMBOL_REF:
14856 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14857 *total = 3;
14858 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14859 *total = 2;
14860 else if (flag_pic && SYMBOLIC_CONST (x))
14861 *total = 1;
14862 else
14863 *total = 0;
14864 return true;
14866 case CONST_DOUBLE:
14867 if (mode == VOIDmode)
14868 *total = 0;
14869 else
14870 switch (standard_80387_constant_p (x))
14872 case 1: /* 0.0 */
14873 *total = 1;
14874 break;
14875 default: /* Other constants */
14876 *total = 2;
14877 break;
14878 case 0:
14879 case -1:
14880 /* Start with (MEM (SYMBOL_REF)), since that's where
14881 it'll probably end up. Add a penalty for size. */
14882 *total = (COSTS_N_INSNS (1)
14883 + (flag_pic != 0)
14884 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14885 break;
14887 return true;
14889 case ZERO_EXTEND:
14890 /* The zero extensions is often completely free on x86_64, so make
14891 it as cheap as possible. */
14892 if (TARGET_64BIT && mode == DImode
14893 && GET_MODE (XEXP (x, 0)) == SImode)
14894 *total = 1;
14895 else if (TARGET_ZERO_EXTEND_WITH_AND)
14896 *total = COSTS_N_INSNS (ix86_cost->add);
14897 else
14898 *total = COSTS_N_INSNS (ix86_cost->movzx);
14899 return false;
14901 case SIGN_EXTEND:
14902 *total = COSTS_N_INSNS (ix86_cost->movsx);
14903 return false;
14905 case ASHIFT:
14906 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14907 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14909 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14910 if (value == 1)
14912 *total = COSTS_N_INSNS (ix86_cost->add);
14913 return false;
14915 if ((value == 2 || value == 3)
14916 && !TARGET_DECOMPOSE_LEA
14917 && ix86_cost->lea <= ix86_cost->shift_const)
14919 *total = COSTS_N_INSNS (ix86_cost->lea);
14920 return false;
14923 /* FALLTHRU */
14925 case ROTATE:
14926 case ASHIFTRT:
14927 case LSHIFTRT:
14928 case ROTATERT:
14929 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14931 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14933 if (INTVAL (XEXP (x, 1)) > 32)
14934 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14935 else
14936 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14938 else
14940 if (GET_CODE (XEXP (x, 1)) == AND)
14941 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14942 else
14943 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14946 else
14948 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14949 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14950 else
14951 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14953 return false;
14955 case MULT:
14956 if (FLOAT_MODE_P (mode))
14957 *total = COSTS_N_INSNS (ix86_cost->fmul);
14958 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14960 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14961 int nbits;
14963 for (nbits = 0; value != 0; value >>= 1)
14964 nbits++;
14966 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14967 + nbits * ix86_cost->mult_bit);
14969 else
14971 /* This is arbitrary */
14972 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14973 + 7 * ix86_cost->mult_bit);
14975 return false;
14977 case DIV:
14978 case UDIV:
14979 case MOD:
14980 case UMOD:
14981 if (FLOAT_MODE_P (mode))
14982 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14983 else
14984 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14985 return false;
14987 case PLUS:
14988 if (FLOAT_MODE_P (mode))
14989 *total = COSTS_N_INSNS (ix86_cost->fadd);
14990 else if (!TARGET_DECOMPOSE_LEA
14991 && GET_MODE_CLASS (mode) == MODE_INT
14992 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14994 if (GET_CODE (XEXP (x, 0)) == PLUS
14995 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14996 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14997 && CONSTANT_P (XEXP (x, 1)))
14999 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15000 if (val == 2 || val == 4 || val == 8)
15002 *total = COSTS_N_INSNS (ix86_cost->lea);
15003 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15004 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15005 outer_code);
15006 *total += rtx_cost (XEXP (x, 1), outer_code);
15007 return true;
15010 else if (GET_CODE (XEXP (x, 0)) == MULT
15011 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15013 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15014 if (val == 2 || val == 4 || val == 8)
15016 *total = COSTS_N_INSNS (ix86_cost->lea);
15017 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15018 *total += rtx_cost (XEXP (x, 1), outer_code);
15019 return true;
15022 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15024 *total = COSTS_N_INSNS (ix86_cost->lea);
15025 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15026 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15027 *total += rtx_cost (XEXP (x, 1), outer_code);
15028 return true;
15031 /* FALLTHRU */
15033 case MINUS:
15034 if (FLOAT_MODE_P (mode))
15036 *total = COSTS_N_INSNS (ix86_cost->fadd);
15037 return false;
15039 /* FALLTHRU */
15041 case AND:
15042 case IOR:
15043 case XOR:
15044 if (!TARGET_64BIT && mode == DImode)
15046 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15047 + (rtx_cost (XEXP (x, 0), outer_code)
15048 << (GET_MODE (XEXP (x, 0)) != DImode))
15049 + (rtx_cost (XEXP (x, 1), outer_code)
15050 << (GET_MODE (XEXP (x, 1)) != DImode)));
15051 return true;
15053 /* FALLTHRU */
15055 case NEG:
15056 if (FLOAT_MODE_P (mode))
15058 *total = COSTS_N_INSNS (ix86_cost->fchs);
15059 return false;
15061 /* FALLTHRU */
15063 case NOT:
15064 if (!TARGET_64BIT && mode == DImode)
15065 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15066 else
15067 *total = COSTS_N_INSNS (ix86_cost->add);
15068 return false;
15070 case FLOAT_EXTEND:
15071 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15072 *total = 0;
15073 return false;
15075 case ABS:
15076 if (FLOAT_MODE_P (mode))
15077 *total = COSTS_N_INSNS (ix86_cost->fabs);
15078 return false;
15080 case SQRT:
15081 if (FLOAT_MODE_P (mode))
15082 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15083 return false;
15085 default:
15086 return false;
15090 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15091 static void
15092 ix86_svr3_asm_out_constructor (symbol, priority)
15093 rtx symbol;
15094 int priority ATTRIBUTE_UNUSED;
15096 init_section ();
15097 fputs ("\tpushl $", asm_out_file);
15098 assemble_name (asm_out_file, XSTR (symbol, 0));
15099 fputc ('\n', asm_out_file);
15101 #endif
15103 #if TARGET_MACHO
15105 static int current_machopic_label_num;
15107 /* Given a symbol name and its associated stub, write out the
15108 definition of the stub. */
15110 void
15111 machopic_output_stub (file, symb, stub)
15112 FILE *file;
15113 const char *symb, *stub;
15115 unsigned int length;
15116 char *binder_name, *symbol_name, lazy_ptr_name[32];
15117 int label = ++current_machopic_label_num;
15119 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15120 symb = (*targetm.strip_name_encoding) (symb);
15122 length = strlen (stub);
15123 binder_name = alloca (length + 32);
15124 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15126 length = strlen (symb);
15127 symbol_name = alloca (length + 32);
15128 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15130 sprintf (lazy_ptr_name, "L%d$lz", label);
15132 if (MACHOPIC_PURE)
15133 machopic_picsymbol_stub_section ();
15134 else
15135 machopic_symbol_stub_section ();
15137 fprintf (file, "%s:\n", stub);
15138 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15140 if (MACHOPIC_PURE)
15142 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15143 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15144 fprintf (file, "\tjmp %%edx\n");
15146 else
15147 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15149 fprintf (file, "%s:\n", binder_name);
15151 if (MACHOPIC_PURE)
15153 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15154 fprintf (file, "\tpushl %%eax\n");
15156 else
15157 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15159 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15161 machopic_lazy_symbol_ptr_section ();
15162 fprintf (file, "%s:\n", lazy_ptr_name);
15163 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15164 fprintf (file, "\t.long %s\n", binder_name);
15166 #endif /* TARGET_MACHO */
15168 /* Order the registers for register allocator. */
15170 void
15171 x86_order_regs_for_local_alloc ()
15173 int pos = 0;
15174 int i;
15176 /* First allocate the local general purpose registers. */
15177 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15178 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15179 reg_alloc_order [pos++] = i;
15181 /* Global general purpose registers. */
15182 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15183 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15184 reg_alloc_order [pos++] = i;
15186 /* x87 registers come first in case we are doing FP math
15187 using them. */
15188 if (!TARGET_SSE_MATH)
15189 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15190 reg_alloc_order [pos++] = i;
15192 /* SSE registers. */
15193 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15194 reg_alloc_order [pos++] = i;
15195 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15196 reg_alloc_order [pos++] = i;
15198 /* x87 registers. */
15199 if (TARGET_SSE_MATH)
15200 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15201 reg_alloc_order [pos++] = i;
15203 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15204 reg_alloc_order [pos++] = i;
15206 /* Initialize the rest of array as we do not allocate some registers
15207 at all. */
15208 while (pos < FIRST_PSEUDO_REGISTER)
15209 reg_alloc_order [pos++] = 0;
15212 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15213 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15214 #endif
15216 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15217 struct attribute_spec.handler. */
15218 static tree
15219 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
15220 tree *node;
15221 tree name;
15222 tree args ATTRIBUTE_UNUSED;
15223 int flags ATTRIBUTE_UNUSED;
15224 bool *no_add_attrs;
15226 tree *type = NULL;
15227 if (DECL_P (*node))
15229 if (TREE_CODE (*node) == TYPE_DECL)
15230 type = &TREE_TYPE (*node);
15232 else
15233 type = node;
15235 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15236 || TREE_CODE (*type) == UNION_TYPE)))
15238 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15239 *no_add_attrs = true;
15242 else if ((is_attribute_p ("ms_struct", name)
15243 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15244 || ((is_attribute_p ("gcc_struct", name)
15245 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15247 warning ("`%s' incompatible attribute ignored",
15248 IDENTIFIER_POINTER (name));
15249 *no_add_attrs = true;
15252 return NULL_TREE;
15255 static bool
15256 ix86_ms_bitfield_layout_p (record_type)
15257 tree record_type;
15259 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15260 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15261 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15264 /* Returns an expression indicating where the this parameter is
15265 located on entry to the FUNCTION. */
15267 static rtx
15268 x86_this_parameter (function)
15269 tree function;
15271 tree type = TREE_TYPE (function);
15273 if (TARGET_64BIT)
15275 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15276 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15279 if (ix86_fntype_regparm (type) > 0)
15281 tree parm;
15283 parm = TYPE_ARG_TYPES (type);
15284 /* Figure out whether or not the function has a variable number of
15285 arguments. */
15286 for (; parm; parm = TREE_CHAIN (parm))
15287 if (TREE_VALUE (parm) == void_type_node)
15288 break;
15289 /* If not, the this parameter is in %eax. */
15290 if (parm)
15291 return gen_rtx_REG (SImode, 0);
15294 if (aggregate_value_p (TREE_TYPE (type)))
15295 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15296 else
15297 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15300 /* Determine whether x86_output_mi_thunk can succeed. */
15302 static bool
15303 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15304 tree thunk ATTRIBUTE_UNUSED;
15305 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15306 HOST_WIDE_INT vcall_offset;
15307 tree function;
15309 /* 64-bit can handle anything. */
15310 if (TARGET_64BIT)
15311 return true;
15313 /* For 32-bit, everything's fine if we have one free register. */
15314 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15315 return true;
15317 /* Need a free register for vcall_offset. */
15318 if (vcall_offset)
15319 return false;
15321 /* Need a free register for GOT references. */
15322 if (flag_pic && !(*targetm.binds_local_p) (function))
15323 return false;
15325 /* Otherwise ok. */
15326 return true;
15329 /* Output the assembler code for a thunk function. THUNK_DECL is the
15330 declaration for the thunk function itself, FUNCTION is the decl for
15331 the target function. DELTA is an immediate constant offset to be
15332 added to THIS. If VCALL_OFFSET is nonzero, the word at
15333 *(*this + vcall_offset) should be added to THIS. */
15335 static void
15336 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15337 FILE *file ATTRIBUTE_UNUSED;
15338 tree thunk ATTRIBUTE_UNUSED;
15339 HOST_WIDE_INT delta;
15340 HOST_WIDE_INT vcall_offset;
15341 tree function;
15343 rtx xops[3];
15344 rtx this = x86_this_parameter (function);
15345 rtx this_reg, tmp;
15347 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15348 pull it in now and let DELTA benefit. */
15349 if (REG_P (this))
15350 this_reg = this;
15351 else if (vcall_offset)
15353 /* Put the this parameter into %eax. */
15354 xops[0] = this;
15355 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15356 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15358 else
15359 this_reg = NULL_RTX;
15361 /* Adjust the this parameter by a fixed constant. */
15362 if (delta)
15364 xops[0] = GEN_INT (delta);
15365 xops[1] = this_reg ? this_reg : this;
15366 if (TARGET_64BIT)
15368 if (!x86_64_general_operand (xops[0], DImode))
15370 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15371 xops[1] = tmp;
15372 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15373 xops[0] = tmp;
15374 xops[1] = this;
15376 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15378 else
15379 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15382 /* Adjust the this parameter by a value stored in the vtable. */
15383 if (vcall_offset)
15385 if (TARGET_64BIT)
15386 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15387 else
15388 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15390 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15391 xops[1] = tmp;
15392 if (TARGET_64BIT)
15393 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15394 else
15395 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15397 /* Adjust the this parameter. */
15398 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15399 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15401 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15402 xops[0] = GEN_INT (vcall_offset);
15403 xops[1] = tmp2;
15404 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15405 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15407 xops[1] = this_reg;
15408 if (TARGET_64BIT)
15409 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15410 else
15411 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15414 /* If necessary, drop THIS back to its stack slot. */
15415 if (this_reg && this_reg != this)
15417 xops[0] = this_reg;
15418 xops[1] = this;
15419 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15422 xops[0] = DECL_RTL (function);
15423 if (TARGET_64BIT)
15425 if (!flag_pic || (*targetm.binds_local_p) (function))
15426 output_asm_insn ("jmp\t%P0", xops);
15427 else
15429 tmp = XEXP (xops[0], 0);
15430 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15431 tmp = gen_rtx_CONST (Pmode, tmp);
15432 tmp = gen_rtx_MEM (QImode, tmp);
15433 xops[0] = tmp;
15434 output_asm_insn ("jmp\t%A0", xops);
15437 else
15439 if (!flag_pic || (*targetm.binds_local_p) (function))
15440 output_asm_insn ("jmp\t%P0", xops);
15441 else
15442 #if TARGET_MACHO
15443 if (TARGET_MACHO)
15445 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15446 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15447 tmp = gen_rtx_MEM (QImode, tmp);
15448 xops[0] = tmp;
15449 output_asm_insn ("jmp\t%0", xops);
15451 else
15452 #endif /* TARGET_MACHO */
15454 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15455 output_set_got (tmp);
15457 xops[1] = tmp;
15458 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15459 output_asm_insn ("jmp\t{*}%1", xops);
15465 x86_field_alignment (field, computed)
15466 tree field;
15467 int computed;
15469 enum machine_mode mode;
15470 tree type = TREE_TYPE (field);
15472 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15473 return computed;
15474 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15475 ? get_inner_array_type (type) : type);
15476 if (mode == DFmode || mode == DCmode
15477 || GET_MODE_CLASS (mode) == MODE_INT
15478 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15479 return MIN (32, computed);
15480 return computed;
15483 /* Output assembler code to FILE to increment profiler label # LABELNO
15484 for profiling a function entry. */
15485 void
15486 x86_function_profiler (file, labelno)
15487 FILE *file;
15488 int labelno ATTRIBUTE_UNUSED;
15490 if (TARGET_64BIT)
15491 if (flag_pic)
15493 #ifndef NO_PROFILE_COUNTERS
15494 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15495 #endif
15496 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15498 else
15500 #ifndef NO_PROFILE_COUNTERS
15501 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15502 #endif
15503 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15505 else if (flag_pic)
15507 #ifndef NO_PROFILE_COUNTERS
15508 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15509 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15510 #endif
15511 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15513 else
15515 #ifndef NO_PROFILE_COUNTERS
15516 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15517 PROFILE_COUNT_REGISTER);
15518 #endif
15519 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15523 /* Implement machine specific optimizations.
15524 At the moment we implement single transformation: AMD Athlon works faster
15525 when RET is not destination of conditional jump or directly preceded
15526 by other jump instruction. We avoid the penalty by inserting NOP just
15527 before the RET instructions in such cases. */
15528 void
15529 x86_machine_dependent_reorg (first)
15530 rtx first ATTRIBUTE_UNUSED;
15532 edge e;
15534 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15535 return;
15536 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15538 basic_block bb = e->src;
15539 rtx ret = bb->end;
15540 rtx prev;
15541 bool insert = false;
15543 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15544 continue;
15545 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15546 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15547 break;
15548 if (prev && GET_CODE (prev) == CODE_LABEL)
15550 edge e;
15551 for (e = bb->pred; e; e = e->pred_next)
15552 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15553 && !(e->flags & EDGE_FALLTHRU))
15554 insert = 1;
15556 if (!insert)
15558 prev = prev_active_insn (ret);
15559 if (prev && GET_CODE (prev) == JUMP_INSN
15560 && any_condjump_p (prev))
15561 insert = 1;
15562 /* Empty functions get branch misspredict even when the jump destination
15563 is not visible to us. */
15564 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15565 insert = 1;
15567 if (insert)
15568 emit_insn_before (gen_nop (), ret);
15572 /* Return nonzero when QImode register that must be represented via REX prefix
15573 is used. */
15574 bool
15575 x86_extended_QIreg_mentioned_p (insn)
15576 rtx insn;
15578 int i;
15579 extract_insn_cached (insn);
15580 for (i = 0; i < recog_data.n_operands; i++)
15581 if (REG_P (recog_data.operand[i])
15582 && REGNO (recog_data.operand[i]) >= 4)
15583 return true;
15584 return false;
15587 /* Return nonzero when P points to register encoded via REX prefix.
15588 Called via for_each_rtx. */
15589 static int
15590 extended_reg_mentioned_1 (p, data)
15591 rtx *p;
15592 void *data ATTRIBUTE_UNUSED;
15594 unsigned int regno;
15595 if (!REG_P (*p))
15596 return 0;
15597 regno = REGNO (*p);
15598 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15601 /* Return true when INSN mentions register that must be encoded using REX
15602 prefix. */
15603 bool
15604 x86_extended_reg_mentioned_p (insn)
15605 rtx insn;
15607 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15610 /* Generate an unsigned DImode to FP conversion. This is the same code
15611 optabs would emit if we didn't have TFmode patterns. */
15613 void
15614 x86_emit_floatuns (operands)
15615 rtx operands[2];
15617 rtx neglab, donelab, i0, i1, f0, in, out;
15618 enum machine_mode mode;
15620 out = operands[0];
15621 in = force_reg (DImode, operands[1]);
15622 mode = GET_MODE (out);
15623 neglab = gen_label_rtx ();
15624 donelab = gen_label_rtx ();
15625 i1 = gen_reg_rtx (Pmode);
15626 f0 = gen_reg_rtx (mode);
15628 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15630 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15631 emit_jump_insn (gen_jump (donelab));
15632 emit_barrier ();
15634 emit_label (neglab);
15636 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15637 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15638 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15639 expand_float (f0, i0, 0);
15640 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15642 emit_label (donelab);
15645 #include "gt-i386.h"