* pretty-print.c (pp_base_maybe_space): New function.
[official-gcc.git] / gcc / config / i386 / i386.c
blobff4d4bf6482d2f8ebfdc67b544bd8e18cdf3f580
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
52 #endif
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
60 : 4)
62 /* Processor costs (relative to an add) */
63 static const
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
74 0, /* "large" insn */
75 2, /* MOVE_RATIO */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
98 1, /* Branch cost */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
108 static const
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
120 3, /* MOVE_RATIO */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
143 1, /* Branch cost */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
152 static const
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
164 3, /* MOVE_RATIO */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
187 1, /* Branch cost */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
196 static const
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
208 6, /* MOVE_RATIO */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
231 2, /* Branch cost */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
240 static const
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
252 6, /* MOVE_RATIO */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
275 2, /* Branch cost */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
284 static const
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
296 4, /* MOVE_RATIO */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
319 1, /* Branch cost */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
328 static const
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
340 9, /* MOVE_RATIO */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
363 2, /* Branch cost */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
372 static const
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
384 9, /* MOVE_RATIO */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
407 2, /* Branch cost */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
416 static const
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
428 6, /* MOVE_RATIO */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
451 2, /* Branch cost */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
527 /* Some CPU cores are not able to predict more than 4 branch instructions in
528 the 16 byte window. */
529 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4;
531 /* In case the average insn count for single function invocation is
532 lower than this constant, emit fast (but longer) prologue and
533 epilogue code. */
534 #define FAST_PROLOGUE_INSN_COUNT 20
536 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
537 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
538 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
539 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
541 /* Array of the smallest class containing reg number REGNO, indexed by
542 REGNO. Used by REGNO_REG_CLASS in i386.h. */
544 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
546 /* ax, dx, cx, bx */
547 AREG, DREG, CREG, BREG,
548 /* si, di, bp, sp */
549 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
550 /* FP registers */
551 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
552 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
553 /* arg pointer */
554 NON_Q_REGS,
555 /* flags, fpsr, dirflag, frame */
556 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
557 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
558 SSE_REGS, SSE_REGS,
559 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
560 MMX_REGS, MMX_REGS,
561 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
562 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
563 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 SSE_REGS, SSE_REGS,
567 /* The "default" register map used in 32bit mode. */
569 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
571 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
572 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
573 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
574 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
575 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
576 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
580 static int const x86_64_int_parameter_registers[6] =
582 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
583 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
586 static int const x86_64_int_return_registers[4] =
588 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
591 /* The "default" register map used in 64bit mode. */
592 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
594 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
595 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
596 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
597 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
598 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
599 8,9,10,11,12,13,14,15, /* extended integer registers */
600 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
603 /* Define the register numbers to be used in Dwarf debugging information.
604 The SVR4 reference port C compiler uses the following register numbers
605 in its Dwarf output code:
606 0 for %eax (gcc regno = 0)
607 1 for %ecx (gcc regno = 2)
608 2 for %edx (gcc regno = 1)
609 3 for %ebx (gcc regno = 3)
610 4 for %esp (gcc regno = 7)
611 5 for %ebp (gcc regno = 6)
612 6 for %esi (gcc regno = 4)
613 7 for %edi (gcc regno = 5)
614 The following three DWARF register numbers are never generated by
615 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
616 believes these numbers have these meanings.
617 8 for %eip (no gcc equivalent)
618 9 for %eflags (gcc regno = 17)
619 10 for %trapno (no gcc equivalent)
620 It is not at all clear how we should number the FP stack registers
621 for the x86 architecture. If the version of SDB on x86/svr4 were
622 a bit less brain dead with respect to floating-point then we would
623 have a precedent to follow with respect to DWARF register numbers
624 for x86 FP registers, but the SDB on x86/svr4 is so completely
625 broken with respect to FP registers that it is hardly worth thinking
626 of it as something to strive for compatibility with.
627 The version of x86/svr4 SDB I have at the moment does (partially)
628 seem to believe that DWARF register number 11 is associated with
629 the x86 register %st(0), but that's about all. Higher DWARF
630 register numbers don't seem to be associated with anything in
631 particular, and even for DWARF regno 11, SDB only seems to under-
632 stand that it should say that a variable lives in %st(0) (when
633 asked via an `=' command) if we said it was in DWARF regno 11,
634 but SDB still prints garbage when asked for the value of the
635 variable in question (via a `/' command).
636 (Also note that the labels SDB prints for various FP stack regs
637 when doing an `x' command are all wrong.)
638 Note that these problems generally don't affect the native SVR4
639 C compiler because it doesn't allow the use of -O with -g and
640 because when it is *not* optimizing, it allocates a memory
641 location for each floating-point variable, and the memory
642 location is what gets described in the DWARF AT_location
643 attribute for the variable in question.
644 Regardless of the severe mental illness of the x86/svr4 SDB, we
645 do something sensible here and we use the following DWARF
646 register numbers. Note that these are all stack-top-relative
647 numbers.
648 11 for %st(0) (gcc regno = 8)
649 12 for %st(1) (gcc regno = 9)
650 13 for %st(2) (gcc regno = 10)
651 14 for %st(3) (gcc regno = 11)
652 15 for %st(4) (gcc regno = 12)
653 16 for %st(5) (gcc regno = 13)
654 17 for %st(6) (gcc regno = 14)
655 18 for %st(7) (gcc regno = 15)
657 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
659 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
660 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
661 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
662 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
663 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
664 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
668 /* Test and compare insns in i386.md store the information needed to
669 generate branch and scc insns here. */
671 rtx ix86_compare_op0 = NULL_RTX;
672 rtx ix86_compare_op1 = NULL_RTX;
674 #define MAX_386_STACK_LOCALS 3
675 /* Size of the register save area. */
676 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
678 /* Define the structure for the machine field in struct function. */
680 struct stack_local_entry GTY(())
682 unsigned short mode;
683 unsigned short n;
684 rtx rtl;
685 struct stack_local_entry *next;
688 /* Structure describing stack frame layout.
689 Stack grows downward:
691 [arguments]
692 <- ARG_POINTER
693 saved pc
695 saved frame pointer if frame_pointer_needed
696 <- HARD_FRAME_POINTER
697 [saved regs]
699 [padding1] \
701 [va_arg registers] (
702 > to_allocate <- FRAME_POINTER
703 [frame] (
705 [padding2] /
707 struct ix86_frame
709 int nregs;
710 int padding1;
711 int va_arg_size;
712 HOST_WIDE_INT frame;
713 int padding2;
714 int outgoing_arguments_size;
715 int red_zone_size;
717 HOST_WIDE_INT to_allocate;
718 /* The offsets relative to ARG_POINTER. */
719 HOST_WIDE_INT frame_pointer_offset;
720 HOST_WIDE_INT hard_frame_pointer_offset;
721 HOST_WIDE_INT stack_pointer_offset;
723 /* When save_regs_using_mov is set, emit prologue using
724 move instead of push instructions. */
725 bool save_regs_using_mov;
728 /* Used to enable/disable debugging features. */
729 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
730 /* Code model option as passed by user. */
731 const char *ix86_cmodel_string;
732 /* Parsed value. */
733 enum cmodel ix86_cmodel;
734 /* Asm dialect. */
735 const char *ix86_asm_string;
736 enum asm_dialect ix86_asm_dialect = ASM_ATT;
737 /* TLS dialext. */
738 const char *ix86_tls_dialect_string;
739 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
741 /* Which unit we are generating floating point math for. */
742 enum fpmath_unit ix86_fpmath;
744 /* Which cpu are we scheduling for. */
745 enum processor_type ix86_tune;
746 /* Which instruction set architecture to use. */
747 enum processor_type ix86_arch;
749 /* Strings to hold which cpu and instruction set architecture to use. */
750 const char *ix86_tune_string; /* for -mtune=<xxx> */
751 const char *ix86_arch_string; /* for -march=<xxx> */
752 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
754 /* # of registers to use to pass arguments. */
755 const char *ix86_regparm_string;
757 /* true if sse prefetch instruction is not NOOP. */
758 int x86_prefetch_sse;
760 /* ix86_regparm_string as a number */
761 int ix86_regparm;
763 /* Alignment to use for loops and jumps: */
765 /* Power of two alignment for loops. */
766 const char *ix86_align_loops_string;
768 /* Power of two alignment for non-loop jumps. */
769 const char *ix86_align_jumps_string;
771 /* Power of two alignment for stack boundary in bytes. */
772 const char *ix86_preferred_stack_boundary_string;
774 /* Preferred alignment for stack boundary in bits. */
775 int ix86_preferred_stack_boundary;
777 /* Values 1-5: see jump.c */
778 int ix86_branch_cost;
779 const char *ix86_branch_cost_string;
781 /* Power of two alignment for functions. */
782 const char *ix86_align_funcs_string;
784 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785 static char internal_label_prefix[16];
786 static int internal_label_prefix_len;
788 static int local_symbolic_operand (rtx, enum machine_mode);
789 static int tls_symbolic_operand_1 (rtx, enum tls_model);
790 static void output_pic_addr_const (FILE *, rtx, int);
791 static void put_condition_code (enum rtx_code, enum machine_mode,
792 int, int, FILE *);
793 static const char *get_some_local_dynamic_name (void);
794 static int get_some_local_dynamic_name_1 (rtx *, void *);
795 static rtx maybe_get_pool_constant (rtx);
796 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
797 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
798 rtx *);
799 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
800 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
801 enum machine_mode);
802 static rtx get_thread_pointer (int);
803 static rtx legitimize_tls_address (rtx, enum tls_model, int);
804 static void get_pc_thunk_name (char [32], unsigned int);
805 static rtx gen_push (rtx);
806 static int memory_address_length (rtx addr);
807 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
808 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
809 static struct machine_function * ix86_init_machine_status (void);
810 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
811 static int ix86_nsaved_regs (void);
812 static void ix86_emit_save_regs (void);
813 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
814 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
815 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
816 static HOST_WIDE_INT ix86_GOT_alias_set (void);
817 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
818 static rtx ix86_expand_aligntest (rtx, int);
819 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
820 static int ix86_issue_rate (void);
821 static int ix86_adjust_cost (rtx, rtx, rtx, int);
822 static int ia32_use_dfa_pipeline_interface (void);
823 static int ia32_multipass_dfa_lookahead (void);
824 static void ix86_init_mmx_sse_builtins (void);
825 static rtx x86_this_parameter (tree);
826 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
827 HOST_WIDE_INT, tree);
828 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
829 static void x86_file_start (void);
830 static void ix86_reorg (void);
831 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
832 static tree ix86_build_builtin_va_list (void);
833 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
834 tree, int *, int);
836 struct ix86_address
838 rtx base, index, disp;
839 HOST_WIDE_INT scale;
840 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
843 static int ix86_decompose_address (rtx, struct ix86_address *);
844 static int ix86_address_cost (rtx);
845 static bool ix86_cannot_force_const_mem (rtx);
846 static rtx ix86_delegitimize_address (rtx);
848 struct builtin_description;
849 static rtx ix86_expand_sse_comi (const struct builtin_description *,
850 tree, rtx);
851 static rtx ix86_expand_sse_compare (const struct builtin_description *,
852 tree, rtx);
853 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
854 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
855 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
856 static rtx ix86_expand_store_builtin (enum insn_code, tree);
857 static rtx safe_vector_operand (rtx, enum machine_mode);
858 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
859 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
860 enum rtx_code *, enum rtx_code *);
861 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
862 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
863 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
864 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
865 static int ix86_fp_comparison_cost (enum rtx_code code);
866 static unsigned int ix86_select_alt_pic_regnum (void);
867 static int ix86_save_reg (unsigned int, int);
868 static void ix86_compute_frame_layout (struct ix86_frame *);
869 static int ix86_comp_type_attributes (tree, tree);
870 static int ix86_function_regparm (tree, tree);
871 const struct attribute_spec ix86_attribute_table[];
872 static bool ix86_function_ok_for_sibcall (tree, tree);
873 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
874 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
875 static int ix86_value_regno (enum machine_mode);
876 static bool contains_128bit_aligned_vector_p (tree);
877 static bool ix86_ms_bitfield_layout_p (tree);
878 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
879 static int extended_reg_mentioned_1 (rtx *, void *);
880 static bool ix86_rtx_costs (rtx, int, int, int *);
881 static int min_insn_size (rtx);
882 static tree ix86_md_asm_clobbers (tree clobbers);
884 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
885 static void ix86_svr3_asm_out_constructor (rtx, int);
886 #endif
888 /* Register class used for passing given 64bit part of the argument.
889 These represent classes as documented by the PS ABI, with the exception
890 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
891 use SF or DFmode move instead of DImode to avoid reformatting penalties.
893 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
894 whenever possible (upper half does contain padding).
896 enum x86_64_reg_class
898 X86_64_NO_CLASS,
899 X86_64_INTEGER_CLASS,
900 X86_64_INTEGERSI_CLASS,
901 X86_64_SSE_CLASS,
902 X86_64_SSESF_CLASS,
903 X86_64_SSEDF_CLASS,
904 X86_64_SSEUP_CLASS,
905 X86_64_X87_CLASS,
906 X86_64_X87UP_CLASS,
907 X86_64_MEMORY_CLASS
909 static const char * const x86_64_reg_class_name[] =
910 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
912 #define MAX_CLASSES 4
913 static int classify_argument (enum machine_mode, tree,
914 enum x86_64_reg_class [MAX_CLASSES], int);
915 static int examine_argument (enum machine_mode, tree, int, int *, int *);
916 static rtx construct_container (enum machine_mode, tree, int, int, int,
917 const int *, int);
918 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
919 enum x86_64_reg_class);
921 /* Table of constants used by fldpi, fldln2, etc.... */
922 static REAL_VALUE_TYPE ext_80387_constants_table [5];
923 static bool ext_80387_constants_init = 0;
924 static void init_ext_80387_constants (void);
926 /* Initialize the GCC target structure. */
927 #undef TARGET_ATTRIBUTE_TABLE
928 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
929 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
930 # undef TARGET_MERGE_DECL_ATTRIBUTES
931 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
932 #endif
934 #undef TARGET_COMP_TYPE_ATTRIBUTES
935 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
937 #undef TARGET_INIT_BUILTINS
938 #define TARGET_INIT_BUILTINS ix86_init_builtins
940 #undef TARGET_EXPAND_BUILTIN
941 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
943 #undef TARGET_ASM_FUNCTION_EPILOGUE
944 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
946 #undef TARGET_ASM_OPEN_PAREN
947 #define TARGET_ASM_OPEN_PAREN ""
948 #undef TARGET_ASM_CLOSE_PAREN
949 #define TARGET_ASM_CLOSE_PAREN ""
951 #undef TARGET_ASM_ALIGNED_HI_OP
952 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
953 #undef TARGET_ASM_ALIGNED_SI_OP
954 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
955 #ifdef ASM_QUAD
956 #undef TARGET_ASM_ALIGNED_DI_OP
957 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
958 #endif
960 #undef TARGET_ASM_UNALIGNED_HI_OP
961 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
962 #undef TARGET_ASM_UNALIGNED_SI_OP
963 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
964 #undef TARGET_ASM_UNALIGNED_DI_OP
965 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
967 #undef TARGET_SCHED_ADJUST_COST
968 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
969 #undef TARGET_SCHED_ISSUE_RATE
970 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
971 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
972 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
973 ia32_use_dfa_pipeline_interface
974 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
975 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
976 ia32_multipass_dfa_lookahead
978 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
979 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
981 #ifdef HAVE_AS_TLS
982 #undef TARGET_HAVE_TLS
983 #define TARGET_HAVE_TLS true
984 #endif
985 #undef TARGET_CANNOT_FORCE_CONST_MEM
986 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
988 #undef TARGET_DELEGITIMIZE_ADDRESS
989 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
991 #undef TARGET_MS_BITFIELD_LAYOUT_P
992 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
994 #undef TARGET_ASM_OUTPUT_MI_THUNK
995 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
996 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
997 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
999 #undef TARGET_ASM_FILE_START
1000 #define TARGET_ASM_FILE_START x86_file_start
1002 #undef TARGET_RTX_COSTS
1003 #define TARGET_RTX_COSTS ix86_rtx_costs
1004 #undef TARGET_ADDRESS_COST
1005 #define TARGET_ADDRESS_COST ix86_address_cost
1007 #undef TARGET_FIXED_CONDITION_CODE_REGS
1008 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1009 #undef TARGET_CC_MODES_COMPATIBLE
1010 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1012 #undef TARGET_MACHINE_DEPENDENT_REORG
1013 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1015 #undef TARGET_BUILD_BUILTIN_VA_LIST
1016 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1018 #undef TARGET_MD_ASM_CLOBBERS
1019 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1021 #undef TARGET_PROMOTE_PROTOTYPES
1022 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1024 #undef TARGET_SETUP_INCOMING_VARARGS
1025 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1027 struct gcc_target targetm = TARGET_INITIALIZER;
1029 /* The svr4 ABI for the i386 says that records and unions are returned
1030 in memory. */
1031 #ifndef DEFAULT_PCC_STRUCT_RETURN
1032 #define DEFAULT_PCC_STRUCT_RETURN 1
1033 #endif
1035 /* Sometimes certain combinations of command options do not make
1036 sense on a particular target machine. You can define a macro
1037 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1038 defined, is executed once just after all the command options have
1039 been parsed.
1041 Don't use this macro to turn on various extra optimizations for
1042 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1044 void
1045 override_options (void)
1047 int i;
1048 /* Comes from final.c -- no real reason to change it. */
1049 #define MAX_CODE_ALIGN 16
1051 static struct ptt
1053 const struct processor_costs *cost; /* Processor costs */
1054 const int target_enable; /* Target flags to enable. */
1055 const int target_disable; /* Target flags to disable. */
1056 const int align_loop; /* Default alignments. */
1057 const int align_loop_max_skip;
1058 const int align_jump;
1059 const int align_jump_max_skip;
1060 const int align_func;
1062 const processor_target_table[PROCESSOR_max] =
1064 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1065 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1066 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1067 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1068 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1069 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1070 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1071 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1074 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1075 static struct pta
1077 const char *const name; /* processor name or nickname. */
1078 const enum processor_type processor;
1079 const enum pta_flags
1081 PTA_SSE = 1,
1082 PTA_SSE2 = 2,
1083 PTA_SSE3 = 4,
1084 PTA_MMX = 8,
1085 PTA_PREFETCH_SSE = 16,
1086 PTA_3DNOW = 32,
1087 PTA_3DNOW_A = 64,
1088 PTA_64BIT = 128
1089 } flags;
1091 const processor_alias_table[] =
1093 {"i386", PROCESSOR_I386, 0},
1094 {"i486", PROCESSOR_I486, 0},
1095 {"i586", PROCESSOR_PENTIUM, 0},
1096 {"pentium", PROCESSOR_PENTIUM, 0},
1097 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1098 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1099 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1100 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1101 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1102 {"i686", PROCESSOR_PENTIUMPRO, 0},
1103 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1104 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1105 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1106 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1107 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1108 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1109 | PTA_MMX | PTA_PREFETCH_SSE},
1110 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1111 | PTA_MMX | PTA_PREFETCH_SSE},
1112 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1113 | PTA_MMX | PTA_PREFETCH_SSE},
1114 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1115 | PTA_MMX | PTA_PREFETCH_SSE},
1116 {"k6", PROCESSOR_K6, PTA_MMX},
1117 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1118 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1119 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1120 | PTA_3DNOW_A},
1121 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1122 | PTA_3DNOW | PTA_3DNOW_A},
1123 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1124 | PTA_3DNOW_A | PTA_SSE},
1125 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1126 | PTA_3DNOW_A | PTA_SSE},
1127 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1128 | PTA_3DNOW_A | PTA_SSE},
1129 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1130 | PTA_SSE | PTA_SSE2 },
1131 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1132 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1133 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1134 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1135 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1136 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1137 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1138 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1141 int const pta_size = ARRAY_SIZE (processor_alias_table);
1143 /* Set the default values for switches whose default depends on TARGET_64BIT
1144 in case they weren't overwritten by command line options. */
1145 if (TARGET_64BIT)
1147 if (flag_omit_frame_pointer == 2)
1148 flag_omit_frame_pointer = 1;
1149 if (flag_asynchronous_unwind_tables == 2)
1150 flag_asynchronous_unwind_tables = 1;
1151 if (flag_pcc_struct_return == 2)
1152 flag_pcc_struct_return = 0;
1154 else
1156 if (flag_omit_frame_pointer == 2)
1157 flag_omit_frame_pointer = 0;
1158 if (flag_asynchronous_unwind_tables == 2)
1159 flag_asynchronous_unwind_tables = 0;
1160 if (flag_pcc_struct_return == 2)
1161 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1164 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1165 SUBTARGET_OVERRIDE_OPTIONS;
1166 #endif
1168 if (!ix86_tune_string && ix86_arch_string)
1169 ix86_tune_string = ix86_arch_string;
1170 if (!ix86_tune_string)
1171 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1172 if (!ix86_arch_string)
1173 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1175 if (ix86_cmodel_string != 0)
1177 if (!strcmp (ix86_cmodel_string, "small"))
1178 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1179 else if (flag_pic)
1180 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1181 else if (!strcmp (ix86_cmodel_string, "32"))
1182 ix86_cmodel = CM_32;
1183 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1184 ix86_cmodel = CM_KERNEL;
1185 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1186 ix86_cmodel = CM_MEDIUM;
1187 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1188 ix86_cmodel = CM_LARGE;
1189 else
1190 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1192 else
1194 ix86_cmodel = CM_32;
1195 if (TARGET_64BIT)
1196 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1198 if (ix86_asm_string != 0)
1200 if (!strcmp (ix86_asm_string, "intel"))
1201 ix86_asm_dialect = ASM_INTEL;
1202 else if (!strcmp (ix86_asm_string, "att"))
1203 ix86_asm_dialect = ASM_ATT;
1204 else
1205 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1207 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1208 error ("code model `%s' not supported in the %s bit mode",
1209 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1210 if (ix86_cmodel == CM_LARGE)
1211 sorry ("code model `large' not supported yet");
1212 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1213 sorry ("%i-bit mode not compiled in",
1214 (target_flags & MASK_64BIT) ? 64 : 32);
1216 for (i = 0; i < pta_size; i++)
1217 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1219 ix86_arch = processor_alias_table[i].processor;
1220 /* Default cpu tuning to the architecture. */
1221 ix86_tune = ix86_arch;
1222 if (processor_alias_table[i].flags & PTA_MMX
1223 && !(target_flags_explicit & MASK_MMX))
1224 target_flags |= MASK_MMX;
1225 if (processor_alias_table[i].flags & PTA_3DNOW
1226 && !(target_flags_explicit & MASK_3DNOW))
1227 target_flags |= MASK_3DNOW;
1228 if (processor_alias_table[i].flags & PTA_3DNOW_A
1229 && !(target_flags_explicit & MASK_3DNOW_A))
1230 target_flags |= MASK_3DNOW_A;
1231 if (processor_alias_table[i].flags & PTA_SSE
1232 && !(target_flags_explicit & MASK_SSE))
1233 target_flags |= MASK_SSE;
1234 if (processor_alias_table[i].flags & PTA_SSE2
1235 && !(target_flags_explicit & MASK_SSE2))
1236 target_flags |= MASK_SSE2;
1237 if (processor_alias_table[i].flags & PTA_SSE3
1238 && !(target_flags_explicit & MASK_SSE3))
1239 target_flags |= MASK_SSE3;
1240 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1241 x86_prefetch_sse = true;
1242 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1243 error ("CPU you selected does not support x86-64 instruction set");
1244 break;
1247 if (i == pta_size)
1248 error ("bad value (%s) for -march= switch", ix86_arch_string);
1250 for (i = 0; i < pta_size; i++)
1251 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1253 ix86_tune = processor_alias_table[i].processor;
1254 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1255 error ("CPU you selected does not support x86-64 instruction set");
1256 break;
1258 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1259 x86_prefetch_sse = true;
1260 if (i == pta_size)
1261 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1263 if (optimize_size)
1264 ix86_cost = &size_cost;
1265 else
1266 ix86_cost = processor_target_table[ix86_tune].cost;
1267 target_flags |= processor_target_table[ix86_tune].target_enable;
1268 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1270 /* Arrange to set up i386_stack_locals for all functions. */
1271 init_machine_status = ix86_init_machine_status;
1273 /* Validate -mregparm= value. */
1274 if (ix86_regparm_string)
1276 i = atoi (ix86_regparm_string);
1277 if (i < 0 || i > REGPARM_MAX)
1278 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1279 else
1280 ix86_regparm = i;
1282 else
1283 if (TARGET_64BIT)
1284 ix86_regparm = REGPARM_MAX;
1286 /* If the user has provided any of the -malign-* options,
1287 warn and use that value only if -falign-* is not set.
1288 Remove this code in GCC 3.2 or later. */
1289 if (ix86_align_loops_string)
1291 warning ("-malign-loops is obsolete, use -falign-loops");
1292 if (align_loops == 0)
1294 i = atoi (ix86_align_loops_string);
1295 if (i < 0 || i > MAX_CODE_ALIGN)
1296 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1297 else
1298 align_loops = 1 << i;
1302 if (ix86_align_jumps_string)
1304 warning ("-malign-jumps is obsolete, use -falign-jumps");
1305 if (align_jumps == 0)
1307 i = atoi (ix86_align_jumps_string);
1308 if (i < 0 || i > MAX_CODE_ALIGN)
1309 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1310 else
1311 align_jumps = 1 << i;
1315 if (ix86_align_funcs_string)
1317 warning ("-malign-functions is obsolete, use -falign-functions");
1318 if (align_functions == 0)
1320 i = atoi (ix86_align_funcs_string);
1321 if (i < 0 || i > MAX_CODE_ALIGN)
1322 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1323 else
1324 align_functions = 1 << i;
1328 /* Default align_* from the processor table. */
1329 if (align_loops == 0)
1331 align_loops = processor_target_table[ix86_tune].align_loop;
1332 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1334 if (align_jumps == 0)
1336 align_jumps = processor_target_table[ix86_tune].align_jump;
1337 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1339 if (align_functions == 0)
1341 align_functions = processor_target_table[ix86_tune].align_func;
1344 /* Validate -mpreferred-stack-boundary= value, or provide default.
1345 The default of 128 bits is for Pentium III's SSE __m128, but we
1346 don't want additional code to keep the stack aligned when
1347 optimizing for code size. */
1348 ix86_preferred_stack_boundary = (optimize_size
1349 ? TARGET_64BIT ? 128 : 32
1350 : 128);
1351 if (ix86_preferred_stack_boundary_string)
1353 i = atoi (ix86_preferred_stack_boundary_string);
1354 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1355 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1356 TARGET_64BIT ? 4 : 2);
1357 else
1358 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1361 /* Validate -mbranch-cost= value, or provide default. */
1362 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1363 if (ix86_branch_cost_string)
1365 i = atoi (ix86_branch_cost_string);
1366 if (i < 0 || i > 5)
1367 error ("-mbranch-cost=%d is not between 0 and 5", i);
1368 else
1369 ix86_branch_cost = i;
1372 if (ix86_tls_dialect_string)
1374 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1375 ix86_tls_dialect = TLS_DIALECT_GNU;
1376 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1377 ix86_tls_dialect = TLS_DIALECT_SUN;
1378 else
1379 error ("bad value (%s) for -mtls-dialect= switch",
1380 ix86_tls_dialect_string);
1383 /* Keep nonleaf frame pointers. */
1384 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1385 flag_omit_frame_pointer = 1;
1387 /* If we're doing fast math, we don't care about comparison order
1388 wrt NaNs. This lets us use a shorter comparison sequence. */
1389 if (flag_unsafe_math_optimizations)
1390 target_flags &= ~MASK_IEEE_FP;
1392 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1393 since the insns won't need emulation. */
1394 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1395 target_flags &= ~MASK_NO_FANCY_MATH_387;
1397 /* Turn on SSE2 builtins for -msse3. */
1398 if (TARGET_SSE3)
1399 target_flags |= MASK_SSE2;
1401 /* Turn on SSE builtins for -msse2. */
1402 if (TARGET_SSE2)
1403 target_flags |= MASK_SSE;
1405 if (TARGET_64BIT)
1407 if (TARGET_ALIGN_DOUBLE)
1408 error ("-malign-double makes no sense in the 64bit mode");
1409 if (TARGET_RTD)
1410 error ("-mrtd calling convention not supported in the 64bit mode");
1411 /* Enable by default the SSE and MMX builtins. */
1412 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1413 ix86_fpmath = FPMATH_SSE;
1415 else
1417 ix86_fpmath = FPMATH_387;
1418 /* i386 ABI does not specify red zone. It still makes sense to use it
1419 when programmer takes care to stack from being destroyed. */
1420 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1421 target_flags |= MASK_NO_RED_ZONE;
1424 if (ix86_fpmath_string != 0)
1426 if (! strcmp (ix86_fpmath_string, "387"))
1427 ix86_fpmath = FPMATH_387;
1428 else if (! strcmp (ix86_fpmath_string, "sse"))
1430 if (!TARGET_SSE)
1432 warning ("SSE instruction set disabled, using 387 arithmetics");
1433 ix86_fpmath = FPMATH_387;
1435 else
1436 ix86_fpmath = FPMATH_SSE;
1438 else if (! strcmp (ix86_fpmath_string, "387,sse")
1439 || ! strcmp (ix86_fpmath_string, "sse,387"))
1441 if (!TARGET_SSE)
1443 warning ("SSE instruction set disabled, using 387 arithmetics");
1444 ix86_fpmath = FPMATH_387;
1446 else if (!TARGET_80387)
1448 warning ("387 instruction set disabled, using SSE arithmetics");
1449 ix86_fpmath = FPMATH_SSE;
1451 else
1452 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1454 else
1455 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1458 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1459 on by -msse. */
1460 if (TARGET_SSE)
1462 target_flags |= MASK_MMX;
1463 x86_prefetch_sse = true;
1466 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1467 if (TARGET_3DNOW)
1469 target_flags |= MASK_MMX;
1470 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1471 extensions it adds. */
1472 if (x86_3dnow_a & (1 << ix86_arch))
1473 target_flags |= MASK_3DNOW_A;
1475 if ((x86_accumulate_outgoing_args & TUNEMASK)
1476 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1477 && !optimize_size)
1478 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1480 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1482 char *p;
1483 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1484 p = strchr (internal_label_prefix, 'X');
1485 internal_label_prefix_len = p - internal_label_prefix;
1486 *p = '\0';
1490 void
1491 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1493 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1494 make the problem with not enough registers even worse. */
1495 #ifdef INSN_SCHEDULING
1496 if (level > 1)
1497 flag_schedule_insns = 0;
1498 #endif
1500 /* The default values of these switches depend on the TARGET_64BIT
1501 that is not known at this moment. Mark these values with 2 and
1502 let user the to override these. In case there is no command line option
1503 specifying them, we will set the defaults in override_options. */
1504 if (optimize >= 1)
1505 flag_omit_frame_pointer = 2;
1506 flag_pcc_struct_return = 2;
1507 flag_asynchronous_unwind_tables = 2;
1510 /* Table of valid machine attributes. */
1511 const struct attribute_spec ix86_attribute_table[] =
1513 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1514 /* Stdcall attribute says callee is responsible for popping arguments
1515 if they are not variable. */
1516 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1517 /* Fastcall attribute says callee is responsible for popping arguments
1518 if they are not variable. */
1519 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1520 /* Cdecl attribute says the callee is a normal C declaration */
1521 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1522 /* Regparm attribute specifies how many integer arguments are to be
1523 passed in registers. */
1524 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1525 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1526 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1527 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1528 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1529 #endif
1530 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1531 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1532 { NULL, 0, 0, false, false, false, NULL }
1535 /* Decide whether we can make a sibling call to a function. DECL is the
1536 declaration of the function being targeted by the call and EXP is the
1537 CALL_EXPR representing the call. */
1539 static bool
1540 ix86_function_ok_for_sibcall (tree decl, tree exp)
1542 /* If we are generating position-independent code, we cannot sibcall
1543 optimize any indirect call, or a direct call to a global function,
1544 as the PLT requires %ebx be live. */
1545 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1546 return false;
1548 /* If we are returning floats on the 80387 register stack, we cannot
1549 make a sibcall from a function that doesn't return a float to a
1550 function that does or, conversely, from a function that does return
1551 a float to a function that doesn't; the necessary stack adjustment
1552 would not be executed. */
1553 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1554 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1555 return false;
1557 /* If this call is indirect, we'll need to be able to use a call-clobbered
1558 register for the address of the target function. Make sure that all
1559 such registers are not used for passing parameters. */
1560 if (!decl && !TARGET_64BIT)
1562 tree type;
1564 /* We're looking at the CALL_EXPR, we need the type of the function. */
1565 type = TREE_OPERAND (exp, 0); /* pointer expression */
1566 type = TREE_TYPE (type); /* pointer type */
1567 type = TREE_TYPE (type); /* function type */
1569 if (ix86_function_regparm (type, NULL) >= 3)
1571 /* ??? Need to count the actual number of registers to be used,
1572 not the possible number of registers. Fix later. */
1573 return false;
1577 /* Otherwise okay. That also includes certain types of indirect calls. */
1578 return true;
1581 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1582 arguments as in struct attribute_spec.handler. */
1583 static tree
1584 ix86_handle_cdecl_attribute (tree *node, tree name,
1585 tree args ATTRIBUTE_UNUSED,
1586 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1588 if (TREE_CODE (*node) != FUNCTION_TYPE
1589 && TREE_CODE (*node) != METHOD_TYPE
1590 && TREE_CODE (*node) != FIELD_DECL
1591 && TREE_CODE (*node) != TYPE_DECL)
1593 warning ("`%s' attribute only applies to functions",
1594 IDENTIFIER_POINTER (name));
1595 *no_add_attrs = true;
1597 else
1599 if (is_attribute_p ("fastcall", name))
1601 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1603 error ("fastcall and stdcall attributes are not compatible");
1605 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1607 error ("fastcall and regparm attributes are not compatible");
1610 else if (is_attribute_p ("stdcall", name))
1612 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1614 error ("fastcall and stdcall attributes are not compatible");
1619 if (TARGET_64BIT)
1621 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1622 *no_add_attrs = true;
1625 return NULL_TREE;
1628 /* Handle a "regparm" attribute;
1629 arguments as in struct attribute_spec.handler. */
1630 static tree
1631 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1632 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1634 if (TREE_CODE (*node) != FUNCTION_TYPE
1635 && TREE_CODE (*node) != METHOD_TYPE
1636 && TREE_CODE (*node) != FIELD_DECL
1637 && TREE_CODE (*node) != TYPE_DECL)
1639 warning ("`%s' attribute only applies to functions",
1640 IDENTIFIER_POINTER (name));
1641 *no_add_attrs = true;
1643 else
1645 tree cst;
1647 cst = TREE_VALUE (args);
1648 if (TREE_CODE (cst) != INTEGER_CST)
1650 warning ("`%s' attribute requires an integer constant argument",
1651 IDENTIFIER_POINTER (name));
1652 *no_add_attrs = true;
1654 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1656 warning ("argument to `%s' attribute larger than %d",
1657 IDENTIFIER_POINTER (name), REGPARM_MAX);
1658 *no_add_attrs = true;
1661 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1663 error ("fastcall and regparm attributes are not compatible");
1667 return NULL_TREE;
1670 /* Return 0 if the attributes for two types are incompatible, 1 if they
1671 are compatible, and 2 if they are nearly compatible (which causes a
1672 warning to be generated). */
1674 static int
1675 ix86_comp_type_attributes (tree type1, tree type2)
1677 /* Check for mismatch of non-default calling convention. */
1678 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1680 if (TREE_CODE (type1) != FUNCTION_TYPE)
1681 return 1;
1683 /* Check for mismatched fastcall types */
1684 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1685 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1686 return 0;
1688 /* Check for mismatched return types (cdecl vs stdcall). */
1689 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1690 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1691 return 0;
1692 if (ix86_function_regparm (type1, NULL)
1693 != ix86_function_regparm (type2, NULL))
1694 return 0;
1695 return 1;
1698 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1699 DECL may be NULL when calling function indirectly
1700 or considering a libcall. */
1702 static int
1703 ix86_function_regparm (tree type, tree decl)
1705 tree attr;
1706 int regparm = ix86_regparm;
1707 bool user_convention = false;
1709 if (!TARGET_64BIT)
1711 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1712 if (attr)
1714 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1715 user_convention = true;
1718 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1720 regparm = 2;
1721 user_convention = true;
1724 /* Use register calling convention for local functions when possible. */
1725 if (!TARGET_64BIT && !user_convention && decl
1726 && flag_unit_at_a_time && !profile_flag)
1728 struct cgraph_local_info *i = cgraph_local_info (decl);
1729 if (i && i->local)
1731 /* We can't use regparm(3) for nested functions as these use
1732 static chain pointer in third argument. */
1733 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1734 regparm = 2;
1735 else
1736 regparm = 3;
1740 return regparm;
1743 /* Return true if EAX is live at the start of the function. Used by
1744 ix86_expand_prologue to determine if we need special help before
1745 calling allocate_stack_worker. */
1747 static bool
1748 ix86_eax_live_at_start_p (void)
1750 /* Cheat. Don't bother working forward from ix86_function_regparm
1751 to the function type to whether an actual argument is located in
1752 eax. Instead just look at cfg info, which is still close enough
1753 to correct at this point. This gives false positives for broken
1754 functions that might use uninitialized data that happens to be
1755 allocated in eax, but who cares? */
1756 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1759 /* Value is the number of bytes of arguments automatically
1760 popped when returning from a subroutine call.
1761 FUNDECL is the declaration node of the function (as a tree),
1762 FUNTYPE is the data type of the function (as a tree),
1763 or for a library call it is an identifier node for the subroutine name.
1764 SIZE is the number of bytes of arguments passed on the stack.
1766 On the 80386, the RTD insn may be used to pop them if the number
1767 of args is fixed, but if the number is variable then the caller
1768 must pop them all. RTD can't be used for library calls now
1769 because the library is compiled with the Unix compiler.
1770 Use of RTD is a selectable option, since it is incompatible with
1771 standard Unix calling sequences. If the option is not selected,
1772 the caller must always pop the args.
1774 The attribute stdcall is equivalent to RTD on a per module basis. */
1777 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1779 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1781 /* Cdecl functions override -mrtd, and never pop the stack. */
1782 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1784 /* Stdcall and fastcall functions will pop the stack if not
1785 variable args. */
1786 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1787 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1788 rtd = 1;
1790 if (rtd
1791 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1792 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1793 == void_type_node)))
1794 return size;
1797 /* Lose any fake structure return argument if it is passed on the stack. */
1798 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1799 && !TARGET_64BIT)
1801 int nregs = ix86_function_regparm (funtype, fundecl);
1803 if (!nregs)
1804 return GET_MODE_SIZE (Pmode);
1807 return 0;
1810 /* Argument support functions. */
1812 /* Return true when register may be used to pass function parameters. */
1813 bool
1814 ix86_function_arg_regno_p (int regno)
1816 int i;
1817 if (!TARGET_64BIT)
1818 return (regno < REGPARM_MAX
1819 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1820 if (SSE_REGNO_P (regno) && TARGET_SSE)
1821 return true;
1822 /* RAX is used as hidden argument to va_arg functions. */
1823 if (!regno)
1824 return true;
1825 for (i = 0; i < REGPARM_MAX; i++)
1826 if (regno == x86_64_int_parameter_registers[i])
1827 return true;
1828 return false;
1831 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1832 for a call to a function whose data type is FNTYPE.
1833 For a library call, FNTYPE is 0. */
1835 void
1836 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1837 tree fntype, /* tree ptr for function decl */
1838 rtx libname, /* SYMBOL_REF of library name or 0 */
1839 tree fndecl)
1841 static CUMULATIVE_ARGS zero_cum;
1842 tree param, next_param;
1844 if (TARGET_DEBUG_ARG)
1846 fprintf (stderr, "\ninit_cumulative_args (");
1847 if (fntype)
1848 fprintf (stderr, "fntype code = %s, ret code = %s",
1849 tree_code_name[(int) TREE_CODE (fntype)],
1850 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1851 else
1852 fprintf (stderr, "no fntype");
1854 if (libname)
1855 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1858 *cum = zero_cum;
1860 /* Set up the number of registers to use for passing arguments. */
1861 if (fntype)
1862 cum->nregs = ix86_function_regparm (fntype, fndecl);
1863 else
1864 cum->nregs = ix86_regparm;
1865 cum->sse_nregs = SSE_REGPARM_MAX;
1866 cum->mmx_nregs = MMX_REGPARM_MAX;
1867 cum->warn_sse = true;
1868 cum->warn_mmx = true;
1869 cum->maybe_vaarg = false;
1871 /* Use ecx and edx registers if function has fastcall attribute */
1872 if (fntype && !TARGET_64BIT)
1874 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1876 cum->nregs = 2;
1877 cum->fastcall = 1;
1882 /* Determine if this function has variable arguments. This is
1883 indicated by the last argument being 'void_type_mode' if there
1884 are no variable arguments. If there are variable arguments, then
1885 we won't pass anything in registers */
1887 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1889 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1890 param != 0; param = next_param)
1892 next_param = TREE_CHAIN (param);
1893 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1895 if (!TARGET_64BIT)
1897 cum->nregs = 0;
1898 cum->sse_nregs = 0;
1899 cum->mmx_nregs = 0;
1900 cum->warn_sse = 0;
1901 cum->warn_mmx = 0;
1902 cum->fastcall = 0;
1904 cum->maybe_vaarg = true;
1908 if ((!fntype && !libname)
1909 || (fntype && !TYPE_ARG_TYPES (fntype)))
1910 cum->maybe_vaarg = 1;
1912 if (TARGET_DEBUG_ARG)
1913 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1915 return;
1918 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1919 of this code is to classify each 8bytes of incoming argument by the register
1920 class and assign registers accordingly. */
1922 /* Return the union class of CLASS1 and CLASS2.
1923 See the x86-64 PS ABI for details. */
1925 static enum x86_64_reg_class
1926 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1928 /* Rule #1: If both classes are equal, this is the resulting class. */
1929 if (class1 == class2)
1930 return class1;
1932 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1933 the other class. */
1934 if (class1 == X86_64_NO_CLASS)
1935 return class2;
1936 if (class2 == X86_64_NO_CLASS)
1937 return class1;
1939 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1940 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1941 return X86_64_MEMORY_CLASS;
1943 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1944 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1945 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1946 return X86_64_INTEGERSI_CLASS;
1947 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1948 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1949 return X86_64_INTEGER_CLASS;
1951 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1952 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1953 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1954 return X86_64_MEMORY_CLASS;
1956 /* Rule #6: Otherwise class SSE is used. */
1957 return X86_64_SSE_CLASS;
1960 /* Classify the argument of type TYPE and mode MODE.
1961 CLASSES will be filled by the register class used to pass each word
1962 of the operand. The number of words is returned. In case the parameter
1963 should be passed in memory, 0 is returned. As a special case for zero
1964 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1966 BIT_OFFSET is used internally for handling records and specifies offset
1967 of the offset in bits modulo 256 to avoid overflow cases.
1969 See the x86-64 PS ABI for details.
1972 static int
1973 classify_argument (enum machine_mode mode, tree type,
1974 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1976 HOST_WIDE_INT bytes =
1977 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1978 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1980 /* Variable sized entities are always passed/returned in memory. */
1981 if (bytes < 0)
1982 return 0;
1984 if (mode != VOIDmode
1985 && MUST_PASS_IN_STACK (mode, type))
1986 return 0;
1988 if (type && AGGREGATE_TYPE_P (type))
1990 int i;
1991 tree field;
1992 enum x86_64_reg_class subclasses[MAX_CLASSES];
1994 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1995 if (bytes > 16)
1996 return 0;
1998 for (i = 0; i < words; i++)
1999 classes[i] = X86_64_NO_CLASS;
2001 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2002 signalize memory class, so handle it as special case. */
2003 if (!words)
2005 classes[0] = X86_64_NO_CLASS;
2006 return 1;
2009 /* Classify each field of record and merge classes. */
2010 if (TREE_CODE (type) == RECORD_TYPE)
2012 /* For classes first merge in the field of the subclasses. */
2013 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2015 tree bases = TYPE_BINFO_BASETYPES (type);
2016 int n_bases = TREE_VEC_LENGTH (bases);
2017 int i;
2019 for (i = 0; i < n_bases; ++i)
2021 tree binfo = TREE_VEC_ELT (bases, i);
2022 int num;
2023 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2024 tree type = BINFO_TYPE (binfo);
2026 num = classify_argument (TYPE_MODE (type),
2027 type, subclasses,
2028 (offset + bit_offset) % 256);
2029 if (!num)
2030 return 0;
2031 for (i = 0; i < num; i++)
2033 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2034 classes[i + pos] =
2035 merge_classes (subclasses[i], classes[i + pos]);
2039 /* And now merge the fields of structure. */
2040 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2042 if (TREE_CODE (field) == FIELD_DECL)
2044 int num;
2046 /* Bitfields are always classified as integer. Handle them
2047 early, since later code would consider them to be
2048 misaligned integers. */
2049 if (DECL_BIT_FIELD (field))
2051 for (i = int_bit_position (field) / 8 / 8;
2052 i < (int_bit_position (field)
2053 + tree_low_cst (DECL_SIZE (field), 0)
2054 + 63) / 8 / 8; i++)
2055 classes[i] =
2056 merge_classes (X86_64_INTEGER_CLASS,
2057 classes[i]);
2059 else
2061 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2062 TREE_TYPE (field), subclasses,
2063 (int_bit_position (field)
2064 + bit_offset) % 256);
2065 if (!num)
2066 return 0;
2067 for (i = 0; i < num; i++)
2069 int pos =
2070 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2071 classes[i + pos] =
2072 merge_classes (subclasses[i], classes[i + pos]);
2078 /* Arrays are handled as small records. */
2079 else if (TREE_CODE (type) == ARRAY_TYPE)
2081 int num;
2082 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2083 TREE_TYPE (type), subclasses, bit_offset);
2084 if (!num)
2085 return 0;
2087 /* The partial classes are now full classes. */
2088 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2089 subclasses[0] = X86_64_SSE_CLASS;
2090 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2091 subclasses[0] = X86_64_INTEGER_CLASS;
2093 for (i = 0; i < words; i++)
2094 classes[i] = subclasses[i % num];
2096 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2097 else if (TREE_CODE (type) == UNION_TYPE
2098 || TREE_CODE (type) == QUAL_UNION_TYPE)
2100 /* For classes first merge in the field of the subclasses. */
2101 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2103 tree bases = TYPE_BINFO_BASETYPES (type);
2104 int n_bases = TREE_VEC_LENGTH (bases);
2105 int i;
2107 for (i = 0; i < n_bases; ++i)
2109 tree binfo = TREE_VEC_ELT (bases, i);
2110 int num;
2111 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2112 tree type = BINFO_TYPE (binfo);
2114 num = classify_argument (TYPE_MODE (type),
2115 type, subclasses,
2116 (offset + (bit_offset % 64)) % 256);
2117 if (!num)
2118 return 0;
2119 for (i = 0; i < num; i++)
2121 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2122 classes[i + pos] =
2123 merge_classes (subclasses[i], classes[i + pos]);
2127 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2129 if (TREE_CODE (field) == FIELD_DECL)
2131 int num;
2132 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2133 TREE_TYPE (field), subclasses,
2134 bit_offset);
2135 if (!num)
2136 return 0;
2137 for (i = 0; i < num; i++)
2138 classes[i] = merge_classes (subclasses[i], classes[i]);
2142 else if (TREE_CODE (type) == SET_TYPE)
2144 if (bytes <= 4)
2146 classes[0] = X86_64_INTEGERSI_CLASS;
2147 return 1;
2149 else if (bytes <= 8)
2151 classes[0] = X86_64_INTEGER_CLASS;
2152 return 1;
2154 else if (bytes <= 12)
2156 classes[0] = X86_64_INTEGER_CLASS;
2157 classes[1] = X86_64_INTEGERSI_CLASS;
2158 return 2;
2160 else
2162 classes[0] = X86_64_INTEGER_CLASS;
2163 classes[1] = X86_64_INTEGER_CLASS;
2164 return 2;
2167 else
2168 abort ();
2170 /* Final merger cleanup. */
2171 for (i = 0; i < words; i++)
2173 /* If one class is MEMORY, everything should be passed in
2174 memory. */
2175 if (classes[i] == X86_64_MEMORY_CLASS)
2176 return 0;
2178 /* The X86_64_SSEUP_CLASS should be always preceded by
2179 X86_64_SSE_CLASS. */
2180 if (classes[i] == X86_64_SSEUP_CLASS
2181 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2182 classes[i] = X86_64_SSE_CLASS;
2184 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2185 if (classes[i] == X86_64_X87UP_CLASS
2186 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2187 classes[i] = X86_64_SSE_CLASS;
2189 return words;
2192 /* Compute alignment needed. We align all types to natural boundaries with
2193 exception of XFmode that is aligned to 64bits. */
2194 if (mode != VOIDmode && mode != BLKmode)
2196 int mode_alignment = GET_MODE_BITSIZE (mode);
2198 if (mode == XFmode)
2199 mode_alignment = 128;
2200 else if (mode == XCmode)
2201 mode_alignment = 256;
2202 /* Misaligned fields are always returned in memory. */
2203 if (bit_offset % mode_alignment)
2204 return 0;
2207 /* Classification of atomic types. */
2208 switch (mode)
2210 case DImode:
2211 case SImode:
2212 case HImode:
2213 case QImode:
2214 case CSImode:
2215 case CHImode:
2216 case CQImode:
2217 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2218 classes[0] = X86_64_INTEGERSI_CLASS;
2219 else
2220 classes[0] = X86_64_INTEGER_CLASS;
2221 return 1;
2222 case CDImode:
2223 case TImode:
2224 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2225 return 2;
2226 case CTImode:
2227 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2228 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2229 return 4;
2230 case SFmode:
2231 if (!(bit_offset % 64))
2232 classes[0] = X86_64_SSESF_CLASS;
2233 else
2234 classes[0] = X86_64_SSE_CLASS;
2235 return 1;
2236 case DFmode:
2237 classes[0] = X86_64_SSEDF_CLASS;
2238 return 1;
2239 case XFmode:
2240 classes[0] = X86_64_X87_CLASS;
2241 classes[1] = X86_64_X87UP_CLASS;
2242 return 2;
2243 case TFmode:
2244 case TCmode:
2245 return 0;
2246 case XCmode:
2247 classes[0] = X86_64_X87_CLASS;
2248 classes[1] = X86_64_X87UP_CLASS;
2249 classes[2] = X86_64_X87_CLASS;
2250 classes[3] = X86_64_X87UP_CLASS;
2251 return 4;
2252 case DCmode:
2253 classes[0] = X86_64_SSEDF_CLASS;
2254 classes[1] = X86_64_SSEDF_CLASS;
2255 return 2;
2256 case SCmode:
2257 classes[0] = X86_64_SSE_CLASS;
2258 return 1;
2259 case V4SFmode:
2260 case V4SImode:
2261 case V16QImode:
2262 case V8HImode:
2263 case V2DFmode:
2264 case V2DImode:
2265 classes[0] = X86_64_SSE_CLASS;
2266 classes[1] = X86_64_SSEUP_CLASS;
2267 return 2;
2268 case V2SFmode:
2269 case V2SImode:
2270 case V4HImode:
2271 case V8QImode:
2272 return 0;
2273 case BLKmode:
2274 case VOIDmode:
2275 return 0;
2276 default:
2277 abort ();
2281 /* Examine the argument and return set number of register required in each
2282 class. Return 0 iff parameter should be passed in memory. */
2283 static int
2284 examine_argument (enum machine_mode mode, tree type, int in_return,
2285 int *int_nregs, int *sse_nregs)
2287 enum x86_64_reg_class class[MAX_CLASSES];
2288 int n = classify_argument (mode, type, class, 0);
2290 *int_nregs = 0;
2291 *sse_nregs = 0;
2292 if (!n)
2293 return 0;
2294 for (n--; n >= 0; n--)
2295 switch (class[n])
2297 case X86_64_INTEGER_CLASS:
2298 case X86_64_INTEGERSI_CLASS:
2299 (*int_nregs)++;
2300 break;
2301 case X86_64_SSE_CLASS:
2302 case X86_64_SSESF_CLASS:
2303 case X86_64_SSEDF_CLASS:
2304 (*sse_nregs)++;
2305 break;
2306 case X86_64_NO_CLASS:
2307 case X86_64_SSEUP_CLASS:
2308 break;
2309 case X86_64_X87_CLASS:
2310 case X86_64_X87UP_CLASS:
2311 if (!in_return)
2312 return 0;
2313 break;
2314 case X86_64_MEMORY_CLASS:
2315 abort ();
2317 return 1;
2319 /* Construct container for the argument used by GCC interface. See
2320 FUNCTION_ARG for the detailed description. */
2321 static rtx
2322 construct_container (enum machine_mode mode, tree type, int in_return,
2323 int nintregs, int nsseregs, const int * intreg,
2324 int sse_regno)
2326 enum machine_mode tmpmode;
2327 int bytes =
2328 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2329 enum x86_64_reg_class class[MAX_CLASSES];
2330 int n;
2331 int i;
2332 int nexps = 0;
2333 int needed_sseregs, needed_intregs;
2334 rtx exp[MAX_CLASSES];
2335 rtx ret;
2337 n = classify_argument (mode, type, class, 0);
2338 if (TARGET_DEBUG_ARG)
2340 if (!n)
2341 fprintf (stderr, "Memory class\n");
2342 else
2344 fprintf (stderr, "Classes:");
2345 for (i = 0; i < n; i++)
2347 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2349 fprintf (stderr, "\n");
2352 if (!n)
2353 return NULL;
2354 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2355 return NULL;
2356 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2357 return NULL;
2359 /* First construct simple cases. Avoid SCmode, since we want to use
2360 single register to pass this type. */
2361 if (n == 1 && mode != SCmode)
2362 switch (class[0])
2364 case X86_64_INTEGER_CLASS:
2365 case X86_64_INTEGERSI_CLASS:
2366 return gen_rtx_REG (mode, intreg[0]);
2367 case X86_64_SSE_CLASS:
2368 case X86_64_SSESF_CLASS:
2369 case X86_64_SSEDF_CLASS:
2370 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2371 case X86_64_X87_CLASS:
2372 return gen_rtx_REG (mode, FIRST_STACK_REG);
2373 case X86_64_NO_CLASS:
2374 /* Zero sized array, struct or class. */
2375 return NULL;
2376 default:
2377 abort ();
2379 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2380 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2381 if (n == 2
2382 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2383 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2384 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2385 && class[1] == X86_64_INTEGER_CLASS
2386 && (mode == CDImode || mode == TImode || mode == TFmode)
2387 && intreg[0] + 1 == intreg[1])
2388 return gen_rtx_REG (mode, intreg[0]);
2389 if (n == 4
2390 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2391 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2392 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2394 /* Otherwise figure out the entries of the PARALLEL. */
2395 for (i = 0; i < n; i++)
2397 switch (class[i])
2399 case X86_64_NO_CLASS:
2400 break;
2401 case X86_64_INTEGER_CLASS:
2402 case X86_64_INTEGERSI_CLASS:
2403 /* Merge TImodes on aligned occasions here too. */
2404 if (i * 8 + 8 > bytes)
2405 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2406 else if (class[i] == X86_64_INTEGERSI_CLASS)
2407 tmpmode = SImode;
2408 else
2409 tmpmode = DImode;
2410 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2411 if (tmpmode == BLKmode)
2412 tmpmode = DImode;
2413 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2414 gen_rtx_REG (tmpmode, *intreg),
2415 GEN_INT (i*8));
2416 intreg++;
2417 break;
2418 case X86_64_SSESF_CLASS:
2419 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2420 gen_rtx_REG (SFmode,
2421 SSE_REGNO (sse_regno)),
2422 GEN_INT (i*8));
2423 sse_regno++;
2424 break;
2425 case X86_64_SSEDF_CLASS:
2426 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2427 gen_rtx_REG (DFmode,
2428 SSE_REGNO (sse_regno)),
2429 GEN_INT (i*8));
2430 sse_regno++;
2431 break;
2432 case X86_64_SSE_CLASS:
2433 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2434 tmpmode = TImode;
2435 else
2436 tmpmode = DImode;
2437 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2438 gen_rtx_REG (tmpmode,
2439 SSE_REGNO (sse_regno)),
2440 GEN_INT (i*8));
2441 if (tmpmode == TImode)
2442 i++;
2443 sse_regno++;
2444 break;
2445 default:
2446 abort ();
2449 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2450 for (i = 0; i < nexps; i++)
2451 XVECEXP (ret, 0, i) = exp [i];
2452 return ret;
2455 /* Update the data in CUM to advance over an argument
2456 of mode MODE and data type TYPE.
2457 (TYPE is null for libcalls where that information may not be available.) */
2459 void
2460 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2461 enum machine_mode mode, /* current arg mode */
2462 tree type, /* type of the argument or 0 if lib support */
2463 int named) /* whether or not the argument was named */
2465 int bytes =
2466 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2467 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2469 if (TARGET_DEBUG_ARG)
2470 fprintf (stderr,
2471 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2472 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2473 if (TARGET_64BIT)
2475 int int_nregs, sse_nregs;
2476 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2477 cum->words += words;
2478 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2480 cum->nregs -= int_nregs;
2481 cum->sse_nregs -= sse_nregs;
2482 cum->regno += int_nregs;
2483 cum->sse_regno += sse_nregs;
2485 else
2486 cum->words += words;
2488 else
2490 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2491 && (!type || !AGGREGATE_TYPE_P (type)))
2493 cum->sse_words += words;
2494 cum->sse_nregs -= 1;
2495 cum->sse_regno += 1;
2496 if (cum->sse_nregs <= 0)
2498 cum->sse_nregs = 0;
2499 cum->sse_regno = 0;
2502 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2503 && (!type || !AGGREGATE_TYPE_P (type)))
2505 cum->mmx_words += words;
2506 cum->mmx_nregs -= 1;
2507 cum->mmx_regno += 1;
2508 if (cum->mmx_nregs <= 0)
2510 cum->mmx_nregs = 0;
2511 cum->mmx_regno = 0;
2514 else
2516 cum->words += words;
2517 cum->nregs -= words;
2518 cum->regno += words;
2520 if (cum->nregs <= 0)
2522 cum->nregs = 0;
2523 cum->regno = 0;
2527 return;
2530 /* Define where to put the arguments to a function.
2531 Value is zero to push the argument on the stack,
2532 or a hard register in which to store the argument.
2534 MODE is the argument's machine mode.
2535 TYPE is the data type of the argument (as a tree).
2536 This is null for libcalls where that information may
2537 not be available.
2538 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2539 the preceding args and about the function being called.
2540 NAMED is nonzero if this argument is a named parameter
2541 (otherwise it is an extra parameter matching an ellipsis). */
2544 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2545 enum machine_mode mode, /* current arg mode */
2546 tree type, /* type of the argument or 0 if lib support */
2547 int named) /* != 0 for normal args, == 0 for ... args */
2549 rtx ret = NULL_RTX;
2550 int bytes =
2551 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2552 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2553 static bool warnedsse, warnedmmx;
2555 /* Handle a hidden AL argument containing number of registers for varargs
2556 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2557 any AL settings. */
2558 if (mode == VOIDmode)
2560 if (TARGET_64BIT)
2561 return GEN_INT (cum->maybe_vaarg
2562 ? (cum->sse_nregs < 0
2563 ? SSE_REGPARM_MAX
2564 : cum->sse_regno)
2565 : -1);
2566 else
2567 return constm1_rtx;
2569 if (TARGET_64BIT)
2570 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2571 &x86_64_int_parameter_registers [cum->regno],
2572 cum->sse_regno);
2573 else
2574 switch (mode)
2576 /* For now, pass fp/complex values on the stack. */
2577 default:
2578 break;
2580 case BLKmode:
2581 if (bytes < 0)
2582 break;
2583 /* FALLTHRU */
2584 case DImode:
2585 case SImode:
2586 case HImode:
2587 case QImode:
2588 if (words <= cum->nregs)
2590 int regno = cum->regno;
2592 /* Fastcall allocates the first two DWORD (SImode) or
2593 smaller arguments to ECX and EDX. */
2594 if (cum->fastcall)
2596 if (mode == BLKmode || mode == DImode)
2597 break;
2599 /* ECX not EAX is the first allocated register. */
2600 if (regno == 0)
2601 regno = 2;
2603 ret = gen_rtx_REG (mode, regno);
2605 break;
2606 case TImode:
2607 case V16QImode:
2608 case V8HImode:
2609 case V4SImode:
2610 case V2DImode:
2611 case V4SFmode:
2612 case V2DFmode:
2613 if (!type || !AGGREGATE_TYPE_P (type))
2615 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2617 warnedsse = true;
2618 warning ("SSE vector argument without SSE enabled "
2619 "changes the ABI");
2621 if (cum->sse_nregs)
2622 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2624 break;
2625 case V8QImode:
2626 case V4HImode:
2627 case V2SImode:
2628 case V2SFmode:
2629 if (!type || !AGGREGATE_TYPE_P (type))
2631 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2633 warnedmmx = true;
2634 warning ("MMX vector argument without MMX enabled "
2635 "changes the ABI");
2637 if (cum->mmx_nregs)
2638 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2640 break;
2643 if (TARGET_DEBUG_ARG)
2645 fprintf (stderr,
2646 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2647 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2649 if (ret)
2650 print_simple_rtl (stderr, ret);
2651 else
2652 fprintf (stderr, ", stack");
2654 fprintf (stderr, " )\n");
2657 return ret;
2660 /* A C expression that indicates when an argument must be passed by
2661 reference. If nonzero for an argument, a copy of that argument is
2662 made in memory and a pointer to the argument is passed instead of
2663 the argument itself. The pointer is passed in whatever way is
2664 appropriate for passing a pointer to that type. */
2667 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2668 enum machine_mode mode ATTRIBUTE_UNUSED,
2669 tree type, int named ATTRIBUTE_UNUSED)
2671 if (!TARGET_64BIT)
2672 return 0;
2674 if (type && int_size_in_bytes (type) == -1)
2676 if (TARGET_DEBUG_ARG)
2677 fprintf (stderr, "function_arg_pass_by_reference\n");
2678 return 1;
2681 return 0;
2684 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2685 ABI */
2686 static bool
2687 contains_128bit_aligned_vector_p (tree type)
2689 enum machine_mode mode = TYPE_MODE (type);
2690 if (SSE_REG_MODE_P (mode)
2691 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2692 return true;
2693 if (TYPE_ALIGN (type) < 128)
2694 return false;
2696 if (AGGREGATE_TYPE_P (type))
2698 /* Walk the aggregates recursively. */
2699 if (TREE_CODE (type) == RECORD_TYPE
2700 || TREE_CODE (type) == UNION_TYPE
2701 || TREE_CODE (type) == QUAL_UNION_TYPE)
2703 tree field;
2705 if (TYPE_BINFO (type) != NULL
2706 && TYPE_BINFO_BASETYPES (type) != NULL)
2708 tree bases = TYPE_BINFO_BASETYPES (type);
2709 int n_bases = TREE_VEC_LENGTH (bases);
2710 int i;
2712 for (i = 0; i < n_bases; ++i)
2714 tree binfo = TREE_VEC_ELT (bases, i);
2715 tree type = BINFO_TYPE (binfo);
2717 if (contains_128bit_aligned_vector_p (type))
2718 return true;
2721 /* And now merge the fields of structure. */
2722 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2724 if (TREE_CODE (field) == FIELD_DECL
2725 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2726 return true;
2729 /* Just for use if some languages passes arrays by value. */
2730 else if (TREE_CODE (type) == ARRAY_TYPE)
2732 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2733 return true;
2735 else
2736 abort ();
2738 return false;
2741 /* Gives the alignment boundary, in bits, of an argument with the
2742 specified mode and type. */
2745 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2747 int align;
2748 if (type)
2749 align = TYPE_ALIGN (type);
2750 else
2751 align = GET_MODE_ALIGNMENT (mode);
2752 if (align < PARM_BOUNDARY)
2753 align = PARM_BOUNDARY;
2754 if (!TARGET_64BIT)
2756 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2757 make an exception for SSE modes since these require 128bit
2758 alignment.
2760 The handling here differs from field_alignment. ICC aligns MMX
2761 arguments to 4 byte boundaries, while structure fields are aligned
2762 to 8 byte boundaries. */
2763 if (!type)
2765 if (!SSE_REG_MODE_P (mode))
2766 align = PARM_BOUNDARY;
2768 else
2770 if (!contains_128bit_aligned_vector_p (type))
2771 align = PARM_BOUNDARY;
2774 if (align > 128)
2775 align = 128;
2776 return align;
2779 /* Return true if N is a possible register number of function value. */
2780 bool
2781 ix86_function_value_regno_p (int regno)
2783 if (!TARGET_64BIT)
2785 return ((regno) == 0
2786 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2787 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2789 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2790 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2791 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2794 /* Define how to find the value returned by a function.
2795 VALTYPE is the data type of the value (as a tree).
2796 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2797 otherwise, FUNC is 0. */
2799 ix86_function_value (tree valtype)
2801 if (TARGET_64BIT)
2803 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2804 REGPARM_MAX, SSE_REGPARM_MAX,
2805 x86_64_int_return_registers, 0);
2806 /* For zero sized structures, construct_container return NULL, but we need
2807 to keep rest of compiler happy by returning meaningful value. */
2808 if (!ret)
2809 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2810 return ret;
2812 else
2813 return gen_rtx_REG (TYPE_MODE (valtype),
2814 ix86_value_regno (TYPE_MODE (valtype)));
2817 /* Return false iff type is returned in memory. */
2819 ix86_return_in_memory (tree type)
2821 int needed_intregs, needed_sseregs, size;
2822 enum machine_mode mode = TYPE_MODE (type);
2824 if (TARGET_64BIT)
2825 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2827 if (mode == BLKmode)
2828 return 1;
2830 size = int_size_in_bytes (type);
2832 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2833 return 0;
2835 if (VECTOR_MODE_P (mode) || mode == TImode)
2837 /* User-created vectors small enough to fit in EAX. */
2838 if (size < 8)
2839 return 0;
2841 /* MMX/3dNow values are returned on the stack, since we've
2842 got to EMMS/FEMMS before returning. */
2843 if (size == 8)
2844 return 1;
2846 /* SSE values are returned in XMM0. */
2847 /* ??? Except when it doesn't exist? We have a choice of
2848 either (1) being abi incompatible with a -march switch,
2849 or (2) generating an error here. Given no good solution,
2850 I think the safest thing is one warning. The user won't
2851 be able to use -Werror, but.... */
2852 if (size == 16)
2854 static bool warned;
2856 if (TARGET_SSE)
2857 return 0;
2859 if (!warned)
2861 warned = true;
2862 warning ("SSE vector return without SSE enabled "
2863 "changes the ABI");
2865 return 1;
2869 if (mode == XFmode)
2870 return 0;
2872 if (size > 12)
2873 return 1;
2874 return 0;
2877 /* Define how to find the value returned by a library function
2878 assuming the value has mode MODE. */
2880 ix86_libcall_value (enum machine_mode mode)
2882 if (TARGET_64BIT)
2884 switch (mode)
2886 case SFmode:
2887 case SCmode:
2888 case DFmode:
2889 case DCmode:
2890 return gen_rtx_REG (mode, FIRST_SSE_REG);
2891 case XFmode:
2892 case XCmode:
2893 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2894 case TFmode:
2895 case TCmode:
2896 return NULL;
2897 default:
2898 return gen_rtx_REG (mode, 0);
2901 else
2902 return gen_rtx_REG (mode, ix86_value_regno (mode));
2905 /* Given a mode, return the register to use for a return value. */
2907 static int
2908 ix86_value_regno (enum machine_mode mode)
2910 /* Floating point return values in %st(0). */
2911 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2912 return FIRST_FLOAT_REG;
2913 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2914 we prevent this case when sse is not available. */
2915 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2916 return FIRST_SSE_REG;
2917 /* Everything else in %eax. */
2918 return 0;
2921 /* Create the va_list data type. */
2923 static tree
2924 ix86_build_builtin_va_list (void)
2926 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2928 /* For i386 we use plain pointer to argument area. */
2929 if (!TARGET_64BIT)
2930 return build_pointer_type (char_type_node);
2932 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2933 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2935 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2936 unsigned_type_node);
2937 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2938 unsigned_type_node);
2939 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2940 ptr_type_node);
2941 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2942 ptr_type_node);
2944 DECL_FIELD_CONTEXT (f_gpr) = record;
2945 DECL_FIELD_CONTEXT (f_fpr) = record;
2946 DECL_FIELD_CONTEXT (f_ovf) = record;
2947 DECL_FIELD_CONTEXT (f_sav) = record;
2949 TREE_CHAIN (record) = type_decl;
2950 TYPE_NAME (record) = type_decl;
2951 TYPE_FIELDS (record) = f_gpr;
2952 TREE_CHAIN (f_gpr) = f_fpr;
2953 TREE_CHAIN (f_fpr) = f_ovf;
2954 TREE_CHAIN (f_ovf) = f_sav;
2956 layout_type (record);
2958 /* The correct type is an array type of one element. */
2959 return build_array_type (record, build_index_type (size_zero_node));
2962 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
2964 static void
2965 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2966 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2967 int no_rtl)
2969 CUMULATIVE_ARGS next_cum;
2970 rtx save_area = NULL_RTX, mem;
2971 rtx label;
2972 rtx label_ref;
2973 rtx tmp_reg;
2974 rtx nsse_reg;
2975 int set;
2976 tree fntype;
2977 int stdarg_p;
2978 int i;
2980 if (!TARGET_64BIT)
2981 return;
2983 /* Indicate to allocate space on the stack for varargs save area. */
2984 ix86_save_varrargs_registers = 1;
2986 cfun->stack_alignment_needed = 128;
2988 fntype = TREE_TYPE (current_function_decl);
2989 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2990 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2991 != void_type_node));
2993 /* For varargs, we do not want to skip the dummy va_dcl argument.
2994 For stdargs, we do want to skip the last named argument. */
2995 next_cum = *cum;
2996 if (stdarg_p)
2997 function_arg_advance (&next_cum, mode, type, 1);
2999 if (!no_rtl)
3000 save_area = frame_pointer_rtx;
3002 set = get_varargs_alias_set ();
3004 for (i = next_cum.regno; i < ix86_regparm; i++)
3006 mem = gen_rtx_MEM (Pmode,
3007 plus_constant (save_area, i * UNITS_PER_WORD));
3008 set_mem_alias_set (mem, set);
3009 emit_move_insn (mem, gen_rtx_REG (Pmode,
3010 x86_64_int_parameter_registers[i]));
3013 if (next_cum.sse_nregs)
3015 /* Now emit code to save SSE registers. The AX parameter contains number
3016 of SSE parameter registers used to call this function. We use
3017 sse_prologue_save insn template that produces computed jump across
3018 SSE saves. We need some preparation work to get this working. */
3020 label = gen_label_rtx ();
3021 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3023 /* Compute address to jump to :
3024 label - 5*eax + nnamed_sse_arguments*5 */
3025 tmp_reg = gen_reg_rtx (Pmode);
3026 nsse_reg = gen_reg_rtx (Pmode);
3027 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3028 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3029 gen_rtx_MULT (Pmode, nsse_reg,
3030 GEN_INT (4))));
3031 if (next_cum.sse_regno)
3032 emit_move_insn
3033 (nsse_reg,
3034 gen_rtx_CONST (DImode,
3035 gen_rtx_PLUS (DImode,
3036 label_ref,
3037 GEN_INT (next_cum.sse_regno * 4))));
3038 else
3039 emit_move_insn (nsse_reg, label_ref);
3040 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3042 /* Compute address of memory block we save into. We always use pointer
3043 pointing 127 bytes after first byte to store - this is needed to keep
3044 instruction size limited by 4 bytes. */
3045 tmp_reg = gen_reg_rtx (Pmode);
3046 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3047 plus_constant (save_area,
3048 8 * REGPARM_MAX + 127)));
3049 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3050 set_mem_alias_set (mem, set);
3051 set_mem_align (mem, BITS_PER_WORD);
3053 /* And finally do the dirty job! */
3054 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3055 GEN_INT (next_cum.sse_regno), label));
3060 /* Implement va_start. */
3062 void
3063 ix86_va_start (tree valist, rtx nextarg)
3065 HOST_WIDE_INT words, n_gpr, n_fpr;
3066 tree f_gpr, f_fpr, f_ovf, f_sav;
3067 tree gpr, fpr, ovf, sav, t;
3069 /* Only 64bit target needs something special. */
3070 if (!TARGET_64BIT)
3072 std_expand_builtin_va_start (valist, nextarg);
3073 return;
3076 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3077 f_fpr = TREE_CHAIN (f_gpr);
3078 f_ovf = TREE_CHAIN (f_fpr);
3079 f_sav = TREE_CHAIN (f_ovf);
3081 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3082 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3083 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3084 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3085 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3087 /* Count number of gp and fp argument registers used. */
3088 words = current_function_args_info.words;
3089 n_gpr = current_function_args_info.regno;
3090 n_fpr = current_function_args_info.sse_regno;
3092 if (TARGET_DEBUG_ARG)
3093 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3094 (int) words, (int) n_gpr, (int) n_fpr);
3096 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3097 build_int_2 (n_gpr * 8, 0));
3098 TREE_SIDE_EFFECTS (t) = 1;
3099 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3101 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3102 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3103 TREE_SIDE_EFFECTS (t) = 1;
3104 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3106 /* Find the overflow area. */
3107 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3108 if (words != 0)
3109 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3110 build_int_2 (words * UNITS_PER_WORD, 0));
3111 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3112 TREE_SIDE_EFFECTS (t) = 1;
3113 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3115 /* Find the register save area.
3116 Prologue of the function save it right above stack frame. */
3117 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3118 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3119 TREE_SIDE_EFFECTS (t) = 1;
3120 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3123 /* Implement va_arg. */
3125 ix86_va_arg (tree valist, tree type)
3127 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3128 tree f_gpr, f_fpr, f_ovf, f_sav;
3129 tree gpr, fpr, ovf, sav, t;
3130 int size, rsize;
3131 rtx lab_false, lab_over = NULL_RTX;
3132 rtx addr_rtx, r;
3133 rtx container;
3134 int indirect_p = 0;
3136 /* Only 64bit target needs something special. */
3137 if (!TARGET_64BIT)
3139 return std_expand_builtin_va_arg (valist, type);
3142 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3143 f_fpr = TREE_CHAIN (f_gpr);
3144 f_ovf = TREE_CHAIN (f_fpr);
3145 f_sav = TREE_CHAIN (f_ovf);
3147 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3148 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3149 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3150 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3151 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3153 size = int_size_in_bytes (type);
3154 if (size == -1)
3156 /* Passed by reference. */
3157 indirect_p = 1;
3158 type = build_pointer_type (type);
3159 size = int_size_in_bytes (type);
3161 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3163 container = construct_container (TYPE_MODE (type), type, 0,
3164 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3166 * Pull the value out of the saved registers ...
3169 addr_rtx = gen_reg_rtx (Pmode);
3171 if (container)
3173 rtx int_addr_rtx, sse_addr_rtx;
3174 int needed_intregs, needed_sseregs;
3175 int need_temp;
3177 lab_over = gen_label_rtx ();
3178 lab_false = gen_label_rtx ();
3180 examine_argument (TYPE_MODE (type), type, 0,
3181 &needed_intregs, &needed_sseregs);
3184 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3185 || TYPE_ALIGN (type) > 128);
3187 /* In case we are passing structure, verify that it is consecutive block
3188 on the register save area. If not we need to do moves. */
3189 if (!need_temp && !REG_P (container))
3191 /* Verify that all registers are strictly consecutive */
3192 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3194 int i;
3196 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3198 rtx slot = XVECEXP (container, 0, i);
3199 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3200 || INTVAL (XEXP (slot, 1)) != i * 16)
3201 need_temp = 1;
3204 else
3206 int i;
3208 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3210 rtx slot = XVECEXP (container, 0, i);
3211 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3212 || INTVAL (XEXP (slot, 1)) != i * 8)
3213 need_temp = 1;
3217 if (!need_temp)
3219 int_addr_rtx = addr_rtx;
3220 sse_addr_rtx = addr_rtx;
3222 else
3224 int_addr_rtx = gen_reg_rtx (Pmode);
3225 sse_addr_rtx = gen_reg_rtx (Pmode);
3227 /* First ensure that we fit completely in registers. */
3228 if (needed_intregs)
3230 emit_cmp_and_jump_insns (expand_expr
3231 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3232 GEN_INT ((REGPARM_MAX - needed_intregs +
3233 1) * 8), GE, const1_rtx, SImode,
3234 1, lab_false);
3236 if (needed_sseregs)
3238 emit_cmp_and_jump_insns (expand_expr
3239 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3240 GEN_INT ((SSE_REGPARM_MAX -
3241 needed_sseregs + 1) * 16 +
3242 REGPARM_MAX * 8), GE, const1_rtx,
3243 SImode, 1, lab_false);
3246 /* Compute index to start of area used for integer regs. */
3247 if (needed_intregs)
3249 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3250 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3251 if (r != int_addr_rtx)
3252 emit_move_insn (int_addr_rtx, r);
3254 if (needed_sseregs)
3256 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3257 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3258 if (r != sse_addr_rtx)
3259 emit_move_insn (sse_addr_rtx, r);
3261 if (need_temp)
3263 int i;
3264 rtx mem;
3265 rtx x;
3267 /* Never use the memory itself, as it has the alias set. */
3268 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3269 mem = gen_rtx_MEM (BLKmode, x);
3270 force_operand (x, addr_rtx);
3271 set_mem_alias_set (mem, get_varargs_alias_set ());
3272 set_mem_align (mem, BITS_PER_UNIT);
3274 for (i = 0; i < XVECLEN (container, 0); i++)
3276 rtx slot = XVECEXP (container, 0, i);
3277 rtx reg = XEXP (slot, 0);
3278 enum machine_mode mode = GET_MODE (reg);
3279 rtx src_addr;
3280 rtx src_mem;
3281 int src_offset;
3282 rtx dest_mem;
3284 if (SSE_REGNO_P (REGNO (reg)))
3286 src_addr = sse_addr_rtx;
3287 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3289 else
3291 src_addr = int_addr_rtx;
3292 src_offset = REGNO (reg) * 8;
3294 src_mem = gen_rtx_MEM (mode, src_addr);
3295 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3296 src_mem = adjust_address (src_mem, mode, src_offset);
3297 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3298 emit_move_insn (dest_mem, src_mem);
3302 if (needed_intregs)
3305 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3306 build_int_2 (needed_intregs * 8, 0));
3307 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3308 TREE_SIDE_EFFECTS (t) = 1;
3309 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3311 if (needed_sseregs)
3314 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3315 build_int_2 (needed_sseregs * 16, 0));
3316 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3317 TREE_SIDE_EFFECTS (t) = 1;
3318 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3321 emit_jump_insn (gen_jump (lab_over));
3322 emit_barrier ();
3323 emit_label (lab_false);
3326 /* ... otherwise out of the overflow area. */
3328 /* Care for on-stack alignment if needed. */
3329 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3330 t = ovf;
3331 else
3333 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3334 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3335 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3337 t = save_expr (t);
3339 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3340 if (r != addr_rtx)
3341 emit_move_insn (addr_rtx, r);
3344 build (PLUS_EXPR, TREE_TYPE (t), t,
3345 build_int_2 (rsize * UNITS_PER_WORD, 0));
3346 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3347 TREE_SIDE_EFFECTS (t) = 1;
3348 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3350 if (container)
3351 emit_label (lab_over);
3353 if (indirect_p)
3355 r = gen_rtx_MEM (Pmode, addr_rtx);
3356 set_mem_alias_set (r, get_varargs_alias_set ());
3357 emit_move_insn (addr_rtx, r);
3360 return addr_rtx;
3363 /* Return nonzero if OP is either a i387 or SSE fp register. */
3365 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3367 return ANY_FP_REG_P (op);
3370 /* Return nonzero if OP is an i387 fp register. */
3372 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3374 return FP_REG_P (op);
3377 /* Return nonzero if OP is a non-fp register_operand. */
3379 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3381 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3384 /* Return nonzero if OP is a register operand other than an
3385 i387 fp register. */
3387 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3389 return register_operand (op, mode) && !FP_REG_P (op);
3392 /* Return nonzero if OP is general operand representable on x86_64. */
3395 x86_64_general_operand (rtx op, enum machine_mode mode)
3397 if (!TARGET_64BIT)
3398 return general_operand (op, mode);
3399 if (nonimmediate_operand (op, mode))
3400 return 1;
3401 return x86_64_sign_extended_value (op);
3404 /* Return nonzero if OP is general operand representable on x86_64
3405 as either sign extended or zero extended constant. */
3408 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3410 if (!TARGET_64BIT)
3411 return general_operand (op, mode);
3412 if (nonimmediate_operand (op, mode))
3413 return 1;
3414 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3417 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3420 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3422 if (!TARGET_64BIT)
3423 return nonmemory_operand (op, mode);
3424 if (register_operand (op, mode))
3425 return 1;
3426 return x86_64_sign_extended_value (op);
3429 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3432 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3434 if (!TARGET_64BIT || !flag_pic)
3435 return nonmemory_operand (op, mode);
3436 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3437 return 1;
3438 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3439 return 1;
3440 return 0;
3443 /* Return nonzero if OPNUM's MEM should be matched
3444 in movabs* patterns. */
3447 ix86_check_movabs (rtx insn, int opnum)
3449 rtx set, mem;
3451 set = PATTERN (insn);
3452 if (GET_CODE (set) == PARALLEL)
3453 set = XVECEXP (set, 0, 0);
3454 if (GET_CODE (set) != SET)
3455 abort ();
3456 mem = XEXP (set, opnum);
3457 while (GET_CODE (mem) == SUBREG)
3458 mem = SUBREG_REG (mem);
3459 if (GET_CODE (mem) != MEM)
3460 abort ();
3461 return (volatile_ok || !MEM_VOLATILE_P (mem));
3464 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3467 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3469 if (!TARGET_64BIT)
3470 return nonmemory_operand (op, mode);
3471 if (register_operand (op, mode))
3472 return 1;
3473 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3476 /* Return nonzero if OP is immediate operand representable on x86_64. */
3479 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3481 if (!TARGET_64BIT)
3482 return immediate_operand (op, mode);
3483 return x86_64_sign_extended_value (op);
3486 /* Return nonzero if OP is immediate operand representable on x86_64. */
3489 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3491 return x86_64_zero_extended_value (op);
3494 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3495 for shift & compare patterns, as shifting by 0 does not change flags),
3496 else return zero. */
3499 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3501 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3504 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3505 reference and a constant. */
3508 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3510 switch (GET_CODE (op))
3512 case SYMBOL_REF:
3513 case LABEL_REF:
3514 return 1;
3516 case CONST:
3517 op = XEXP (op, 0);
3518 if (GET_CODE (op) == SYMBOL_REF
3519 || GET_CODE (op) == LABEL_REF
3520 || (GET_CODE (op) == UNSPEC
3521 && (XINT (op, 1) == UNSPEC_GOT
3522 || XINT (op, 1) == UNSPEC_GOTOFF
3523 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3524 return 1;
3525 if (GET_CODE (op) != PLUS
3526 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3527 return 0;
3529 op = XEXP (op, 0);
3530 if (GET_CODE (op) == SYMBOL_REF
3531 || GET_CODE (op) == LABEL_REF)
3532 return 1;
3533 /* Only @GOTOFF gets offsets. */
3534 if (GET_CODE (op) != UNSPEC
3535 || XINT (op, 1) != UNSPEC_GOTOFF)
3536 return 0;
3538 op = XVECEXP (op, 0, 0);
3539 if (GET_CODE (op) == SYMBOL_REF
3540 || GET_CODE (op) == LABEL_REF)
3541 return 1;
3542 return 0;
3544 default:
3545 return 0;
3549 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3552 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3554 if (GET_CODE (op) != CONST)
3555 return 0;
3556 op = XEXP (op, 0);
3557 if (TARGET_64BIT)
3559 if (GET_CODE (op) == UNSPEC
3560 && XINT (op, 1) == UNSPEC_GOTPCREL)
3561 return 1;
3562 if (GET_CODE (op) == PLUS
3563 && GET_CODE (XEXP (op, 0)) == UNSPEC
3564 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3565 return 1;
3567 else
3569 if (GET_CODE (op) == UNSPEC)
3570 return 1;
3571 if (GET_CODE (op) != PLUS
3572 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3573 return 0;
3574 op = XEXP (op, 0);
3575 if (GET_CODE (op) == UNSPEC)
3576 return 1;
3578 return 0;
3581 /* Return true if OP is a symbolic operand that resolves locally. */
3583 static int
3584 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3586 if (GET_CODE (op) == CONST
3587 && GET_CODE (XEXP (op, 0)) == PLUS
3588 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3589 op = XEXP (XEXP (op, 0), 0);
3591 if (GET_CODE (op) == LABEL_REF)
3592 return 1;
3594 if (GET_CODE (op) != SYMBOL_REF)
3595 return 0;
3597 if (SYMBOL_REF_LOCAL_P (op))
3598 return 1;
3600 /* There is, however, a not insubstantial body of code in the rest of
3601 the compiler that assumes it can just stick the results of
3602 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3603 /* ??? This is a hack. Should update the body of the compiler to
3604 always create a DECL an invoke targetm.encode_section_info. */
3605 if (strncmp (XSTR (op, 0), internal_label_prefix,
3606 internal_label_prefix_len) == 0)
3607 return 1;
3609 return 0;
3612 /* Test for various thread-local symbols. */
3615 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3617 if (GET_CODE (op) != SYMBOL_REF)
3618 return 0;
3619 return SYMBOL_REF_TLS_MODEL (op);
3622 static inline int
3623 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3625 if (GET_CODE (op) != SYMBOL_REF)
3626 return 0;
3627 return SYMBOL_REF_TLS_MODEL (op) == kind;
3631 global_dynamic_symbolic_operand (rtx op,
3632 enum machine_mode mode ATTRIBUTE_UNUSED)
3634 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3638 local_dynamic_symbolic_operand (rtx op,
3639 enum machine_mode mode ATTRIBUTE_UNUSED)
3641 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3645 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3647 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3651 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3653 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3656 /* Test for a valid operand for a call instruction. Don't allow the
3657 arg pointer register or virtual regs since they may decay into
3658 reg + const, which the patterns can't handle. */
3661 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3663 /* Disallow indirect through a virtual register. This leads to
3664 compiler aborts when trying to eliminate them. */
3665 if (GET_CODE (op) == REG
3666 && (op == arg_pointer_rtx
3667 || op == frame_pointer_rtx
3668 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3669 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3670 return 0;
3672 /* Disallow `call 1234'. Due to varying assembler lameness this
3673 gets either rejected or translated to `call .+1234'. */
3674 if (GET_CODE (op) == CONST_INT)
3675 return 0;
3677 /* Explicitly allow SYMBOL_REF even if pic. */
3678 if (GET_CODE (op) == SYMBOL_REF)
3679 return 1;
3681 /* Otherwise we can allow any general_operand in the address. */
3682 return general_operand (op, Pmode);
3685 /* Test for a valid operand for a call instruction. Don't allow the
3686 arg pointer register or virtual regs since they may decay into
3687 reg + const, which the patterns can't handle. */
3690 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3692 /* Disallow indirect through a virtual register. This leads to
3693 compiler aborts when trying to eliminate them. */
3694 if (GET_CODE (op) == REG
3695 && (op == arg_pointer_rtx
3696 || op == frame_pointer_rtx
3697 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3698 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3699 return 0;
3701 /* Explicitly allow SYMBOL_REF even if pic. */
3702 if (GET_CODE (op) == SYMBOL_REF)
3703 return 1;
3705 /* Otherwise we can only allow register operands. */
3706 return register_operand (op, Pmode);
3710 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3712 if (GET_CODE (op) == CONST
3713 && GET_CODE (XEXP (op, 0)) == PLUS
3714 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3715 op = XEXP (XEXP (op, 0), 0);
3716 return GET_CODE (op) == SYMBOL_REF;
3719 /* Match exactly zero and one. */
3722 const0_operand (rtx op, enum machine_mode mode)
3724 return op == CONST0_RTX (mode);
3728 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3730 return op == const1_rtx;
3733 /* Match 2, 4, or 8. Used for leal multiplicands. */
3736 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3738 return (GET_CODE (op) == CONST_INT
3739 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3743 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3745 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3749 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3751 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3755 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3757 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3761 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3763 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3767 /* True if this is a constant appropriate for an increment or decrement. */
3770 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3772 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3773 registers, since carry flag is not set. */
3774 if (TARGET_PENTIUM4 && !optimize_size)
3775 return 0;
3776 return op == const1_rtx || op == constm1_rtx;
3779 /* Return nonzero if OP is acceptable as operand of DImode shift
3780 expander. */
3783 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3785 if (TARGET_64BIT)
3786 return nonimmediate_operand (op, mode);
3787 else
3788 return register_operand (op, mode);
3791 /* Return false if this is the stack pointer, or any other fake
3792 register eliminable to the stack pointer. Otherwise, this is
3793 a register operand.
3795 This is used to prevent esp from being used as an index reg.
3796 Which would only happen in pathological cases. */
3799 reg_no_sp_operand (rtx op, enum machine_mode mode)
3801 rtx t = op;
3802 if (GET_CODE (t) == SUBREG)
3803 t = SUBREG_REG (t);
3804 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3805 return 0;
3807 return register_operand (op, mode);
3811 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3813 return MMX_REG_P (op);
3816 /* Return false if this is any eliminable register. Otherwise
3817 general_operand. */
3820 general_no_elim_operand (rtx op, enum machine_mode mode)
3822 rtx t = op;
3823 if (GET_CODE (t) == SUBREG)
3824 t = SUBREG_REG (t);
3825 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3826 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3827 || t == virtual_stack_dynamic_rtx)
3828 return 0;
3829 if (REG_P (t)
3830 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3831 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3832 return 0;
3834 return general_operand (op, mode);
3837 /* Return false if this is any eliminable register. Otherwise
3838 register_operand or const_int. */
3841 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3843 rtx t = op;
3844 if (GET_CODE (t) == SUBREG)
3845 t = SUBREG_REG (t);
3846 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3847 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3848 || t == virtual_stack_dynamic_rtx)
3849 return 0;
3851 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3854 /* Return false if this is any eliminable register or stack register,
3855 otherwise work like register_operand. */
3858 index_register_operand (rtx op, enum machine_mode mode)
3860 rtx t = op;
3861 if (GET_CODE (t) == SUBREG)
3862 t = SUBREG_REG (t);
3863 if (!REG_P (t))
3864 return 0;
3865 if (t == arg_pointer_rtx
3866 || t == frame_pointer_rtx
3867 || t == virtual_incoming_args_rtx
3868 || t == virtual_stack_vars_rtx
3869 || t == virtual_stack_dynamic_rtx
3870 || REGNO (t) == STACK_POINTER_REGNUM)
3871 return 0;
3873 return general_operand (op, mode);
3876 /* Return true if op is a Q_REGS class register. */
3879 q_regs_operand (rtx op, enum machine_mode mode)
3881 if (mode != VOIDmode && GET_MODE (op) != mode)
3882 return 0;
3883 if (GET_CODE (op) == SUBREG)
3884 op = SUBREG_REG (op);
3885 return ANY_QI_REG_P (op);
3888 /* Return true if op is an flags register. */
3891 flags_reg_operand (rtx op, enum machine_mode mode)
3893 if (mode != VOIDmode && GET_MODE (op) != mode)
3894 return 0;
3895 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3898 /* Return true if op is a NON_Q_REGS class register. */
3901 non_q_regs_operand (rtx op, enum machine_mode mode)
3903 if (mode != VOIDmode && GET_MODE (op) != mode)
3904 return 0;
3905 if (GET_CODE (op) == SUBREG)
3906 op = SUBREG_REG (op);
3907 return NON_QI_REG_P (op);
3911 zero_extended_scalar_load_operand (rtx op,
3912 enum machine_mode mode ATTRIBUTE_UNUSED)
3914 unsigned n_elts;
3915 if (GET_CODE (op) != MEM)
3916 return 0;
3917 op = maybe_get_pool_constant (op);
3918 if (!op)
3919 return 0;
3920 if (GET_CODE (op) != CONST_VECTOR)
3921 return 0;
3922 n_elts =
3923 (GET_MODE_SIZE (GET_MODE (op)) /
3924 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3925 for (n_elts--; n_elts > 0; n_elts--)
3927 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3928 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3929 return 0;
3931 return 1;
3934 /* Return 1 when OP is operand acceptable for standard SSE move. */
3936 vector_move_operand (rtx op, enum machine_mode mode)
3938 if (nonimmediate_operand (op, mode))
3939 return 1;
3940 if (GET_MODE (op) != mode && mode != VOIDmode)
3941 return 0;
3942 return (op == CONST0_RTX (GET_MODE (op)));
3945 /* Return true if op if a valid address, and does not contain
3946 a segment override. */
3949 no_seg_address_operand (rtx op, enum machine_mode mode)
3951 struct ix86_address parts;
3953 if (! address_operand (op, mode))
3954 return 0;
3956 if (! ix86_decompose_address (op, &parts))
3957 abort ();
3959 return parts.seg == SEG_DEFAULT;
3962 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3963 insns. */
3965 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3967 enum rtx_code code = GET_CODE (op);
3968 switch (code)
3970 /* Operations supported directly. */
3971 case EQ:
3972 case LT:
3973 case LE:
3974 case UNORDERED:
3975 case NE:
3976 case UNGE:
3977 case UNGT:
3978 case ORDERED:
3979 return 1;
3980 /* These are equivalent to ones above in non-IEEE comparisons. */
3981 case UNEQ:
3982 case UNLT:
3983 case UNLE:
3984 case LTGT:
3985 case GE:
3986 case GT:
3987 return !TARGET_IEEE_FP;
3988 default:
3989 return 0;
3992 /* Return 1 if OP is a valid comparison operator in valid mode. */
3994 ix86_comparison_operator (rtx op, enum machine_mode mode)
3996 enum machine_mode inmode;
3997 enum rtx_code code = GET_CODE (op);
3998 if (mode != VOIDmode && GET_MODE (op) != mode)
3999 return 0;
4000 if (!COMPARISON_P (op))
4001 return 0;
4002 inmode = GET_MODE (XEXP (op, 0));
4004 if (inmode == CCFPmode || inmode == CCFPUmode)
4006 enum rtx_code second_code, bypass_code;
4007 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4008 return (bypass_code == NIL && second_code == NIL);
4010 switch (code)
4012 case EQ: case NE:
4013 return 1;
4014 case LT: case GE:
4015 if (inmode == CCmode || inmode == CCGCmode
4016 || inmode == CCGOCmode || inmode == CCNOmode)
4017 return 1;
4018 return 0;
4019 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4020 if (inmode == CCmode)
4021 return 1;
4022 return 0;
4023 case GT: case LE:
4024 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4025 return 1;
4026 return 0;
4027 default:
4028 return 0;
4032 /* Return 1 if OP is a valid comparison operator testing carry flag
4033 to be set. */
4035 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4037 enum machine_mode inmode;
4038 enum rtx_code code = GET_CODE (op);
4040 if (mode != VOIDmode && GET_MODE (op) != mode)
4041 return 0;
4042 if (!COMPARISON_P (op))
4043 return 0;
4044 inmode = GET_MODE (XEXP (op, 0));
4045 if (GET_CODE (XEXP (op, 0)) != REG
4046 || REGNO (XEXP (op, 0)) != 17
4047 || XEXP (op, 1) != const0_rtx)
4048 return 0;
4050 if (inmode == CCFPmode || inmode == CCFPUmode)
4052 enum rtx_code second_code, bypass_code;
4054 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4055 if (bypass_code != NIL || second_code != NIL)
4056 return 0;
4057 code = ix86_fp_compare_code_to_integer (code);
4059 else if (inmode != CCmode)
4060 return 0;
4061 return code == LTU;
4064 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4067 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4069 enum machine_mode inmode;
4070 enum rtx_code code = GET_CODE (op);
4072 if (mode != VOIDmode && GET_MODE (op) != mode)
4073 return 0;
4074 if (!COMPARISON_P (op))
4075 return 0;
4076 inmode = GET_MODE (XEXP (op, 0));
4077 if (inmode == CCFPmode || inmode == CCFPUmode)
4079 enum rtx_code second_code, bypass_code;
4081 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4082 if (bypass_code != NIL || second_code != NIL)
4083 return 0;
4084 code = ix86_fp_compare_code_to_integer (code);
4086 /* i387 supports just limited amount of conditional codes. */
4087 switch (code)
4089 case LTU: case GTU: case LEU: case GEU:
4090 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4091 return 1;
4092 return 0;
4093 case ORDERED: case UNORDERED:
4094 case EQ: case NE:
4095 return 1;
4096 default:
4097 return 0;
4101 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4104 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4106 switch (GET_CODE (op))
4108 case MULT:
4109 /* Modern CPUs have same latency for HImode and SImode multiply,
4110 but 386 and 486 do HImode multiply faster. */
4111 return ix86_tune > PROCESSOR_I486;
4112 case PLUS:
4113 case AND:
4114 case IOR:
4115 case XOR:
4116 case ASHIFT:
4117 return 1;
4118 default:
4119 return 0;
4123 /* Nearly general operand, but accept any const_double, since we wish
4124 to be able to drop them into memory rather than have them get pulled
4125 into registers. */
4128 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4130 if (mode != VOIDmode && mode != GET_MODE (op))
4131 return 0;
4132 if (GET_CODE (op) == CONST_DOUBLE)
4133 return 1;
4134 return general_operand (op, mode);
4137 /* Match an SI or HImode register for a zero_extract. */
4140 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4142 int regno;
4143 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4144 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4145 return 0;
4147 if (!register_operand (op, VOIDmode))
4148 return 0;
4150 /* Be careful to accept only registers having upper parts. */
4151 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4152 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4155 /* Return 1 if this is a valid binary floating-point operation.
4156 OP is the expression matched, and MODE is its mode. */
4159 binary_fp_operator (rtx op, enum machine_mode mode)
4161 if (mode != VOIDmode && mode != GET_MODE (op))
4162 return 0;
4164 switch (GET_CODE (op))
4166 case PLUS:
4167 case MINUS:
4168 case MULT:
4169 case DIV:
4170 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4172 default:
4173 return 0;
4178 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4180 return GET_CODE (op) == MULT;
4184 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4186 return GET_CODE (op) == DIV;
4190 arith_or_logical_operator (rtx op, enum machine_mode mode)
4192 return ((mode == VOIDmode || GET_MODE (op) == mode)
4193 && ARITHMETIC_P (op));
4196 /* Returns 1 if OP is memory operand with a displacement. */
4199 memory_displacement_operand (rtx op, enum machine_mode mode)
4201 struct ix86_address parts;
4203 if (! memory_operand (op, mode))
4204 return 0;
4206 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4207 abort ();
4209 return parts.disp != NULL_RTX;
4212 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4213 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4215 ??? It seems likely that this will only work because cmpsi is an
4216 expander, and no actual insns use this. */
4219 cmpsi_operand (rtx op, enum machine_mode mode)
4221 if (nonimmediate_operand (op, mode))
4222 return 1;
4224 if (GET_CODE (op) == AND
4225 && GET_MODE (op) == SImode
4226 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4227 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4228 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4229 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4230 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4231 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4232 return 1;
4234 return 0;
4237 /* Returns 1 if OP is memory operand that can not be represented by the
4238 modRM array. */
4241 long_memory_operand (rtx op, enum machine_mode mode)
4243 if (! memory_operand (op, mode))
4244 return 0;
4246 return memory_address_length (op) != 0;
4249 /* Return nonzero if the rtx is known aligned. */
4252 aligned_operand (rtx op, enum machine_mode mode)
4254 struct ix86_address parts;
4256 if (!general_operand (op, mode))
4257 return 0;
4259 /* Registers and immediate operands are always "aligned". */
4260 if (GET_CODE (op) != MEM)
4261 return 1;
4263 /* Don't even try to do any aligned optimizations with volatiles. */
4264 if (MEM_VOLATILE_P (op))
4265 return 0;
4267 op = XEXP (op, 0);
4269 /* Pushes and pops are only valid on the stack pointer. */
4270 if (GET_CODE (op) == PRE_DEC
4271 || GET_CODE (op) == POST_INC)
4272 return 1;
4274 /* Decode the address. */
4275 if (! ix86_decompose_address (op, &parts))
4276 abort ();
4278 /* Look for some component that isn't known to be aligned. */
4279 if (parts.index)
4281 if (parts.scale < 4
4282 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4283 return 0;
4285 if (parts.base)
4287 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4288 return 0;
4290 if (parts.disp)
4292 if (GET_CODE (parts.disp) != CONST_INT
4293 || (INTVAL (parts.disp) & 3) != 0)
4294 return 0;
4297 /* Didn't find one -- this must be an aligned address. */
4298 return 1;
4301 /* Initialize the table of extra 80387 mathematical constants. */
4303 static void
4304 init_ext_80387_constants (void)
4306 static const char * cst[5] =
4308 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4309 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4310 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4311 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4312 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4314 int i;
4316 for (i = 0; i < 5; i++)
4318 real_from_string (&ext_80387_constants_table[i], cst[i]);
4319 /* Ensure each constant is rounded to XFmode precision. */
4320 real_convert (&ext_80387_constants_table[i],
4321 XFmode, &ext_80387_constants_table[i]);
4324 ext_80387_constants_init = 1;
4327 /* Return true if the constant is something that can be loaded with
4328 a special instruction. */
4331 standard_80387_constant_p (rtx x)
4333 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4334 return -1;
4336 if (x == CONST0_RTX (GET_MODE (x)))
4337 return 1;
4338 if (x == CONST1_RTX (GET_MODE (x)))
4339 return 2;
4341 /* For XFmode constants, try to find a special 80387 instruction when
4342 optimizing for size or on those CPUs that benefit from them. */
4343 if (GET_MODE (x) == XFmode
4344 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4346 REAL_VALUE_TYPE r;
4347 int i;
4349 if (! ext_80387_constants_init)
4350 init_ext_80387_constants ();
4352 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4353 for (i = 0; i < 5; i++)
4354 if (real_identical (&r, &ext_80387_constants_table[i]))
4355 return i + 3;
4358 return 0;
4361 /* Return the opcode of the special instruction to be used to load
4362 the constant X. */
4364 const char *
4365 standard_80387_constant_opcode (rtx x)
4367 switch (standard_80387_constant_p (x))
4369 case 1:
4370 return "fldz";
4371 case 2:
4372 return "fld1";
4373 case 3:
4374 return "fldlg2";
4375 case 4:
4376 return "fldln2";
4377 case 5:
4378 return "fldl2e";
4379 case 6:
4380 return "fldl2t";
4381 case 7:
4382 return "fldpi";
4384 abort ();
4387 /* Return the CONST_DOUBLE representing the 80387 constant that is
4388 loaded by the specified special instruction. The argument IDX
4389 matches the return value from standard_80387_constant_p. */
4392 standard_80387_constant_rtx (int idx)
4394 int i;
4396 if (! ext_80387_constants_init)
4397 init_ext_80387_constants ();
4399 switch (idx)
4401 case 3:
4402 case 4:
4403 case 5:
4404 case 6:
4405 case 7:
4406 i = idx - 3;
4407 break;
4409 default:
4410 abort ();
4413 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4414 XFmode);
4417 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4420 standard_sse_constant_p (rtx x)
4422 if (x == const0_rtx)
4423 return 1;
4424 return (x == CONST0_RTX (GET_MODE (x)));
4427 /* Returns 1 if OP contains a symbol reference */
4430 symbolic_reference_mentioned_p (rtx op)
4432 const char *fmt;
4433 int i;
4435 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4436 return 1;
4438 fmt = GET_RTX_FORMAT (GET_CODE (op));
4439 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4441 if (fmt[i] == 'E')
4443 int j;
4445 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4446 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4447 return 1;
4450 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4451 return 1;
4454 return 0;
4457 /* Return 1 if it is appropriate to emit `ret' instructions in the
4458 body of a function. Do this only if the epilogue is simple, needing a
4459 couple of insns. Prior to reloading, we can't tell how many registers
4460 must be saved, so return 0 then. Return 0 if there is no frame
4461 marker to de-allocate.
4463 If NON_SAVING_SETJMP is defined and true, then it is not possible
4464 for the epilogue to be simple, so return 0. This is a special case
4465 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4466 until final, but jump_optimize may need to know sooner if a
4467 `return' is OK. */
4470 ix86_can_use_return_insn_p (void)
4472 struct ix86_frame frame;
4474 #ifdef NON_SAVING_SETJMP
4475 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4476 return 0;
4477 #endif
4479 if (! reload_completed || frame_pointer_needed)
4480 return 0;
4482 /* Don't allow more than 32 pop, since that's all we can do
4483 with one instruction. */
4484 if (current_function_pops_args
4485 && current_function_args_size >= 32768)
4486 return 0;
4488 ix86_compute_frame_layout (&frame);
4489 return frame.to_allocate == 0 && frame.nregs == 0;
4492 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4494 x86_64_sign_extended_value (rtx value)
4496 switch (GET_CODE (value))
4498 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4499 to be at least 32 and this all acceptable constants are
4500 represented as CONST_INT. */
4501 case CONST_INT:
4502 if (HOST_BITS_PER_WIDE_INT == 32)
4503 return 1;
4504 else
4506 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4507 return trunc_int_for_mode (val, SImode) == val;
4509 break;
4511 /* For certain code models, the symbolic references are known to fit.
4512 in CM_SMALL_PIC model we know it fits if it is local to the shared
4513 library. Don't count TLS SYMBOL_REFs here, since they should fit
4514 only if inside of UNSPEC handled below. */
4515 case SYMBOL_REF:
4516 /* TLS symbols are not constant. */
4517 if (tls_symbolic_operand (value, Pmode))
4518 return false;
4519 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4521 /* For certain code models, the code is near as well. */
4522 case LABEL_REF:
4523 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4524 || ix86_cmodel == CM_KERNEL);
4526 /* We also may accept the offsetted memory references in certain special
4527 cases. */
4528 case CONST:
4529 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4530 switch (XINT (XEXP (value, 0), 1))
4532 case UNSPEC_GOTPCREL:
4533 case UNSPEC_DTPOFF:
4534 case UNSPEC_GOTNTPOFF:
4535 case UNSPEC_NTPOFF:
4536 return 1;
4537 default:
4538 break;
4540 if (GET_CODE (XEXP (value, 0)) == PLUS)
4542 rtx op1 = XEXP (XEXP (value, 0), 0);
4543 rtx op2 = XEXP (XEXP (value, 0), 1);
4544 HOST_WIDE_INT offset;
4546 if (ix86_cmodel == CM_LARGE)
4547 return 0;
4548 if (GET_CODE (op2) != CONST_INT)
4549 return 0;
4550 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4551 switch (GET_CODE (op1))
4553 case SYMBOL_REF:
4554 /* For CM_SMALL assume that latest object is 16MB before
4555 end of 31bits boundary. We may also accept pretty
4556 large negative constants knowing that all objects are
4557 in the positive half of address space. */
4558 if (ix86_cmodel == CM_SMALL
4559 && offset < 16*1024*1024
4560 && trunc_int_for_mode (offset, SImode) == offset)
4561 return 1;
4562 /* For CM_KERNEL we know that all object resist in the
4563 negative half of 32bits address space. We may not
4564 accept negative offsets, since they may be just off
4565 and we may accept pretty large positive ones. */
4566 if (ix86_cmodel == CM_KERNEL
4567 && offset > 0
4568 && trunc_int_for_mode (offset, SImode) == offset)
4569 return 1;
4570 break;
4571 case LABEL_REF:
4572 /* These conditions are similar to SYMBOL_REF ones, just the
4573 constraints for code models differ. */
4574 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4575 && offset < 16*1024*1024
4576 && trunc_int_for_mode (offset, SImode) == offset)
4577 return 1;
4578 if (ix86_cmodel == CM_KERNEL
4579 && offset > 0
4580 && trunc_int_for_mode (offset, SImode) == offset)
4581 return 1;
4582 break;
4583 case UNSPEC:
4584 switch (XINT (op1, 1))
4586 case UNSPEC_DTPOFF:
4587 case UNSPEC_NTPOFF:
4588 if (offset > 0
4589 && trunc_int_for_mode (offset, SImode) == offset)
4590 return 1;
4592 break;
4593 default:
4594 return 0;
4597 return 0;
4598 default:
4599 return 0;
4603 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4605 x86_64_zero_extended_value (rtx value)
4607 switch (GET_CODE (value))
4609 case CONST_DOUBLE:
4610 if (HOST_BITS_PER_WIDE_INT == 32)
4611 return (GET_MODE (value) == VOIDmode
4612 && !CONST_DOUBLE_HIGH (value));
4613 else
4614 return 0;
4615 case CONST_INT:
4616 if (HOST_BITS_PER_WIDE_INT == 32)
4617 return INTVAL (value) >= 0;
4618 else
4619 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4620 break;
4622 /* For certain code models, the symbolic references are known to fit. */
4623 case SYMBOL_REF:
4624 /* TLS symbols are not constant. */
4625 if (tls_symbolic_operand (value, Pmode))
4626 return false;
4627 return ix86_cmodel == CM_SMALL;
4629 /* For certain code models, the code is near as well. */
4630 case LABEL_REF:
4631 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4633 /* We also may accept the offsetted memory references in certain special
4634 cases. */
4635 case CONST:
4636 if (GET_CODE (XEXP (value, 0)) == PLUS)
4638 rtx op1 = XEXP (XEXP (value, 0), 0);
4639 rtx op2 = XEXP (XEXP (value, 0), 1);
4641 if (ix86_cmodel == CM_LARGE)
4642 return 0;
4643 switch (GET_CODE (op1))
4645 case SYMBOL_REF:
4646 return 0;
4647 /* For small code model we may accept pretty large positive
4648 offsets, since one bit is available for free. Negative
4649 offsets are limited by the size of NULL pointer area
4650 specified by the ABI. */
4651 if (ix86_cmodel == CM_SMALL
4652 && GET_CODE (op2) == CONST_INT
4653 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4654 && (trunc_int_for_mode (INTVAL (op2), SImode)
4655 == INTVAL (op2)))
4656 return 1;
4657 /* ??? For the kernel, we may accept adjustment of
4658 -0x10000000, since we know that it will just convert
4659 negative address space to positive, but perhaps this
4660 is not worthwhile. */
4661 break;
4662 case LABEL_REF:
4663 /* These conditions are similar to SYMBOL_REF ones, just the
4664 constraints for code models differ. */
4665 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4666 && GET_CODE (op2) == CONST_INT
4667 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4668 && (trunc_int_for_mode (INTVAL (op2), SImode)
4669 == INTVAL (op2)))
4670 return 1;
4671 break;
4672 default:
4673 return 0;
4676 return 0;
4677 default:
4678 return 0;
4682 /* Value should be nonzero if functions must have frame pointers.
4683 Zero means the frame pointer need not be set up (and parms may
4684 be accessed via the stack pointer) in functions that seem suitable. */
4687 ix86_frame_pointer_required (void)
4689 /* If we accessed previous frames, then the generated code expects
4690 to be able to access the saved ebp value in our frame. */
4691 if (cfun->machine->accesses_prev_frame)
4692 return 1;
4694 /* Several x86 os'es need a frame pointer for other reasons,
4695 usually pertaining to setjmp. */
4696 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4697 return 1;
4699 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4700 the frame pointer by default. Turn it back on now if we've not
4701 got a leaf function. */
4702 if (TARGET_OMIT_LEAF_FRAME_POINTER
4703 && (!current_function_is_leaf))
4704 return 1;
4706 if (current_function_profile)
4707 return 1;
4709 return 0;
4712 /* Record that the current function accesses previous call frames. */
4714 void
4715 ix86_setup_frame_addresses (void)
4717 cfun->machine->accesses_prev_frame = 1;
4720 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4721 # define USE_HIDDEN_LINKONCE 1
4722 #else
4723 # define USE_HIDDEN_LINKONCE 0
4724 #endif
4726 static int pic_labels_used;
4728 /* Fills in the label name that should be used for a pc thunk for
4729 the given register. */
4731 static void
4732 get_pc_thunk_name (char name[32], unsigned int regno)
4734 if (USE_HIDDEN_LINKONCE)
4735 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4736 else
4737 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4741 /* This function generates code for -fpic that loads %ebx with
4742 the return address of the caller and then returns. */
4744 void
4745 ix86_file_end (void)
4747 rtx xops[2];
4748 int regno;
4750 for (regno = 0; regno < 8; ++regno)
4752 char name[32];
4754 if (! ((pic_labels_used >> regno) & 1))
4755 continue;
4757 get_pc_thunk_name (name, regno);
4759 if (USE_HIDDEN_LINKONCE)
4761 tree decl;
4763 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4764 error_mark_node);
4765 TREE_PUBLIC (decl) = 1;
4766 TREE_STATIC (decl) = 1;
4767 DECL_ONE_ONLY (decl) = 1;
4769 (*targetm.asm_out.unique_section) (decl, 0);
4770 named_section (decl, NULL, 0);
4772 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4773 fputs ("\t.hidden\t", asm_out_file);
4774 assemble_name (asm_out_file, name);
4775 fputc ('\n', asm_out_file);
4776 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4778 else
4780 text_section ();
4781 ASM_OUTPUT_LABEL (asm_out_file, name);
4784 xops[0] = gen_rtx_REG (SImode, regno);
4785 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4786 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4787 output_asm_insn ("ret", xops);
4790 if (NEED_INDICATE_EXEC_STACK)
4791 file_end_indicate_exec_stack ();
4794 /* Emit code for the SET_GOT patterns. */
4796 const char *
4797 output_set_got (rtx dest)
4799 rtx xops[3];
4801 xops[0] = dest;
4802 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4804 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4806 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4808 if (!flag_pic)
4809 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4810 else
4811 output_asm_insn ("call\t%a2", xops);
4813 #if TARGET_MACHO
4814 /* Output the "canonical" label name ("Lxx$pb") here too. This
4815 is what will be referred to by the Mach-O PIC subsystem. */
4816 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4817 #endif
4818 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4819 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4821 if (flag_pic)
4822 output_asm_insn ("pop{l}\t%0", xops);
4824 else
4826 char name[32];
4827 get_pc_thunk_name (name, REGNO (dest));
4828 pic_labels_used |= 1 << REGNO (dest);
4830 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4831 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4832 output_asm_insn ("call\t%X2", xops);
4835 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4836 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4837 else if (!TARGET_MACHO)
4838 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4840 return "";
4843 /* Generate an "push" pattern for input ARG. */
4845 static rtx
4846 gen_push (rtx arg)
4848 return gen_rtx_SET (VOIDmode,
4849 gen_rtx_MEM (Pmode,
4850 gen_rtx_PRE_DEC (Pmode,
4851 stack_pointer_rtx)),
4852 arg);
4855 /* Return >= 0 if there is an unused call-clobbered register available
4856 for the entire function. */
4858 static unsigned int
4859 ix86_select_alt_pic_regnum (void)
4861 if (current_function_is_leaf && !current_function_profile)
4863 int i;
4864 for (i = 2; i >= 0; --i)
4865 if (!regs_ever_live[i])
4866 return i;
4869 return INVALID_REGNUM;
4872 /* Return 1 if we need to save REGNO. */
4873 static int
4874 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4876 if (pic_offset_table_rtx
4877 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4878 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4879 || current_function_profile
4880 || current_function_calls_eh_return
4881 || current_function_uses_const_pool))
4883 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4884 return 0;
4885 return 1;
4888 if (current_function_calls_eh_return && maybe_eh_return)
4890 unsigned i;
4891 for (i = 0; ; i++)
4893 unsigned test = EH_RETURN_DATA_REGNO (i);
4894 if (test == INVALID_REGNUM)
4895 break;
4896 if (test == regno)
4897 return 1;
4901 return (regs_ever_live[regno]
4902 && !call_used_regs[regno]
4903 && !fixed_regs[regno]
4904 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4907 /* Return number of registers to be saved on the stack. */
4909 static int
4910 ix86_nsaved_regs (void)
4912 int nregs = 0;
4913 int regno;
4915 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4916 if (ix86_save_reg (regno, true))
4917 nregs++;
4918 return nregs;
4921 /* Return the offset between two registers, one to be eliminated, and the other
4922 its replacement, at the start of a routine. */
4924 HOST_WIDE_INT
4925 ix86_initial_elimination_offset (int from, int to)
4927 struct ix86_frame frame;
4928 ix86_compute_frame_layout (&frame);
4930 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4931 return frame.hard_frame_pointer_offset;
4932 else if (from == FRAME_POINTER_REGNUM
4933 && to == HARD_FRAME_POINTER_REGNUM)
4934 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4935 else
4937 if (to != STACK_POINTER_REGNUM)
4938 abort ();
4939 else if (from == ARG_POINTER_REGNUM)
4940 return frame.stack_pointer_offset;
4941 else if (from != FRAME_POINTER_REGNUM)
4942 abort ();
4943 else
4944 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4948 /* Fill structure ix86_frame about frame of currently computed function. */
4950 static void
4951 ix86_compute_frame_layout (struct ix86_frame *frame)
4953 HOST_WIDE_INT total_size;
4954 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4955 HOST_WIDE_INT offset;
4956 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4957 HOST_WIDE_INT size = get_frame_size ();
4959 frame->nregs = ix86_nsaved_regs ();
4960 total_size = size;
4962 /* During reload iteration the amount of registers saved can change.
4963 Recompute the value as needed. Do not recompute when amount of registers
4964 didn't change as reload does mutiple calls to the function and does not
4965 expect the decision to change within single iteration. */
4966 if (!optimize_size
4967 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4969 int count = frame->nregs;
4971 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4972 /* The fast prologue uses move instead of push to save registers. This
4973 is significantly longer, but also executes faster as modern hardware
4974 can execute the moves in parallel, but can't do that for push/pop.
4976 Be careful about choosing what prologue to emit: When function takes
4977 many instructions to execute we may use slow version as well as in
4978 case function is known to be outside hot spot (this is known with
4979 feedback only). Weight the size of function by number of registers
4980 to save as it is cheap to use one or two push instructions but very
4981 slow to use many of them. */
4982 if (count)
4983 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4984 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4985 || (flag_branch_probabilities
4986 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4987 cfun->machine->use_fast_prologue_epilogue = false;
4988 else
4989 cfun->machine->use_fast_prologue_epilogue
4990 = !expensive_function_p (count);
4992 if (TARGET_PROLOGUE_USING_MOVE
4993 && cfun->machine->use_fast_prologue_epilogue)
4994 frame->save_regs_using_mov = true;
4995 else
4996 frame->save_regs_using_mov = false;
4999 /* Skip return address and saved base pointer. */
5000 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5002 frame->hard_frame_pointer_offset = offset;
5004 /* Do some sanity checking of stack_alignment_needed and
5005 preferred_alignment, since i386 port is the only using those features
5006 that may break easily. */
5008 if (size && !stack_alignment_needed)
5009 abort ();
5010 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5011 abort ();
5012 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5013 abort ();
5014 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5015 abort ();
5017 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5018 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5020 /* Register save area */
5021 offset += frame->nregs * UNITS_PER_WORD;
5023 /* Va-arg area */
5024 if (ix86_save_varrargs_registers)
5026 offset += X86_64_VARARGS_SIZE;
5027 frame->va_arg_size = X86_64_VARARGS_SIZE;
5029 else
5030 frame->va_arg_size = 0;
5032 /* Align start of frame for local function. */
5033 frame->padding1 = ((offset + stack_alignment_needed - 1)
5034 & -stack_alignment_needed) - offset;
5036 offset += frame->padding1;
5038 /* Frame pointer points here. */
5039 frame->frame_pointer_offset = offset;
5041 offset += size;
5043 /* Add outgoing arguments area. Can be skipped if we eliminated
5044 all the function calls as dead code.
5045 Skipping is however impossible when function calls alloca. Alloca
5046 expander assumes that last current_function_outgoing_args_size
5047 of stack frame are unused. */
5048 if (ACCUMULATE_OUTGOING_ARGS
5049 && (!current_function_is_leaf || current_function_calls_alloca))
5051 offset += current_function_outgoing_args_size;
5052 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5054 else
5055 frame->outgoing_arguments_size = 0;
5057 /* Align stack boundary. Only needed if we're calling another function
5058 or using alloca. */
5059 if (!current_function_is_leaf || current_function_calls_alloca)
5060 frame->padding2 = ((offset + preferred_alignment - 1)
5061 & -preferred_alignment) - offset;
5062 else
5063 frame->padding2 = 0;
5065 offset += frame->padding2;
5067 /* We've reached end of stack frame. */
5068 frame->stack_pointer_offset = offset;
5070 /* Size prologue needs to allocate. */
5071 frame->to_allocate =
5072 (size + frame->padding1 + frame->padding2
5073 + frame->outgoing_arguments_size + frame->va_arg_size);
5075 if ((!frame->to_allocate && frame->nregs <= 1)
5076 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5077 frame->save_regs_using_mov = false;
5079 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5080 && current_function_is_leaf)
5082 frame->red_zone_size = frame->to_allocate;
5083 if (frame->save_regs_using_mov)
5084 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5085 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5086 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5088 else
5089 frame->red_zone_size = 0;
5090 frame->to_allocate -= frame->red_zone_size;
5091 frame->stack_pointer_offset -= frame->red_zone_size;
5092 #if 0
5093 fprintf (stderr, "nregs: %i\n", frame->nregs);
5094 fprintf (stderr, "size: %i\n", size);
5095 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5096 fprintf (stderr, "padding1: %i\n", frame->padding1);
5097 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5098 fprintf (stderr, "padding2: %i\n", frame->padding2);
5099 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5100 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5101 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5102 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5103 frame->hard_frame_pointer_offset);
5104 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5105 #endif
5108 /* Emit code to save registers in the prologue. */
5110 static void
5111 ix86_emit_save_regs (void)
5113 int regno;
5114 rtx insn;
5116 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5117 if (ix86_save_reg (regno, true))
5119 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5120 RTX_FRAME_RELATED_P (insn) = 1;
5124 /* Emit code to save registers using MOV insns. First register
5125 is restored from POINTER + OFFSET. */
5126 static void
5127 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5129 int regno;
5130 rtx insn;
5132 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5133 if (ix86_save_reg (regno, true))
5135 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5136 Pmode, offset),
5137 gen_rtx_REG (Pmode, regno));
5138 RTX_FRAME_RELATED_P (insn) = 1;
5139 offset += UNITS_PER_WORD;
5143 /* Expand prologue or epilogue stack adjustment.
5144 The pattern exist to put a dependency on all ebp-based memory accesses.
5145 STYLE should be negative if instructions should be marked as frame related,
5146 zero if %r11 register is live and cannot be freely used and positive
5147 otherwise. */
5149 static void
5150 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5152 rtx insn;
5154 if (! TARGET_64BIT)
5155 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5156 else if (x86_64_immediate_operand (offset, DImode))
5157 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5158 else
5160 rtx r11;
5161 /* r11 is used by indirect sibcall return as well, set before the
5162 epilogue and used after the epilogue. ATM indirect sibcall
5163 shouldn't be used together with huge frame sizes in one
5164 function because of the frame_size check in sibcall.c. */
5165 if (style == 0)
5166 abort ();
5167 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5168 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5169 if (style < 0)
5170 RTX_FRAME_RELATED_P (insn) = 1;
5171 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5172 offset));
5174 if (style < 0)
5175 RTX_FRAME_RELATED_P (insn) = 1;
5178 /* Expand the prologue into a bunch of separate insns. */
5180 void
5181 ix86_expand_prologue (void)
5183 rtx insn;
5184 bool pic_reg_used;
5185 struct ix86_frame frame;
5186 HOST_WIDE_INT allocate;
5188 ix86_compute_frame_layout (&frame);
5190 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5191 slower on all targets. Also sdb doesn't like it. */
5193 if (frame_pointer_needed)
5195 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5196 RTX_FRAME_RELATED_P (insn) = 1;
5198 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5199 RTX_FRAME_RELATED_P (insn) = 1;
5202 allocate = frame.to_allocate;
5204 if (!frame.save_regs_using_mov)
5205 ix86_emit_save_regs ();
5206 else
5207 allocate += frame.nregs * UNITS_PER_WORD;
5209 /* When using red zone we may start register saving before allocating
5210 the stack frame saving one cycle of the prologue. */
5211 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5212 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5213 : stack_pointer_rtx,
5214 -frame.nregs * UNITS_PER_WORD);
5216 if (allocate == 0)
5218 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5219 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5220 GEN_INT (-allocate), -1);
5221 else
5223 /* Only valid for Win32. */
5224 rtx eax = gen_rtx_REG (SImode, 0);
5225 bool eax_live = ix86_eax_live_at_start_p ();
5227 if (TARGET_64BIT)
5228 abort ();
5230 if (eax_live)
5232 emit_insn (gen_push (eax));
5233 allocate -= 4;
5236 insn = emit_move_insn (eax, GEN_INT (allocate));
5237 RTX_FRAME_RELATED_P (insn) = 1;
5239 insn = emit_insn (gen_allocate_stack_worker (eax));
5240 RTX_FRAME_RELATED_P (insn) = 1;
5242 if (eax_live)
5244 rtx t = plus_constant (stack_pointer_rtx, allocate);
5245 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5249 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5251 if (!frame_pointer_needed || !frame.to_allocate)
5252 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5253 else
5254 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5255 -frame.nregs * UNITS_PER_WORD);
5258 pic_reg_used = false;
5259 if (pic_offset_table_rtx
5260 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5261 || current_function_profile))
5263 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5265 if (alt_pic_reg_used != INVALID_REGNUM)
5266 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5268 pic_reg_used = true;
5271 if (pic_reg_used)
5273 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5275 /* Even with accurate pre-reload life analysis, we can wind up
5276 deleting all references to the pic register after reload.
5277 Consider if cross-jumping unifies two sides of a branch
5278 controlled by a comparison vs the only read from a global.
5279 In which case, allow the set_got to be deleted, though we're
5280 too late to do anything about the ebx save in the prologue. */
5281 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5284 /* Prevent function calls from be scheduled before the call to mcount.
5285 In the pic_reg_used case, make sure that the got load isn't deleted. */
5286 if (current_function_profile)
5287 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5290 /* Emit code to restore saved registers using MOV insns. First register
5291 is restored from POINTER + OFFSET. */
5292 static void
5293 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5294 int maybe_eh_return)
5296 int regno;
5297 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5299 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5300 if (ix86_save_reg (regno, maybe_eh_return))
5302 /* Ensure that adjust_address won't be forced to produce pointer
5303 out of range allowed by x86-64 instruction set. */
5304 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5306 rtx r11;
5308 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5309 emit_move_insn (r11, GEN_INT (offset));
5310 emit_insn (gen_adddi3 (r11, r11, pointer));
5311 base_address = gen_rtx_MEM (Pmode, r11);
5312 offset = 0;
5314 emit_move_insn (gen_rtx_REG (Pmode, regno),
5315 adjust_address (base_address, Pmode, offset));
5316 offset += UNITS_PER_WORD;
5320 /* Restore function stack, frame, and registers. */
5322 void
5323 ix86_expand_epilogue (int style)
5325 int regno;
5326 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5327 struct ix86_frame frame;
5328 HOST_WIDE_INT offset;
5330 ix86_compute_frame_layout (&frame);
5332 /* Calculate start of saved registers relative to ebp. Special care
5333 must be taken for the normal return case of a function using
5334 eh_return: the eax and edx registers are marked as saved, but not
5335 restored along this path. */
5336 offset = frame.nregs;
5337 if (current_function_calls_eh_return && style != 2)
5338 offset -= 2;
5339 offset *= -UNITS_PER_WORD;
5341 /* If we're only restoring one register and sp is not valid then
5342 using a move instruction to restore the register since it's
5343 less work than reloading sp and popping the register.
5345 The default code result in stack adjustment using add/lea instruction,
5346 while this code results in LEAVE instruction (or discrete equivalent),
5347 so it is profitable in some other cases as well. Especially when there
5348 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5349 and there is exactly one register to pop. This heuristic may need some
5350 tuning in future. */
5351 if ((!sp_valid && frame.nregs <= 1)
5352 || (TARGET_EPILOGUE_USING_MOVE
5353 && cfun->machine->use_fast_prologue_epilogue
5354 && (frame.nregs > 1 || frame.to_allocate))
5355 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5356 || (frame_pointer_needed && TARGET_USE_LEAVE
5357 && cfun->machine->use_fast_prologue_epilogue
5358 && frame.nregs == 1)
5359 || current_function_calls_eh_return)
5361 /* Restore registers. We can use ebp or esp to address the memory
5362 locations. If both are available, default to ebp, since offsets
5363 are known to be small. Only exception is esp pointing directly to the
5364 end of block of saved registers, where we may simplify addressing
5365 mode. */
5367 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5368 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5369 frame.to_allocate, style == 2);
5370 else
5371 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5372 offset, style == 2);
5374 /* eh_return epilogues need %ecx added to the stack pointer. */
5375 if (style == 2)
5377 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5379 if (frame_pointer_needed)
5381 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5382 tmp = plus_constant (tmp, UNITS_PER_WORD);
5383 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5385 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5386 emit_move_insn (hard_frame_pointer_rtx, tmp);
5388 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5389 const0_rtx, style);
5391 else
5393 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5394 tmp = plus_constant (tmp, (frame.to_allocate
5395 + frame.nregs * UNITS_PER_WORD));
5396 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5399 else if (!frame_pointer_needed)
5400 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5401 GEN_INT (frame.to_allocate
5402 + frame.nregs * UNITS_PER_WORD),
5403 style);
5404 /* If not an i386, mov & pop is faster than "leave". */
5405 else if (TARGET_USE_LEAVE || optimize_size
5406 || !cfun->machine->use_fast_prologue_epilogue)
5407 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5408 else
5410 pro_epilogue_adjust_stack (stack_pointer_rtx,
5411 hard_frame_pointer_rtx,
5412 const0_rtx, style);
5413 if (TARGET_64BIT)
5414 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5415 else
5416 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5419 else
5421 /* First step is to deallocate the stack frame so that we can
5422 pop the registers. */
5423 if (!sp_valid)
5425 if (!frame_pointer_needed)
5426 abort ();
5427 pro_epilogue_adjust_stack (stack_pointer_rtx,
5428 hard_frame_pointer_rtx,
5429 GEN_INT (offset), style);
5431 else if (frame.to_allocate)
5432 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5433 GEN_INT (frame.to_allocate), style);
5435 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5436 if (ix86_save_reg (regno, false))
5438 if (TARGET_64BIT)
5439 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5440 else
5441 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5443 if (frame_pointer_needed)
5445 /* Leave results in shorter dependency chains on CPUs that are
5446 able to grok it fast. */
5447 if (TARGET_USE_LEAVE)
5448 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5449 else if (TARGET_64BIT)
5450 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5451 else
5452 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5456 /* Sibcall epilogues don't want a return instruction. */
5457 if (style == 0)
5458 return;
5460 if (current_function_pops_args && current_function_args_size)
5462 rtx popc = GEN_INT (current_function_pops_args);
5464 /* i386 can only pop 64K bytes. If asked to pop more, pop
5465 return address, do explicit add, and jump indirectly to the
5466 caller. */
5468 if (current_function_pops_args >= 65536)
5470 rtx ecx = gen_rtx_REG (SImode, 2);
5472 /* There is no "pascal" calling convention in 64bit ABI. */
5473 if (TARGET_64BIT)
5474 abort ();
5476 emit_insn (gen_popsi1 (ecx));
5477 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5478 emit_jump_insn (gen_return_indirect_internal (ecx));
5480 else
5481 emit_jump_insn (gen_return_pop_internal (popc));
5483 else
5484 emit_jump_insn (gen_return_internal ());
5487 /* Reset from the function's potential modifications. */
5489 static void
5490 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5491 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5493 if (pic_offset_table_rtx)
5494 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5497 /* Extract the parts of an RTL expression that is a valid memory address
5498 for an instruction. Return 0 if the structure of the address is
5499 grossly off. Return -1 if the address contains ASHIFT, so it is not
5500 strictly valid, but still used for computing length of lea instruction. */
5502 static int
5503 ix86_decompose_address (rtx addr, struct ix86_address *out)
5505 rtx base = NULL_RTX;
5506 rtx index = NULL_RTX;
5507 rtx disp = NULL_RTX;
5508 HOST_WIDE_INT scale = 1;
5509 rtx scale_rtx = NULL_RTX;
5510 int retval = 1;
5511 enum ix86_address_seg seg = SEG_DEFAULT;
5513 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5514 base = addr;
5515 else if (GET_CODE (addr) == PLUS)
5517 rtx addends[4], op;
5518 int n = 0, i;
5520 op = addr;
5523 if (n >= 4)
5524 return 0;
5525 addends[n++] = XEXP (op, 1);
5526 op = XEXP (op, 0);
5528 while (GET_CODE (op) == PLUS);
5529 if (n >= 4)
5530 return 0;
5531 addends[n] = op;
5533 for (i = n; i >= 0; --i)
5535 op = addends[i];
5536 switch (GET_CODE (op))
5538 case MULT:
5539 if (index)
5540 return 0;
5541 index = XEXP (op, 0);
5542 scale_rtx = XEXP (op, 1);
5543 break;
5545 case UNSPEC:
5546 if (XINT (op, 1) == UNSPEC_TP
5547 && TARGET_TLS_DIRECT_SEG_REFS
5548 && seg == SEG_DEFAULT)
5549 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5550 else
5551 return 0;
5552 break;
5554 case REG:
5555 case SUBREG:
5556 if (!base)
5557 base = op;
5558 else if (!index)
5559 index = op;
5560 else
5561 return 0;
5562 break;
5564 case CONST:
5565 case CONST_INT:
5566 case SYMBOL_REF:
5567 case LABEL_REF:
5568 if (disp)
5569 return 0;
5570 disp = op;
5571 break;
5573 default:
5574 return 0;
5578 else if (GET_CODE (addr) == MULT)
5580 index = XEXP (addr, 0); /* index*scale */
5581 scale_rtx = XEXP (addr, 1);
5583 else if (GET_CODE (addr) == ASHIFT)
5585 rtx tmp;
5587 /* We're called for lea too, which implements ashift on occasion. */
5588 index = XEXP (addr, 0);
5589 tmp = XEXP (addr, 1);
5590 if (GET_CODE (tmp) != CONST_INT)
5591 return 0;
5592 scale = INTVAL (tmp);
5593 if ((unsigned HOST_WIDE_INT) scale > 3)
5594 return 0;
5595 scale = 1 << scale;
5596 retval = -1;
5598 else
5599 disp = addr; /* displacement */
5601 /* Extract the integral value of scale. */
5602 if (scale_rtx)
5604 if (GET_CODE (scale_rtx) != CONST_INT)
5605 return 0;
5606 scale = INTVAL (scale_rtx);
5609 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5610 if (base && index && scale == 1
5611 && (index == arg_pointer_rtx
5612 || index == frame_pointer_rtx
5613 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5615 rtx tmp = base;
5616 base = index;
5617 index = tmp;
5620 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5621 if ((base == hard_frame_pointer_rtx
5622 || base == frame_pointer_rtx
5623 || base == arg_pointer_rtx) && !disp)
5624 disp = const0_rtx;
5626 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5627 Avoid this by transforming to [%esi+0]. */
5628 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5629 && base && !index && !disp
5630 && REG_P (base)
5631 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5632 disp = const0_rtx;
5634 /* Special case: encode reg+reg instead of reg*2. */
5635 if (!base && index && scale && scale == 2)
5636 base = index, scale = 1;
5638 /* Special case: scaling cannot be encoded without base or displacement. */
5639 if (!base && !disp && index && scale != 1)
5640 disp = const0_rtx;
5642 out->base = base;
5643 out->index = index;
5644 out->disp = disp;
5645 out->scale = scale;
5646 out->seg = seg;
5648 return retval;
5651 /* Return cost of the memory address x.
5652 For i386, it is better to use a complex address than let gcc copy
5653 the address into a reg and make a new pseudo. But not if the address
5654 requires to two regs - that would mean more pseudos with longer
5655 lifetimes. */
5656 static int
5657 ix86_address_cost (rtx x)
5659 struct ix86_address parts;
5660 int cost = 1;
5662 if (!ix86_decompose_address (x, &parts))
5663 abort ();
5665 /* More complex memory references are better. */
5666 if (parts.disp && parts.disp != const0_rtx)
5667 cost--;
5668 if (parts.seg != SEG_DEFAULT)
5669 cost--;
5671 /* Attempt to minimize number of registers in the address. */
5672 if ((parts.base
5673 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5674 || (parts.index
5675 && (!REG_P (parts.index)
5676 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5677 cost++;
5679 if (parts.base
5680 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5681 && parts.index
5682 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5683 && parts.base != parts.index)
5684 cost++;
5686 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5687 since it's predecode logic can't detect the length of instructions
5688 and it degenerates to vector decoded. Increase cost of such
5689 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5690 to split such addresses or even refuse such addresses at all.
5692 Following addressing modes are affected:
5693 [base+scale*index]
5694 [scale*index+disp]
5695 [base+index]
5697 The first and last case may be avoidable by explicitly coding the zero in
5698 memory address, but I don't have AMD-K6 machine handy to check this
5699 theory. */
5701 if (TARGET_K6
5702 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5703 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5704 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5705 cost += 10;
5707 return cost;
5710 /* If X is a machine specific address (i.e. a symbol or label being
5711 referenced as a displacement from the GOT implemented using an
5712 UNSPEC), then return the base term. Otherwise return X. */
5715 ix86_find_base_term (rtx x)
5717 rtx term;
5719 if (TARGET_64BIT)
5721 if (GET_CODE (x) != CONST)
5722 return x;
5723 term = XEXP (x, 0);
5724 if (GET_CODE (term) == PLUS
5725 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5726 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5727 term = XEXP (term, 0);
5728 if (GET_CODE (term) != UNSPEC
5729 || XINT (term, 1) != UNSPEC_GOTPCREL)
5730 return x;
5732 term = XVECEXP (term, 0, 0);
5734 if (GET_CODE (term) != SYMBOL_REF
5735 && GET_CODE (term) != LABEL_REF)
5736 return x;
5738 return term;
5741 term = ix86_delegitimize_address (x);
5743 if (GET_CODE (term) != SYMBOL_REF
5744 && GET_CODE (term) != LABEL_REF)
5745 return x;
5747 return term;
5750 /* Determine if a given RTX is a valid constant. We already know this
5751 satisfies CONSTANT_P. */
5753 bool
5754 legitimate_constant_p (rtx x)
5756 rtx inner;
5758 switch (GET_CODE (x))
5760 case SYMBOL_REF:
5761 /* TLS symbols are not constant. */
5762 if (tls_symbolic_operand (x, Pmode))
5763 return false;
5764 break;
5766 case CONST:
5767 inner = XEXP (x, 0);
5769 /* Offsets of TLS symbols are never valid.
5770 Discourage CSE from creating them. */
5771 if (GET_CODE (inner) == PLUS
5772 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5773 return false;
5775 if (GET_CODE (inner) == PLUS)
5777 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5778 return false;
5779 inner = XEXP (inner, 0);
5782 /* Only some unspecs are valid as "constants". */
5783 if (GET_CODE (inner) == UNSPEC)
5784 switch (XINT (inner, 1))
5786 case UNSPEC_TPOFF:
5787 case UNSPEC_NTPOFF:
5788 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5789 case UNSPEC_DTPOFF:
5790 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5791 default:
5792 return false;
5794 break;
5796 default:
5797 break;
5800 /* Otherwise we handle everything else in the move patterns. */
5801 return true;
5804 /* Determine if it's legal to put X into the constant pool. This
5805 is not possible for the address of thread-local symbols, which
5806 is checked above. */
5808 static bool
5809 ix86_cannot_force_const_mem (rtx x)
5811 return !legitimate_constant_p (x);
5814 /* Determine if a given RTX is a valid constant address. */
5816 bool
5817 constant_address_p (rtx x)
5819 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5822 /* Nonzero if the constant value X is a legitimate general operand
5823 when generating PIC code. It is given that flag_pic is on and
5824 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5826 bool
5827 legitimate_pic_operand_p (rtx x)
5829 rtx inner;
5831 switch (GET_CODE (x))
5833 case CONST:
5834 inner = XEXP (x, 0);
5836 /* Only some unspecs are valid as "constants". */
5837 if (GET_CODE (inner) == UNSPEC)
5838 switch (XINT (inner, 1))
5840 case UNSPEC_TPOFF:
5841 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5842 default:
5843 return false;
5845 /* FALLTHRU */
5847 case SYMBOL_REF:
5848 case LABEL_REF:
5849 return legitimate_pic_address_disp_p (x);
5851 default:
5852 return true;
5856 /* Determine if a given CONST RTX is a valid memory displacement
5857 in PIC mode. */
5860 legitimate_pic_address_disp_p (rtx disp)
5862 bool saw_plus;
5864 /* In 64bit mode we can allow direct addresses of symbols and labels
5865 when they are not dynamic symbols. */
5866 if (TARGET_64BIT)
5868 /* TLS references should always be enclosed in UNSPEC. */
5869 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5870 return 0;
5871 if (GET_CODE (disp) == SYMBOL_REF
5872 && ix86_cmodel == CM_SMALL_PIC
5873 && SYMBOL_REF_LOCAL_P (disp))
5874 return 1;
5875 if (GET_CODE (disp) == LABEL_REF)
5876 return 1;
5877 if (GET_CODE (disp) == CONST
5878 && GET_CODE (XEXP (disp, 0)) == PLUS)
5880 rtx op0 = XEXP (XEXP (disp, 0), 0);
5881 rtx op1 = XEXP (XEXP (disp, 0), 1);
5883 /* TLS references should always be enclosed in UNSPEC. */
5884 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5885 return 0;
5886 if (((GET_CODE (op0) == SYMBOL_REF
5887 && ix86_cmodel == CM_SMALL_PIC
5888 && SYMBOL_REF_LOCAL_P (op0))
5889 || GET_CODE (op0) == LABEL_REF)
5890 && GET_CODE (op1) == CONST_INT
5891 && INTVAL (op1) < 16*1024*1024
5892 && INTVAL (op1) >= -16*1024*1024)
5893 return 1;
5896 if (GET_CODE (disp) != CONST)
5897 return 0;
5898 disp = XEXP (disp, 0);
5900 if (TARGET_64BIT)
5902 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5903 of GOT tables. We should not need these anyway. */
5904 if (GET_CODE (disp) != UNSPEC
5905 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5906 return 0;
5908 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5909 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5910 return 0;
5911 return 1;
5914 saw_plus = false;
5915 if (GET_CODE (disp) == PLUS)
5917 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5918 return 0;
5919 disp = XEXP (disp, 0);
5920 saw_plus = true;
5923 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5924 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5926 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5927 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5928 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5930 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5931 if (! strcmp (sym_name, "<pic base>"))
5932 return 1;
5936 if (GET_CODE (disp) != UNSPEC)
5937 return 0;
5939 switch (XINT (disp, 1))
5941 case UNSPEC_GOT:
5942 if (saw_plus)
5943 return false;
5944 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5945 case UNSPEC_GOTOFF:
5946 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5947 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5948 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5949 return false;
5950 case UNSPEC_GOTTPOFF:
5951 case UNSPEC_GOTNTPOFF:
5952 case UNSPEC_INDNTPOFF:
5953 if (saw_plus)
5954 return false;
5955 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5956 case UNSPEC_NTPOFF:
5957 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5958 case UNSPEC_DTPOFF:
5959 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5962 return 0;
5965 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5966 memory address for an instruction. The MODE argument is the machine mode
5967 for the MEM expression that wants to use this address.
5969 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5970 convert common non-canonical forms to canonical form so that they will
5971 be recognized. */
5974 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5976 struct ix86_address parts;
5977 rtx base, index, disp;
5978 HOST_WIDE_INT scale;
5979 const char *reason = NULL;
5980 rtx reason_rtx = NULL_RTX;
5982 if (TARGET_DEBUG_ADDR)
5984 fprintf (stderr,
5985 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5986 GET_MODE_NAME (mode), strict);
5987 debug_rtx (addr);
5990 if (ix86_decompose_address (addr, &parts) <= 0)
5992 reason = "decomposition failed";
5993 goto report_error;
5996 base = parts.base;
5997 index = parts.index;
5998 disp = parts.disp;
5999 scale = parts.scale;
6001 /* Validate base register.
6003 Don't allow SUBREG's here, it can lead to spill failures when the base
6004 is one word out of a two word structure, which is represented internally
6005 as a DImode int. */
6007 if (base)
6009 reason_rtx = base;
6011 if (GET_CODE (base) != REG)
6013 reason = "base is not a register";
6014 goto report_error;
6017 if (GET_MODE (base) != Pmode)
6019 reason = "base is not in Pmode";
6020 goto report_error;
6023 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6024 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6026 reason = "base is not valid";
6027 goto report_error;
6031 /* Validate index register.
6033 Don't allow SUBREG's here, it can lead to spill failures when the index
6034 is one word out of a two word structure, which is represented internally
6035 as a DImode int. */
6037 if (index)
6039 reason_rtx = index;
6041 if (GET_CODE (index) != REG)
6043 reason = "index is not a register";
6044 goto report_error;
6047 if (GET_MODE (index) != Pmode)
6049 reason = "index is not in Pmode";
6050 goto report_error;
6053 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6054 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6056 reason = "index is not valid";
6057 goto report_error;
6061 /* Validate scale factor. */
6062 if (scale != 1)
6064 reason_rtx = GEN_INT (scale);
6065 if (!index)
6067 reason = "scale without index";
6068 goto report_error;
6071 if (scale != 2 && scale != 4 && scale != 8)
6073 reason = "scale is not a valid multiplier";
6074 goto report_error;
6078 /* Validate displacement. */
6079 if (disp)
6081 reason_rtx = disp;
6083 if (GET_CODE (disp) == CONST
6084 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6085 switch (XINT (XEXP (disp, 0), 1))
6087 case UNSPEC_GOT:
6088 case UNSPEC_GOTOFF:
6089 case UNSPEC_GOTPCREL:
6090 if (!flag_pic)
6091 abort ();
6092 goto is_legitimate_pic;
6094 case UNSPEC_GOTTPOFF:
6095 case UNSPEC_GOTNTPOFF:
6096 case UNSPEC_INDNTPOFF:
6097 case UNSPEC_NTPOFF:
6098 case UNSPEC_DTPOFF:
6099 break;
6101 default:
6102 reason = "invalid address unspec";
6103 goto report_error;
6106 else if (flag_pic && (SYMBOLIC_CONST (disp)
6107 #if TARGET_MACHO
6108 && !machopic_operand_p (disp)
6109 #endif
6112 is_legitimate_pic:
6113 if (TARGET_64BIT && (index || base))
6115 /* foo@dtpoff(%rX) is ok. */
6116 if (GET_CODE (disp) != CONST
6117 || GET_CODE (XEXP (disp, 0)) != PLUS
6118 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6119 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6120 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6121 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6123 reason = "non-constant pic memory reference";
6124 goto report_error;
6127 else if (! legitimate_pic_address_disp_p (disp))
6129 reason = "displacement is an invalid pic construct";
6130 goto report_error;
6133 /* This code used to verify that a symbolic pic displacement
6134 includes the pic_offset_table_rtx register.
6136 While this is good idea, unfortunately these constructs may
6137 be created by "adds using lea" optimization for incorrect
6138 code like:
6140 int a;
6141 int foo(int i)
6143 return *(&a+i);
6146 This code is nonsensical, but results in addressing
6147 GOT table with pic_offset_table_rtx base. We can't
6148 just refuse it easily, since it gets matched by
6149 "addsi3" pattern, that later gets split to lea in the
6150 case output register differs from input. While this
6151 can be handled by separate addsi pattern for this case
6152 that never results in lea, this seems to be easier and
6153 correct fix for crash to disable this test. */
6155 else if (GET_CODE (disp) != LABEL_REF
6156 && GET_CODE (disp) != CONST_INT
6157 && (GET_CODE (disp) != CONST
6158 || !legitimate_constant_p (disp))
6159 && (GET_CODE (disp) != SYMBOL_REF
6160 || !legitimate_constant_p (disp)))
6162 reason = "displacement is not constant";
6163 goto report_error;
6165 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6167 reason = "displacement is out of range";
6168 goto report_error;
6172 /* Everything looks valid. */
6173 if (TARGET_DEBUG_ADDR)
6174 fprintf (stderr, "Success.\n");
6175 return TRUE;
6177 report_error:
6178 if (TARGET_DEBUG_ADDR)
6180 fprintf (stderr, "Error: %s\n", reason);
6181 debug_rtx (reason_rtx);
6183 return FALSE;
6186 /* Return an unique alias set for the GOT. */
6188 static HOST_WIDE_INT
6189 ix86_GOT_alias_set (void)
6191 static HOST_WIDE_INT set = -1;
6192 if (set == -1)
6193 set = new_alias_set ();
6194 return set;
6197 /* Return a legitimate reference for ORIG (an address) using the
6198 register REG. If REG is 0, a new pseudo is generated.
6200 There are two types of references that must be handled:
6202 1. Global data references must load the address from the GOT, via
6203 the PIC reg. An insn is emitted to do this load, and the reg is
6204 returned.
6206 2. Static data references, constant pool addresses, and code labels
6207 compute the address as an offset from the GOT, whose base is in
6208 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6209 differentiate them from global data objects. The returned
6210 address is the PIC reg + an unspec constant.
6212 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6213 reg also appears in the address. */
6216 legitimize_pic_address (rtx orig, rtx reg)
6218 rtx addr = orig;
6219 rtx new = orig;
6220 rtx base;
6222 #if TARGET_MACHO
6223 if (reg == 0)
6224 reg = gen_reg_rtx (Pmode);
6225 /* Use the generic Mach-O PIC machinery. */
6226 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6227 #endif
6229 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6230 new = addr;
6231 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6233 /* This symbol may be referenced via a displacement from the PIC
6234 base address (@GOTOFF). */
6236 if (reload_in_progress)
6237 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6238 if (GET_CODE (addr) == CONST)
6239 addr = XEXP (addr, 0);
6240 if (GET_CODE (addr) == PLUS)
6242 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6243 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6245 else
6246 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6247 new = gen_rtx_CONST (Pmode, new);
6248 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6250 if (reg != 0)
6252 emit_move_insn (reg, new);
6253 new = reg;
6256 else if (GET_CODE (addr) == SYMBOL_REF)
6258 if (TARGET_64BIT)
6260 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6261 new = gen_rtx_CONST (Pmode, new);
6262 new = gen_rtx_MEM (Pmode, new);
6263 RTX_UNCHANGING_P (new) = 1;
6264 set_mem_alias_set (new, ix86_GOT_alias_set ());
6266 if (reg == 0)
6267 reg = gen_reg_rtx (Pmode);
6268 /* Use directly gen_movsi, otherwise the address is loaded
6269 into register for CSE. We don't want to CSE this addresses,
6270 instead we CSE addresses from the GOT table, so skip this. */
6271 emit_insn (gen_movsi (reg, new));
6272 new = reg;
6274 else
6276 /* This symbol must be referenced via a load from the
6277 Global Offset Table (@GOT). */
6279 if (reload_in_progress)
6280 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6281 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6282 new = gen_rtx_CONST (Pmode, new);
6283 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6284 new = gen_rtx_MEM (Pmode, new);
6285 RTX_UNCHANGING_P (new) = 1;
6286 set_mem_alias_set (new, ix86_GOT_alias_set ());
6288 if (reg == 0)
6289 reg = gen_reg_rtx (Pmode);
6290 emit_move_insn (reg, new);
6291 new = reg;
6294 else
6296 if (GET_CODE (addr) == CONST)
6298 addr = XEXP (addr, 0);
6300 /* We must match stuff we generate before. Assume the only
6301 unspecs that can get here are ours. Not that we could do
6302 anything with them anyway.... */
6303 if (GET_CODE (addr) == UNSPEC
6304 || (GET_CODE (addr) == PLUS
6305 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6306 return orig;
6307 if (GET_CODE (addr) != PLUS)
6308 abort ();
6310 if (GET_CODE (addr) == PLUS)
6312 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6314 /* Check first to see if this is a constant offset from a @GOTOFF
6315 symbol reference. */
6316 if (local_symbolic_operand (op0, Pmode)
6317 && GET_CODE (op1) == CONST_INT)
6319 if (!TARGET_64BIT)
6321 if (reload_in_progress)
6322 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6323 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6324 UNSPEC_GOTOFF);
6325 new = gen_rtx_PLUS (Pmode, new, op1);
6326 new = gen_rtx_CONST (Pmode, new);
6327 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6329 if (reg != 0)
6331 emit_move_insn (reg, new);
6332 new = reg;
6335 else
6337 if (INTVAL (op1) < -16*1024*1024
6338 || INTVAL (op1) >= 16*1024*1024)
6339 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6342 else
6344 base = legitimize_pic_address (XEXP (addr, 0), reg);
6345 new = legitimize_pic_address (XEXP (addr, 1),
6346 base == reg ? NULL_RTX : reg);
6348 if (GET_CODE (new) == CONST_INT)
6349 new = plus_constant (base, INTVAL (new));
6350 else
6352 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6354 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6355 new = XEXP (new, 1);
6357 new = gen_rtx_PLUS (Pmode, base, new);
6362 return new;
6365 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6367 static rtx
6368 get_thread_pointer (int to_reg)
6370 rtx tp, reg, insn;
6372 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6373 if (!to_reg)
6374 return tp;
6376 reg = gen_reg_rtx (Pmode);
6377 insn = gen_rtx_SET (VOIDmode, reg, tp);
6378 insn = emit_insn (insn);
6380 return reg;
6383 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6384 false if we expect this to be used for a memory address and true if
6385 we expect to load the address into a register. */
6387 static rtx
6388 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6390 rtx dest, base, off, pic;
6391 int type;
6393 switch (model)
6395 case TLS_MODEL_GLOBAL_DYNAMIC:
6396 dest = gen_reg_rtx (Pmode);
6397 if (TARGET_64BIT)
6399 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6401 start_sequence ();
6402 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6403 insns = get_insns ();
6404 end_sequence ();
6406 emit_libcall_block (insns, dest, rax, x);
6408 else
6409 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6410 break;
6412 case TLS_MODEL_LOCAL_DYNAMIC:
6413 base = gen_reg_rtx (Pmode);
6414 if (TARGET_64BIT)
6416 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6418 start_sequence ();
6419 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6420 insns = get_insns ();
6421 end_sequence ();
6423 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6424 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6425 emit_libcall_block (insns, base, rax, note);
6427 else
6428 emit_insn (gen_tls_local_dynamic_base_32 (base));
6430 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6431 off = gen_rtx_CONST (Pmode, off);
6433 return gen_rtx_PLUS (Pmode, base, off);
6435 case TLS_MODEL_INITIAL_EXEC:
6436 if (TARGET_64BIT)
6438 pic = NULL;
6439 type = UNSPEC_GOTNTPOFF;
6441 else if (flag_pic)
6443 if (reload_in_progress)
6444 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6445 pic = pic_offset_table_rtx;
6446 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6448 else if (!TARGET_GNU_TLS)
6450 pic = gen_reg_rtx (Pmode);
6451 emit_insn (gen_set_got (pic));
6452 type = UNSPEC_GOTTPOFF;
6454 else
6456 pic = NULL;
6457 type = UNSPEC_INDNTPOFF;
6460 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6461 off = gen_rtx_CONST (Pmode, off);
6462 if (pic)
6463 off = gen_rtx_PLUS (Pmode, pic, off);
6464 off = gen_rtx_MEM (Pmode, off);
6465 RTX_UNCHANGING_P (off) = 1;
6466 set_mem_alias_set (off, ix86_GOT_alias_set ());
6468 if (TARGET_64BIT || TARGET_GNU_TLS)
6470 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6471 off = force_reg (Pmode, off);
6472 return gen_rtx_PLUS (Pmode, base, off);
6474 else
6476 base = get_thread_pointer (true);
6477 dest = gen_reg_rtx (Pmode);
6478 emit_insn (gen_subsi3 (dest, base, off));
6480 break;
6482 case TLS_MODEL_LOCAL_EXEC:
6483 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6484 (TARGET_64BIT || TARGET_GNU_TLS)
6485 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6486 off = gen_rtx_CONST (Pmode, off);
6488 if (TARGET_64BIT || TARGET_GNU_TLS)
6490 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6491 return gen_rtx_PLUS (Pmode, base, off);
6493 else
6495 base = get_thread_pointer (true);
6496 dest = gen_reg_rtx (Pmode);
6497 emit_insn (gen_subsi3 (dest, base, off));
6499 break;
6501 default:
6502 abort ();
6505 return dest;
6508 /* Try machine-dependent ways of modifying an illegitimate address
6509 to be legitimate. If we find one, return the new, valid address.
6510 This macro is used in only one place: `memory_address' in explow.c.
6512 OLDX is the address as it was before break_out_memory_refs was called.
6513 In some cases it is useful to look at this to decide what needs to be done.
6515 MODE and WIN are passed so that this macro can use
6516 GO_IF_LEGITIMATE_ADDRESS.
6518 It is always safe for this macro to do nothing. It exists to recognize
6519 opportunities to optimize the output.
6521 For the 80386, we handle X+REG by loading X into a register R and
6522 using R+REG. R will go in a general reg and indexing will be used.
6523 However, if REG is a broken-out memory address or multiplication,
6524 nothing needs to be done because REG can certainly go in a general reg.
6526 When -fpic is used, special handling is needed for symbolic references.
6527 See comments by legitimize_pic_address in i386.c for details. */
6530 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6532 int changed = 0;
6533 unsigned log;
6535 if (TARGET_DEBUG_ADDR)
6537 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6538 GET_MODE_NAME (mode));
6539 debug_rtx (x);
6542 log = tls_symbolic_operand (x, mode);
6543 if (log)
6544 return legitimize_tls_address (x, log, false);
6546 if (flag_pic && SYMBOLIC_CONST (x))
6547 return legitimize_pic_address (x, 0);
6549 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6550 if (GET_CODE (x) == ASHIFT
6551 && GET_CODE (XEXP (x, 1)) == CONST_INT
6552 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6554 changed = 1;
6555 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6556 GEN_INT (1 << log));
6559 if (GET_CODE (x) == PLUS)
6561 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6563 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6564 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6565 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6567 changed = 1;
6568 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6569 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6570 GEN_INT (1 << log));
6573 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6574 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6575 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6577 changed = 1;
6578 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6579 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6580 GEN_INT (1 << log));
6583 /* Put multiply first if it isn't already. */
6584 if (GET_CODE (XEXP (x, 1)) == MULT)
6586 rtx tmp = XEXP (x, 0);
6587 XEXP (x, 0) = XEXP (x, 1);
6588 XEXP (x, 1) = tmp;
6589 changed = 1;
6592 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6593 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6594 created by virtual register instantiation, register elimination, and
6595 similar optimizations. */
6596 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6598 changed = 1;
6599 x = gen_rtx_PLUS (Pmode,
6600 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6601 XEXP (XEXP (x, 1), 0)),
6602 XEXP (XEXP (x, 1), 1));
6605 /* Canonicalize
6606 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6607 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6608 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6609 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6610 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6611 && CONSTANT_P (XEXP (x, 1)))
6613 rtx constant;
6614 rtx other = NULL_RTX;
6616 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6618 constant = XEXP (x, 1);
6619 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6621 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6623 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6624 other = XEXP (x, 1);
6626 else
6627 constant = 0;
6629 if (constant)
6631 changed = 1;
6632 x = gen_rtx_PLUS (Pmode,
6633 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6634 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6635 plus_constant (other, INTVAL (constant)));
6639 if (changed && legitimate_address_p (mode, x, FALSE))
6640 return x;
6642 if (GET_CODE (XEXP (x, 0)) == MULT)
6644 changed = 1;
6645 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6648 if (GET_CODE (XEXP (x, 1)) == MULT)
6650 changed = 1;
6651 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6654 if (changed
6655 && GET_CODE (XEXP (x, 1)) == REG
6656 && GET_CODE (XEXP (x, 0)) == REG)
6657 return x;
6659 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6661 changed = 1;
6662 x = legitimize_pic_address (x, 0);
6665 if (changed && legitimate_address_p (mode, x, FALSE))
6666 return x;
6668 if (GET_CODE (XEXP (x, 0)) == REG)
6670 rtx temp = gen_reg_rtx (Pmode);
6671 rtx val = force_operand (XEXP (x, 1), temp);
6672 if (val != temp)
6673 emit_move_insn (temp, val);
6675 XEXP (x, 1) = temp;
6676 return x;
6679 else if (GET_CODE (XEXP (x, 1)) == REG)
6681 rtx temp = gen_reg_rtx (Pmode);
6682 rtx val = force_operand (XEXP (x, 0), temp);
6683 if (val != temp)
6684 emit_move_insn (temp, val);
6686 XEXP (x, 0) = temp;
6687 return x;
6691 return x;
6694 /* Print an integer constant expression in assembler syntax. Addition
6695 and subtraction are the only arithmetic that may appear in these
6696 expressions. FILE is the stdio stream to write to, X is the rtx, and
6697 CODE is the operand print code from the output string. */
6699 static void
6700 output_pic_addr_const (FILE *file, rtx x, int code)
6702 char buf[256];
6704 switch (GET_CODE (x))
6706 case PC:
6707 if (flag_pic)
6708 putc ('.', file);
6709 else
6710 abort ();
6711 break;
6713 case SYMBOL_REF:
6714 assemble_name (file, XSTR (x, 0));
6715 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6716 fputs ("@PLT", file);
6717 break;
6719 case LABEL_REF:
6720 x = XEXP (x, 0);
6721 /* FALLTHRU */
6722 case CODE_LABEL:
6723 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6724 assemble_name (asm_out_file, buf);
6725 break;
6727 case CONST_INT:
6728 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6729 break;
6731 case CONST:
6732 /* This used to output parentheses around the expression,
6733 but that does not work on the 386 (either ATT or BSD assembler). */
6734 output_pic_addr_const (file, XEXP (x, 0), code);
6735 break;
6737 case CONST_DOUBLE:
6738 if (GET_MODE (x) == VOIDmode)
6740 /* We can use %d if the number is <32 bits and positive. */
6741 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6742 fprintf (file, "0x%lx%08lx",
6743 (unsigned long) CONST_DOUBLE_HIGH (x),
6744 (unsigned long) CONST_DOUBLE_LOW (x));
6745 else
6746 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6748 else
6749 /* We can't handle floating point constants;
6750 PRINT_OPERAND must handle them. */
6751 output_operand_lossage ("floating constant misused");
6752 break;
6754 case PLUS:
6755 /* Some assemblers need integer constants to appear first. */
6756 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6758 output_pic_addr_const (file, XEXP (x, 0), code);
6759 putc ('+', file);
6760 output_pic_addr_const (file, XEXP (x, 1), code);
6762 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6764 output_pic_addr_const (file, XEXP (x, 1), code);
6765 putc ('+', file);
6766 output_pic_addr_const (file, XEXP (x, 0), code);
6768 else
6769 abort ();
6770 break;
6772 case MINUS:
6773 if (!TARGET_MACHO)
6774 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6775 output_pic_addr_const (file, XEXP (x, 0), code);
6776 putc ('-', file);
6777 output_pic_addr_const (file, XEXP (x, 1), code);
6778 if (!TARGET_MACHO)
6779 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6780 break;
6782 case UNSPEC:
6783 if (XVECLEN (x, 0) != 1)
6784 abort ();
6785 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6786 switch (XINT (x, 1))
6788 case UNSPEC_GOT:
6789 fputs ("@GOT", file);
6790 break;
6791 case UNSPEC_GOTOFF:
6792 fputs ("@GOTOFF", file);
6793 break;
6794 case UNSPEC_GOTPCREL:
6795 fputs ("@GOTPCREL(%rip)", file);
6796 break;
6797 case UNSPEC_GOTTPOFF:
6798 /* FIXME: This might be @TPOFF in Sun ld too. */
6799 fputs ("@GOTTPOFF", file);
6800 break;
6801 case UNSPEC_TPOFF:
6802 fputs ("@TPOFF", file);
6803 break;
6804 case UNSPEC_NTPOFF:
6805 if (TARGET_64BIT)
6806 fputs ("@TPOFF", file);
6807 else
6808 fputs ("@NTPOFF", file);
6809 break;
6810 case UNSPEC_DTPOFF:
6811 fputs ("@DTPOFF", file);
6812 break;
6813 case UNSPEC_GOTNTPOFF:
6814 if (TARGET_64BIT)
6815 fputs ("@GOTTPOFF(%rip)", file);
6816 else
6817 fputs ("@GOTNTPOFF", file);
6818 break;
6819 case UNSPEC_INDNTPOFF:
6820 fputs ("@INDNTPOFF", file);
6821 break;
6822 default:
6823 output_operand_lossage ("invalid UNSPEC as operand");
6824 break;
6826 break;
6828 default:
6829 output_operand_lossage ("invalid expression as operand");
6833 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6834 We need to handle our special PIC relocations. */
6836 void
6837 i386_dwarf_output_addr_const (FILE *file, rtx x)
6839 #ifdef ASM_QUAD
6840 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6841 #else
6842 if (TARGET_64BIT)
6843 abort ();
6844 fprintf (file, "%s", ASM_LONG);
6845 #endif
6846 if (flag_pic)
6847 output_pic_addr_const (file, x, '\0');
6848 else
6849 output_addr_const (file, x);
6850 fputc ('\n', file);
6853 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6854 We need to emit DTP-relative relocations. */
6856 void
6857 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6859 fputs (ASM_LONG, file);
6860 output_addr_const (file, x);
6861 fputs ("@DTPOFF", file);
6862 switch (size)
6864 case 4:
6865 break;
6866 case 8:
6867 fputs (", 0", file);
6868 break;
6869 default:
6870 abort ();
6874 /* In the name of slightly smaller debug output, and to cater to
6875 general assembler losage, recognize PIC+GOTOFF and turn it back
6876 into a direct symbol reference. */
6878 static rtx
6879 ix86_delegitimize_address (rtx orig_x)
6881 rtx x = orig_x, y;
6883 if (GET_CODE (x) == MEM)
6884 x = XEXP (x, 0);
6886 if (TARGET_64BIT)
6888 if (GET_CODE (x) != CONST
6889 || GET_CODE (XEXP (x, 0)) != UNSPEC
6890 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6891 || GET_CODE (orig_x) != MEM)
6892 return orig_x;
6893 return XVECEXP (XEXP (x, 0), 0, 0);
6896 if (GET_CODE (x) != PLUS
6897 || GET_CODE (XEXP (x, 1)) != CONST)
6898 return orig_x;
6900 if (GET_CODE (XEXP (x, 0)) == REG
6901 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6902 /* %ebx + GOT/GOTOFF */
6903 y = NULL;
6904 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6906 /* %ebx + %reg * scale + GOT/GOTOFF */
6907 y = XEXP (x, 0);
6908 if (GET_CODE (XEXP (y, 0)) == REG
6909 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6910 y = XEXP (y, 1);
6911 else if (GET_CODE (XEXP (y, 1)) == REG
6912 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6913 y = XEXP (y, 0);
6914 else
6915 return orig_x;
6916 if (GET_CODE (y) != REG
6917 && GET_CODE (y) != MULT
6918 && GET_CODE (y) != ASHIFT)
6919 return orig_x;
6921 else
6922 return orig_x;
6924 x = XEXP (XEXP (x, 1), 0);
6925 if (GET_CODE (x) == UNSPEC
6926 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6927 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6929 if (y)
6930 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6931 return XVECEXP (x, 0, 0);
6934 if (GET_CODE (x) == PLUS
6935 && GET_CODE (XEXP (x, 0)) == UNSPEC
6936 && GET_CODE (XEXP (x, 1)) == CONST_INT
6937 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6938 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6939 && GET_CODE (orig_x) != MEM)))
6941 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6942 if (y)
6943 return gen_rtx_PLUS (Pmode, y, x);
6944 return x;
6947 return orig_x;
6950 static void
6951 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6952 int fp, FILE *file)
6954 const char *suffix;
6956 if (mode == CCFPmode || mode == CCFPUmode)
6958 enum rtx_code second_code, bypass_code;
6959 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6960 if (bypass_code != NIL || second_code != NIL)
6961 abort ();
6962 code = ix86_fp_compare_code_to_integer (code);
6963 mode = CCmode;
6965 if (reverse)
6966 code = reverse_condition (code);
6968 switch (code)
6970 case EQ:
6971 suffix = "e";
6972 break;
6973 case NE:
6974 suffix = "ne";
6975 break;
6976 case GT:
6977 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6978 abort ();
6979 suffix = "g";
6980 break;
6981 case GTU:
6982 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6983 Those same assemblers have the same but opposite losage on cmov. */
6984 if (mode != CCmode)
6985 abort ();
6986 suffix = fp ? "nbe" : "a";
6987 break;
6988 case LT:
6989 if (mode == CCNOmode || mode == CCGOCmode)
6990 suffix = "s";
6991 else if (mode == CCmode || mode == CCGCmode)
6992 suffix = "l";
6993 else
6994 abort ();
6995 break;
6996 case LTU:
6997 if (mode != CCmode)
6998 abort ();
6999 suffix = "b";
7000 break;
7001 case GE:
7002 if (mode == CCNOmode || mode == CCGOCmode)
7003 suffix = "ns";
7004 else if (mode == CCmode || mode == CCGCmode)
7005 suffix = "ge";
7006 else
7007 abort ();
7008 break;
7009 case GEU:
7010 /* ??? As above. */
7011 if (mode != CCmode)
7012 abort ();
7013 suffix = fp ? "nb" : "ae";
7014 break;
7015 case LE:
7016 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7017 abort ();
7018 suffix = "le";
7019 break;
7020 case LEU:
7021 if (mode != CCmode)
7022 abort ();
7023 suffix = "be";
7024 break;
7025 case UNORDERED:
7026 suffix = fp ? "u" : "p";
7027 break;
7028 case ORDERED:
7029 suffix = fp ? "nu" : "np";
7030 break;
7031 default:
7032 abort ();
7034 fputs (suffix, file);
7037 /* Print the name of register X to FILE based on its machine mode and number.
7038 If CODE is 'w', pretend the mode is HImode.
7039 If CODE is 'b', pretend the mode is QImode.
7040 If CODE is 'k', pretend the mode is SImode.
7041 If CODE is 'q', pretend the mode is DImode.
7042 If CODE is 'h', pretend the reg is the `high' byte register.
7043 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7045 void
7046 print_reg (rtx x, int code, FILE *file)
7048 if (REGNO (x) == ARG_POINTER_REGNUM
7049 || REGNO (x) == FRAME_POINTER_REGNUM
7050 || REGNO (x) == FLAGS_REG
7051 || REGNO (x) == FPSR_REG)
7052 abort ();
7054 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7055 putc ('%', file);
7057 if (code == 'w' || MMX_REG_P (x))
7058 code = 2;
7059 else if (code == 'b')
7060 code = 1;
7061 else if (code == 'k')
7062 code = 4;
7063 else if (code == 'q')
7064 code = 8;
7065 else if (code == 'y')
7066 code = 3;
7067 else if (code == 'h')
7068 code = 0;
7069 else
7070 code = GET_MODE_SIZE (GET_MODE (x));
7072 /* Irritatingly, AMD extended registers use different naming convention
7073 from the normal registers. */
7074 if (REX_INT_REG_P (x))
7076 if (!TARGET_64BIT)
7077 abort ();
7078 switch (code)
7080 case 0:
7081 error ("extended registers have no high halves");
7082 break;
7083 case 1:
7084 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7085 break;
7086 case 2:
7087 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7088 break;
7089 case 4:
7090 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7091 break;
7092 case 8:
7093 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7094 break;
7095 default:
7096 error ("unsupported operand size for extended register");
7097 break;
7099 return;
7101 switch (code)
7103 case 3:
7104 if (STACK_TOP_P (x))
7106 fputs ("st(0)", file);
7107 break;
7109 /* FALLTHRU */
7110 case 8:
7111 case 4:
7112 case 12:
7113 if (! ANY_FP_REG_P (x))
7114 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7115 /* FALLTHRU */
7116 case 16:
7117 case 2:
7118 normal:
7119 fputs (hi_reg_name[REGNO (x)], file);
7120 break;
7121 case 1:
7122 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7123 goto normal;
7124 fputs (qi_reg_name[REGNO (x)], file);
7125 break;
7126 case 0:
7127 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7128 goto normal;
7129 fputs (qi_high_reg_name[REGNO (x)], file);
7130 break;
7131 default:
7132 abort ();
7136 /* Locate some local-dynamic symbol still in use by this function
7137 so that we can print its name in some tls_local_dynamic_base
7138 pattern. */
7140 static const char *
7141 get_some_local_dynamic_name (void)
7143 rtx insn;
7145 if (cfun->machine->some_ld_name)
7146 return cfun->machine->some_ld_name;
7148 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7149 if (INSN_P (insn)
7150 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7151 return cfun->machine->some_ld_name;
7153 abort ();
7156 static int
7157 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7159 rtx x = *px;
7161 if (GET_CODE (x) == SYMBOL_REF
7162 && local_dynamic_symbolic_operand (x, Pmode))
7164 cfun->machine->some_ld_name = XSTR (x, 0);
7165 return 1;
7168 return 0;
7171 /* Meaning of CODE:
7172 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7173 C -- print opcode suffix for set/cmov insn.
7174 c -- like C, but print reversed condition
7175 F,f -- likewise, but for floating-point.
7176 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7177 otherwise nothing
7178 R -- print the prefix for register names.
7179 z -- print the opcode suffix for the size of the current operand.
7180 * -- print a star (in certain assembler syntax)
7181 A -- print an absolute memory reference.
7182 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7183 s -- print a shift double count, followed by the assemblers argument
7184 delimiter.
7185 b -- print the QImode name of the register for the indicated operand.
7186 %b0 would print %al if operands[0] is reg 0.
7187 w -- likewise, print the HImode name of the register.
7188 k -- likewise, print the SImode name of the register.
7189 q -- likewise, print the DImode name of the register.
7190 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7191 y -- print "st(0)" instead of "st" as a register.
7192 D -- print condition for SSE cmp instruction.
7193 P -- if PIC, print an @PLT suffix.
7194 X -- don't print any sort of PIC '@' suffix for a symbol.
7195 & -- print some in-use local-dynamic symbol name.
7198 void
7199 print_operand (FILE *file, rtx x, int code)
7201 if (code)
7203 switch (code)
7205 case '*':
7206 if (ASSEMBLER_DIALECT == ASM_ATT)
7207 putc ('*', file);
7208 return;
7210 case '&':
7211 assemble_name (file, get_some_local_dynamic_name ());
7212 return;
7214 case 'A':
7215 if (ASSEMBLER_DIALECT == ASM_ATT)
7216 putc ('*', file);
7217 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7219 /* Intel syntax. For absolute addresses, registers should not
7220 be surrounded by braces. */
7221 if (GET_CODE (x) != REG)
7223 putc ('[', file);
7224 PRINT_OPERAND (file, x, 0);
7225 putc (']', file);
7226 return;
7229 else
7230 abort ();
7232 PRINT_OPERAND (file, x, 0);
7233 return;
7236 case 'L':
7237 if (ASSEMBLER_DIALECT == ASM_ATT)
7238 putc ('l', file);
7239 return;
7241 case 'W':
7242 if (ASSEMBLER_DIALECT == ASM_ATT)
7243 putc ('w', file);
7244 return;
7246 case 'B':
7247 if (ASSEMBLER_DIALECT == ASM_ATT)
7248 putc ('b', file);
7249 return;
7251 case 'Q':
7252 if (ASSEMBLER_DIALECT == ASM_ATT)
7253 putc ('l', file);
7254 return;
7256 case 'S':
7257 if (ASSEMBLER_DIALECT == ASM_ATT)
7258 putc ('s', file);
7259 return;
7261 case 'T':
7262 if (ASSEMBLER_DIALECT == ASM_ATT)
7263 putc ('t', file);
7264 return;
7266 case 'z':
7267 /* 387 opcodes don't get size suffixes if the operands are
7268 registers. */
7269 if (STACK_REG_P (x))
7270 return;
7272 /* Likewise if using Intel opcodes. */
7273 if (ASSEMBLER_DIALECT == ASM_INTEL)
7274 return;
7276 /* This is the size of op from size of operand. */
7277 switch (GET_MODE_SIZE (GET_MODE (x)))
7279 case 2:
7280 #ifdef HAVE_GAS_FILDS_FISTS
7281 putc ('s', file);
7282 #endif
7283 return;
7285 case 4:
7286 if (GET_MODE (x) == SFmode)
7288 putc ('s', file);
7289 return;
7291 else
7292 putc ('l', file);
7293 return;
7295 case 12:
7296 case 16:
7297 putc ('t', file);
7298 return;
7300 case 8:
7301 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7303 #ifdef GAS_MNEMONICS
7304 putc ('q', file);
7305 #else
7306 putc ('l', file);
7307 putc ('l', file);
7308 #endif
7310 else
7311 putc ('l', file);
7312 return;
7314 default:
7315 abort ();
7318 case 'b':
7319 case 'w':
7320 case 'k':
7321 case 'q':
7322 case 'h':
7323 case 'y':
7324 case 'X':
7325 case 'P':
7326 break;
7328 case 's':
7329 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7331 PRINT_OPERAND (file, x, 0);
7332 putc (',', file);
7334 return;
7336 case 'D':
7337 /* Little bit of braindamage here. The SSE compare instructions
7338 does use completely different names for the comparisons that the
7339 fp conditional moves. */
7340 switch (GET_CODE (x))
7342 case EQ:
7343 case UNEQ:
7344 fputs ("eq", file);
7345 break;
7346 case LT:
7347 case UNLT:
7348 fputs ("lt", file);
7349 break;
7350 case LE:
7351 case UNLE:
7352 fputs ("le", file);
7353 break;
7354 case UNORDERED:
7355 fputs ("unord", file);
7356 break;
7357 case NE:
7358 case LTGT:
7359 fputs ("neq", file);
7360 break;
7361 case UNGE:
7362 case GE:
7363 fputs ("nlt", file);
7364 break;
7365 case UNGT:
7366 case GT:
7367 fputs ("nle", file);
7368 break;
7369 case ORDERED:
7370 fputs ("ord", file);
7371 break;
7372 default:
7373 abort ();
7374 break;
7376 return;
7377 case 'O':
7378 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7379 if (ASSEMBLER_DIALECT == ASM_ATT)
7381 switch (GET_MODE (x))
7383 case HImode: putc ('w', file); break;
7384 case SImode:
7385 case SFmode: putc ('l', file); break;
7386 case DImode:
7387 case DFmode: putc ('q', file); break;
7388 default: abort ();
7390 putc ('.', file);
7392 #endif
7393 return;
7394 case 'C':
7395 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7396 return;
7397 case 'F':
7398 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7399 if (ASSEMBLER_DIALECT == ASM_ATT)
7400 putc ('.', file);
7401 #endif
7402 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7403 return;
7405 /* Like above, but reverse condition */
7406 case 'c':
7407 /* Check to see if argument to %c is really a constant
7408 and not a condition code which needs to be reversed. */
7409 if (!COMPARISON_P (x))
7411 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7412 return;
7414 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7415 return;
7416 case 'f':
7417 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7418 if (ASSEMBLER_DIALECT == ASM_ATT)
7419 putc ('.', file);
7420 #endif
7421 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7422 return;
7423 case '+':
7425 rtx x;
7427 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7428 return;
7430 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7431 if (x)
7433 int pred_val = INTVAL (XEXP (x, 0));
7435 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7436 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7438 int taken = pred_val > REG_BR_PROB_BASE / 2;
7439 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7441 /* Emit hints only in the case default branch prediction
7442 heuristics would fail. */
7443 if (taken != cputaken)
7445 /* We use 3e (DS) prefix for taken branches and
7446 2e (CS) prefix for not taken branches. */
7447 if (taken)
7448 fputs ("ds ; ", file);
7449 else
7450 fputs ("cs ; ", file);
7454 return;
7456 default:
7457 output_operand_lossage ("invalid operand code `%c'", code);
7461 if (GET_CODE (x) == REG)
7462 print_reg (x, code, file);
7464 else if (GET_CODE (x) == MEM)
7466 /* No `byte ptr' prefix for call instructions. */
7467 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7469 const char * size;
7470 switch (GET_MODE_SIZE (GET_MODE (x)))
7472 case 1: size = "BYTE"; break;
7473 case 2: size = "WORD"; break;
7474 case 4: size = "DWORD"; break;
7475 case 8: size = "QWORD"; break;
7476 case 12: size = "XWORD"; break;
7477 case 16: size = "XMMWORD"; break;
7478 default:
7479 abort ();
7482 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7483 if (code == 'b')
7484 size = "BYTE";
7485 else if (code == 'w')
7486 size = "WORD";
7487 else if (code == 'k')
7488 size = "DWORD";
7490 fputs (size, file);
7491 fputs (" PTR ", file);
7494 x = XEXP (x, 0);
7495 /* Avoid (%rip) for call operands. */
7496 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7497 && GET_CODE (x) != CONST_INT)
7498 output_addr_const (file, x);
7499 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7500 output_operand_lossage ("invalid constraints for operand");
7501 else
7502 output_address (x);
7505 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7507 REAL_VALUE_TYPE r;
7508 long l;
7510 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7511 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7513 if (ASSEMBLER_DIALECT == ASM_ATT)
7514 putc ('$', file);
7515 fprintf (file, "0x%08lx", l);
7518 /* These float cases don't actually occur as immediate operands. */
7519 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7521 char dstr[30];
7523 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7524 fprintf (file, "%s", dstr);
7527 else if (GET_CODE (x) == CONST_DOUBLE
7528 && GET_MODE (x) == XFmode)
7530 char dstr[30];
7532 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7533 fprintf (file, "%s", dstr);
7536 else
7538 if (code != 'P')
7540 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7542 if (ASSEMBLER_DIALECT == ASM_ATT)
7543 putc ('$', file);
7545 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7546 || GET_CODE (x) == LABEL_REF)
7548 if (ASSEMBLER_DIALECT == ASM_ATT)
7549 putc ('$', file);
7550 else
7551 fputs ("OFFSET FLAT:", file);
7554 if (GET_CODE (x) == CONST_INT)
7555 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7556 else if (flag_pic)
7557 output_pic_addr_const (file, x, code);
7558 else
7559 output_addr_const (file, x);
7563 /* Print a memory operand whose address is ADDR. */
7565 void
7566 print_operand_address (FILE *file, rtx addr)
7568 struct ix86_address parts;
7569 rtx base, index, disp;
7570 int scale;
7572 if (! ix86_decompose_address (addr, &parts))
7573 abort ();
7575 base = parts.base;
7576 index = parts.index;
7577 disp = parts.disp;
7578 scale = parts.scale;
7580 switch (parts.seg)
7582 case SEG_DEFAULT:
7583 break;
7584 case SEG_FS:
7585 case SEG_GS:
7586 if (USER_LABEL_PREFIX[0] == 0)
7587 putc ('%', file);
7588 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7589 break;
7590 default:
7591 abort ();
7594 if (!base && !index)
7596 /* Displacement only requires special attention. */
7598 if (GET_CODE (disp) == CONST_INT)
7600 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7602 if (USER_LABEL_PREFIX[0] == 0)
7603 putc ('%', file);
7604 fputs ("ds:", file);
7606 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7608 else if (flag_pic)
7609 output_pic_addr_const (file, disp, 0);
7610 else
7611 output_addr_const (file, disp);
7613 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7614 if (TARGET_64BIT
7615 && ((GET_CODE (disp) == SYMBOL_REF
7616 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7617 || GET_CODE (disp) == LABEL_REF
7618 || (GET_CODE (disp) == CONST
7619 && GET_CODE (XEXP (disp, 0)) == PLUS
7620 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7621 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7622 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7623 fputs ("(%rip)", file);
7625 else
7627 if (ASSEMBLER_DIALECT == ASM_ATT)
7629 if (disp)
7631 if (flag_pic)
7632 output_pic_addr_const (file, disp, 0);
7633 else if (GET_CODE (disp) == LABEL_REF)
7634 output_asm_label (disp);
7635 else
7636 output_addr_const (file, disp);
7639 putc ('(', file);
7640 if (base)
7641 print_reg (base, 0, file);
7642 if (index)
7644 putc (',', file);
7645 print_reg (index, 0, file);
7646 if (scale != 1)
7647 fprintf (file, ",%d", scale);
7649 putc (')', file);
7651 else
7653 rtx offset = NULL_RTX;
7655 if (disp)
7657 /* Pull out the offset of a symbol; print any symbol itself. */
7658 if (GET_CODE (disp) == CONST
7659 && GET_CODE (XEXP (disp, 0)) == PLUS
7660 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7662 offset = XEXP (XEXP (disp, 0), 1);
7663 disp = gen_rtx_CONST (VOIDmode,
7664 XEXP (XEXP (disp, 0), 0));
7667 if (flag_pic)
7668 output_pic_addr_const (file, disp, 0);
7669 else if (GET_CODE (disp) == LABEL_REF)
7670 output_asm_label (disp);
7671 else if (GET_CODE (disp) == CONST_INT)
7672 offset = disp;
7673 else
7674 output_addr_const (file, disp);
7677 putc ('[', file);
7678 if (base)
7680 print_reg (base, 0, file);
7681 if (offset)
7683 if (INTVAL (offset) >= 0)
7684 putc ('+', file);
7685 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7688 else if (offset)
7689 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7690 else
7691 putc ('0', file);
7693 if (index)
7695 putc ('+', file);
7696 print_reg (index, 0, file);
7697 if (scale != 1)
7698 fprintf (file, "*%d", scale);
7700 putc (']', file);
7705 bool
7706 output_addr_const_extra (FILE *file, rtx x)
7708 rtx op;
7710 if (GET_CODE (x) != UNSPEC)
7711 return false;
7713 op = XVECEXP (x, 0, 0);
7714 switch (XINT (x, 1))
7716 case UNSPEC_GOTTPOFF:
7717 output_addr_const (file, op);
7718 /* FIXME: This might be @TPOFF in Sun ld. */
7719 fputs ("@GOTTPOFF", file);
7720 break;
7721 case UNSPEC_TPOFF:
7722 output_addr_const (file, op);
7723 fputs ("@TPOFF", file);
7724 break;
7725 case UNSPEC_NTPOFF:
7726 output_addr_const (file, op);
7727 if (TARGET_64BIT)
7728 fputs ("@TPOFF", file);
7729 else
7730 fputs ("@NTPOFF", file);
7731 break;
7732 case UNSPEC_DTPOFF:
7733 output_addr_const (file, op);
7734 fputs ("@DTPOFF", file);
7735 break;
7736 case UNSPEC_GOTNTPOFF:
7737 output_addr_const (file, op);
7738 if (TARGET_64BIT)
7739 fputs ("@GOTTPOFF(%rip)", file);
7740 else
7741 fputs ("@GOTNTPOFF", file);
7742 break;
7743 case UNSPEC_INDNTPOFF:
7744 output_addr_const (file, op);
7745 fputs ("@INDNTPOFF", file);
7746 break;
7748 default:
7749 return false;
7752 return true;
7755 /* Split one or more DImode RTL references into pairs of SImode
7756 references. The RTL can be REG, offsettable MEM, integer constant, or
7757 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7758 split and "num" is its length. lo_half and hi_half are output arrays
7759 that parallel "operands". */
7761 void
7762 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7764 while (num--)
7766 rtx op = operands[num];
7768 /* simplify_subreg refuse to split volatile memory addresses,
7769 but we still have to handle it. */
7770 if (GET_CODE (op) == MEM)
7772 lo_half[num] = adjust_address (op, SImode, 0);
7773 hi_half[num] = adjust_address (op, SImode, 4);
7775 else
7777 lo_half[num] = simplify_gen_subreg (SImode, op,
7778 GET_MODE (op) == VOIDmode
7779 ? DImode : GET_MODE (op), 0);
7780 hi_half[num] = simplify_gen_subreg (SImode, op,
7781 GET_MODE (op) == VOIDmode
7782 ? DImode : GET_MODE (op), 4);
7786 /* Split one or more TImode RTL references into pairs of SImode
7787 references. The RTL can be REG, offsettable MEM, integer constant, or
7788 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7789 split and "num" is its length. lo_half and hi_half are output arrays
7790 that parallel "operands". */
7792 void
7793 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7795 while (num--)
7797 rtx op = operands[num];
7799 /* simplify_subreg refuse to split volatile memory addresses, but we
7800 still have to handle it. */
7801 if (GET_CODE (op) == MEM)
7803 lo_half[num] = adjust_address (op, DImode, 0);
7804 hi_half[num] = adjust_address (op, DImode, 8);
7806 else
7808 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7809 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7814 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7815 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7816 is the expression of the binary operation. The output may either be
7817 emitted here, or returned to the caller, like all output_* functions.
7819 There is no guarantee that the operands are the same mode, as they
7820 might be within FLOAT or FLOAT_EXTEND expressions. */
7822 #ifndef SYSV386_COMPAT
7823 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7824 wants to fix the assemblers because that causes incompatibility
7825 with gcc. No-one wants to fix gcc because that causes
7826 incompatibility with assemblers... You can use the option of
7827 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7828 #define SYSV386_COMPAT 1
7829 #endif
7831 const char *
7832 output_387_binary_op (rtx insn, rtx *operands)
7834 static char buf[30];
7835 const char *p;
7836 const char *ssep;
7837 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7839 #ifdef ENABLE_CHECKING
7840 /* Even if we do not want to check the inputs, this documents input
7841 constraints. Which helps in understanding the following code. */
7842 if (STACK_REG_P (operands[0])
7843 && ((REG_P (operands[1])
7844 && REGNO (operands[0]) == REGNO (operands[1])
7845 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7846 || (REG_P (operands[2])
7847 && REGNO (operands[0]) == REGNO (operands[2])
7848 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7849 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7850 ; /* ok */
7851 else if (!is_sse)
7852 abort ();
7853 #endif
7855 switch (GET_CODE (operands[3]))
7857 case PLUS:
7858 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7859 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7860 p = "fiadd";
7861 else
7862 p = "fadd";
7863 ssep = "add";
7864 break;
7866 case MINUS:
7867 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7868 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7869 p = "fisub";
7870 else
7871 p = "fsub";
7872 ssep = "sub";
7873 break;
7875 case MULT:
7876 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7877 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7878 p = "fimul";
7879 else
7880 p = "fmul";
7881 ssep = "mul";
7882 break;
7884 case DIV:
7885 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7886 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7887 p = "fidiv";
7888 else
7889 p = "fdiv";
7890 ssep = "div";
7891 break;
7893 default:
7894 abort ();
7897 if (is_sse)
7899 strcpy (buf, ssep);
7900 if (GET_MODE (operands[0]) == SFmode)
7901 strcat (buf, "ss\t{%2, %0|%0, %2}");
7902 else
7903 strcat (buf, "sd\t{%2, %0|%0, %2}");
7904 return buf;
7906 strcpy (buf, p);
7908 switch (GET_CODE (operands[3]))
7910 case MULT:
7911 case PLUS:
7912 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7914 rtx temp = operands[2];
7915 operands[2] = operands[1];
7916 operands[1] = temp;
7919 /* know operands[0] == operands[1]. */
7921 if (GET_CODE (operands[2]) == MEM)
7923 p = "%z2\t%2";
7924 break;
7927 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7929 if (STACK_TOP_P (operands[0]))
7930 /* How is it that we are storing to a dead operand[2]?
7931 Well, presumably operands[1] is dead too. We can't
7932 store the result to st(0) as st(0) gets popped on this
7933 instruction. Instead store to operands[2] (which I
7934 think has to be st(1)). st(1) will be popped later.
7935 gcc <= 2.8.1 didn't have this check and generated
7936 assembly code that the Unixware assembler rejected. */
7937 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7938 else
7939 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7940 break;
7943 if (STACK_TOP_P (operands[0]))
7944 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7945 else
7946 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7947 break;
7949 case MINUS:
7950 case DIV:
7951 if (GET_CODE (operands[1]) == MEM)
7953 p = "r%z1\t%1";
7954 break;
7957 if (GET_CODE (operands[2]) == MEM)
7959 p = "%z2\t%2";
7960 break;
7963 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7965 #if SYSV386_COMPAT
7966 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7967 derived assemblers, confusingly reverse the direction of
7968 the operation for fsub{r} and fdiv{r} when the
7969 destination register is not st(0). The Intel assembler
7970 doesn't have this brain damage. Read !SYSV386_COMPAT to
7971 figure out what the hardware really does. */
7972 if (STACK_TOP_P (operands[0]))
7973 p = "{p\t%0, %2|rp\t%2, %0}";
7974 else
7975 p = "{rp\t%2, %0|p\t%0, %2}";
7976 #else
7977 if (STACK_TOP_P (operands[0]))
7978 /* As above for fmul/fadd, we can't store to st(0). */
7979 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7980 else
7981 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7982 #endif
7983 break;
7986 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7988 #if SYSV386_COMPAT
7989 if (STACK_TOP_P (operands[0]))
7990 p = "{rp\t%0, %1|p\t%1, %0}";
7991 else
7992 p = "{p\t%1, %0|rp\t%0, %1}";
7993 #else
7994 if (STACK_TOP_P (operands[0]))
7995 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7996 else
7997 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7998 #endif
7999 break;
8002 if (STACK_TOP_P (operands[0]))
8004 if (STACK_TOP_P (operands[1]))
8005 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8006 else
8007 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8008 break;
8010 else if (STACK_TOP_P (operands[1]))
8012 #if SYSV386_COMPAT
8013 p = "{\t%1, %0|r\t%0, %1}";
8014 #else
8015 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8016 #endif
8018 else
8020 #if SYSV386_COMPAT
8021 p = "{r\t%2, %0|\t%0, %2}";
8022 #else
8023 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8024 #endif
8026 break;
8028 default:
8029 abort ();
8032 strcat (buf, p);
8033 return buf;
8036 /* Output code to initialize control word copies used by
8037 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8038 is set to control word rounding downwards. */
8039 void
8040 emit_i387_cw_initialization (rtx normal, rtx round_down)
8042 rtx reg = gen_reg_rtx (HImode);
8044 emit_insn (gen_x86_fnstcw_1 (normal));
8045 emit_move_insn (reg, normal);
8046 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8047 && !TARGET_64BIT)
8048 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8049 else
8050 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8051 emit_move_insn (round_down, reg);
8054 /* Output code for INSN to convert a float to a signed int. OPERANDS
8055 are the insn operands. The output may be [HSD]Imode and the input
8056 operand may be [SDX]Fmode. */
8058 const char *
8059 output_fix_trunc (rtx insn, rtx *operands)
8061 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8062 int dimode_p = GET_MODE (operands[0]) == DImode;
8064 /* Jump through a hoop or two for DImode, since the hardware has no
8065 non-popping instruction. We used to do this a different way, but
8066 that was somewhat fragile and broke with post-reload splitters. */
8067 if (dimode_p && !stack_top_dies)
8068 output_asm_insn ("fld\t%y1", operands);
8070 if (!STACK_TOP_P (operands[1]))
8071 abort ();
8073 if (GET_CODE (operands[0]) != MEM)
8074 abort ();
8076 output_asm_insn ("fldcw\t%3", operands);
8077 if (stack_top_dies || dimode_p)
8078 output_asm_insn ("fistp%z0\t%0", operands);
8079 else
8080 output_asm_insn ("fist%z0\t%0", operands);
8081 output_asm_insn ("fldcw\t%2", operands);
8083 return "";
8086 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8087 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8088 when fucom should be used. */
8090 const char *
8091 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8093 int stack_top_dies;
8094 rtx cmp_op0 = operands[0];
8095 rtx cmp_op1 = operands[1];
8096 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8098 if (eflags_p == 2)
8100 cmp_op0 = cmp_op1;
8101 cmp_op1 = operands[2];
8103 if (is_sse)
8105 if (GET_MODE (operands[0]) == SFmode)
8106 if (unordered_p)
8107 return "ucomiss\t{%1, %0|%0, %1}";
8108 else
8109 return "comiss\t{%1, %0|%0, %1}";
8110 else
8111 if (unordered_p)
8112 return "ucomisd\t{%1, %0|%0, %1}";
8113 else
8114 return "comisd\t{%1, %0|%0, %1}";
8117 if (! STACK_TOP_P (cmp_op0))
8118 abort ();
8120 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8122 if (STACK_REG_P (cmp_op1)
8123 && stack_top_dies
8124 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8125 && REGNO (cmp_op1) != FIRST_STACK_REG)
8127 /* If both the top of the 387 stack dies, and the other operand
8128 is also a stack register that dies, then this must be a
8129 `fcompp' float compare */
8131 if (eflags_p == 1)
8133 /* There is no double popping fcomi variant. Fortunately,
8134 eflags is immune from the fstp's cc clobbering. */
8135 if (unordered_p)
8136 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8137 else
8138 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8139 return "fstp\t%y0";
8141 else
8143 if (eflags_p == 2)
8145 if (unordered_p)
8146 return "fucompp\n\tfnstsw\t%0";
8147 else
8148 return "fcompp\n\tfnstsw\t%0";
8150 else
8152 if (unordered_p)
8153 return "fucompp";
8154 else
8155 return "fcompp";
8159 else
8161 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8163 static const char * const alt[24] =
8165 "fcom%z1\t%y1",
8166 "fcomp%z1\t%y1",
8167 "fucom%z1\t%y1",
8168 "fucomp%z1\t%y1",
8170 "ficom%z1\t%y1",
8171 "ficomp%z1\t%y1",
8172 NULL,
8173 NULL,
8175 "fcomi\t{%y1, %0|%0, %y1}",
8176 "fcomip\t{%y1, %0|%0, %y1}",
8177 "fucomi\t{%y1, %0|%0, %y1}",
8178 "fucomip\t{%y1, %0|%0, %y1}",
8180 NULL,
8181 NULL,
8182 NULL,
8183 NULL,
8185 "fcom%z2\t%y2\n\tfnstsw\t%0",
8186 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8187 "fucom%z2\t%y2\n\tfnstsw\t%0",
8188 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8190 "ficom%z2\t%y2\n\tfnstsw\t%0",
8191 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8192 NULL,
8193 NULL
8196 int mask;
8197 const char *ret;
8199 mask = eflags_p << 3;
8200 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8201 mask |= unordered_p << 1;
8202 mask |= stack_top_dies;
8204 if (mask >= 24)
8205 abort ();
8206 ret = alt[mask];
8207 if (ret == NULL)
8208 abort ();
8210 return ret;
8214 void
8215 ix86_output_addr_vec_elt (FILE *file, int value)
8217 const char *directive = ASM_LONG;
8219 if (TARGET_64BIT)
8221 #ifdef ASM_QUAD
8222 directive = ASM_QUAD;
8223 #else
8224 abort ();
8225 #endif
8228 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8231 void
8232 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8234 if (TARGET_64BIT)
8235 fprintf (file, "%s%s%d-%s%d\n",
8236 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8237 else if (HAVE_AS_GOTOFF_IN_DATA)
8238 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8239 #if TARGET_MACHO
8240 else if (TARGET_MACHO)
8242 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8243 machopic_output_function_base_name (file);
8244 fprintf(file, "\n");
8246 #endif
8247 else
8248 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8249 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8252 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8253 for the target. */
8255 void
8256 ix86_expand_clear (rtx dest)
8258 rtx tmp;
8260 /* We play register width games, which are only valid after reload. */
8261 if (!reload_completed)
8262 abort ();
8264 /* Avoid HImode and its attendant prefix byte. */
8265 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8266 dest = gen_rtx_REG (SImode, REGNO (dest));
8268 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8270 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8271 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8273 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8274 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8277 emit_insn (tmp);
8280 /* X is an unchanging MEM. If it is a constant pool reference, return
8281 the constant pool rtx, else NULL. */
8283 static rtx
8284 maybe_get_pool_constant (rtx x)
8286 x = ix86_delegitimize_address (XEXP (x, 0));
8288 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8289 return get_pool_constant (x);
8291 return NULL_RTX;
8294 void
8295 ix86_expand_move (enum machine_mode mode, rtx operands[])
8297 int strict = (reload_in_progress || reload_completed);
8298 rtx op0, op1;
8299 enum tls_model model;
8301 op0 = operands[0];
8302 op1 = operands[1];
8304 model = tls_symbolic_operand (op1, Pmode);
8305 if (model)
8307 op1 = legitimize_tls_address (op1, model, true);
8308 op1 = force_operand (op1, op0);
8309 if (op1 == op0)
8310 return;
8313 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8315 #if TARGET_MACHO
8316 if (MACHOPIC_PURE)
8318 rtx temp = ((reload_in_progress
8319 || ((op0 && GET_CODE (op0) == REG)
8320 && mode == Pmode))
8321 ? op0 : gen_reg_rtx (Pmode));
8322 op1 = machopic_indirect_data_reference (op1, temp);
8323 op1 = machopic_legitimize_pic_address (op1, mode,
8324 temp == op1 ? 0 : temp);
8326 else if (MACHOPIC_INDIRECT)
8327 op1 = machopic_indirect_data_reference (op1, 0);
8328 if (op0 == op1)
8329 return;
8330 #else
8331 if (GET_CODE (op0) == MEM)
8332 op1 = force_reg (Pmode, op1);
8333 else
8335 rtx temp = op0;
8336 if (GET_CODE (temp) != REG)
8337 temp = gen_reg_rtx (Pmode);
8338 temp = legitimize_pic_address (op1, temp);
8339 if (temp == op0)
8340 return;
8341 op1 = temp;
8343 #endif /* TARGET_MACHO */
8345 else
8347 if (GET_CODE (op0) == MEM
8348 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8349 || !push_operand (op0, mode))
8350 && GET_CODE (op1) == MEM)
8351 op1 = force_reg (mode, op1);
8353 if (push_operand (op0, mode)
8354 && ! general_no_elim_operand (op1, mode))
8355 op1 = copy_to_mode_reg (mode, op1);
8357 /* Force large constants in 64bit compilation into register
8358 to get them CSEed. */
8359 if (TARGET_64BIT && mode == DImode
8360 && immediate_operand (op1, mode)
8361 && !x86_64_zero_extended_value (op1)
8362 && !register_operand (op0, mode)
8363 && optimize && !reload_completed && !reload_in_progress)
8364 op1 = copy_to_mode_reg (mode, op1);
8366 if (FLOAT_MODE_P (mode))
8368 /* If we are loading a floating point constant to a register,
8369 force the value to memory now, since we'll get better code
8370 out the back end. */
8372 if (strict)
8374 else if (GET_CODE (op1) == CONST_DOUBLE)
8376 op1 = validize_mem (force_const_mem (mode, op1));
8377 if (!register_operand (op0, mode))
8379 rtx temp = gen_reg_rtx (mode);
8380 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8381 emit_move_insn (op0, temp);
8382 return;
8388 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8391 void
8392 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8394 /* Force constants other than zero into memory. We do not know how
8395 the instructions used to build constants modify the upper 64 bits
8396 of the register, once we have that information we may be able
8397 to handle some of them more efficiently. */
8398 if ((reload_in_progress | reload_completed) == 0
8399 && register_operand (operands[0], mode)
8400 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8401 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8403 /* Make operand1 a register if it isn't already. */
8404 if (!no_new_pseudos
8405 && !register_operand (operands[0], mode)
8406 && !register_operand (operands[1], mode))
8408 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8409 emit_move_insn (operands[0], temp);
8410 return;
8413 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8416 /* Attempt to expand a binary operator. Make the expansion closer to the
8417 actual machine, then just general_operand, which will allow 3 separate
8418 memory references (one output, two input) in a single insn. */
8420 void
8421 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8422 rtx operands[])
8424 int matching_memory;
8425 rtx src1, src2, dst, op, clob;
8427 dst = operands[0];
8428 src1 = operands[1];
8429 src2 = operands[2];
8431 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8432 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8433 && (rtx_equal_p (dst, src2)
8434 || immediate_operand (src1, mode)))
8436 rtx temp = src1;
8437 src1 = src2;
8438 src2 = temp;
8441 /* If the destination is memory, and we do not have matching source
8442 operands, do things in registers. */
8443 matching_memory = 0;
8444 if (GET_CODE (dst) == MEM)
8446 if (rtx_equal_p (dst, src1))
8447 matching_memory = 1;
8448 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8449 && rtx_equal_p (dst, src2))
8450 matching_memory = 2;
8451 else
8452 dst = gen_reg_rtx (mode);
8455 /* Both source operands cannot be in memory. */
8456 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8458 if (matching_memory != 2)
8459 src2 = force_reg (mode, src2);
8460 else
8461 src1 = force_reg (mode, src1);
8464 /* If the operation is not commutable, source 1 cannot be a constant
8465 or non-matching memory. */
8466 if ((CONSTANT_P (src1)
8467 || (!matching_memory && GET_CODE (src1) == MEM))
8468 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8469 src1 = force_reg (mode, src1);
8471 /* If optimizing, copy to regs to improve CSE */
8472 if (optimize && ! no_new_pseudos)
8474 if (GET_CODE (dst) == MEM)
8475 dst = gen_reg_rtx (mode);
8476 if (GET_CODE (src1) == MEM)
8477 src1 = force_reg (mode, src1);
8478 if (GET_CODE (src2) == MEM)
8479 src2 = force_reg (mode, src2);
8482 /* Emit the instruction. */
8484 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8485 if (reload_in_progress)
8487 /* Reload doesn't know about the flags register, and doesn't know that
8488 it doesn't want to clobber it. We can only do this with PLUS. */
8489 if (code != PLUS)
8490 abort ();
8491 emit_insn (op);
8493 else
8495 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8496 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8499 /* Fix up the destination if needed. */
8500 if (dst != operands[0])
8501 emit_move_insn (operands[0], dst);
8504 /* Return TRUE or FALSE depending on whether the binary operator meets the
8505 appropriate constraints. */
8508 ix86_binary_operator_ok (enum rtx_code code,
8509 enum machine_mode mode ATTRIBUTE_UNUSED,
8510 rtx operands[3])
8512 /* Both source operands cannot be in memory. */
8513 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8514 return 0;
8515 /* If the operation is not commutable, source 1 cannot be a constant. */
8516 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8517 return 0;
8518 /* If the destination is memory, we must have a matching source operand. */
8519 if (GET_CODE (operands[0]) == MEM
8520 && ! (rtx_equal_p (operands[0], operands[1])
8521 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8522 && rtx_equal_p (operands[0], operands[2]))))
8523 return 0;
8524 /* If the operation is not commutable and the source 1 is memory, we must
8525 have a matching destination. */
8526 if (GET_CODE (operands[1]) == MEM
8527 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8528 && ! rtx_equal_p (operands[0], operands[1]))
8529 return 0;
8530 return 1;
8533 /* Attempt to expand a unary operator. Make the expansion closer to the
8534 actual machine, then just general_operand, which will allow 2 separate
8535 memory references (one output, one input) in a single insn. */
8537 void
8538 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8539 rtx operands[])
8541 int matching_memory;
8542 rtx src, dst, op, clob;
8544 dst = operands[0];
8545 src = operands[1];
8547 /* If the destination is memory, and we do not have matching source
8548 operands, do things in registers. */
8549 matching_memory = 0;
8550 if (GET_CODE (dst) == MEM)
8552 if (rtx_equal_p (dst, src))
8553 matching_memory = 1;
8554 else
8555 dst = gen_reg_rtx (mode);
8558 /* When source operand is memory, destination must match. */
8559 if (!matching_memory && GET_CODE (src) == MEM)
8560 src = force_reg (mode, src);
8562 /* If optimizing, copy to regs to improve CSE */
8563 if (optimize && ! no_new_pseudos)
8565 if (GET_CODE (dst) == MEM)
8566 dst = gen_reg_rtx (mode);
8567 if (GET_CODE (src) == MEM)
8568 src = force_reg (mode, src);
8571 /* Emit the instruction. */
8573 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8574 if (reload_in_progress || code == NOT)
8576 /* Reload doesn't know about the flags register, and doesn't know that
8577 it doesn't want to clobber it. */
8578 if (code != NOT)
8579 abort ();
8580 emit_insn (op);
8582 else
8584 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8585 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8588 /* Fix up the destination if needed. */
8589 if (dst != operands[0])
8590 emit_move_insn (operands[0], dst);
8593 /* Return TRUE or FALSE depending on whether the unary operator meets the
8594 appropriate constraints. */
8597 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8598 enum machine_mode mode ATTRIBUTE_UNUSED,
8599 rtx operands[2] ATTRIBUTE_UNUSED)
8601 /* If one of operands is memory, source and destination must match. */
8602 if ((GET_CODE (operands[0]) == MEM
8603 || GET_CODE (operands[1]) == MEM)
8604 && ! rtx_equal_p (operands[0], operands[1]))
8605 return FALSE;
8606 return TRUE;
8609 /* Return TRUE or FALSE depending on whether the first SET in INSN
8610 has source and destination with matching CC modes, and that the
8611 CC mode is at least as constrained as REQ_MODE. */
8614 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8616 rtx set;
8617 enum machine_mode set_mode;
8619 set = PATTERN (insn);
8620 if (GET_CODE (set) == PARALLEL)
8621 set = XVECEXP (set, 0, 0);
8622 if (GET_CODE (set) != SET)
8623 abort ();
8624 if (GET_CODE (SET_SRC (set)) != COMPARE)
8625 abort ();
8627 set_mode = GET_MODE (SET_DEST (set));
8628 switch (set_mode)
8630 case CCNOmode:
8631 if (req_mode != CCNOmode
8632 && (req_mode != CCmode
8633 || XEXP (SET_SRC (set), 1) != const0_rtx))
8634 return 0;
8635 break;
8636 case CCmode:
8637 if (req_mode == CCGCmode)
8638 return 0;
8639 /* FALLTHRU */
8640 case CCGCmode:
8641 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8642 return 0;
8643 /* FALLTHRU */
8644 case CCGOCmode:
8645 if (req_mode == CCZmode)
8646 return 0;
8647 /* FALLTHRU */
8648 case CCZmode:
8649 break;
8651 default:
8652 abort ();
8655 return (GET_MODE (SET_SRC (set)) == set_mode);
8658 /* Generate insn patterns to do an integer compare of OPERANDS. */
8660 static rtx
8661 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8663 enum machine_mode cmpmode;
8664 rtx tmp, flags;
8666 cmpmode = SELECT_CC_MODE (code, op0, op1);
8667 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8669 /* This is very simple, but making the interface the same as in the
8670 FP case makes the rest of the code easier. */
8671 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8672 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8674 /* Return the test that should be put into the flags user, i.e.
8675 the bcc, scc, or cmov instruction. */
8676 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8679 /* Figure out whether to use ordered or unordered fp comparisons.
8680 Return the appropriate mode to use. */
8682 enum machine_mode
8683 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8685 /* ??? In order to make all comparisons reversible, we do all comparisons
8686 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8687 all forms trapping and nontrapping comparisons, we can make inequality
8688 comparisons trapping again, since it results in better code when using
8689 FCOM based compares. */
8690 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8693 enum machine_mode
8694 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8696 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8697 return ix86_fp_compare_mode (code);
8698 switch (code)
8700 /* Only zero flag is needed. */
8701 case EQ: /* ZF=0 */
8702 case NE: /* ZF!=0 */
8703 return CCZmode;
8704 /* Codes needing carry flag. */
8705 case GEU: /* CF=0 */
8706 case GTU: /* CF=0 & ZF=0 */
8707 case LTU: /* CF=1 */
8708 case LEU: /* CF=1 | ZF=1 */
8709 return CCmode;
8710 /* Codes possibly doable only with sign flag when
8711 comparing against zero. */
8712 case GE: /* SF=OF or SF=0 */
8713 case LT: /* SF<>OF or SF=1 */
8714 if (op1 == const0_rtx)
8715 return CCGOCmode;
8716 else
8717 /* For other cases Carry flag is not required. */
8718 return CCGCmode;
8719 /* Codes doable only with sign flag when comparing
8720 against zero, but we miss jump instruction for it
8721 so we need to use relational tests against overflow
8722 that thus needs to be zero. */
8723 case GT: /* ZF=0 & SF=OF */
8724 case LE: /* ZF=1 | SF<>OF */
8725 if (op1 == const0_rtx)
8726 return CCNOmode;
8727 else
8728 return CCGCmode;
8729 /* strcmp pattern do (use flags) and combine may ask us for proper
8730 mode. */
8731 case USE:
8732 return CCmode;
8733 default:
8734 abort ();
8738 /* Return the fixed registers used for condition codes. */
8740 static bool
8741 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8743 *p1 = FLAGS_REG;
8744 *p2 = FPSR_REG;
8745 return true;
8748 /* If two condition code modes are compatible, return a condition code
8749 mode which is compatible with both. Otherwise, return
8750 VOIDmode. */
8752 static enum machine_mode
8753 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8755 if (m1 == m2)
8756 return m1;
8758 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8759 return VOIDmode;
8761 if ((m1 == CCGCmode && m2 == CCGOCmode)
8762 || (m1 == CCGOCmode && m2 == CCGCmode))
8763 return CCGCmode;
8765 switch (m1)
8767 default:
8768 abort ();
8770 case CCmode:
8771 case CCGCmode:
8772 case CCGOCmode:
8773 case CCNOmode:
8774 case CCZmode:
8775 switch (m2)
8777 default:
8778 return VOIDmode;
8780 case CCmode:
8781 case CCGCmode:
8782 case CCGOCmode:
8783 case CCNOmode:
8784 case CCZmode:
8785 return CCmode;
8788 case CCFPmode:
8789 case CCFPUmode:
8790 /* These are only compatible with themselves, which we already
8791 checked above. */
8792 return VOIDmode;
8796 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8799 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8801 enum rtx_code swapped_code = swap_condition (code);
8802 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8803 || (ix86_fp_comparison_cost (swapped_code)
8804 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8807 /* Swap, force into registers, or otherwise massage the two operands
8808 to a fp comparison. The operands are updated in place; the new
8809 comparison code is returned. */
8811 static enum rtx_code
8812 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8814 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8815 rtx op0 = *pop0, op1 = *pop1;
8816 enum machine_mode op_mode = GET_MODE (op0);
8817 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8819 /* All of the unordered compare instructions only work on registers.
8820 The same is true of the XFmode compare instructions. The same is
8821 true of the fcomi compare instructions. */
8823 if (!is_sse
8824 && (fpcmp_mode == CCFPUmode
8825 || op_mode == XFmode
8826 || ix86_use_fcomi_compare (code)))
8828 op0 = force_reg (op_mode, op0);
8829 op1 = force_reg (op_mode, op1);
8831 else
8833 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8834 things around if they appear profitable, otherwise force op0
8835 into a register. */
8837 if (standard_80387_constant_p (op0) == 0
8838 || (GET_CODE (op0) == MEM
8839 && ! (standard_80387_constant_p (op1) == 0
8840 || GET_CODE (op1) == MEM)))
8842 rtx tmp;
8843 tmp = op0, op0 = op1, op1 = tmp;
8844 code = swap_condition (code);
8847 if (GET_CODE (op0) != REG)
8848 op0 = force_reg (op_mode, op0);
8850 if (CONSTANT_P (op1))
8852 if (standard_80387_constant_p (op1))
8853 op1 = force_reg (op_mode, op1);
8854 else
8855 op1 = validize_mem (force_const_mem (op_mode, op1));
8859 /* Try to rearrange the comparison to make it cheaper. */
8860 if (ix86_fp_comparison_cost (code)
8861 > ix86_fp_comparison_cost (swap_condition (code))
8862 && (GET_CODE (op1) == REG || !no_new_pseudos))
8864 rtx tmp;
8865 tmp = op0, op0 = op1, op1 = tmp;
8866 code = swap_condition (code);
8867 if (GET_CODE (op0) != REG)
8868 op0 = force_reg (op_mode, op0);
8871 *pop0 = op0;
8872 *pop1 = op1;
8873 return code;
8876 /* Convert comparison codes we use to represent FP comparison to integer
8877 code that will result in proper branch. Return UNKNOWN if no such code
8878 is available. */
8879 static enum rtx_code
8880 ix86_fp_compare_code_to_integer (enum rtx_code code)
8882 switch (code)
8884 case GT:
8885 return GTU;
8886 case GE:
8887 return GEU;
8888 case ORDERED:
8889 case UNORDERED:
8890 return code;
8891 break;
8892 case UNEQ:
8893 return EQ;
8894 break;
8895 case UNLT:
8896 return LTU;
8897 break;
8898 case UNLE:
8899 return LEU;
8900 break;
8901 case LTGT:
8902 return NE;
8903 break;
8904 default:
8905 return UNKNOWN;
8909 /* Split comparison code CODE into comparisons we can do using branch
8910 instructions. BYPASS_CODE is comparison code for branch that will
8911 branch around FIRST_CODE and SECOND_CODE. If some of branches
8912 is not required, set value to NIL.
8913 We never require more than two branches. */
8914 static void
8915 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8916 enum rtx_code *first_code,
8917 enum rtx_code *second_code)
8919 *first_code = code;
8920 *bypass_code = NIL;
8921 *second_code = NIL;
8923 /* The fcomi comparison sets flags as follows:
8925 cmp ZF PF CF
8926 > 0 0 0
8927 < 0 0 1
8928 = 1 0 0
8929 un 1 1 1 */
8931 switch (code)
8933 case GT: /* GTU - CF=0 & ZF=0 */
8934 case GE: /* GEU - CF=0 */
8935 case ORDERED: /* PF=0 */
8936 case UNORDERED: /* PF=1 */
8937 case UNEQ: /* EQ - ZF=1 */
8938 case UNLT: /* LTU - CF=1 */
8939 case UNLE: /* LEU - CF=1 | ZF=1 */
8940 case LTGT: /* EQ - ZF=0 */
8941 break;
8942 case LT: /* LTU - CF=1 - fails on unordered */
8943 *first_code = UNLT;
8944 *bypass_code = UNORDERED;
8945 break;
8946 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8947 *first_code = UNLE;
8948 *bypass_code = UNORDERED;
8949 break;
8950 case EQ: /* EQ - ZF=1 - fails on unordered */
8951 *first_code = UNEQ;
8952 *bypass_code = UNORDERED;
8953 break;
8954 case NE: /* NE - ZF=0 - fails on unordered */
8955 *first_code = LTGT;
8956 *second_code = UNORDERED;
8957 break;
8958 case UNGE: /* GEU - CF=0 - fails on unordered */
8959 *first_code = GE;
8960 *second_code = UNORDERED;
8961 break;
8962 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8963 *first_code = GT;
8964 *second_code = UNORDERED;
8965 break;
8966 default:
8967 abort ();
8969 if (!TARGET_IEEE_FP)
8971 *second_code = NIL;
8972 *bypass_code = NIL;
8976 /* Return cost of comparison done fcom + arithmetics operations on AX.
8977 All following functions do use number of instructions as a cost metrics.
8978 In future this should be tweaked to compute bytes for optimize_size and
8979 take into account performance of various instructions on various CPUs. */
8980 static int
8981 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8983 if (!TARGET_IEEE_FP)
8984 return 4;
8985 /* The cost of code output by ix86_expand_fp_compare. */
8986 switch (code)
8988 case UNLE:
8989 case UNLT:
8990 case LTGT:
8991 case GT:
8992 case GE:
8993 case UNORDERED:
8994 case ORDERED:
8995 case UNEQ:
8996 return 4;
8997 break;
8998 case LT:
8999 case NE:
9000 case EQ:
9001 case UNGE:
9002 return 5;
9003 break;
9004 case LE:
9005 case UNGT:
9006 return 6;
9007 break;
9008 default:
9009 abort ();
9013 /* Return cost of comparison done using fcomi operation.
9014 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9015 static int
9016 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9018 enum rtx_code bypass_code, first_code, second_code;
9019 /* Return arbitrarily high cost when instruction is not supported - this
9020 prevents gcc from using it. */
9021 if (!TARGET_CMOVE)
9022 return 1024;
9023 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9024 return (bypass_code != NIL || second_code != NIL) + 2;
9027 /* Return cost of comparison done using sahf operation.
9028 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9029 static int
9030 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9032 enum rtx_code bypass_code, first_code, second_code;
9033 /* Return arbitrarily high cost when instruction is not preferred - this
9034 avoids gcc from using it. */
9035 if (!TARGET_USE_SAHF && !optimize_size)
9036 return 1024;
9037 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9038 return (bypass_code != NIL || second_code != NIL) + 3;
9041 /* Compute cost of the comparison done using any method.
9042 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9043 static int
9044 ix86_fp_comparison_cost (enum rtx_code code)
9046 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9047 int min;
9049 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9050 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9052 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9053 if (min > sahf_cost)
9054 min = sahf_cost;
9055 if (min > fcomi_cost)
9056 min = fcomi_cost;
9057 return min;
9060 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9062 static rtx
9063 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9064 rtx *second_test, rtx *bypass_test)
9066 enum machine_mode fpcmp_mode, intcmp_mode;
9067 rtx tmp, tmp2;
9068 int cost = ix86_fp_comparison_cost (code);
9069 enum rtx_code bypass_code, first_code, second_code;
9071 fpcmp_mode = ix86_fp_compare_mode (code);
9072 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9074 if (second_test)
9075 *second_test = NULL_RTX;
9076 if (bypass_test)
9077 *bypass_test = NULL_RTX;
9079 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9081 /* Do fcomi/sahf based test when profitable. */
9082 if ((bypass_code == NIL || bypass_test)
9083 && (second_code == NIL || second_test)
9084 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9086 if (TARGET_CMOVE)
9088 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9089 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9090 tmp);
9091 emit_insn (tmp);
9093 else
9095 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9096 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9097 if (!scratch)
9098 scratch = gen_reg_rtx (HImode);
9099 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9100 emit_insn (gen_x86_sahf_1 (scratch));
9103 /* The FP codes work out to act like unsigned. */
9104 intcmp_mode = fpcmp_mode;
9105 code = first_code;
9106 if (bypass_code != NIL)
9107 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9108 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9109 const0_rtx);
9110 if (second_code != NIL)
9111 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9112 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9113 const0_rtx);
9115 else
9117 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9118 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9119 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9120 if (!scratch)
9121 scratch = gen_reg_rtx (HImode);
9122 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9124 /* In the unordered case, we have to check C2 for NaN's, which
9125 doesn't happen to work out to anything nice combination-wise.
9126 So do some bit twiddling on the value we've got in AH to come
9127 up with an appropriate set of condition codes. */
9129 intcmp_mode = CCNOmode;
9130 switch (code)
9132 case GT:
9133 case UNGT:
9134 if (code == GT || !TARGET_IEEE_FP)
9136 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9137 code = EQ;
9139 else
9141 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9142 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9143 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9144 intcmp_mode = CCmode;
9145 code = GEU;
9147 break;
9148 case LT:
9149 case UNLT:
9150 if (code == LT && TARGET_IEEE_FP)
9152 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9153 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9154 intcmp_mode = CCmode;
9155 code = EQ;
9157 else
9159 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9160 code = NE;
9162 break;
9163 case GE:
9164 case UNGE:
9165 if (code == GE || !TARGET_IEEE_FP)
9167 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9168 code = EQ;
9170 else
9172 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9173 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9174 GEN_INT (0x01)));
9175 code = NE;
9177 break;
9178 case LE:
9179 case UNLE:
9180 if (code == LE && TARGET_IEEE_FP)
9182 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9183 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9184 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9185 intcmp_mode = CCmode;
9186 code = LTU;
9188 else
9190 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9191 code = NE;
9193 break;
9194 case EQ:
9195 case UNEQ:
9196 if (code == EQ && TARGET_IEEE_FP)
9198 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9199 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9200 intcmp_mode = CCmode;
9201 code = EQ;
9203 else
9205 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9206 code = NE;
9207 break;
9209 break;
9210 case NE:
9211 case LTGT:
9212 if (code == NE && TARGET_IEEE_FP)
9214 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9215 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9216 GEN_INT (0x40)));
9217 code = NE;
9219 else
9221 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9222 code = EQ;
9224 break;
9226 case UNORDERED:
9227 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9228 code = NE;
9229 break;
9230 case ORDERED:
9231 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9232 code = EQ;
9233 break;
9235 default:
9236 abort ();
9240 /* Return the test that should be put into the flags user, i.e.
9241 the bcc, scc, or cmov instruction. */
9242 return gen_rtx_fmt_ee (code, VOIDmode,
9243 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9244 const0_rtx);
9248 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9250 rtx op0, op1, ret;
9251 op0 = ix86_compare_op0;
9252 op1 = ix86_compare_op1;
9254 if (second_test)
9255 *second_test = NULL_RTX;
9256 if (bypass_test)
9257 *bypass_test = NULL_RTX;
9259 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9260 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9261 second_test, bypass_test);
9262 else
9263 ret = ix86_expand_int_compare (code, op0, op1);
9265 return ret;
9268 /* Return true if the CODE will result in nontrivial jump sequence. */
9269 bool
9270 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9272 enum rtx_code bypass_code, first_code, second_code;
9273 if (!TARGET_CMOVE)
9274 return true;
9275 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9276 return bypass_code != NIL || second_code != NIL;
9279 void
9280 ix86_expand_branch (enum rtx_code code, rtx label)
9282 rtx tmp;
9284 switch (GET_MODE (ix86_compare_op0))
9286 case QImode:
9287 case HImode:
9288 case SImode:
9289 simple:
9290 tmp = ix86_expand_compare (code, NULL, NULL);
9291 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9292 gen_rtx_LABEL_REF (VOIDmode, label),
9293 pc_rtx);
9294 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9295 return;
9297 case SFmode:
9298 case DFmode:
9299 case XFmode:
9301 rtvec vec;
9302 int use_fcomi;
9303 enum rtx_code bypass_code, first_code, second_code;
9305 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9306 &ix86_compare_op1);
9308 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9310 /* Check whether we will use the natural sequence with one jump. If
9311 so, we can expand jump early. Otherwise delay expansion by
9312 creating compound insn to not confuse optimizers. */
9313 if (bypass_code == NIL && second_code == NIL
9314 && TARGET_CMOVE)
9316 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9317 gen_rtx_LABEL_REF (VOIDmode, label),
9318 pc_rtx, NULL_RTX);
9320 else
9322 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9323 ix86_compare_op0, ix86_compare_op1);
9324 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9325 gen_rtx_LABEL_REF (VOIDmode, label),
9326 pc_rtx);
9327 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9329 use_fcomi = ix86_use_fcomi_compare (code);
9330 vec = rtvec_alloc (3 + !use_fcomi);
9331 RTVEC_ELT (vec, 0) = tmp;
9332 RTVEC_ELT (vec, 1)
9333 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9334 RTVEC_ELT (vec, 2)
9335 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9336 if (! use_fcomi)
9337 RTVEC_ELT (vec, 3)
9338 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9340 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9342 return;
9345 case DImode:
9346 if (TARGET_64BIT)
9347 goto simple;
9348 /* Expand DImode branch into multiple compare+branch. */
9350 rtx lo[2], hi[2], label2;
9351 enum rtx_code code1, code2, code3;
9353 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9355 tmp = ix86_compare_op0;
9356 ix86_compare_op0 = ix86_compare_op1;
9357 ix86_compare_op1 = tmp;
9358 code = swap_condition (code);
9360 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9361 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9363 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9364 avoid two branches. This costs one extra insn, so disable when
9365 optimizing for size. */
9367 if ((code == EQ || code == NE)
9368 && (!optimize_size
9369 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9371 rtx xor0, xor1;
9373 xor1 = hi[0];
9374 if (hi[1] != const0_rtx)
9375 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9376 NULL_RTX, 0, OPTAB_WIDEN);
9378 xor0 = lo[0];
9379 if (lo[1] != const0_rtx)
9380 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9381 NULL_RTX, 0, OPTAB_WIDEN);
9383 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9384 NULL_RTX, 0, OPTAB_WIDEN);
9386 ix86_compare_op0 = tmp;
9387 ix86_compare_op1 = const0_rtx;
9388 ix86_expand_branch (code, label);
9389 return;
9392 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9393 op1 is a constant and the low word is zero, then we can just
9394 examine the high word. */
9396 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9397 switch (code)
9399 case LT: case LTU: case GE: case GEU:
9400 ix86_compare_op0 = hi[0];
9401 ix86_compare_op1 = hi[1];
9402 ix86_expand_branch (code, label);
9403 return;
9404 default:
9405 break;
9408 /* Otherwise, we need two or three jumps. */
9410 label2 = gen_label_rtx ();
9412 code1 = code;
9413 code2 = swap_condition (code);
9414 code3 = unsigned_condition (code);
9416 switch (code)
9418 case LT: case GT: case LTU: case GTU:
9419 break;
9421 case LE: code1 = LT; code2 = GT; break;
9422 case GE: code1 = GT; code2 = LT; break;
9423 case LEU: code1 = LTU; code2 = GTU; break;
9424 case GEU: code1 = GTU; code2 = LTU; break;
9426 case EQ: code1 = NIL; code2 = NE; break;
9427 case NE: code2 = NIL; break;
9429 default:
9430 abort ();
9434 * a < b =>
9435 * if (hi(a) < hi(b)) goto true;
9436 * if (hi(a) > hi(b)) goto false;
9437 * if (lo(a) < lo(b)) goto true;
9438 * false:
9441 ix86_compare_op0 = hi[0];
9442 ix86_compare_op1 = hi[1];
9444 if (code1 != NIL)
9445 ix86_expand_branch (code1, label);
9446 if (code2 != NIL)
9447 ix86_expand_branch (code2, label2);
9449 ix86_compare_op0 = lo[0];
9450 ix86_compare_op1 = lo[1];
9451 ix86_expand_branch (code3, label);
9453 if (code2 != NIL)
9454 emit_label (label2);
9455 return;
9458 default:
9459 abort ();
9463 /* Split branch based on floating point condition. */
9464 void
9465 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9466 rtx target1, rtx target2, rtx tmp)
9468 rtx second, bypass;
9469 rtx label = NULL_RTX;
9470 rtx condition;
9471 int bypass_probability = -1, second_probability = -1, probability = -1;
9472 rtx i;
9474 if (target2 != pc_rtx)
9476 rtx tmp = target2;
9477 code = reverse_condition_maybe_unordered (code);
9478 target2 = target1;
9479 target1 = tmp;
9482 condition = ix86_expand_fp_compare (code, op1, op2,
9483 tmp, &second, &bypass);
9485 if (split_branch_probability >= 0)
9487 /* Distribute the probabilities across the jumps.
9488 Assume the BYPASS and SECOND to be always test
9489 for UNORDERED. */
9490 probability = split_branch_probability;
9492 /* Value of 1 is low enough to make no need for probability
9493 to be updated. Later we may run some experiments and see
9494 if unordered values are more frequent in practice. */
9495 if (bypass)
9496 bypass_probability = 1;
9497 if (second)
9498 second_probability = 1;
9500 if (bypass != NULL_RTX)
9502 label = gen_label_rtx ();
9503 i = emit_jump_insn (gen_rtx_SET
9504 (VOIDmode, pc_rtx,
9505 gen_rtx_IF_THEN_ELSE (VOIDmode,
9506 bypass,
9507 gen_rtx_LABEL_REF (VOIDmode,
9508 label),
9509 pc_rtx)));
9510 if (bypass_probability >= 0)
9511 REG_NOTES (i)
9512 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9513 GEN_INT (bypass_probability),
9514 REG_NOTES (i));
9516 i = emit_jump_insn (gen_rtx_SET
9517 (VOIDmode, pc_rtx,
9518 gen_rtx_IF_THEN_ELSE (VOIDmode,
9519 condition, target1, target2)));
9520 if (probability >= 0)
9521 REG_NOTES (i)
9522 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9523 GEN_INT (probability),
9524 REG_NOTES (i));
9525 if (second != NULL_RTX)
9527 i = emit_jump_insn (gen_rtx_SET
9528 (VOIDmode, pc_rtx,
9529 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9530 target2)));
9531 if (second_probability >= 0)
9532 REG_NOTES (i)
9533 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9534 GEN_INT (second_probability),
9535 REG_NOTES (i));
9537 if (label != NULL_RTX)
9538 emit_label (label);
9542 ix86_expand_setcc (enum rtx_code code, rtx dest)
9544 rtx ret, tmp, tmpreg, equiv;
9545 rtx second_test, bypass_test;
9547 if (GET_MODE (ix86_compare_op0) == DImode
9548 && !TARGET_64BIT)
9549 return 0; /* FAIL */
9551 if (GET_MODE (dest) != QImode)
9552 abort ();
9554 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9555 PUT_MODE (ret, QImode);
9557 tmp = dest;
9558 tmpreg = dest;
9560 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9561 if (bypass_test || second_test)
9563 rtx test = second_test;
9564 int bypass = 0;
9565 rtx tmp2 = gen_reg_rtx (QImode);
9566 if (bypass_test)
9568 if (second_test)
9569 abort ();
9570 test = bypass_test;
9571 bypass = 1;
9572 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9574 PUT_MODE (test, QImode);
9575 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9577 if (bypass)
9578 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9579 else
9580 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9583 /* Attach a REG_EQUAL note describing the comparison result. */
9584 equiv = simplify_gen_relational (code, QImode,
9585 GET_MODE (ix86_compare_op0),
9586 ix86_compare_op0, ix86_compare_op1);
9587 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9589 return 1; /* DONE */
9592 /* Expand comparison setting or clearing carry flag. Return true when
9593 successful and set pop for the operation. */
9594 static bool
9595 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9597 enum machine_mode mode =
9598 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9600 /* Do not handle DImode compares that go trought special path. Also we can't
9601 deal with FP compares yet. This is possible to add. */
9602 if ((mode == DImode && !TARGET_64BIT))
9603 return false;
9604 if (FLOAT_MODE_P (mode))
9606 rtx second_test = NULL, bypass_test = NULL;
9607 rtx compare_op, compare_seq;
9609 /* Shortcut: following common codes never translate into carry flag compares. */
9610 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9611 || code == ORDERED || code == UNORDERED)
9612 return false;
9614 /* These comparisons require zero flag; swap operands so they won't. */
9615 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9616 && !TARGET_IEEE_FP)
9618 rtx tmp = op0;
9619 op0 = op1;
9620 op1 = tmp;
9621 code = swap_condition (code);
9624 /* Try to expand the comparison and verify that we end up with carry flag
9625 based comparison. This is fails to be true only when we decide to expand
9626 comparison using arithmetic that is not too common scenario. */
9627 start_sequence ();
9628 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9629 &second_test, &bypass_test);
9630 compare_seq = get_insns ();
9631 end_sequence ();
9633 if (second_test || bypass_test)
9634 return false;
9635 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9636 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9637 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9638 else
9639 code = GET_CODE (compare_op);
9640 if (code != LTU && code != GEU)
9641 return false;
9642 emit_insn (compare_seq);
9643 *pop = compare_op;
9644 return true;
9646 if (!INTEGRAL_MODE_P (mode))
9647 return false;
9648 switch (code)
9650 case LTU:
9651 case GEU:
9652 break;
9654 /* Convert a==0 into (unsigned)a<1. */
9655 case EQ:
9656 case NE:
9657 if (op1 != const0_rtx)
9658 return false;
9659 op1 = const1_rtx;
9660 code = (code == EQ ? LTU : GEU);
9661 break;
9663 /* Convert a>b into b<a or a>=b-1. */
9664 case GTU:
9665 case LEU:
9666 if (GET_CODE (op1) == CONST_INT)
9668 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9669 /* Bail out on overflow. We still can swap operands but that
9670 would force loading of the constant into register. */
9671 if (op1 == const0_rtx
9672 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9673 return false;
9674 code = (code == GTU ? GEU : LTU);
9676 else
9678 rtx tmp = op1;
9679 op1 = op0;
9680 op0 = tmp;
9681 code = (code == GTU ? LTU : GEU);
9683 break;
9685 /* Convert a>=0 into (unsigned)a<0x80000000. */
9686 case LT:
9687 case GE:
9688 if (mode == DImode || op1 != const0_rtx)
9689 return false;
9690 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9691 code = (code == LT ? GEU : LTU);
9692 break;
9693 case LE:
9694 case GT:
9695 if (mode == DImode || op1 != constm1_rtx)
9696 return false;
9697 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9698 code = (code == LE ? GEU : LTU);
9699 break;
9701 default:
9702 return false;
9704 /* Swapping operands may cause constant to appear as first operand. */
9705 if (!nonimmediate_operand (op0, VOIDmode))
9707 if (no_new_pseudos)
9708 return false;
9709 op0 = force_reg (mode, op0);
9711 ix86_compare_op0 = op0;
9712 ix86_compare_op1 = op1;
9713 *pop = ix86_expand_compare (code, NULL, NULL);
9714 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9715 abort ();
9716 return true;
9720 ix86_expand_int_movcc (rtx operands[])
9722 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9723 rtx compare_seq, compare_op;
9724 rtx second_test, bypass_test;
9725 enum machine_mode mode = GET_MODE (operands[0]);
9726 bool sign_bit_compare_p = false;;
9728 start_sequence ();
9729 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9730 compare_seq = get_insns ();
9731 end_sequence ();
9733 compare_code = GET_CODE (compare_op);
9735 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9736 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9737 sign_bit_compare_p = true;
9739 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9740 HImode insns, we'd be swallowed in word prefix ops. */
9742 if ((mode != HImode || TARGET_FAST_PREFIX)
9743 && (mode != DImode || TARGET_64BIT)
9744 && GET_CODE (operands[2]) == CONST_INT
9745 && GET_CODE (operands[3]) == CONST_INT)
9747 rtx out = operands[0];
9748 HOST_WIDE_INT ct = INTVAL (operands[2]);
9749 HOST_WIDE_INT cf = INTVAL (operands[3]);
9750 HOST_WIDE_INT diff;
9752 diff = ct - cf;
9753 /* Sign bit compares are better done using shifts than we do by using
9754 sbb. */
9755 if (sign_bit_compare_p
9756 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9757 ix86_compare_op1, &compare_op))
9759 /* Detect overlap between destination and compare sources. */
9760 rtx tmp = out;
9762 if (!sign_bit_compare_p)
9764 bool fpcmp = false;
9766 compare_code = GET_CODE (compare_op);
9768 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9769 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9771 fpcmp = true;
9772 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9775 /* To simplify rest of code, restrict to the GEU case. */
9776 if (compare_code == LTU)
9778 HOST_WIDE_INT tmp = ct;
9779 ct = cf;
9780 cf = tmp;
9781 compare_code = reverse_condition (compare_code);
9782 code = reverse_condition (code);
9784 else
9786 if (fpcmp)
9787 PUT_CODE (compare_op,
9788 reverse_condition_maybe_unordered
9789 (GET_CODE (compare_op)));
9790 else
9791 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9793 diff = ct - cf;
9795 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9796 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9797 tmp = gen_reg_rtx (mode);
9799 if (mode == DImode)
9800 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9801 else
9802 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9804 else
9806 if (code == GT || code == GE)
9807 code = reverse_condition (code);
9808 else
9810 HOST_WIDE_INT tmp = ct;
9811 ct = cf;
9812 cf = tmp;
9813 diff = ct - cf;
9815 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9816 ix86_compare_op1, VOIDmode, 0, -1);
9819 if (diff == 1)
9822 * cmpl op0,op1
9823 * sbbl dest,dest
9824 * [addl dest, ct]
9826 * Size 5 - 8.
9828 if (ct)
9829 tmp = expand_simple_binop (mode, PLUS,
9830 tmp, GEN_INT (ct),
9831 copy_rtx (tmp), 1, OPTAB_DIRECT);
9833 else if (cf == -1)
9836 * cmpl op0,op1
9837 * sbbl dest,dest
9838 * orl $ct, dest
9840 * Size 8.
9842 tmp = expand_simple_binop (mode, IOR,
9843 tmp, GEN_INT (ct),
9844 copy_rtx (tmp), 1, OPTAB_DIRECT);
9846 else if (diff == -1 && ct)
9849 * cmpl op0,op1
9850 * sbbl dest,dest
9851 * notl dest
9852 * [addl dest, cf]
9854 * Size 8 - 11.
9856 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9857 if (cf)
9858 tmp = expand_simple_binop (mode, PLUS,
9859 copy_rtx (tmp), GEN_INT (cf),
9860 copy_rtx (tmp), 1, OPTAB_DIRECT);
9862 else
9865 * cmpl op0,op1
9866 * sbbl dest,dest
9867 * [notl dest]
9868 * andl cf - ct, dest
9869 * [addl dest, ct]
9871 * Size 8 - 11.
9874 if (cf == 0)
9876 cf = ct;
9877 ct = 0;
9878 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9881 tmp = expand_simple_binop (mode, AND,
9882 copy_rtx (tmp),
9883 gen_int_mode (cf - ct, mode),
9884 copy_rtx (tmp), 1, OPTAB_DIRECT);
9885 if (ct)
9886 tmp = expand_simple_binop (mode, PLUS,
9887 copy_rtx (tmp), GEN_INT (ct),
9888 copy_rtx (tmp), 1, OPTAB_DIRECT);
9891 if (!rtx_equal_p (tmp, out))
9892 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9894 return 1; /* DONE */
9897 if (diff < 0)
9899 HOST_WIDE_INT tmp;
9900 tmp = ct, ct = cf, cf = tmp;
9901 diff = -diff;
9902 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9904 /* We may be reversing unordered compare to normal compare, that
9905 is not valid in general (we may convert non-trapping condition
9906 to trapping one), however on i386 we currently emit all
9907 comparisons unordered. */
9908 compare_code = reverse_condition_maybe_unordered (compare_code);
9909 code = reverse_condition_maybe_unordered (code);
9911 else
9913 compare_code = reverse_condition (compare_code);
9914 code = reverse_condition (code);
9918 compare_code = NIL;
9919 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9920 && GET_CODE (ix86_compare_op1) == CONST_INT)
9922 if (ix86_compare_op1 == const0_rtx
9923 && (code == LT || code == GE))
9924 compare_code = code;
9925 else if (ix86_compare_op1 == constm1_rtx)
9927 if (code == LE)
9928 compare_code = LT;
9929 else if (code == GT)
9930 compare_code = GE;
9934 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9935 if (compare_code != NIL
9936 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9937 && (cf == -1 || ct == -1))
9939 /* If lea code below could be used, only optimize
9940 if it results in a 2 insn sequence. */
9942 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9943 || diff == 3 || diff == 5 || diff == 9)
9944 || (compare_code == LT && ct == -1)
9945 || (compare_code == GE && cf == -1))
9948 * notl op1 (if necessary)
9949 * sarl $31, op1
9950 * orl cf, op1
9952 if (ct != -1)
9954 cf = ct;
9955 ct = -1;
9956 code = reverse_condition (code);
9959 out = emit_store_flag (out, code, ix86_compare_op0,
9960 ix86_compare_op1, VOIDmode, 0, -1);
9962 out = expand_simple_binop (mode, IOR,
9963 out, GEN_INT (cf),
9964 out, 1, OPTAB_DIRECT);
9965 if (out != operands[0])
9966 emit_move_insn (operands[0], out);
9968 return 1; /* DONE */
9973 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9974 || diff == 3 || diff == 5 || diff == 9)
9975 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9976 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9979 * xorl dest,dest
9980 * cmpl op1,op2
9981 * setcc dest
9982 * lea cf(dest*(ct-cf)),dest
9984 * Size 14.
9986 * This also catches the degenerate setcc-only case.
9989 rtx tmp;
9990 int nops;
9992 out = emit_store_flag (out, code, ix86_compare_op0,
9993 ix86_compare_op1, VOIDmode, 0, 1);
9995 nops = 0;
9996 /* On x86_64 the lea instruction operates on Pmode, so we need
9997 to get arithmetics done in proper mode to match. */
9998 if (diff == 1)
9999 tmp = copy_rtx (out);
10000 else
10002 rtx out1;
10003 out1 = copy_rtx (out);
10004 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10005 nops++;
10006 if (diff & 1)
10008 tmp = gen_rtx_PLUS (mode, tmp, out1);
10009 nops++;
10012 if (cf != 0)
10014 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10015 nops++;
10017 if (!rtx_equal_p (tmp, out))
10019 if (nops == 1)
10020 out = force_operand (tmp, copy_rtx (out));
10021 else
10022 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10024 if (!rtx_equal_p (out, operands[0]))
10025 emit_move_insn (operands[0], copy_rtx (out));
10027 return 1; /* DONE */
10031 * General case: Jumpful:
10032 * xorl dest,dest cmpl op1, op2
10033 * cmpl op1, op2 movl ct, dest
10034 * setcc dest jcc 1f
10035 * decl dest movl cf, dest
10036 * andl (cf-ct),dest 1:
10037 * addl ct,dest
10039 * Size 20. Size 14.
10041 * This is reasonably steep, but branch mispredict costs are
10042 * high on modern cpus, so consider failing only if optimizing
10043 * for space.
10046 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10047 && BRANCH_COST >= 2)
10049 if (cf == 0)
10051 cf = ct;
10052 ct = 0;
10053 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10054 /* We may be reversing unordered compare to normal compare,
10055 that is not valid in general (we may convert non-trapping
10056 condition to trapping one), however on i386 we currently
10057 emit all comparisons unordered. */
10058 code = reverse_condition_maybe_unordered (code);
10059 else
10061 code = reverse_condition (code);
10062 if (compare_code != NIL)
10063 compare_code = reverse_condition (compare_code);
10067 if (compare_code != NIL)
10069 /* notl op1 (if needed)
10070 sarl $31, op1
10071 andl (cf-ct), op1
10072 addl ct, op1
10074 For x < 0 (resp. x <= -1) there will be no notl,
10075 so if possible swap the constants to get rid of the
10076 complement.
10077 True/false will be -1/0 while code below (store flag
10078 followed by decrement) is 0/-1, so the constants need
10079 to be exchanged once more. */
10081 if (compare_code == GE || !cf)
10083 code = reverse_condition (code);
10084 compare_code = LT;
10086 else
10088 HOST_WIDE_INT tmp = cf;
10089 cf = ct;
10090 ct = tmp;
10093 out = emit_store_flag (out, code, ix86_compare_op0,
10094 ix86_compare_op1, VOIDmode, 0, -1);
10096 else
10098 out = emit_store_flag (out, code, ix86_compare_op0,
10099 ix86_compare_op1, VOIDmode, 0, 1);
10101 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10102 copy_rtx (out), 1, OPTAB_DIRECT);
10105 out = expand_simple_binop (mode, AND, copy_rtx (out),
10106 gen_int_mode (cf - ct, mode),
10107 copy_rtx (out), 1, OPTAB_DIRECT);
10108 if (ct)
10109 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10110 copy_rtx (out), 1, OPTAB_DIRECT);
10111 if (!rtx_equal_p (out, operands[0]))
10112 emit_move_insn (operands[0], copy_rtx (out));
10114 return 1; /* DONE */
10118 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10120 /* Try a few things more with specific constants and a variable. */
10122 optab op;
10123 rtx var, orig_out, out, tmp;
10125 if (BRANCH_COST <= 2)
10126 return 0; /* FAIL */
10128 /* If one of the two operands is an interesting constant, load a
10129 constant with the above and mask it in with a logical operation. */
10131 if (GET_CODE (operands[2]) == CONST_INT)
10133 var = operands[3];
10134 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10135 operands[3] = constm1_rtx, op = and_optab;
10136 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10137 operands[3] = const0_rtx, op = ior_optab;
10138 else
10139 return 0; /* FAIL */
10141 else if (GET_CODE (operands[3]) == CONST_INT)
10143 var = operands[2];
10144 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10145 operands[2] = constm1_rtx, op = and_optab;
10146 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10147 operands[2] = const0_rtx, op = ior_optab;
10148 else
10149 return 0; /* FAIL */
10151 else
10152 return 0; /* FAIL */
10154 orig_out = operands[0];
10155 tmp = gen_reg_rtx (mode);
10156 operands[0] = tmp;
10158 /* Recurse to get the constant loaded. */
10159 if (ix86_expand_int_movcc (operands) == 0)
10160 return 0; /* FAIL */
10162 /* Mask in the interesting variable. */
10163 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10164 OPTAB_WIDEN);
10165 if (!rtx_equal_p (out, orig_out))
10166 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10168 return 1; /* DONE */
10172 * For comparison with above,
10174 * movl cf,dest
10175 * movl ct,tmp
10176 * cmpl op1,op2
10177 * cmovcc tmp,dest
10179 * Size 15.
10182 if (! nonimmediate_operand (operands[2], mode))
10183 operands[2] = force_reg (mode, operands[2]);
10184 if (! nonimmediate_operand (operands[3], mode))
10185 operands[3] = force_reg (mode, operands[3]);
10187 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10189 rtx tmp = gen_reg_rtx (mode);
10190 emit_move_insn (tmp, operands[3]);
10191 operands[3] = tmp;
10193 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10195 rtx tmp = gen_reg_rtx (mode);
10196 emit_move_insn (tmp, operands[2]);
10197 operands[2] = tmp;
10200 if (! register_operand (operands[2], VOIDmode)
10201 && (mode == QImode
10202 || ! register_operand (operands[3], VOIDmode)))
10203 operands[2] = force_reg (mode, operands[2]);
10205 if (mode == QImode
10206 && ! register_operand (operands[3], VOIDmode))
10207 operands[3] = force_reg (mode, operands[3]);
10209 emit_insn (compare_seq);
10210 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10211 gen_rtx_IF_THEN_ELSE (mode,
10212 compare_op, operands[2],
10213 operands[3])));
10214 if (bypass_test)
10215 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10216 gen_rtx_IF_THEN_ELSE (mode,
10217 bypass_test,
10218 copy_rtx (operands[3]),
10219 copy_rtx (operands[0]))));
10220 if (second_test)
10221 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10222 gen_rtx_IF_THEN_ELSE (mode,
10223 second_test,
10224 copy_rtx (operands[2]),
10225 copy_rtx (operands[0]))));
10227 return 1; /* DONE */
10231 ix86_expand_fp_movcc (rtx operands[])
10233 enum rtx_code code;
10234 rtx tmp;
10235 rtx compare_op, second_test, bypass_test;
10237 /* For SF/DFmode conditional moves based on comparisons
10238 in same mode, we may want to use SSE min/max instructions. */
10239 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10240 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10241 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10242 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10243 && (!TARGET_IEEE_FP
10244 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10245 /* We may be called from the post-reload splitter. */
10246 && (!REG_P (operands[0])
10247 || SSE_REG_P (operands[0])
10248 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10250 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10251 code = GET_CODE (operands[1]);
10253 /* See if we have (cross) match between comparison operands and
10254 conditional move operands. */
10255 if (rtx_equal_p (operands[2], op1))
10257 rtx tmp = op0;
10258 op0 = op1;
10259 op1 = tmp;
10260 code = reverse_condition_maybe_unordered (code);
10262 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10264 /* Check for min operation. */
10265 if (code == LT || code == UNLE)
10267 if (code == UNLE)
10269 rtx tmp = op0;
10270 op0 = op1;
10271 op1 = tmp;
10273 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10274 if (memory_operand (op0, VOIDmode))
10275 op0 = force_reg (GET_MODE (operands[0]), op0);
10276 if (GET_MODE (operands[0]) == SFmode)
10277 emit_insn (gen_minsf3 (operands[0], op0, op1));
10278 else
10279 emit_insn (gen_mindf3 (operands[0], op0, op1));
10280 return 1;
10282 /* Check for max operation. */
10283 if (code == GT || code == UNGE)
10285 if (code == UNGE)
10287 rtx tmp = op0;
10288 op0 = op1;
10289 op1 = tmp;
10291 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10292 if (memory_operand (op0, VOIDmode))
10293 op0 = force_reg (GET_MODE (operands[0]), op0);
10294 if (GET_MODE (operands[0]) == SFmode)
10295 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10296 else
10297 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10298 return 1;
10301 /* Manage condition to be sse_comparison_operator. In case we are
10302 in non-ieee mode, try to canonicalize the destination operand
10303 to be first in the comparison - this helps reload to avoid extra
10304 moves. */
10305 if (!sse_comparison_operator (operands[1], VOIDmode)
10306 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10308 rtx tmp = ix86_compare_op0;
10309 ix86_compare_op0 = ix86_compare_op1;
10310 ix86_compare_op1 = tmp;
10311 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10312 VOIDmode, ix86_compare_op0,
10313 ix86_compare_op1);
10315 /* Similarly try to manage result to be first operand of conditional
10316 move. We also don't support the NE comparison on SSE, so try to
10317 avoid it. */
10318 if ((rtx_equal_p (operands[0], operands[3])
10319 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10320 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10322 rtx tmp = operands[2];
10323 operands[2] = operands[3];
10324 operands[3] = tmp;
10325 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10326 (GET_CODE (operands[1])),
10327 VOIDmode, ix86_compare_op0,
10328 ix86_compare_op1);
10330 if (GET_MODE (operands[0]) == SFmode)
10331 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10332 operands[2], operands[3],
10333 ix86_compare_op0, ix86_compare_op1));
10334 else
10335 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10336 operands[2], operands[3],
10337 ix86_compare_op0, ix86_compare_op1));
10338 return 1;
10341 /* The floating point conditional move instructions don't directly
10342 support conditions resulting from a signed integer comparison. */
10344 code = GET_CODE (operands[1]);
10345 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10347 /* The floating point conditional move instructions don't directly
10348 support signed integer comparisons. */
10350 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10352 if (second_test != NULL || bypass_test != NULL)
10353 abort ();
10354 tmp = gen_reg_rtx (QImode);
10355 ix86_expand_setcc (code, tmp);
10356 code = NE;
10357 ix86_compare_op0 = tmp;
10358 ix86_compare_op1 = const0_rtx;
10359 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10361 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10363 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10364 emit_move_insn (tmp, operands[3]);
10365 operands[3] = tmp;
10367 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10369 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10370 emit_move_insn (tmp, operands[2]);
10371 operands[2] = tmp;
10374 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10375 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10376 compare_op,
10377 operands[2],
10378 operands[3])));
10379 if (bypass_test)
10380 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10381 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10382 bypass_test,
10383 operands[3],
10384 operands[0])));
10385 if (second_test)
10386 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10387 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10388 second_test,
10389 operands[2],
10390 operands[0])));
10392 return 1;
10395 /* Expand conditional increment or decrement using adb/sbb instructions.
10396 The default case using setcc followed by the conditional move can be
10397 done by generic code. */
10399 ix86_expand_int_addcc (rtx operands[])
10401 enum rtx_code code = GET_CODE (operands[1]);
10402 rtx compare_op;
10403 rtx val = const0_rtx;
10404 bool fpcmp = false;
10405 enum machine_mode mode = GET_MODE (operands[0]);
10407 if (operands[3] != const1_rtx
10408 && operands[3] != constm1_rtx)
10409 return 0;
10410 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10411 ix86_compare_op1, &compare_op))
10412 return 0;
10413 code = GET_CODE (compare_op);
10415 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10416 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10418 fpcmp = true;
10419 code = ix86_fp_compare_code_to_integer (code);
10422 if (code != LTU)
10424 val = constm1_rtx;
10425 if (fpcmp)
10426 PUT_CODE (compare_op,
10427 reverse_condition_maybe_unordered
10428 (GET_CODE (compare_op)));
10429 else
10430 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10432 PUT_MODE (compare_op, mode);
10434 /* Construct either adc or sbb insn. */
10435 if ((code == LTU) == (operands[3] == constm1_rtx))
10437 switch (GET_MODE (operands[0]))
10439 case QImode:
10440 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10441 break;
10442 case HImode:
10443 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10444 break;
10445 case SImode:
10446 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10447 break;
10448 case DImode:
10449 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10450 break;
10451 default:
10452 abort ();
10455 else
10457 switch (GET_MODE (operands[0]))
10459 case QImode:
10460 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10461 break;
10462 case HImode:
10463 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10464 break;
10465 case SImode:
10466 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10467 break;
10468 case DImode:
10469 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10470 break;
10471 default:
10472 abort ();
10475 return 1; /* DONE */
10479 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10480 works for floating pointer parameters and nonoffsetable memories.
10481 For pushes, it returns just stack offsets; the values will be saved
10482 in the right order. Maximally three parts are generated. */
10484 static int
10485 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10487 int size;
10489 if (!TARGET_64BIT)
10490 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10491 else
10492 size = (GET_MODE_SIZE (mode) + 4) / 8;
10494 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10495 abort ();
10496 if (size < 2 || size > 3)
10497 abort ();
10499 /* Optimize constant pool reference to immediates. This is used by fp
10500 moves, that force all constants to memory to allow combining. */
10501 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10503 rtx tmp = maybe_get_pool_constant (operand);
10504 if (tmp)
10505 operand = tmp;
10508 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10510 /* The only non-offsetable memories we handle are pushes. */
10511 if (! push_operand (operand, VOIDmode))
10512 abort ();
10514 operand = copy_rtx (operand);
10515 PUT_MODE (operand, Pmode);
10516 parts[0] = parts[1] = parts[2] = operand;
10518 else if (!TARGET_64BIT)
10520 if (mode == DImode)
10521 split_di (&operand, 1, &parts[0], &parts[1]);
10522 else
10524 if (REG_P (operand))
10526 if (!reload_completed)
10527 abort ();
10528 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10529 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10530 if (size == 3)
10531 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10533 else if (offsettable_memref_p (operand))
10535 operand = adjust_address (operand, SImode, 0);
10536 parts[0] = operand;
10537 parts[1] = adjust_address (operand, SImode, 4);
10538 if (size == 3)
10539 parts[2] = adjust_address (operand, SImode, 8);
10541 else if (GET_CODE (operand) == CONST_DOUBLE)
10543 REAL_VALUE_TYPE r;
10544 long l[4];
10546 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10547 switch (mode)
10549 case XFmode:
10550 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10551 parts[2] = gen_int_mode (l[2], SImode);
10552 break;
10553 case DFmode:
10554 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10555 break;
10556 default:
10557 abort ();
10559 parts[1] = gen_int_mode (l[1], SImode);
10560 parts[0] = gen_int_mode (l[0], SImode);
10562 else
10563 abort ();
10566 else
10568 if (mode == TImode)
10569 split_ti (&operand, 1, &parts[0], &parts[1]);
10570 if (mode == XFmode || mode == TFmode)
10572 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10573 if (REG_P (operand))
10575 if (!reload_completed)
10576 abort ();
10577 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10578 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10580 else if (offsettable_memref_p (operand))
10582 operand = adjust_address (operand, DImode, 0);
10583 parts[0] = operand;
10584 parts[1] = adjust_address (operand, upper_mode, 8);
10586 else if (GET_CODE (operand) == CONST_DOUBLE)
10588 REAL_VALUE_TYPE r;
10589 long l[3];
10591 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10592 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10593 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10594 if (HOST_BITS_PER_WIDE_INT >= 64)
10595 parts[0]
10596 = gen_int_mode
10597 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10598 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10599 DImode);
10600 else
10601 parts[0] = immed_double_const (l[0], l[1], DImode);
10602 if (upper_mode == SImode)
10603 parts[1] = gen_int_mode (l[2], SImode);
10604 else if (HOST_BITS_PER_WIDE_INT >= 64)
10605 parts[1]
10606 = gen_int_mode
10607 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10608 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10609 DImode);
10610 else
10611 parts[1] = immed_double_const (l[2], l[3], DImode);
10613 else
10614 abort ();
10618 return size;
10621 /* Emit insns to perform a move or push of DI, DF, and XF values.
10622 Return false when normal moves are needed; true when all required
10623 insns have been emitted. Operands 2-4 contain the input values
10624 int the correct order; operands 5-7 contain the output values. */
10626 void
10627 ix86_split_long_move (rtx operands[])
10629 rtx part[2][3];
10630 int nparts;
10631 int push = 0;
10632 int collisions = 0;
10633 enum machine_mode mode = GET_MODE (operands[0]);
10635 /* The DFmode expanders may ask us to move double.
10636 For 64bit target this is single move. By hiding the fact
10637 here we simplify i386.md splitters. */
10638 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10640 /* Optimize constant pool reference to immediates. This is used by
10641 fp moves, that force all constants to memory to allow combining. */
10643 if (GET_CODE (operands[1]) == MEM
10644 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10645 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10646 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10647 if (push_operand (operands[0], VOIDmode))
10649 operands[0] = copy_rtx (operands[0]);
10650 PUT_MODE (operands[0], Pmode);
10652 else
10653 operands[0] = gen_lowpart (DImode, operands[0]);
10654 operands[1] = gen_lowpart (DImode, operands[1]);
10655 emit_move_insn (operands[0], operands[1]);
10656 return;
10659 /* The only non-offsettable memory we handle is push. */
10660 if (push_operand (operands[0], VOIDmode))
10661 push = 1;
10662 else if (GET_CODE (operands[0]) == MEM
10663 && ! offsettable_memref_p (operands[0]))
10664 abort ();
10666 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10667 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10669 /* When emitting push, take care for source operands on the stack. */
10670 if (push && GET_CODE (operands[1]) == MEM
10671 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10673 if (nparts == 3)
10674 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10675 XEXP (part[1][2], 0));
10676 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10677 XEXP (part[1][1], 0));
10680 /* We need to do copy in the right order in case an address register
10681 of the source overlaps the destination. */
10682 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10684 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10685 collisions++;
10686 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10687 collisions++;
10688 if (nparts == 3
10689 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10690 collisions++;
10692 /* Collision in the middle part can be handled by reordering. */
10693 if (collisions == 1 && nparts == 3
10694 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10696 rtx tmp;
10697 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10698 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10701 /* If there are more collisions, we can't handle it by reordering.
10702 Do an lea to the last part and use only one colliding move. */
10703 else if (collisions > 1)
10705 rtx base;
10707 collisions = 1;
10709 base = part[0][nparts - 1];
10711 /* Handle the case when the last part isn't valid for lea.
10712 Happens in 64-bit mode storing the 12-byte XFmode. */
10713 if (GET_MODE (base) != Pmode)
10714 base = gen_rtx_REG (Pmode, REGNO (base));
10716 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10717 part[1][0] = replace_equiv_address (part[1][0], base);
10718 part[1][1] = replace_equiv_address (part[1][1],
10719 plus_constant (base, UNITS_PER_WORD));
10720 if (nparts == 3)
10721 part[1][2] = replace_equiv_address (part[1][2],
10722 plus_constant (base, 8));
10726 if (push)
10728 if (!TARGET_64BIT)
10730 if (nparts == 3)
10732 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10733 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10734 emit_move_insn (part[0][2], part[1][2]);
10737 else
10739 /* In 64bit mode we don't have 32bit push available. In case this is
10740 register, it is OK - we will just use larger counterpart. We also
10741 retype memory - these comes from attempt to avoid REX prefix on
10742 moving of second half of TFmode value. */
10743 if (GET_MODE (part[1][1]) == SImode)
10745 if (GET_CODE (part[1][1]) == MEM)
10746 part[1][1] = adjust_address (part[1][1], DImode, 0);
10747 else if (REG_P (part[1][1]))
10748 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10749 else
10750 abort ();
10751 if (GET_MODE (part[1][0]) == SImode)
10752 part[1][0] = part[1][1];
10755 emit_move_insn (part[0][1], part[1][1]);
10756 emit_move_insn (part[0][0], part[1][0]);
10757 return;
10760 /* Choose correct order to not overwrite the source before it is copied. */
10761 if ((REG_P (part[0][0])
10762 && REG_P (part[1][1])
10763 && (REGNO (part[0][0]) == REGNO (part[1][1])
10764 || (nparts == 3
10765 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10766 || (collisions > 0
10767 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10769 if (nparts == 3)
10771 operands[2] = part[0][2];
10772 operands[3] = part[0][1];
10773 operands[4] = part[0][0];
10774 operands[5] = part[1][2];
10775 operands[6] = part[1][1];
10776 operands[7] = part[1][0];
10778 else
10780 operands[2] = part[0][1];
10781 operands[3] = part[0][0];
10782 operands[5] = part[1][1];
10783 operands[6] = part[1][0];
10786 else
10788 if (nparts == 3)
10790 operands[2] = part[0][0];
10791 operands[3] = part[0][1];
10792 operands[4] = part[0][2];
10793 operands[5] = part[1][0];
10794 operands[6] = part[1][1];
10795 operands[7] = part[1][2];
10797 else
10799 operands[2] = part[0][0];
10800 operands[3] = part[0][1];
10801 operands[5] = part[1][0];
10802 operands[6] = part[1][1];
10805 emit_move_insn (operands[2], operands[5]);
10806 emit_move_insn (operands[3], operands[6]);
10807 if (nparts == 3)
10808 emit_move_insn (operands[4], operands[7]);
10810 return;
10813 void
10814 ix86_split_ashldi (rtx *operands, rtx scratch)
10816 rtx low[2], high[2];
10817 int count;
10819 if (GET_CODE (operands[2]) == CONST_INT)
10821 split_di (operands, 2, low, high);
10822 count = INTVAL (operands[2]) & 63;
10824 if (count >= 32)
10826 emit_move_insn (high[0], low[1]);
10827 emit_move_insn (low[0], const0_rtx);
10829 if (count > 32)
10830 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10832 else
10834 if (!rtx_equal_p (operands[0], operands[1]))
10835 emit_move_insn (operands[0], operands[1]);
10836 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10837 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10840 else
10842 if (!rtx_equal_p (operands[0], operands[1]))
10843 emit_move_insn (operands[0], operands[1]);
10845 split_di (operands, 1, low, high);
10847 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10848 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10850 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10852 if (! no_new_pseudos)
10853 scratch = force_reg (SImode, const0_rtx);
10854 else
10855 emit_move_insn (scratch, const0_rtx);
10857 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10858 scratch));
10860 else
10861 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10865 void
10866 ix86_split_ashrdi (rtx *operands, rtx scratch)
10868 rtx low[2], high[2];
10869 int count;
10871 if (GET_CODE (operands[2]) == CONST_INT)
10873 split_di (operands, 2, low, high);
10874 count = INTVAL (operands[2]) & 63;
10876 if (count == 63)
10878 emit_move_insn (high[0], high[1]);
10879 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10880 emit_move_insn (low[0], high[0]);
10883 else if (count >= 32)
10885 emit_move_insn (low[0], high[1]);
10887 if (! reload_completed)
10888 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10889 else
10891 emit_move_insn (high[0], low[0]);
10892 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10895 if (count > 32)
10896 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10898 else
10900 if (!rtx_equal_p (operands[0], operands[1]))
10901 emit_move_insn (operands[0], operands[1]);
10902 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10903 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10906 else
10908 if (!rtx_equal_p (operands[0], operands[1]))
10909 emit_move_insn (operands[0], operands[1]);
10911 split_di (operands, 1, low, high);
10913 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10914 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10916 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10918 if (! no_new_pseudos)
10919 scratch = gen_reg_rtx (SImode);
10920 emit_move_insn (scratch, high[0]);
10921 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10922 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10923 scratch));
10925 else
10926 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10930 void
10931 ix86_split_lshrdi (rtx *operands, rtx scratch)
10933 rtx low[2], high[2];
10934 int count;
10936 if (GET_CODE (operands[2]) == CONST_INT)
10938 split_di (operands, 2, low, high);
10939 count = INTVAL (operands[2]) & 63;
10941 if (count >= 32)
10943 emit_move_insn (low[0], high[1]);
10944 emit_move_insn (high[0], const0_rtx);
10946 if (count > 32)
10947 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10949 else
10951 if (!rtx_equal_p (operands[0], operands[1]))
10952 emit_move_insn (operands[0], operands[1]);
10953 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10954 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10957 else
10959 if (!rtx_equal_p (operands[0], operands[1]))
10960 emit_move_insn (operands[0], operands[1]);
10962 split_di (operands, 1, low, high);
10964 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10965 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10967 /* Heh. By reversing the arguments, we can reuse this pattern. */
10968 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10970 if (! no_new_pseudos)
10971 scratch = force_reg (SImode, const0_rtx);
10972 else
10973 emit_move_insn (scratch, const0_rtx);
10975 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10976 scratch));
10978 else
10979 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10983 /* Helper function for the string operations below. Dest VARIABLE whether
10984 it is aligned to VALUE bytes. If true, jump to the label. */
10985 static rtx
10986 ix86_expand_aligntest (rtx variable, int value)
10988 rtx label = gen_label_rtx ();
10989 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10990 if (GET_MODE (variable) == DImode)
10991 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10992 else
10993 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10994 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10995 1, label);
10996 return label;
10999 /* Adjust COUNTER by the VALUE. */
11000 static void
11001 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11003 if (GET_MODE (countreg) == DImode)
11004 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11005 else
11006 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11009 /* Zero extend possibly SImode EXP to Pmode register. */
11011 ix86_zero_extend_to_Pmode (rtx exp)
11013 rtx r;
11014 if (GET_MODE (exp) == VOIDmode)
11015 return force_reg (Pmode, exp);
11016 if (GET_MODE (exp) == Pmode)
11017 return copy_to_mode_reg (Pmode, exp);
11018 r = gen_reg_rtx (Pmode);
11019 emit_insn (gen_zero_extendsidi2 (r, exp));
11020 return r;
11023 /* Expand string move (memcpy) operation. Use i386 string operations when
11024 profitable. expand_clrstr contains similar code. */
11026 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11028 rtx srcreg, destreg, countreg, srcexp, destexp;
11029 enum machine_mode counter_mode;
11030 HOST_WIDE_INT align = 0;
11031 unsigned HOST_WIDE_INT count = 0;
11033 if (GET_CODE (align_exp) == CONST_INT)
11034 align = INTVAL (align_exp);
11036 /* Can't use any of this if the user has appropriated esi or edi. */
11037 if (global_regs[4] || global_regs[5])
11038 return 0;
11040 /* This simple hack avoids all inlining code and simplifies code below. */
11041 if (!TARGET_ALIGN_STRINGOPS)
11042 align = 64;
11044 if (GET_CODE (count_exp) == CONST_INT)
11046 count = INTVAL (count_exp);
11047 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11048 return 0;
11051 /* Figure out proper mode for counter. For 32bits it is always SImode,
11052 for 64bits use SImode when possible, otherwise DImode.
11053 Set count to number of bytes copied when known at compile time. */
11054 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11055 || x86_64_zero_extended_value (count_exp))
11056 counter_mode = SImode;
11057 else
11058 counter_mode = DImode;
11060 if (counter_mode != SImode && counter_mode != DImode)
11061 abort ();
11063 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11064 if (destreg != XEXP (dst, 0))
11065 dst = replace_equiv_address_nv (dst, destreg);
11066 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11067 if (srcreg != XEXP (src, 0))
11068 src = replace_equiv_address_nv (src, srcreg);
11070 /* When optimizing for size emit simple rep ; movsb instruction for
11071 counts not divisible by 4. */
11073 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11075 emit_insn (gen_cld ());
11076 countreg = ix86_zero_extend_to_Pmode (count_exp);
11077 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11078 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11079 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11080 destexp, srcexp));
11083 /* For constant aligned (or small unaligned) copies use rep movsl
11084 followed by code copying the rest. For PentiumPro ensure 8 byte
11085 alignment to allow rep movsl acceleration. */
11087 else if (count != 0
11088 && (align >= 8
11089 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11090 || optimize_size || count < (unsigned int) 64))
11092 unsigned HOST_WIDE_INT offset = 0;
11093 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11094 rtx srcmem, dstmem;
11096 emit_insn (gen_cld ());
11097 if (count & ~(size - 1))
11099 countreg = copy_to_mode_reg (counter_mode,
11100 GEN_INT ((count >> (size == 4 ? 2 : 3))
11101 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11102 countreg = ix86_zero_extend_to_Pmode (countreg);
11104 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11105 GEN_INT (size == 4 ? 2 : 3));
11106 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11107 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11109 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11110 countreg, destexp, srcexp));
11111 offset = count & ~(size - 1);
11113 if (size == 8 && (count & 0x04))
11115 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11116 offset);
11117 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11118 offset);
11119 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11120 offset += 4;
11122 if (count & 0x02)
11124 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11125 offset);
11126 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11127 offset);
11128 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11129 offset += 2;
11131 if (count & 0x01)
11133 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11134 offset);
11135 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11136 offset);
11137 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11140 /* The generic code based on the glibc implementation:
11141 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11142 allowing accelerated copying there)
11143 - copy the data using rep movsl
11144 - copy the rest. */
11145 else
11147 rtx countreg2;
11148 rtx label = NULL;
11149 rtx srcmem, dstmem;
11150 int desired_alignment = (TARGET_PENTIUMPRO
11151 && (count == 0 || count >= (unsigned int) 260)
11152 ? 8 : UNITS_PER_WORD);
11153 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11154 dst = change_address (dst, BLKmode, destreg);
11155 src = change_address (src, BLKmode, srcreg);
11157 /* In case we don't know anything about the alignment, default to
11158 library version, since it is usually equally fast and result in
11159 shorter code.
11161 Also emit call when we know that the count is large and call overhead
11162 will not be important. */
11163 if (!TARGET_INLINE_ALL_STRINGOPS
11164 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11165 return 0;
11167 if (TARGET_SINGLE_STRINGOP)
11168 emit_insn (gen_cld ());
11170 countreg2 = gen_reg_rtx (Pmode);
11171 countreg = copy_to_mode_reg (counter_mode, count_exp);
11173 /* We don't use loops to align destination and to copy parts smaller
11174 than 4 bytes, because gcc is able to optimize such code better (in
11175 the case the destination or the count really is aligned, gcc is often
11176 able to predict the branches) and also it is friendlier to the
11177 hardware branch prediction.
11179 Using loops is beneficial for generic case, because we can
11180 handle small counts using the loops. Many CPUs (such as Athlon)
11181 have large REP prefix setup costs.
11183 This is quite costly. Maybe we can revisit this decision later or
11184 add some customizability to this code. */
11186 if (count == 0 && align < desired_alignment)
11188 label = gen_label_rtx ();
11189 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11190 LEU, 0, counter_mode, 1, label);
11192 if (align <= 1)
11194 rtx label = ix86_expand_aligntest (destreg, 1);
11195 srcmem = change_address (src, QImode, srcreg);
11196 dstmem = change_address (dst, QImode, destreg);
11197 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11198 ix86_adjust_counter (countreg, 1);
11199 emit_label (label);
11200 LABEL_NUSES (label) = 1;
11202 if (align <= 2)
11204 rtx label = ix86_expand_aligntest (destreg, 2);
11205 srcmem = change_address (src, HImode, srcreg);
11206 dstmem = change_address (dst, HImode, destreg);
11207 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11208 ix86_adjust_counter (countreg, 2);
11209 emit_label (label);
11210 LABEL_NUSES (label) = 1;
11212 if (align <= 4 && desired_alignment > 4)
11214 rtx label = ix86_expand_aligntest (destreg, 4);
11215 srcmem = change_address (src, SImode, srcreg);
11216 dstmem = change_address (dst, SImode, destreg);
11217 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11218 ix86_adjust_counter (countreg, 4);
11219 emit_label (label);
11220 LABEL_NUSES (label) = 1;
11223 if (label && desired_alignment > 4 && !TARGET_64BIT)
11225 emit_label (label);
11226 LABEL_NUSES (label) = 1;
11227 label = NULL_RTX;
11229 if (!TARGET_SINGLE_STRINGOP)
11230 emit_insn (gen_cld ());
11231 if (TARGET_64BIT)
11233 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11234 GEN_INT (3)));
11235 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11237 else
11239 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11240 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11242 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11243 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11244 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11245 countreg2, destexp, srcexp));
11247 if (label)
11249 emit_label (label);
11250 LABEL_NUSES (label) = 1;
11252 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11254 srcmem = change_address (src, SImode, srcreg);
11255 dstmem = change_address (dst, SImode, destreg);
11256 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11258 if ((align <= 4 || count == 0) && TARGET_64BIT)
11260 rtx label = ix86_expand_aligntest (countreg, 4);
11261 srcmem = change_address (src, SImode, srcreg);
11262 dstmem = change_address (dst, SImode, destreg);
11263 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11264 emit_label (label);
11265 LABEL_NUSES (label) = 1;
11267 if (align > 2 && count != 0 && (count & 2))
11269 srcmem = change_address (src, HImode, srcreg);
11270 dstmem = change_address (dst, HImode, destreg);
11271 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11273 if (align <= 2 || count == 0)
11275 rtx label = ix86_expand_aligntest (countreg, 2);
11276 srcmem = change_address (src, HImode, srcreg);
11277 dstmem = change_address (dst, HImode, destreg);
11278 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11279 emit_label (label);
11280 LABEL_NUSES (label) = 1;
11282 if (align > 1 && count != 0 && (count & 1))
11284 srcmem = change_address (src, QImode, srcreg);
11285 dstmem = change_address (dst, QImode, destreg);
11286 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11288 if (align <= 1 || count == 0)
11290 rtx label = ix86_expand_aligntest (countreg, 1);
11291 srcmem = change_address (src, QImode, srcreg);
11292 dstmem = change_address (dst, QImode, destreg);
11293 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11294 emit_label (label);
11295 LABEL_NUSES (label) = 1;
11299 return 1;
11302 /* Expand string clear operation (bzero). Use i386 string operations when
11303 profitable. expand_movstr contains similar code. */
11305 ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11307 rtx destreg, zeroreg, countreg, destexp;
11308 enum machine_mode counter_mode;
11309 HOST_WIDE_INT align = 0;
11310 unsigned HOST_WIDE_INT count = 0;
11312 if (GET_CODE (align_exp) == CONST_INT)
11313 align = INTVAL (align_exp);
11315 /* Can't use any of this if the user has appropriated esi. */
11316 if (global_regs[4])
11317 return 0;
11319 /* This simple hack avoids all inlining code and simplifies code below. */
11320 if (!TARGET_ALIGN_STRINGOPS)
11321 align = 32;
11323 if (GET_CODE (count_exp) == CONST_INT)
11325 count = INTVAL (count_exp);
11326 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11327 return 0;
11329 /* Figure out proper mode for counter. For 32bits it is always SImode,
11330 for 64bits use SImode when possible, otherwise DImode.
11331 Set count to number of bytes copied when known at compile time. */
11332 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11333 || x86_64_zero_extended_value (count_exp))
11334 counter_mode = SImode;
11335 else
11336 counter_mode = DImode;
11338 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11339 if (destreg != XEXP (dst, 0))
11340 dst = replace_equiv_address_nv (dst, destreg);
11342 emit_insn (gen_cld ());
11344 /* When optimizing for size emit simple rep ; movsb instruction for
11345 counts not divisible by 4. */
11347 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11349 countreg = ix86_zero_extend_to_Pmode (count_exp);
11350 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11351 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11352 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11354 else if (count != 0
11355 && (align >= 8
11356 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11357 || optimize_size || count < (unsigned int) 64))
11359 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11360 unsigned HOST_WIDE_INT offset = 0;
11362 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11363 if (count & ~(size - 1))
11365 countreg = copy_to_mode_reg (counter_mode,
11366 GEN_INT ((count >> (size == 4 ? 2 : 3))
11367 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11368 countreg = ix86_zero_extend_to_Pmode (countreg);
11369 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11370 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11371 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11372 offset = count & ~(size - 1);
11374 if (size == 8 && (count & 0x04))
11376 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11377 offset);
11378 emit_insn (gen_strset (destreg, mem,
11379 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11380 offset += 4;
11382 if (count & 0x02)
11384 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11385 offset);
11386 emit_insn (gen_strset (destreg, mem,
11387 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11388 offset += 2;
11390 if (count & 0x01)
11392 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11393 offset);
11394 emit_insn (gen_strset (destreg, mem,
11395 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11398 else
11400 rtx countreg2;
11401 rtx label = NULL;
11402 /* Compute desired alignment of the string operation. */
11403 int desired_alignment = (TARGET_PENTIUMPRO
11404 && (count == 0 || count >= (unsigned int) 260)
11405 ? 8 : UNITS_PER_WORD);
11407 /* In case we don't know anything about the alignment, default to
11408 library version, since it is usually equally fast and result in
11409 shorter code.
11411 Also emit call when we know that the count is large and call overhead
11412 will not be important. */
11413 if (!TARGET_INLINE_ALL_STRINGOPS
11414 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11415 return 0;
11417 if (TARGET_SINGLE_STRINGOP)
11418 emit_insn (gen_cld ());
11420 countreg2 = gen_reg_rtx (Pmode);
11421 countreg = copy_to_mode_reg (counter_mode, count_exp);
11422 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11423 /* Get rid of MEM_OFFSET, it won't be accurate. */
11424 dst = change_address (dst, BLKmode, destreg);
11426 if (count == 0 && align < desired_alignment)
11428 label = gen_label_rtx ();
11429 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11430 LEU, 0, counter_mode, 1, label);
11432 if (align <= 1)
11434 rtx label = ix86_expand_aligntest (destreg, 1);
11435 emit_insn (gen_strset (destreg, dst,
11436 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11437 ix86_adjust_counter (countreg, 1);
11438 emit_label (label);
11439 LABEL_NUSES (label) = 1;
11441 if (align <= 2)
11443 rtx label = ix86_expand_aligntest (destreg, 2);
11444 emit_insn (gen_strset (destreg, dst,
11445 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11446 ix86_adjust_counter (countreg, 2);
11447 emit_label (label);
11448 LABEL_NUSES (label) = 1;
11450 if (align <= 4 && desired_alignment > 4)
11452 rtx label = ix86_expand_aligntest (destreg, 4);
11453 emit_insn (gen_strset (destreg, dst,
11454 (TARGET_64BIT
11455 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11456 : zeroreg)));
11457 ix86_adjust_counter (countreg, 4);
11458 emit_label (label);
11459 LABEL_NUSES (label) = 1;
11462 if (label && desired_alignment > 4 && !TARGET_64BIT)
11464 emit_label (label);
11465 LABEL_NUSES (label) = 1;
11466 label = NULL_RTX;
11469 if (!TARGET_SINGLE_STRINGOP)
11470 emit_insn (gen_cld ());
11471 if (TARGET_64BIT)
11473 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11474 GEN_INT (3)));
11475 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11477 else
11479 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11480 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11482 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11483 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11485 if (label)
11487 emit_label (label);
11488 LABEL_NUSES (label) = 1;
11491 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11492 emit_insn (gen_strset (destreg, dst,
11493 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11494 if (TARGET_64BIT && (align <= 4 || count == 0))
11496 rtx label = ix86_expand_aligntest (countreg, 4);
11497 emit_insn (gen_strset (destreg, dst,
11498 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11499 emit_label (label);
11500 LABEL_NUSES (label) = 1;
11502 if (align > 2 && count != 0 && (count & 2))
11503 emit_insn (gen_strset (destreg, dst,
11504 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11505 if (align <= 2 || count == 0)
11507 rtx label = ix86_expand_aligntest (countreg, 2);
11508 emit_insn (gen_strset (destreg, dst,
11509 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11510 emit_label (label);
11511 LABEL_NUSES (label) = 1;
11513 if (align > 1 && count != 0 && (count & 1))
11514 emit_insn (gen_strset (destreg, dst,
11515 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11516 if (align <= 1 || count == 0)
11518 rtx label = ix86_expand_aligntest (countreg, 1);
11519 emit_insn (gen_strset (destreg, dst,
11520 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11521 emit_label (label);
11522 LABEL_NUSES (label) = 1;
11525 return 1;
11528 /* Expand strlen. */
11530 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11532 rtx addr, scratch1, scratch2, scratch3, scratch4;
11534 /* The generic case of strlen expander is long. Avoid it's
11535 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11537 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11538 && !TARGET_INLINE_ALL_STRINGOPS
11539 && !optimize_size
11540 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11541 return 0;
11543 addr = force_reg (Pmode, XEXP (src, 0));
11544 scratch1 = gen_reg_rtx (Pmode);
11546 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11547 && !optimize_size)
11549 /* Well it seems that some optimizer does not combine a call like
11550 foo(strlen(bar), strlen(bar));
11551 when the move and the subtraction is done here. It does calculate
11552 the length just once when these instructions are done inside of
11553 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11554 often used and I use one fewer register for the lifetime of
11555 output_strlen_unroll() this is better. */
11557 emit_move_insn (out, addr);
11559 ix86_expand_strlensi_unroll_1 (out, src, align);
11561 /* strlensi_unroll_1 returns the address of the zero at the end of
11562 the string, like memchr(), so compute the length by subtracting
11563 the start address. */
11564 if (TARGET_64BIT)
11565 emit_insn (gen_subdi3 (out, out, addr));
11566 else
11567 emit_insn (gen_subsi3 (out, out, addr));
11569 else
11571 rtx unspec;
11572 scratch2 = gen_reg_rtx (Pmode);
11573 scratch3 = gen_reg_rtx (Pmode);
11574 scratch4 = force_reg (Pmode, constm1_rtx);
11576 emit_move_insn (scratch3, addr);
11577 eoschar = force_reg (QImode, eoschar);
11579 emit_insn (gen_cld ());
11580 src = replace_equiv_address_nv (src, scratch3);
11582 /* If .md starts supporting :P, this can be done in .md. */
11583 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11584 scratch4), UNSPEC_SCAS);
11585 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11586 if (TARGET_64BIT)
11588 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11589 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11591 else
11593 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11594 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11597 return 1;
11600 /* Expand the appropriate insns for doing strlen if not just doing
11601 repnz; scasb
11603 out = result, initialized with the start address
11604 align_rtx = alignment of the address.
11605 scratch = scratch register, initialized with the startaddress when
11606 not aligned, otherwise undefined
11608 This is just the body. It needs the initializations mentioned above and
11609 some address computing at the end. These things are done in i386.md. */
11611 static void
11612 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11614 int align;
11615 rtx tmp;
11616 rtx align_2_label = NULL_RTX;
11617 rtx align_3_label = NULL_RTX;
11618 rtx align_4_label = gen_label_rtx ();
11619 rtx end_0_label = gen_label_rtx ();
11620 rtx mem;
11621 rtx tmpreg = gen_reg_rtx (SImode);
11622 rtx scratch = gen_reg_rtx (SImode);
11623 rtx cmp;
11625 align = 0;
11626 if (GET_CODE (align_rtx) == CONST_INT)
11627 align = INTVAL (align_rtx);
11629 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11631 /* Is there a known alignment and is it less than 4? */
11632 if (align < 4)
11634 rtx scratch1 = gen_reg_rtx (Pmode);
11635 emit_move_insn (scratch1, out);
11636 /* Is there a known alignment and is it not 2? */
11637 if (align != 2)
11639 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11640 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11642 /* Leave just the 3 lower bits. */
11643 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11644 NULL_RTX, 0, OPTAB_WIDEN);
11646 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11647 Pmode, 1, align_4_label);
11648 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11649 Pmode, 1, align_2_label);
11650 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11651 Pmode, 1, align_3_label);
11653 else
11655 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11656 check if is aligned to 4 - byte. */
11658 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11659 NULL_RTX, 0, OPTAB_WIDEN);
11661 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11662 Pmode, 1, align_4_label);
11665 mem = change_address (src, QImode, out);
11667 /* Now compare the bytes. */
11669 /* Compare the first n unaligned byte on a byte per byte basis. */
11670 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11671 QImode, 1, end_0_label);
11673 /* Increment the address. */
11674 if (TARGET_64BIT)
11675 emit_insn (gen_adddi3 (out, out, const1_rtx));
11676 else
11677 emit_insn (gen_addsi3 (out, out, const1_rtx));
11679 /* Not needed with an alignment of 2 */
11680 if (align != 2)
11682 emit_label (align_2_label);
11684 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11685 end_0_label);
11687 if (TARGET_64BIT)
11688 emit_insn (gen_adddi3 (out, out, const1_rtx));
11689 else
11690 emit_insn (gen_addsi3 (out, out, const1_rtx));
11692 emit_label (align_3_label);
11695 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11696 end_0_label);
11698 if (TARGET_64BIT)
11699 emit_insn (gen_adddi3 (out, out, const1_rtx));
11700 else
11701 emit_insn (gen_addsi3 (out, out, const1_rtx));
11704 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11705 align this loop. It gives only huge programs, but does not help to
11706 speed up. */
11707 emit_label (align_4_label);
11709 mem = change_address (src, SImode, out);
11710 emit_move_insn (scratch, mem);
11711 if (TARGET_64BIT)
11712 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11713 else
11714 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11716 /* This formula yields a nonzero result iff one of the bytes is zero.
11717 This saves three branches inside loop and many cycles. */
11719 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11720 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11721 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11722 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11723 gen_int_mode (0x80808080, SImode)));
11724 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11725 align_4_label);
11727 if (TARGET_CMOVE)
11729 rtx reg = gen_reg_rtx (SImode);
11730 rtx reg2 = gen_reg_rtx (Pmode);
11731 emit_move_insn (reg, tmpreg);
11732 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11734 /* If zero is not in the first two bytes, move two bytes forward. */
11735 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11736 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11737 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11738 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11739 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11740 reg,
11741 tmpreg)));
11742 /* Emit lea manually to avoid clobbering of flags. */
11743 emit_insn (gen_rtx_SET (SImode, reg2,
11744 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11746 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11747 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11748 emit_insn (gen_rtx_SET (VOIDmode, out,
11749 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11750 reg2,
11751 out)));
11754 else
11756 rtx end_2_label = gen_label_rtx ();
11757 /* Is zero in the first two bytes? */
11759 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11760 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11761 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11762 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11763 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11764 pc_rtx);
11765 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11766 JUMP_LABEL (tmp) = end_2_label;
11768 /* Not in the first two. Move two bytes forward. */
11769 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11770 if (TARGET_64BIT)
11771 emit_insn (gen_adddi3 (out, out, const2_rtx));
11772 else
11773 emit_insn (gen_addsi3 (out, out, const2_rtx));
11775 emit_label (end_2_label);
11779 /* Avoid branch in fixing the byte. */
11780 tmpreg = gen_lowpart (QImode, tmpreg);
11781 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11782 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11783 if (TARGET_64BIT)
11784 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11785 else
11786 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11788 emit_label (end_0_label);
11791 void
11792 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11793 rtx callarg2 ATTRIBUTE_UNUSED,
11794 rtx pop, int sibcall)
11796 rtx use = NULL, call;
11798 if (pop == const0_rtx)
11799 pop = NULL;
11800 if (TARGET_64BIT && pop)
11801 abort ();
11803 #if TARGET_MACHO
11804 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11805 fnaddr = machopic_indirect_call_target (fnaddr);
11806 #else
11807 /* Static functions and indirect calls don't need the pic register. */
11808 if (! TARGET_64BIT && flag_pic
11809 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11810 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11811 use_reg (&use, pic_offset_table_rtx);
11813 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11815 rtx al = gen_rtx_REG (QImode, 0);
11816 emit_move_insn (al, callarg2);
11817 use_reg (&use, al);
11819 #endif /* TARGET_MACHO */
11821 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11823 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11824 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11826 if (sibcall && TARGET_64BIT
11827 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11829 rtx addr;
11830 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11831 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11832 emit_move_insn (fnaddr, addr);
11833 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11836 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11837 if (retval)
11838 call = gen_rtx_SET (VOIDmode, retval, call);
11839 if (pop)
11841 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11842 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11843 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11846 call = emit_call_insn (call);
11847 if (use)
11848 CALL_INSN_FUNCTION_USAGE (call) = use;
11852 /* Clear stack slot assignments remembered from previous functions.
11853 This is called from INIT_EXPANDERS once before RTL is emitted for each
11854 function. */
11856 static struct machine_function *
11857 ix86_init_machine_status (void)
11859 struct machine_function *f;
11861 f = ggc_alloc_cleared (sizeof (struct machine_function));
11862 f->use_fast_prologue_epilogue_nregs = -1;
11864 return f;
11867 /* Return a MEM corresponding to a stack slot with mode MODE.
11868 Allocate a new slot if necessary.
11870 The RTL for a function can have several slots available: N is
11871 which slot to use. */
11874 assign_386_stack_local (enum machine_mode mode, int n)
11876 struct stack_local_entry *s;
11878 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11879 abort ();
11881 for (s = ix86_stack_locals; s; s = s->next)
11882 if (s->mode == mode && s->n == n)
11883 return s->rtl;
11885 s = (struct stack_local_entry *)
11886 ggc_alloc (sizeof (struct stack_local_entry));
11887 s->n = n;
11888 s->mode = mode;
11889 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11891 s->next = ix86_stack_locals;
11892 ix86_stack_locals = s;
11893 return s->rtl;
11896 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11898 static GTY(()) rtx ix86_tls_symbol;
11900 ix86_tls_get_addr (void)
11903 if (!ix86_tls_symbol)
11905 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11906 (TARGET_GNU_TLS && !TARGET_64BIT)
11907 ? "___tls_get_addr"
11908 : "__tls_get_addr");
11911 return ix86_tls_symbol;
11914 /* Calculate the length of the memory address in the instruction
11915 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11917 static int
11918 memory_address_length (rtx addr)
11920 struct ix86_address parts;
11921 rtx base, index, disp;
11922 int len;
11924 if (GET_CODE (addr) == PRE_DEC
11925 || GET_CODE (addr) == POST_INC
11926 || GET_CODE (addr) == PRE_MODIFY
11927 || GET_CODE (addr) == POST_MODIFY)
11928 return 0;
11930 if (! ix86_decompose_address (addr, &parts))
11931 abort ();
11933 base = parts.base;
11934 index = parts.index;
11935 disp = parts.disp;
11936 len = 0;
11938 /* Rule of thumb:
11939 - esp as the base always wants an index,
11940 - ebp as the base always wants a displacement. */
11942 /* Register Indirect. */
11943 if (base && !index && !disp)
11945 /* esp (for its index) and ebp (for its displacement) need
11946 the two-byte modrm form. */
11947 if (addr == stack_pointer_rtx
11948 || addr == arg_pointer_rtx
11949 || addr == frame_pointer_rtx
11950 || addr == hard_frame_pointer_rtx)
11951 len = 1;
11954 /* Direct Addressing. */
11955 else if (disp && !base && !index)
11956 len = 4;
11958 else
11960 /* Find the length of the displacement constant. */
11961 if (disp)
11963 if (GET_CODE (disp) == CONST_INT
11964 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11965 && base)
11966 len = 1;
11967 else
11968 len = 4;
11970 /* ebp always wants a displacement. */
11971 else if (base == hard_frame_pointer_rtx)
11972 len = 1;
11974 /* An index requires the two-byte modrm form.... */
11975 if (index
11976 /* ...like esp, which always wants an index. */
11977 || base == stack_pointer_rtx
11978 || base == arg_pointer_rtx
11979 || base == frame_pointer_rtx)
11980 len += 1;
11983 return len;
11986 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11987 is set, expect that insn have 8bit immediate alternative. */
11989 ix86_attr_length_immediate_default (rtx insn, int shortform)
11991 int len = 0;
11992 int i;
11993 extract_insn_cached (insn);
11994 for (i = recog_data.n_operands - 1; i >= 0; --i)
11995 if (CONSTANT_P (recog_data.operand[i]))
11997 if (len)
11998 abort ();
11999 if (shortform
12000 && GET_CODE (recog_data.operand[i]) == CONST_INT
12001 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12002 len = 1;
12003 else
12005 switch (get_attr_mode (insn))
12007 case MODE_QI:
12008 len+=1;
12009 break;
12010 case MODE_HI:
12011 len+=2;
12012 break;
12013 case MODE_SI:
12014 len+=4;
12015 break;
12016 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12017 case MODE_DI:
12018 len+=4;
12019 break;
12020 default:
12021 fatal_insn ("unknown insn mode", insn);
12025 return len;
12027 /* Compute default value for "length_address" attribute. */
12029 ix86_attr_length_address_default (rtx insn)
12031 int i;
12033 if (get_attr_type (insn) == TYPE_LEA)
12035 rtx set = PATTERN (insn);
12036 if (GET_CODE (set) == SET)
12038 else if (GET_CODE (set) == PARALLEL
12039 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12040 set = XVECEXP (set, 0, 0);
12041 else
12043 #ifdef ENABLE_CHECKING
12044 abort ();
12045 #endif
12046 return 0;
12049 return memory_address_length (SET_SRC (set));
12052 extract_insn_cached (insn);
12053 for (i = recog_data.n_operands - 1; i >= 0; --i)
12054 if (GET_CODE (recog_data.operand[i]) == MEM)
12056 return memory_address_length (XEXP (recog_data.operand[i], 0));
12057 break;
12059 return 0;
12062 /* Return the maximum number of instructions a cpu can issue. */
12064 static int
12065 ix86_issue_rate (void)
12067 switch (ix86_tune)
12069 case PROCESSOR_PENTIUM:
12070 case PROCESSOR_K6:
12071 return 2;
12073 case PROCESSOR_PENTIUMPRO:
12074 case PROCESSOR_PENTIUM4:
12075 case PROCESSOR_ATHLON:
12076 case PROCESSOR_K8:
12077 return 3;
12079 default:
12080 return 1;
12084 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12085 by DEP_INSN and nothing set by DEP_INSN. */
12087 static int
12088 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12090 rtx set, set2;
12092 /* Simplify the test for uninteresting insns. */
12093 if (insn_type != TYPE_SETCC
12094 && insn_type != TYPE_ICMOV
12095 && insn_type != TYPE_FCMOV
12096 && insn_type != TYPE_IBR)
12097 return 0;
12099 if ((set = single_set (dep_insn)) != 0)
12101 set = SET_DEST (set);
12102 set2 = NULL_RTX;
12104 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12105 && XVECLEN (PATTERN (dep_insn), 0) == 2
12106 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12107 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12109 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12110 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12112 else
12113 return 0;
12115 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12116 return 0;
12118 /* This test is true if the dependent insn reads the flags but
12119 not any other potentially set register. */
12120 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12121 return 0;
12123 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12124 return 0;
12126 return 1;
12129 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12130 address with operands set by DEP_INSN. */
12132 static int
12133 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12135 rtx addr;
12137 if (insn_type == TYPE_LEA
12138 && TARGET_PENTIUM)
12140 addr = PATTERN (insn);
12141 if (GET_CODE (addr) == SET)
12143 else if (GET_CODE (addr) == PARALLEL
12144 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12145 addr = XVECEXP (addr, 0, 0);
12146 else
12147 abort ();
12148 addr = SET_SRC (addr);
12150 else
12152 int i;
12153 extract_insn_cached (insn);
12154 for (i = recog_data.n_operands - 1; i >= 0; --i)
12155 if (GET_CODE (recog_data.operand[i]) == MEM)
12157 addr = XEXP (recog_data.operand[i], 0);
12158 goto found;
12160 return 0;
12161 found:;
12164 return modified_in_p (addr, dep_insn);
12167 static int
12168 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12170 enum attr_type insn_type, dep_insn_type;
12171 enum attr_memory memory, dep_memory;
12172 rtx set, set2;
12173 int dep_insn_code_number;
12175 /* Anti and output dependencies have zero cost on all CPUs. */
12176 if (REG_NOTE_KIND (link) != 0)
12177 return 0;
12179 dep_insn_code_number = recog_memoized (dep_insn);
12181 /* If we can't recognize the insns, we can't really do anything. */
12182 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12183 return cost;
12185 insn_type = get_attr_type (insn);
12186 dep_insn_type = get_attr_type (dep_insn);
12188 switch (ix86_tune)
12190 case PROCESSOR_PENTIUM:
12191 /* Address Generation Interlock adds a cycle of latency. */
12192 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12193 cost += 1;
12195 /* ??? Compares pair with jump/setcc. */
12196 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12197 cost = 0;
12199 /* Floating point stores require value to be ready one cycle earlier. */
12200 if (insn_type == TYPE_FMOV
12201 && get_attr_memory (insn) == MEMORY_STORE
12202 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12203 cost += 1;
12204 break;
12206 case PROCESSOR_PENTIUMPRO:
12207 memory = get_attr_memory (insn);
12208 dep_memory = get_attr_memory (dep_insn);
12210 /* Since we can't represent delayed latencies of load+operation,
12211 increase the cost here for non-imov insns. */
12212 if (dep_insn_type != TYPE_IMOV
12213 && dep_insn_type != TYPE_FMOV
12214 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12215 cost += 1;
12217 /* INT->FP conversion is expensive. */
12218 if (get_attr_fp_int_src (dep_insn))
12219 cost += 5;
12221 /* There is one cycle extra latency between an FP op and a store. */
12222 if (insn_type == TYPE_FMOV
12223 && (set = single_set (dep_insn)) != NULL_RTX
12224 && (set2 = single_set (insn)) != NULL_RTX
12225 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12226 && GET_CODE (SET_DEST (set2)) == MEM)
12227 cost += 1;
12229 /* Show ability of reorder buffer to hide latency of load by executing
12230 in parallel with previous instruction in case
12231 previous instruction is not needed to compute the address. */
12232 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12233 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12235 /* Claim moves to take one cycle, as core can issue one load
12236 at time and the next load can start cycle later. */
12237 if (dep_insn_type == TYPE_IMOV
12238 || dep_insn_type == TYPE_FMOV)
12239 cost = 1;
12240 else if (cost > 1)
12241 cost--;
12243 break;
12245 case PROCESSOR_K6:
12246 memory = get_attr_memory (insn);
12247 dep_memory = get_attr_memory (dep_insn);
12248 /* The esp dependency is resolved before the instruction is really
12249 finished. */
12250 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12251 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12252 return 1;
12254 /* Since we can't represent delayed latencies of load+operation,
12255 increase the cost here for non-imov insns. */
12256 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12257 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12259 /* INT->FP conversion is expensive. */
12260 if (get_attr_fp_int_src (dep_insn))
12261 cost += 5;
12263 /* Show ability of reorder buffer to hide latency of load by executing
12264 in parallel with previous instruction in case
12265 previous instruction is not needed to compute the address. */
12266 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12267 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12269 /* Claim moves to take one cycle, as core can issue one load
12270 at time and the next load can start cycle later. */
12271 if (dep_insn_type == TYPE_IMOV
12272 || dep_insn_type == TYPE_FMOV)
12273 cost = 1;
12274 else if (cost > 2)
12275 cost -= 2;
12276 else
12277 cost = 1;
12279 break;
12281 case PROCESSOR_ATHLON:
12282 case PROCESSOR_K8:
12283 memory = get_attr_memory (insn);
12284 dep_memory = get_attr_memory (dep_insn);
12286 /* Show ability of reorder buffer to hide latency of load by executing
12287 in parallel with previous instruction in case
12288 previous instruction is not needed to compute the address. */
12289 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12290 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12292 enum attr_unit unit = get_attr_unit (insn);
12293 int loadcost = 3;
12295 /* Because of the difference between the length of integer and
12296 floating unit pipeline preparation stages, the memory operands
12297 for floating point are cheaper.
12299 ??? For Athlon it the difference is most probably 2. */
12300 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12301 loadcost = 3;
12302 else
12303 loadcost = TARGET_ATHLON ? 2 : 0;
12305 if (cost >= loadcost)
12306 cost -= loadcost;
12307 else
12308 cost = 0;
12311 default:
12312 break;
12315 return cost;
12318 static int
12319 ia32_use_dfa_pipeline_interface (void)
12321 if (TARGET_PENTIUM
12322 || TARGET_PENTIUMPRO
12323 || TARGET_ATHLON_K8)
12324 return 1;
12325 return 0;
12328 /* How many alternative schedules to try. This should be as wide as the
12329 scheduling freedom in the DFA, but no wider. Making this value too
12330 large results extra work for the scheduler. */
12332 static int
12333 ia32_multipass_dfa_lookahead (void)
12335 if (ix86_tune == PROCESSOR_PENTIUM)
12336 return 2;
12338 if (ix86_tune == PROCESSOR_PENTIUMPRO)
12339 return 1;
12341 else
12342 return 0;
12346 /* Compute the alignment given to a constant that is being placed in memory.
12347 EXP is the constant and ALIGN is the alignment that the object would
12348 ordinarily have.
12349 The value of this function is used instead of that alignment to align
12350 the object. */
12353 ix86_constant_alignment (tree exp, int align)
12355 if (TREE_CODE (exp) == REAL_CST)
12357 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12358 return 64;
12359 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12360 return 128;
12362 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12363 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12364 return BITS_PER_WORD;
12366 return align;
12369 /* Compute the alignment for a static variable.
12370 TYPE is the data type, and ALIGN is the alignment that
12371 the object would ordinarily have. The value of this function is used
12372 instead of that alignment to align the object. */
12375 ix86_data_alignment (tree type, int align)
12377 if (AGGREGATE_TYPE_P (type)
12378 && TYPE_SIZE (type)
12379 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12380 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12381 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12382 return 256;
12384 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12385 to 16byte boundary. */
12386 if (TARGET_64BIT)
12388 if (AGGREGATE_TYPE_P (type)
12389 && TYPE_SIZE (type)
12390 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12391 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12392 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12393 return 128;
12396 if (TREE_CODE (type) == ARRAY_TYPE)
12398 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12399 return 64;
12400 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12401 return 128;
12403 else if (TREE_CODE (type) == COMPLEX_TYPE)
12406 if (TYPE_MODE (type) == DCmode && align < 64)
12407 return 64;
12408 if (TYPE_MODE (type) == XCmode && align < 128)
12409 return 128;
12411 else if ((TREE_CODE (type) == RECORD_TYPE
12412 || TREE_CODE (type) == UNION_TYPE
12413 || TREE_CODE (type) == QUAL_UNION_TYPE)
12414 && TYPE_FIELDS (type))
12416 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12417 return 64;
12418 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12419 return 128;
12421 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12422 || TREE_CODE (type) == INTEGER_TYPE)
12424 if (TYPE_MODE (type) == DFmode && align < 64)
12425 return 64;
12426 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12427 return 128;
12430 return align;
12433 /* Compute the alignment for a local variable.
12434 TYPE is the data type, and ALIGN is the alignment that
12435 the object would ordinarily have. The value of this macro is used
12436 instead of that alignment to align the object. */
12439 ix86_local_alignment (tree type, int align)
12441 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12442 to 16byte boundary. */
12443 if (TARGET_64BIT)
12445 if (AGGREGATE_TYPE_P (type)
12446 && TYPE_SIZE (type)
12447 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12448 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12449 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12450 return 128;
12452 if (TREE_CODE (type) == ARRAY_TYPE)
12454 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12455 return 64;
12456 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12457 return 128;
12459 else if (TREE_CODE (type) == COMPLEX_TYPE)
12461 if (TYPE_MODE (type) == DCmode && align < 64)
12462 return 64;
12463 if (TYPE_MODE (type) == XCmode && align < 128)
12464 return 128;
12466 else if ((TREE_CODE (type) == RECORD_TYPE
12467 || TREE_CODE (type) == UNION_TYPE
12468 || TREE_CODE (type) == QUAL_UNION_TYPE)
12469 && TYPE_FIELDS (type))
12471 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12472 return 64;
12473 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12474 return 128;
12476 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12477 || TREE_CODE (type) == INTEGER_TYPE)
12480 if (TYPE_MODE (type) == DFmode && align < 64)
12481 return 64;
12482 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12483 return 128;
12485 return align;
12488 /* Emit RTL insns to initialize the variable parts of a trampoline.
12489 FNADDR is an RTX for the address of the function's pure code.
12490 CXT is an RTX for the static chain value for the function. */
12491 void
12492 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12494 if (!TARGET_64BIT)
12496 /* Compute offset from the end of the jmp to the target function. */
12497 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12498 plus_constant (tramp, 10),
12499 NULL_RTX, 1, OPTAB_DIRECT);
12500 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12501 gen_int_mode (0xb9, QImode));
12502 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12503 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12504 gen_int_mode (0xe9, QImode));
12505 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12507 else
12509 int offset = 0;
12510 /* Try to load address using shorter movl instead of movabs.
12511 We may want to support movq for kernel mode, but kernel does not use
12512 trampolines at the moment. */
12513 if (x86_64_zero_extended_value (fnaddr))
12515 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12516 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12517 gen_int_mode (0xbb41, HImode));
12518 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12519 gen_lowpart (SImode, fnaddr));
12520 offset += 6;
12522 else
12524 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12525 gen_int_mode (0xbb49, HImode));
12526 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12527 fnaddr);
12528 offset += 10;
12530 /* Load static chain using movabs to r10. */
12531 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12532 gen_int_mode (0xba49, HImode));
12533 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12534 cxt);
12535 offset += 10;
12536 /* Jump to the r11 */
12537 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12538 gen_int_mode (0xff49, HImode));
12539 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12540 gen_int_mode (0xe3, QImode));
12541 offset += 3;
12542 if (offset > TRAMPOLINE_SIZE)
12543 abort ();
12546 #ifdef TRANSFER_FROM_TRAMPOLINE
12547 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12548 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12549 #endif
12552 #define def_builtin(MASK, NAME, TYPE, CODE) \
12553 do { \
12554 if ((MASK) & target_flags \
12555 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12556 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12557 NULL, NULL_TREE); \
12558 } while (0)
12560 struct builtin_description
12562 const unsigned int mask;
12563 const enum insn_code icode;
12564 const char *const name;
12565 const enum ix86_builtins code;
12566 const enum rtx_code comparison;
12567 const unsigned int flag;
12570 static const struct builtin_description bdesc_comi[] =
12572 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12573 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12574 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12575 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12576 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12577 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12578 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12579 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12580 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12581 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12582 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12583 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12584 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12585 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12586 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12587 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12588 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12589 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12590 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12591 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12592 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12593 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12594 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12595 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12598 static const struct builtin_description bdesc_2arg[] =
12600 /* SSE */
12601 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12602 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12603 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12604 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12605 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12606 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12607 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12608 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12610 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12611 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12612 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12613 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12614 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12615 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12616 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12617 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12618 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12619 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12620 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12621 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12622 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12623 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12624 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12625 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12626 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12627 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12628 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12629 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12631 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12632 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12633 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12634 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12636 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12637 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12638 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12639 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12641 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12642 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12643 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12644 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12645 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12647 /* MMX */
12648 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12649 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12650 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12651 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12652 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12653 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12654 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12655 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12657 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12658 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12659 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12660 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12661 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12662 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12663 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12664 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12666 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12667 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12668 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12670 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12671 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12672 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12673 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12675 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12676 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12678 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12679 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12680 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12681 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12682 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12683 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12685 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12686 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12687 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12688 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12690 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12691 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12692 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12693 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12694 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12695 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12697 /* Special. */
12698 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12699 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12700 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12702 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12703 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12704 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12706 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12707 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12708 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12709 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12710 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12711 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12713 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12714 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12715 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12716 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12717 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12718 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12720 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12721 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12722 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12723 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12725 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12726 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12728 /* SSE2 */
12729 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12730 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12731 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12732 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12733 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12734 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12735 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12736 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12738 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12739 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12740 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12741 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12742 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12743 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12744 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12745 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12746 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12747 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12748 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12749 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12750 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12751 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12752 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12753 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12754 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12755 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12756 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12757 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12759 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12760 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12761 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12762 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12764 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12765 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12766 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12767 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12769 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12770 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12771 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12773 /* SSE2 MMX */
12774 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12775 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12776 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12777 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12778 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12779 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12781 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12783 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12784 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12785 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12786 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12787 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12788 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12789 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12790 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12792 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12793 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12794 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12795 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12797 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12798 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12799 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12800 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12802 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12805 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12806 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12807 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12810 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12812 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12813 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12815 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12820 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12821 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12822 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12823 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12830 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12833 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12834 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12835 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12836 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12837 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12838 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12840 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12841 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12842 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12843 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12844 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12845 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12849 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12850 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12852 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12854 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12855 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12856 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12859 /* SSE3 MMX */
12860 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12861 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12862 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12863 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12864 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12865 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12868 static const struct builtin_description bdesc_1arg[] =
12870 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12871 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12873 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12874 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12875 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12877 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12878 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12879 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12880 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12881 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12882 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12903 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12904 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12905 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12913 /* SSE3 */
12914 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12915 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12916 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12919 void
12920 ix86_init_builtins (void)
12922 if (TARGET_MMX)
12923 ix86_init_mmx_sse_builtins ();
12926 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12927 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12928 builtins. */
12929 static void
12930 ix86_init_mmx_sse_builtins (void)
12932 const struct builtin_description * d;
12933 size_t i;
12935 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12936 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12937 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12938 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12939 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12940 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12941 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12942 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
12943 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
12944 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
12946 tree pchar_type_node = build_pointer_type (char_type_node);
12947 tree pcchar_type_node = build_pointer_type (
12948 build_type_variant (char_type_node, 1, 0));
12949 tree pfloat_type_node = build_pointer_type (float_type_node);
12950 tree pcfloat_type_node = build_pointer_type (
12951 build_type_variant (float_type_node, 1, 0));
12952 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12953 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12954 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12956 /* Comparisons. */
12957 tree int_ftype_v4sf_v4sf
12958 = build_function_type_list (integer_type_node,
12959 V4SF_type_node, V4SF_type_node, NULL_TREE);
12960 tree v4si_ftype_v4sf_v4sf
12961 = build_function_type_list (V4SI_type_node,
12962 V4SF_type_node, V4SF_type_node, NULL_TREE);
12963 /* MMX/SSE/integer conversions. */
12964 tree int_ftype_v4sf
12965 = build_function_type_list (integer_type_node,
12966 V4SF_type_node, NULL_TREE);
12967 tree int64_ftype_v4sf
12968 = build_function_type_list (long_long_integer_type_node,
12969 V4SF_type_node, NULL_TREE);
12970 tree int_ftype_v8qi
12971 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12972 tree v4sf_ftype_v4sf_int
12973 = build_function_type_list (V4SF_type_node,
12974 V4SF_type_node, integer_type_node, NULL_TREE);
12975 tree v4sf_ftype_v4sf_int64
12976 = build_function_type_list (V4SF_type_node,
12977 V4SF_type_node, long_long_integer_type_node,
12978 NULL_TREE);
12979 tree v4sf_ftype_v4sf_v2si
12980 = build_function_type_list (V4SF_type_node,
12981 V4SF_type_node, V2SI_type_node, NULL_TREE);
12982 tree int_ftype_v4hi_int
12983 = build_function_type_list (integer_type_node,
12984 V4HI_type_node, integer_type_node, NULL_TREE);
12985 tree v4hi_ftype_v4hi_int_int
12986 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12987 integer_type_node, integer_type_node,
12988 NULL_TREE);
12989 /* Miscellaneous. */
12990 tree v8qi_ftype_v4hi_v4hi
12991 = build_function_type_list (V8QI_type_node,
12992 V4HI_type_node, V4HI_type_node, NULL_TREE);
12993 tree v4hi_ftype_v2si_v2si
12994 = build_function_type_list (V4HI_type_node,
12995 V2SI_type_node, V2SI_type_node, NULL_TREE);
12996 tree v4sf_ftype_v4sf_v4sf_int
12997 = build_function_type_list (V4SF_type_node,
12998 V4SF_type_node, V4SF_type_node,
12999 integer_type_node, NULL_TREE);
13000 tree v2si_ftype_v4hi_v4hi
13001 = build_function_type_list (V2SI_type_node,
13002 V4HI_type_node, V4HI_type_node, NULL_TREE);
13003 tree v4hi_ftype_v4hi_int
13004 = build_function_type_list (V4HI_type_node,
13005 V4HI_type_node, integer_type_node, NULL_TREE);
13006 tree v4hi_ftype_v4hi_di
13007 = build_function_type_list (V4HI_type_node,
13008 V4HI_type_node, long_long_unsigned_type_node,
13009 NULL_TREE);
13010 tree v2si_ftype_v2si_di
13011 = build_function_type_list (V2SI_type_node,
13012 V2SI_type_node, long_long_unsigned_type_node,
13013 NULL_TREE);
13014 tree void_ftype_void
13015 = build_function_type (void_type_node, void_list_node);
13016 tree void_ftype_unsigned
13017 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13018 tree void_ftype_unsigned_unsigned
13019 = build_function_type_list (void_type_node, unsigned_type_node,
13020 unsigned_type_node, NULL_TREE);
13021 tree void_ftype_pcvoid_unsigned_unsigned
13022 = build_function_type_list (void_type_node, const_ptr_type_node,
13023 unsigned_type_node, unsigned_type_node,
13024 NULL_TREE);
13025 tree unsigned_ftype_void
13026 = build_function_type (unsigned_type_node, void_list_node);
13027 tree di_ftype_void
13028 = build_function_type (long_long_unsigned_type_node, void_list_node);
13029 tree v4sf_ftype_void
13030 = build_function_type (V4SF_type_node, void_list_node);
13031 tree v2si_ftype_v4sf
13032 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13033 /* Loads/stores. */
13034 tree void_ftype_v8qi_v8qi_pchar
13035 = build_function_type_list (void_type_node,
13036 V8QI_type_node, V8QI_type_node,
13037 pchar_type_node, NULL_TREE);
13038 tree v4sf_ftype_pcfloat
13039 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13040 /* @@@ the type is bogus */
13041 tree v4sf_ftype_v4sf_pv2si
13042 = build_function_type_list (V4SF_type_node,
13043 V4SF_type_node, pv2si_type_node, NULL_TREE);
13044 tree void_ftype_pv2si_v4sf
13045 = build_function_type_list (void_type_node,
13046 pv2si_type_node, V4SF_type_node, NULL_TREE);
13047 tree void_ftype_pfloat_v4sf
13048 = build_function_type_list (void_type_node,
13049 pfloat_type_node, V4SF_type_node, NULL_TREE);
13050 tree void_ftype_pdi_di
13051 = build_function_type_list (void_type_node,
13052 pdi_type_node, long_long_unsigned_type_node,
13053 NULL_TREE);
13054 tree void_ftype_pv2di_v2di
13055 = build_function_type_list (void_type_node,
13056 pv2di_type_node, V2DI_type_node, NULL_TREE);
13057 /* Normal vector unops. */
13058 tree v4sf_ftype_v4sf
13059 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13061 /* Normal vector binops. */
13062 tree v4sf_ftype_v4sf_v4sf
13063 = build_function_type_list (V4SF_type_node,
13064 V4SF_type_node, V4SF_type_node, NULL_TREE);
13065 tree v8qi_ftype_v8qi_v8qi
13066 = build_function_type_list (V8QI_type_node,
13067 V8QI_type_node, V8QI_type_node, NULL_TREE);
13068 tree v4hi_ftype_v4hi_v4hi
13069 = build_function_type_list (V4HI_type_node,
13070 V4HI_type_node, V4HI_type_node, NULL_TREE);
13071 tree v2si_ftype_v2si_v2si
13072 = build_function_type_list (V2SI_type_node,
13073 V2SI_type_node, V2SI_type_node, NULL_TREE);
13074 tree di_ftype_di_di
13075 = build_function_type_list (long_long_unsigned_type_node,
13076 long_long_unsigned_type_node,
13077 long_long_unsigned_type_node, NULL_TREE);
13079 tree v2si_ftype_v2sf
13080 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13081 tree v2sf_ftype_v2si
13082 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13083 tree v2si_ftype_v2si
13084 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13085 tree v2sf_ftype_v2sf
13086 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13087 tree v2sf_ftype_v2sf_v2sf
13088 = build_function_type_list (V2SF_type_node,
13089 V2SF_type_node, V2SF_type_node, NULL_TREE);
13090 tree v2si_ftype_v2sf_v2sf
13091 = build_function_type_list (V2SI_type_node,
13092 V2SF_type_node, V2SF_type_node, NULL_TREE);
13093 tree pint_type_node = build_pointer_type (integer_type_node);
13094 tree pcint_type_node = build_pointer_type (
13095 build_type_variant (integer_type_node, 1, 0));
13096 tree pdouble_type_node = build_pointer_type (double_type_node);
13097 tree pcdouble_type_node = build_pointer_type (
13098 build_type_variant (double_type_node, 1, 0));
13099 tree int_ftype_v2df_v2df
13100 = build_function_type_list (integer_type_node,
13101 V2DF_type_node, V2DF_type_node, NULL_TREE);
13103 tree ti_ftype_void
13104 = build_function_type (intTI_type_node, void_list_node);
13105 tree v2di_ftype_void
13106 = build_function_type (V2DI_type_node, void_list_node);
13107 tree ti_ftype_ti_ti
13108 = build_function_type_list (intTI_type_node,
13109 intTI_type_node, intTI_type_node, NULL_TREE);
13110 tree void_ftype_pcvoid
13111 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13112 tree v2di_ftype_di
13113 = build_function_type_list (V2DI_type_node,
13114 long_long_unsigned_type_node, NULL_TREE);
13115 tree di_ftype_v2di
13116 = build_function_type_list (long_long_unsigned_type_node,
13117 V2DI_type_node, NULL_TREE);
13118 tree v4sf_ftype_v4si
13119 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13120 tree v4si_ftype_v4sf
13121 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13122 tree v2df_ftype_v4si
13123 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13124 tree v4si_ftype_v2df
13125 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13126 tree v2si_ftype_v2df
13127 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13128 tree v4sf_ftype_v2df
13129 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13130 tree v2df_ftype_v2si
13131 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13132 tree v2df_ftype_v4sf
13133 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13134 tree int_ftype_v2df
13135 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13136 tree int64_ftype_v2df
13137 = build_function_type_list (long_long_integer_type_node,
13138 V2DF_type_node, NULL_TREE);
13139 tree v2df_ftype_v2df_int
13140 = build_function_type_list (V2DF_type_node,
13141 V2DF_type_node, integer_type_node, NULL_TREE);
13142 tree v2df_ftype_v2df_int64
13143 = build_function_type_list (V2DF_type_node,
13144 V2DF_type_node, long_long_integer_type_node,
13145 NULL_TREE);
13146 tree v4sf_ftype_v4sf_v2df
13147 = build_function_type_list (V4SF_type_node,
13148 V4SF_type_node, V2DF_type_node, NULL_TREE);
13149 tree v2df_ftype_v2df_v4sf
13150 = build_function_type_list (V2DF_type_node,
13151 V2DF_type_node, V4SF_type_node, NULL_TREE);
13152 tree v2df_ftype_v2df_v2df_int
13153 = build_function_type_list (V2DF_type_node,
13154 V2DF_type_node, V2DF_type_node,
13155 integer_type_node,
13156 NULL_TREE);
13157 tree v2df_ftype_v2df_pv2si
13158 = build_function_type_list (V2DF_type_node,
13159 V2DF_type_node, pv2si_type_node, NULL_TREE);
13160 tree void_ftype_pv2si_v2df
13161 = build_function_type_list (void_type_node,
13162 pv2si_type_node, V2DF_type_node, NULL_TREE);
13163 tree void_ftype_pdouble_v2df
13164 = build_function_type_list (void_type_node,
13165 pdouble_type_node, V2DF_type_node, NULL_TREE);
13166 tree void_ftype_pint_int
13167 = build_function_type_list (void_type_node,
13168 pint_type_node, integer_type_node, NULL_TREE);
13169 tree void_ftype_v16qi_v16qi_pchar
13170 = build_function_type_list (void_type_node,
13171 V16QI_type_node, V16QI_type_node,
13172 pchar_type_node, NULL_TREE);
13173 tree v2df_ftype_pcdouble
13174 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13175 tree v2df_ftype_v2df_v2df
13176 = build_function_type_list (V2DF_type_node,
13177 V2DF_type_node, V2DF_type_node, NULL_TREE);
13178 tree v16qi_ftype_v16qi_v16qi
13179 = build_function_type_list (V16QI_type_node,
13180 V16QI_type_node, V16QI_type_node, NULL_TREE);
13181 tree v8hi_ftype_v8hi_v8hi
13182 = build_function_type_list (V8HI_type_node,
13183 V8HI_type_node, V8HI_type_node, NULL_TREE);
13184 tree v4si_ftype_v4si_v4si
13185 = build_function_type_list (V4SI_type_node,
13186 V4SI_type_node, V4SI_type_node, NULL_TREE);
13187 tree v2di_ftype_v2di_v2di
13188 = build_function_type_list (V2DI_type_node,
13189 V2DI_type_node, V2DI_type_node, NULL_TREE);
13190 tree v2di_ftype_v2df_v2df
13191 = build_function_type_list (V2DI_type_node,
13192 V2DF_type_node, V2DF_type_node, NULL_TREE);
13193 tree v2df_ftype_v2df
13194 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13195 tree v2df_ftype_double
13196 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13197 tree v2df_ftype_double_double
13198 = build_function_type_list (V2DF_type_node,
13199 double_type_node, double_type_node, NULL_TREE);
13200 tree int_ftype_v8hi_int
13201 = build_function_type_list (integer_type_node,
13202 V8HI_type_node, integer_type_node, NULL_TREE);
13203 tree v8hi_ftype_v8hi_int_int
13204 = build_function_type_list (V8HI_type_node,
13205 V8HI_type_node, integer_type_node,
13206 integer_type_node, NULL_TREE);
13207 tree v2di_ftype_v2di_int
13208 = build_function_type_list (V2DI_type_node,
13209 V2DI_type_node, integer_type_node, NULL_TREE);
13210 tree v4si_ftype_v4si_int
13211 = build_function_type_list (V4SI_type_node,
13212 V4SI_type_node, integer_type_node, NULL_TREE);
13213 tree v8hi_ftype_v8hi_int
13214 = build_function_type_list (V8HI_type_node,
13215 V8HI_type_node, integer_type_node, NULL_TREE);
13216 tree v8hi_ftype_v8hi_v2di
13217 = build_function_type_list (V8HI_type_node,
13218 V8HI_type_node, V2DI_type_node, NULL_TREE);
13219 tree v4si_ftype_v4si_v2di
13220 = build_function_type_list (V4SI_type_node,
13221 V4SI_type_node, V2DI_type_node, NULL_TREE);
13222 tree v4si_ftype_v8hi_v8hi
13223 = build_function_type_list (V4SI_type_node,
13224 V8HI_type_node, V8HI_type_node, NULL_TREE);
13225 tree di_ftype_v8qi_v8qi
13226 = build_function_type_list (long_long_unsigned_type_node,
13227 V8QI_type_node, V8QI_type_node, NULL_TREE);
13228 tree v2di_ftype_v16qi_v16qi
13229 = build_function_type_list (V2DI_type_node,
13230 V16QI_type_node, V16QI_type_node, NULL_TREE);
13231 tree int_ftype_v16qi
13232 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13233 tree v16qi_ftype_pcchar
13234 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13235 tree void_ftype_pchar_v16qi
13236 = build_function_type_list (void_type_node,
13237 pchar_type_node, V16QI_type_node, NULL_TREE);
13238 tree v4si_ftype_pcint
13239 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13240 tree void_ftype_pcint_v4si
13241 = build_function_type_list (void_type_node,
13242 pcint_type_node, V4SI_type_node, NULL_TREE);
13243 tree v2di_ftype_v2di
13244 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13246 tree float80_type;
13247 tree float128_type;
13249 /* The __float80 type. */
13250 if (TYPE_MODE (long_double_type_node) == XFmode)
13251 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13252 "__float80");
13253 else
13255 /* The __float80 type. */
13256 float80_type = make_node (REAL_TYPE);
13257 TYPE_PRECISION (float80_type) = 96;
13258 layout_type (float80_type);
13259 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13262 float128_type = make_node (REAL_TYPE);
13263 TYPE_PRECISION (float128_type) = 128;
13264 layout_type (float128_type);
13265 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13267 /* Add all builtins that are more or less simple operations on two
13268 operands. */
13269 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13271 /* Use one of the operands; the target can have a different mode for
13272 mask-generating compares. */
13273 enum machine_mode mode;
13274 tree type;
13276 if (d->name == 0)
13277 continue;
13278 mode = insn_data[d->icode].operand[1].mode;
13280 switch (mode)
13282 case V16QImode:
13283 type = v16qi_ftype_v16qi_v16qi;
13284 break;
13285 case V8HImode:
13286 type = v8hi_ftype_v8hi_v8hi;
13287 break;
13288 case V4SImode:
13289 type = v4si_ftype_v4si_v4si;
13290 break;
13291 case V2DImode:
13292 type = v2di_ftype_v2di_v2di;
13293 break;
13294 case V2DFmode:
13295 type = v2df_ftype_v2df_v2df;
13296 break;
13297 case TImode:
13298 type = ti_ftype_ti_ti;
13299 break;
13300 case V4SFmode:
13301 type = v4sf_ftype_v4sf_v4sf;
13302 break;
13303 case V8QImode:
13304 type = v8qi_ftype_v8qi_v8qi;
13305 break;
13306 case V4HImode:
13307 type = v4hi_ftype_v4hi_v4hi;
13308 break;
13309 case V2SImode:
13310 type = v2si_ftype_v2si_v2si;
13311 break;
13312 case DImode:
13313 type = di_ftype_di_di;
13314 break;
13316 default:
13317 abort ();
13320 /* Override for comparisons. */
13321 if (d->icode == CODE_FOR_maskcmpv4sf3
13322 || d->icode == CODE_FOR_maskncmpv4sf3
13323 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13324 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13325 type = v4si_ftype_v4sf_v4sf;
13327 if (d->icode == CODE_FOR_maskcmpv2df3
13328 || d->icode == CODE_FOR_maskncmpv2df3
13329 || d->icode == CODE_FOR_vmmaskcmpv2df3
13330 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13331 type = v2di_ftype_v2df_v2df;
13333 def_builtin (d->mask, d->name, type, d->code);
13336 /* Add the remaining MMX insns with somewhat more complicated types. */
13337 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13338 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13339 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13340 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13341 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13343 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13344 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13345 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13347 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13348 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13350 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13351 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13353 /* comi/ucomi insns. */
13354 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13355 if (d->mask == MASK_SSE2)
13356 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13357 else
13358 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13360 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13361 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13362 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13364 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13365 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13366 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13367 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13368 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13369 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13370 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13371 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13372 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13373 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13374 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13376 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13377 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13379 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13381 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13382 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13383 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13384 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13385 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13386 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13388 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13389 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13390 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13391 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13393 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13394 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13395 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13396 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13398 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13400 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13402 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13403 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13404 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13405 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13406 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13407 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13409 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13411 /* Original 3DNow! */
13412 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13413 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13414 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13415 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13416 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13417 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13418 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13419 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13420 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13421 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13422 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13423 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13424 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13425 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13426 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13427 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13428 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13429 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13430 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13431 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13433 /* 3DNow! extension as used in the Athlon CPU. */
13434 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13435 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13436 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13437 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13438 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13439 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13441 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13443 /* SSE2 */
13444 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13445 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13447 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13448 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13449 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13451 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13452 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13453 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13454 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13455 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13456 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13458 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13459 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13460 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13461 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13463 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13464 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13465 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13466 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13467 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13469 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13470 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13471 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13472 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13474 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13475 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13477 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13479 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13480 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13482 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13483 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13484 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13485 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13486 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13488 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13490 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13491 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13492 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13493 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13495 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13496 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13497 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13499 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13500 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13501 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13502 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13504 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13505 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13506 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13507 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13508 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13509 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13510 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13512 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13513 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13514 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13516 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13517 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13518 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13519 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13520 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13521 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13522 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13524 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13526 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13527 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13528 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13530 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13531 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13532 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13535 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13537 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13538 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13540 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13548 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13552 /* Prescott New Instructions. */
13553 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13554 void_ftype_pcvoid_unsigned_unsigned,
13555 IX86_BUILTIN_MONITOR);
13556 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13557 void_ftype_unsigned_unsigned,
13558 IX86_BUILTIN_MWAIT);
13559 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13560 v4sf_ftype_v4sf,
13561 IX86_BUILTIN_MOVSHDUP);
13562 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13563 v4sf_ftype_v4sf,
13564 IX86_BUILTIN_MOVSLDUP);
13565 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13566 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13567 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13568 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13569 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13570 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13573 /* Errors in the source file can cause expand_expr to return const0_rtx
13574 where we expect a vector. To avoid crashing, use one of the vector
13575 clear instructions. */
13576 static rtx
13577 safe_vector_operand (rtx x, enum machine_mode mode)
13579 if (x != const0_rtx)
13580 return x;
13581 x = gen_reg_rtx (mode);
13583 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13584 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13585 : gen_rtx_SUBREG (DImode, x, 0)));
13586 else
13587 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13588 : gen_rtx_SUBREG (V4SFmode, x, 0),
13589 CONST0_RTX (V4SFmode)));
13590 return x;
13593 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13595 static rtx
13596 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13598 rtx pat;
13599 tree arg0 = TREE_VALUE (arglist);
13600 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13601 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13602 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13603 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13604 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13605 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13607 if (VECTOR_MODE_P (mode0))
13608 op0 = safe_vector_operand (op0, mode0);
13609 if (VECTOR_MODE_P (mode1))
13610 op1 = safe_vector_operand (op1, mode1);
13612 if (! target
13613 || GET_MODE (target) != tmode
13614 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13615 target = gen_reg_rtx (tmode);
13617 if (GET_MODE (op1) == SImode && mode1 == TImode)
13619 rtx x = gen_reg_rtx (V4SImode);
13620 emit_insn (gen_sse2_loadd (x, op1));
13621 op1 = gen_lowpart (TImode, x);
13624 /* In case the insn wants input operands in modes different from
13625 the result, abort. */
13626 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13627 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13628 abort ();
13630 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13631 op0 = copy_to_mode_reg (mode0, op0);
13632 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13633 op1 = copy_to_mode_reg (mode1, op1);
13635 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13636 yet one of the two must not be a memory. This is normally enforced
13637 by expanders, but we didn't bother to create one here. */
13638 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13639 op0 = copy_to_mode_reg (mode0, op0);
13641 pat = GEN_FCN (icode) (target, op0, op1);
13642 if (! pat)
13643 return 0;
13644 emit_insn (pat);
13645 return target;
13648 /* Subroutine of ix86_expand_builtin to take care of stores. */
13650 static rtx
13651 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13653 rtx pat;
13654 tree arg0 = TREE_VALUE (arglist);
13655 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13656 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13657 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13658 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13659 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13661 if (VECTOR_MODE_P (mode1))
13662 op1 = safe_vector_operand (op1, mode1);
13664 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13665 op1 = copy_to_mode_reg (mode1, op1);
13667 pat = GEN_FCN (icode) (op0, op1);
13668 if (pat)
13669 emit_insn (pat);
13670 return 0;
13673 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13675 static rtx
13676 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13677 rtx target, int do_load)
13679 rtx pat;
13680 tree arg0 = TREE_VALUE (arglist);
13681 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13682 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13683 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13685 if (! target
13686 || GET_MODE (target) != tmode
13687 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13688 target = gen_reg_rtx (tmode);
13689 if (do_load)
13690 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13691 else
13693 if (VECTOR_MODE_P (mode0))
13694 op0 = safe_vector_operand (op0, mode0);
13696 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13697 op0 = copy_to_mode_reg (mode0, op0);
13700 pat = GEN_FCN (icode) (target, op0);
13701 if (! pat)
13702 return 0;
13703 emit_insn (pat);
13704 return target;
13707 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13708 sqrtss, rsqrtss, rcpss. */
13710 static rtx
13711 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13713 rtx pat;
13714 tree arg0 = TREE_VALUE (arglist);
13715 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13716 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13717 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13719 if (! target
13720 || GET_MODE (target) != tmode
13721 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13722 target = gen_reg_rtx (tmode);
13724 if (VECTOR_MODE_P (mode0))
13725 op0 = safe_vector_operand (op0, mode0);
13727 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13728 op0 = copy_to_mode_reg (mode0, op0);
13730 op1 = op0;
13731 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13732 op1 = copy_to_mode_reg (mode0, op1);
13734 pat = GEN_FCN (icode) (target, op0, op1);
13735 if (! pat)
13736 return 0;
13737 emit_insn (pat);
13738 return target;
13741 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13743 static rtx
13744 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13745 rtx target)
13747 rtx pat;
13748 tree arg0 = TREE_VALUE (arglist);
13749 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13750 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13751 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13752 rtx op2;
13753 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13754 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13755 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13756 enum rtx_code comparison = d->comparison;
13758 if (VECTOR_MODE_P (mode0))
13759 op0 = safe_vector_operand (op0, mode0);
13760 if (VECTOR_MODE_P (mode1))
13761 op1 = safe_vector_operand (op1, mode1);
13763 /* Swap operands if we have a comparison that isn't available in
13764 hardware. */
13765 if (d->flag)
13767 rtx tmp = gen_reg_rtx (mode1);
13768 emit_move_insn (tmp, op1);
13769 op1 = op0;
13770 op0 = tmp;
13773 if (! target
13774 || GET_MODE (target) != tmode
13775 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13776 target = gen_reg_rtx (tmode);
13778 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13779 op0 = copy_to_mode_reg (mode0, op0);
13780 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13781 op1 = copy_to_mode_reg (mode1, op1);
13783 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13784 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13785 if (! pat)
13786 return 0;
13787 emit_insn (pat);
13788 return target;
13791 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13793 static rtx
13794 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13795 rtx target)
13797 rtx pat;
13798 tree arg0 = TREE_VALUE (arglist);
13799 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13800 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13801 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13802 rtx op2;
13803 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13804 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13805 enum rtx_code comparison = d->comparison;
13807 if (VECTOR_MODE_P (mode0))
13808 op0 = safe_vector_operand (op0, mode0);
13809 if (VECTOR_MODE_P (mode1))
13810 op1 = safe_vector_operand (op1, mode1);
13812 /* Swap operands if we have a comparison that isn't available in
13813 hardware. */
13814 if (d->flag)
13816 rtx tmp = op1;
13817 op1 = op0;
13818 op0 = tmp;
13821 target = gen_reg_rtx (SImode);
13822 emit_move_insn (target, const0_rtx);
13823 target = gen_rtx_SUBREG (QImode, target, 0);
13825 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13826 op0 = copy_to_mode_reg (mode0, op0);
13827 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13828 op1 = copy_to_mode_reg (mode1, op1);
13830 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13831 pat = GEN_FCN (d->icode) (op0, op1);
13832 if (! pat)
13833 return 0;
13834 emit_insn (pat);
13835 emit_insn (gen_rtx_SET (VOIDmode,
13836 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13837 gen_rtx_fmt_ee (comparison, QImode,
13838 SET_DEST (pat),
13839 const0_rtx)));
13841 return SUBREG_REG (target);
13844 /* Expand an expression EXP that calls a built-in function,
13845 with result going to TARGET if that's convenient
13846 (and in mode MODE if that's convenient).
13847 SUBTARGET may be used as the target for computing one of EXP's operands.
13848 IGNORE is nonzero if the value is to be ignored. */
13851 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13852 enum machine_mode mode ATTRIBUTE_UNUSED,
13853 int ignore ATTRIBUTE_UNUSED)
13855 const struct builtin_description *d;
13856 size_t i;
13857 enum insn_code icode;
13858 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13859 tree arglist = TREE_OPERAND (exp, 1);
13860 tree arg0, arg1, arg2;
13861 rtx op0, op1, op2, pat;
13862 enum machine_mode tmode, mode0, mode1, mode2;
13863 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13865 switch (fcode)
13867 case IX86_BUILTIN_EMMS:
13868 emit_insn (gen_emms ());
13869 return 0;
13871 case IX86_BUILTIN_SFENCE:
13872 emit_insn (gen_sfence ());
13873 return 0;
13875 case IX86_BUILTIN_PEXTRW:
13876 case IX86_BUILTIN_PEXTRW128:
13877 icode = (fcode == IX86_BUILTIN_PEXTRW
13878 ? CODE_FOR_mmx_pextrw
13879 : CODE_FOR_sse2_pextrw);
13880 arg0 = TREE_VALUE (arglist);
13881 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13882 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13883 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13884 tmode = insn_data[icode].operand[0].mode;
13885 mode0 = insn_data[icode].operand[1].mode;
13886 mode1 = insn_data[icode].operand[2].mode;
13888 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13889 op0 = copy_to_mode_reg (mode0, op0);
13890 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13892 error ("selector must be an integer constant in the range 0..%i",
13893 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13894 return gen_reg_rtx (tmode);
13896 if (target == 0
13897 || GET_MODE (target) != tmode
13898 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13899 target = gen_reg_rtx (tmode);
13900 pat = GEN_FCN (icode) (target, op0, op1);
13901 if (! pat)
13902 return 0;
13903 emit_insn (pat);
13904 return target;
13906 case IX86_BUILTIN_PINSRW:
13907 case IX86_BUILTIN_PINSRW128:
13908 icode = (fcode == IX86_BUILTIN_PINSRW
13909 ? CODE_FOR_mmx_pinsrw
13910 : CODE_FOR_sse2_pinsrw);
13911 arg0 = TREE_VALUE (arglist);
13912 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13913 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13914 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13915 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13916 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13917 tmode = insn_data[icode].operand[0].mode;
13918 mode0 = insn_data[icode].operand[1].mode;
13919 mode1 = insn_data[icode].operand[2].mode;
13920 mode2 = insn_data[icode].operand[3].mode;
13922 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13923 op0 = copy_to_mode_reg (mode0, op0);
13924 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13925 op1 = copy_to_mode_reg (mode1, op1);
13926 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13928 error ("selector must be an integer constant in the range 0..%i",
13929 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13930 return const0_rtx;
13932 if (target == 0
13933 || GET_MODE (target) != tmode
13934 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13935 target = gen_reg_rtx (tmode);
13936 pat = GEN_FCN (icode) (target, op0, op1, op2);
13937 if (! pat)
13938 return 0;
13939 emit_insn (pat);
13940 return target;
13942 case IX86_BUILTIN_MASKMOVQ:
13943 case IX86_BUILTIN_MASKMOVDQU:
13944 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13945 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13946 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13947 : CODE_FOR_sse2_maskmovdqu));
13948 /* Note the arg order is different from the operand order. */
13949 arg1 = TREE_VALUE (arglist);
13950 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13951 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13952 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13953 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13954 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13955 mode0 = insn_data[icode].operand[0].mode;
13956 mode1 = insn_data[icode].operand[1].mode;
13957 mode2 = insn_data[icode].operand[2].mode;
13959 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13960 op0 = copy_to_mode_reg (mode0, op0);
13961 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13962 op1 = copy_to_mode_reg (mode1, op1);
13963 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13964 op2 = copy_to_mode_reg (mode2, op2);
13965 pat = GEN_FCN (icode) (op0, op1, op2);
13966 if (! pat)
13967 return 0;
13968 emit_insn (pat);
13969 return 0;
13971 case IX86_BUILTIN_SQRTSS:
13972 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13973 case IX86_BUILTIN_RSQRTSS:
13974 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13975 case IX86_BUILTIN_RCPSS:
13976 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13978 case IX86_BUILTIN_LOADAPS:
13979 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13981 case IX86_BUILTIN_LOADUPS:
13982 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13984 case IX86_BUILTIN_STOREAPS:
13985 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13987 case IX86_BUILTIN_STOREUPS:
13988 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13990 case IX86_BUILTIN_LOADSS:
13991 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13993 case IX86_BUILTIN_STORESS:
13994 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13996 case IX86_BUILTIN_LOADHPS:
13997 case IX86_BUILTIN_LOADLPS:
13998 case IX86_BUILTIN_LOADHPD:
13999 case IX86_BUILTIN_LOADLPD:
14000 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14001 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14002 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14003 : CODE_FOR_sse2_movsd);
14004 arg0 = TREE_VALUE (arglist);
14005 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14006 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14007 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14008 tmode = insn_data[icode].operand[0].mode;
14009 mode0 = insn_data[icode].operand[1].mode;
14010 mode1 = insn_data[icode].operand[2].mode;
14012 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14013 op0 = copy_to_mode_reg (mode0, op0);
14014 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14015 if (target == 0
14016 || GET_MODE (target) != tmode
14017 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14018 target = gen_reg_rtx (tmode);
14019 pat = GEN_FCN (icode) (target, op0, op1);
14020 if (! pat)
14021 return 0;
14022 emit_insn (pat);
14023 return target;
14025 case IX86_BUILTIN_STOREHPS:
14026 case IX86_BUILTIN_STORELPS:
14027 case IX86_BUILTIN_STOREHPD:
14028 case IX86_BUILTIN_STORELPD:
14029 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14030 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14031 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14032 : CODE_FOR_sse2_movsd);
14033 arg0 = TREE_VALUE (arglist);
14034 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14035 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14036 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14037 mode0 = insn_data[icode].operand[1].mode;
14038 mode1 = insn_data[icode].operand[2].mode;
14040 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14041 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14042 op1 = copy_to_mode_reg (mode1, op1);
14044 pat = GEN_FCN (icode) (op0, op0, op1);
14045 if (! pat)
14046 return 0;
14047 emit_insn (pat);
14048 return 0;
14050 case IX86_BUILTIN_MOVNTPS:
14051 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14052 case IX86_BUILTIN_MOVNTQ:
14053 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14055 case IX86_BUILTIN_LDMXCSR:
14056 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14057 target = assign_386_stack_local (SImode, 0);
14058 emit_move_insn (target, op0);
14059 emit_insn (gen_ldmxcsr (target));
14060 return 0;
14062 case IX86_BUILTIN_STMXCSR:
14063 target = assign_386_stack_local (SImode, 0);
14064 emit_insn (gen_stmxcsr (target));
14065 return copy_to_mode_reg (SImode, target);
14067 case IX86_BUILTIN_SHUFPS:
14068 case IX86_BUILTIN_SHUFPD:
14069 icode = (fcode == IX86_BUILTIN_SHUFPS
14070 ? CODE_FOR_sse_shufps
14071 : CODE_FOR_sse2_shufpd);
14072 arg0 = TREE_VALUE (arglist);
14073 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14074 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14075 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14076 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14077 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14078 tmode = insn_data[icode].operand[0].mode;
14079 mode0 = insn_data[icode].operand[1].mode;
14080 mode1 = insn_data[icode].operand[2].mode;
14081 mode2 = insn_data[icode].operand[3].mode;
14083 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14084 op0 = copy_to_mode_reg (mode0, op0);
14085 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14086 op1 = copy_to_mode_reg (mode1, op1);
14087 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14089 /* @@@ better error message */
14090 error ("mask must be an immediate");
14091 return gen_reg_rtx (tmode);
14093 if (target == 0
14094 || GET_MODE (target) != tmode
14095 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14096 target = gen_reg_rtx (tmode);
14097 pat = GEN_FCN (icode) (target, op0, op1, op2);
14098 if (! pat)
14099 return 0;
14100 emit_insn (pat);
14101 return target;
14103 case IX86_BUILTIN_PSHUFW:
14104 case IX86_BUILTIN_PSHUFD:
14105 case IX86_BUILTIN_PSHUFHW:
14106 case IX86_BUILTIN_PSHUFLW:
14107 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14108 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14109 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14110 : CODE_FOR_mmx_pshufw);
14111 arg0 = TREE_VALUE (arglist);
14112 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14113 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14114 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14115 tmode = insn_data[icode].operand[0].mode;
14116 mode1 = insn_data[icode].operand[1].mode;
14117 mode2 = insn_data[icode].operand[2].mode;
14119 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14120 op0 = copy_to_mode_reg (mode1, op0);
14121 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14123 /* @@@ better error message */
14124 error ("mask must be an immediate");
14125 return const0_rtx;
14127 if (target == 0
14128 || GET_MODE (target) != tmode
14129 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14130 target = gen_reg_rtx (tmode);
14131 pat = GEN_FCN (icode) (target, op0, op1);
14132 if (! pat)
14133 return 0;
14134 emit_insn (pat);
14135 return target;
14137 case IX86_BUILTIN_PSLLDQI128:
14138 case IX86_BUILTIN_PSRLDQI128:
14139 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14140 : CODE_FOR_sse2_lshrti3);
14141 arg0 = TREE_VALUE (arglist);
14142 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14143 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14144 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14145 tmode = insn_data[icode].operand[0].mode;
14146 mode1 = insn_data[icode].operand[1].mode;
14147 mode2 = insn_data[icode].operand[2].mode;
14149 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14151 op0 = copy_to_reg (op0);
14152 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14154 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14156 error ("shift must be an immediate");
14157 return const0_rtx;
14159 target = gen_reg_rtx (V2DImode);
14160 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14161 if (! pat)
14162 return 0;
14163 emit_insn (pat);
14164 return target;
14166 case IX86_BUILTIN_FEMMS:
14167 emit_insn (gen_femms ());
14168 return NULL_RTX;
14170 case IX86_BUILTIN_PAVGUSB:
14171 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14173 case IX86_BUILTIN_PF2ID:
14174 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14176 case IX86_BUILTIN_PFACC:
14177 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14179 case IX86_BUILTIN_PFADD:
14180 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14182 case IX86_BUILTIN_PFCMPEQ:
14183 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14185 case IX86_BUILTIN_PFCMPGE:
14186 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14188 case IX86_BUILTIN_PFCMPGT:
14189 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14191 case IX86_BUILTIN_PFMAX:
14192 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14194 case IX86_BUILTIN_PFMIN:
14195 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14197 case IX86_BUILTIN_PFMUL:
14198 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14200 case IX86_BUILTIN_PFRCP:
14201 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14203 case IX86_BUILTIN_PFRCPIT1:
14204 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14206 case IX86_BUILTIN_PFRCPIT2:
14207 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14209 case IX86_BUILTIN_PFRSQIT1:
14210 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14212 case IX86_BUILTIN_PFRSQRT:
14213 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14215 case IX86_BUILTIN_PFSUB:
14216 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14218 case IX86_BUILTIN_PFSUBR:
14219 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14221 case IX86_BUILTIN_PI2FD:
14222 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14224 case IX86_BUILTIN_PMULHRW:
14225 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14227 case IX86_BUILTIN_PF2IW:
14228 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14230 case IX86_BUILTIN_PFNACC:
14231 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14233 case IX86_BUILTIN_PFPNACC:
14234 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14236 case IX86_BUILTIN_PI2FW:
14237 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14239 case IX86_BUILTIN_PSWAPDSI:
14240 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14242 case IX86_BUILTIN_PSWAPDSF:
14243 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14245 case IX86_BUILTIN_SSE_ZERO:
14246 target = gen_reg_rtx (V4SFmode);
14247 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14248 return target;
14250 case IX86_BUILTIN_MMX_ZERO:
14251 target = gen_reg_rtx (DImode);
14252 emit_insn (gen_mmx_clrdi (target));
14253 return target;
14255 case IX86_BUILTIN_CLRTI:
14256 target = gen_reg_rtx (V2DImode);
14257 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14258 return target;
14261 case IX86_BUILTIN_SQRTSD:
14262 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14263 case IX86_BUILTIN_LOADAPD:
14264 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14265 case IX86_BUILTIN_LOADUPD:
14266 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14268 case IX86_BUILTIN_STOREAPD:
14269 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14270 case IX86_BUILTIN_STOREUPD:
14271 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14273 case IX86_BUILTIN_LOADSD:
14274 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14276 case IX86_BUILTIN_STORESD:
14277 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14279 case IX86_BUILTIN_SETPD1:
14280 target = assign_386_stack_local (DFmode, 0);
14281 arg0 = TREE_VALUE (arglist);
14282 emit_move_insn (adjust_address (target, DFmode, 0),
14283 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14284 op0 = gen_reg_rtx (V2DFmode);
14285 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14286 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14287 return op0;
14289 case IX86_BUILTIN_SETPD:
14290 target = assign_386_stack_local (V2DFmode, 0);
14291 arg0 = TREE_VALUE (arglist);
14292 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14293 emit_move_insn (adjust_address (target, DFmode, 0),
14294 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14295 emit_move_insn (adjust_address (target, DFmode, 8),
14296 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14297 op0 = gen_reg_rtx (V2DFmode);
14298 emit_insn (gen_sse2_movapd (op0, target));
14299 return op0;
14301 case IX86_BUILTIN_LOADRPD:
14302 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14303 gen_reg_rtx (V2DFmode), 1);
14304 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14305 return target;
14307 case IX86_BUILTIN_LOADPD1:
14308 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14309 gen_reg_rtx (V2DFmode), 1);
14310 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14311 return target;
14313 case IX86_BUILTIN_STOREPD1:
14314 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14315 case IX86_BUILTIN_STORERPD:
14316 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14318 case IX86_BUILTIN_CLRPD:
14319 target = gen_reg_rtx (V2DFmode);
14320 emit_insn (gen_sse_clrv2df (target));
14321 return target;
14323 case IX86_BUILTIN_MFENCE:
14324 emit_insn (gen_sse2_mfence ());
14325 return 0;
14326 case IX86_BUILTIN_LFENCE:
14327 emit_insn (gen_sse2_lfence ());
14328 return 0;
14330 case IX86_BUILTIN_CLFLUSH:
14331 arg0 = TREE_VALUE (arglist);
14332 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14333 icode = CODE_FOR_sse2_clflush;
14334 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14335 op0 = copy_to_mode_reg (Pmode, op0);
14337 emit_insn (gen_sse2_clflush (op0));
14338 return 0;
14340 case IX86_BUILTIN_MOVNTPD:
14341 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14342 case IX86_BUILTIN_MOVNTDQ:
14343 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14344 case IX86_BUILTIN_MOVNTI:
14345 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14347 case IX86_BUILTIN_LOADDQA:
14348 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14349 case IX86_BUILTIN_LOADDQU:
14350 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14351 case IX86_BUILTIN_LOADD:
14352 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14354 case IX86_BUILTIN_STOREDQA:
14355 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14356 case IX86_BUILTIN_STOREDQU:
14357 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14358 case IX86_BUILTIN_STORED:
14359 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14361 case IX86_BUILTIN_MONITOR:
14362 arg0 = TREE_VALUE (arglist);
14363 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14364 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14365 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14366 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14367 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14368 if (!REG_P (op0))
14369 op0 = copy_to_mode_reg (SImode, op0);
14370 if (!REG_P (op1))
14371 op1 = copy_to_mode_reg (SImode, op1);
14372 if (!REG_P (op2))
14373 op2 = copy_to_mode_reg (SImode, op2);
14374 emit_insn (gen_monitor (op0, op1, op2));
14375 return 0;
14377 case IX86_BUILTIN_MWAIT:
14378 arg0 = TREE_VALUE (arglist);
14379 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14380 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14381 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14382 if (!REG_P (op0))
14383 op0 = copy_to_mode_reg (SImode, op0);
14384 if (!REG_P (op1))
14385 op1 = copy_to_mode_reg (SImode, op1);
14386 emit_insn (gen_mwait (op0, op1));
14387 return 0;
14389 case IX86_BUILTIN_LOADDDUP:
14390 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14392 case IX86_BUILTIN_LDDQU:
14393 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14396 default:
14397 break;
14400 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14401 if (d->code == fcode)
14403 /* Compares are treated specially. */
14404 if (d->icode == CODE_FOR_maskcmpv4sf3
14405 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14406 || d->icode == CODE_FOR_maskncmpv4sf3
14407 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14408 || d->icode == CODE_FOR_maskcmpv2df3
14409 || d->icode == CODE_FOR_vmmaskcmpv2df3
14410 || d->icode == CODE_FOR_maskncmpv2df3
14411 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14412 return ix86_expand_sse_compare (d, arglist, target);
14414 return ix86_expand_binop_builtin (d->icode, arglist, target);
14417 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14418 if (d->code == fcode)
14419 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14421 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14422 if (d->code == fcode)
14423 return ix86_expand_sse_comi (d, arglist, target);
14425 /* @@@ Should really do something sensible here. */
14426 return 0;
14429 /* Store OPERAND to the memory after reload is completed. This means
14430 that we can't easily use assign_stack_local. */
14432 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14434 rtx result;
14435 if (!reload_completed)
14436 abort ();
14437 if (TARGET_RED_ZONE)
14439 result = gen_rtx_MEM (mode,
14440 gen_rtx_PLUS (Pmode,
14441 stack_pointer_rtx,
14442 GEN_INT (-RED_ZONE_SIZE)));
14443 emit_move_insn (result, operand);
14445 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14447 switch (mode)
14449 case HImode:
14450 case SImode:
14451 operand = gen_lowpart (DImode, operand);
14452 /* FALLTHRU */
14453 case DImode:
14454 emit_insn (
14455 gen_rtx_SET (VOIDmode,
14456 gen_rtx_MEM (DImode,
14457 gen_rtx_PRE_DEC (DImode,
14458 stack_pointer_rtx)),
14459 operand));
14460 break;
14461 default:
14462 abort ();
14464 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14466 else
14468 switch (mode)
14470 case DImode:
14472 rtx operands[2];
14473 split_di (&operand, 1, operands, operands + 1);
14474 emit_insn (
14475 gen_rtx_SET (VOIDmode,
14476 gen_rtx_MEM (SImode,
14477 gen_rtx_PRE_DEC (Pmode,
14478 stack_pointer_rtx)),
14479 operands[1]));
14480 emit_insn (
14481 gen_rtx_SET (VOIDmode,
14482 gen_rtx_MEM (SImode,
14483 gen_rtx_PRE_DEC (Pmode,
14484 stack_pointer_rtx)),
14485 operands[0]));
14487 break;
14488 case HImode:
14489 /* It is better to store HImodes as SImodes. */
14490 if (!TARGET_PARTIAL_REG_STALL)
14491 operand = gen_lowpart (SImode, operand);
14492 /* FALLTHRU */
14493 case SImode:
14494 emit_insn (
14495 gen_rtx_SET (VOIDmode,
14496 gen_rtx_MEM (GET_MODE (operand),
14497 gen_rtx_PRE_DEC (SImode,
14498 stack_pointer_rtx)),
14499 operand));
14500 break;
14501 default:
14502 abort ();
14504 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14506 return result;
14509 /* Free operand from the memory. */
14510 void
14511 ix86_free_from_memory (enum machine_mode mode)
14513 if (!TARGET_RED_ZONE)
14515 int size;
14517 if (mode == DImode || TARGET_64BIT)
14518 size = 8;
14519 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14520 size = 2;
14521 else
14522 size = 4;
14523 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14524 to pop or add instruction if registers are available. */
14525 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14526 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14527 GEN_INT (size))));
14531 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14532 QImode must go into class Q_REGS.
14533 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14534 movdf to do mem-to-mem moves through integer regs. */
14535 enum reg_class
14536 ix86_preferred_reload_class (rtx x, enum reg_class class)
14538 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14539 return NO_REGS;
14540 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14542 /* SSE can't load any constant directly yet. */
14543 if (SSE_CLASS_P (class))
14544 return NO_REGS;
14545 /* Floats can load 0 and 1. */
14546 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14548 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14549 if (MAYBE_SSE_CLASS_P (class))
14550 return (reg_class_subset_p (class, GENERAL_REGS)
14551 ? GENERAL_REGS : FLOAT_REGS);
14552 else
14553 return class;
14555 /* General regs can load everything. */
14556 if (reg_class_subset_p (class, GENERAL_REGS))
14557 return GENERAL_REGS;
14558 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14559 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14560 return NO_REGS;
14562 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14563 return NO_REGS;
14564 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14565 return Q_REGS;
14566 return class;
14569 /* If we are copying between general and FP registers, we need a memory
14570 location. The same is true for SSE and MMX registers.
14572 The macro can't work reliably when one of the CLASSES is class containing
14573 registers from multiple units (SSE, MMX, integer). We avoid this by never
14574 combining those units in single alternative in the machine description.
14575 Ensure that this constraint holds to avoid unexpected surprises.
14577 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14578 enforce these sanity checks. */
14580 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14581 enum machine_mode mode, int strict)
14583 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14584 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14585 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14586 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14587 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14588 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14590 if (strict)
14591 abort ();
14592 else
14593 return 1;
14595 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14596 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14597 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14598 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14599 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14601 /* Return the cost of moving data from a register in class CLASS1 to
14602 one in class CLASS2.
14604 It is not required that the cost always equal 2 when FROM is the same as TO;
14605 on some machines it is expensive to move between registers if they are not
14606 general registers. */
14608 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14609 enum reg_class class2)
14611 /* In case we require secondary memory, compute cost of the store followed
14612 by load. In order to avoid bad register allocation choices, we need
14613 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14615 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14617 int cost = 1;
14619 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14620 MEMORY_MOVE_COST (mode, class1, 1));
14621 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14622 MEMORY_MOVE_COST (mode, class2, 1));
14624 /* In case of copying from general_purpose_register we may emit multiple
14625 stores followed by single load causing memory size mismatch stall.
14626 Count this as arbitrarily high cost of 20. */
14627 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14628 cost += 20;
14630 /* In the case of FP/MMX moves, the registers actually overlap, and we
14631 have to switch modes in order to treat them differently. */
14632 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14633 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14634 cost += 20;
14636 return cost;
14639 /* Moves between SSE/MMX and integer unit are expensive. */
14640 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14641 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14642 return ix86_cost->mmxsse_to_integer;
14643 if (MAYBE_FLOAT_CLASS_P (class1))
14644 return ix86_cost->fp_move;
14645 if (MAYBE_SSE_CLASS_P (class1))
14646 return ix86_cost->sse_move;
14647 if (MAYBE_MMX_CLASS_P (class1))
14648 return ix86_cost->mmx_move;
14649 return 2;
14652 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14654 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14656 /* Flags and only flags can only hold CCmode values. */
14657 if (CC_REGNO_P (regno))
14658 return GET_MODE_CLASS (mode) == MODE_CC;
14659 if (GET_MODE_CLASS (mode) == MODE_CC
14660 || GET_MODE_CLASS (mode) == MODE_RANDOM
14661 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14662 return 0;
14663 if (FP_REGNO_P (regno))
14664 return VALID_FP_MODE_P (mode);
14665 if (SSE_REGNO_P (regno))
14666 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14667 if (MMX_REGNO_P (regno))
14668 return (TARGET_MMX
14669 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14670 /* We handle both integer and floats in the general purpose registers.
14671 In future we should be able to handle vector modes as well. */
14672 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14673 return 0;
14674 /* Take care for QImode values - they can be in non-QI regs, but then
14675 they do cause partial register stalls. */
14676 if (regno < 4 || mode != QImode || TARGET_64BIT)
14677 return 1;
14678 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14681 /* Return the cost of moving data of mode M between a
14682 register and memory. A value of 2 is the default; this cost is
14683 relative to those in `REGISTER_MOVE_COST'.
14685 If moving between registers and memory is more expensive than
14686 between two registers, you should define this macro to express the
14687 relative cost.
14689 Model also increased moving costs of QImode registers in non
14690 Q_REGS classes.
14693 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14695 if (FLOAT_CLASS_P (class))
14697 int index;
14698 switch (mode)
14700 case SFmode:
14701 index = 0;
14702 break;
14703 case DFmode:
14704 index = 1;
14705 break;
14706 case XFmode:
14707 index = 2;
14708 break;
14709 default:
14710 return 100;
14712 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14714 if (SSE_CLASS_P (class))
14716 int index;
14717 switch (GET_MODE_SIZE (mode))
14719 case 4:
14720 index = 0;
14721 break;
14722 case 8:
14723 index = 1;
14724 break;
14725 case 16:
14726 index = 2;
14727 break;
14728 default:
14729 return 100;
14731 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14733 if (MMX_CLASS_P (class))
14735 int index;
14736 switch (GET_MODE_SIZE (mode))
14738 case 4:
14739 index = 0;
14740 break;
14741 case 8:
14742 index = 1;
14743 break;
14744 default:
14745 return 100;
14747 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14749 switch (GET_MODE_SIZE (mode))
14751 case 1:
14752 if (in)
14753 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14754 : ix86_cost->movzbl_load);
14755 else
14756 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14757 : ix86_cost->int_store[0] + 4);
14758 break;
14759 case 2:
14760 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14761 default:
14762 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14763 if (mode == TFmode)
14764 mode = XFmode;
14765 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14766 * (((int) GET_MODE_SIZE (mode)
14767 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14771 /* Compute a (partial) cost for rtx X. Return true if the complete
14772 cost has been computed, and false if subexpressions should be
14773 scanned. In either case, *TOTAL contains the cost result. */
14775 static bool
14776 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14778 enum machine_mode mode = GET_MODE (x);
14780 switch (code)
14782 case CONST_INT:
14783 case CONST:
14784 case LABEL_REF:
14785 case SYMBOL_REF:
14786 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14787 *total = 3;
14788 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14789 *total = 2;
14790 else if (flag_pic && SYMBOLIC_CONST (x)
14791 && (!TARGET_64BIT
14792 || (!GET_CODE (x) != LABEL_REF
14793 && (GET_CODE (x) != SYMBOL_REF
14794 || !SYMBOL_REF_LOCAL_P (x)))))
14795 *total = 1;
14796 else
14797 *total = 0;
14798 return true;
14800 case CONST_DOUBLE:
14801 if (mode == VOIDmode)
14802 *total = 0;
14803 else
14804 switch (standard_80387_constant_p (x))
14806 case 1: /* 0.0 */
14807 *total = 1;
14808 break;
14809 default: /* Other constants */
14810 *total = 2;
14811 break;
14812 case 0:
14813 case -1:
14814 /* Start with (MEM (SYMBOL_REF)), since that's where
14815 it'll probably end up. Add a penalty for size. */
14816 *total = (COSTS_N_INSNS (1)
14817 + (flag_pic != 0 && !TARGET_64BIT)
14818 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14819 break;
14821 return true;
14823 case ZERO_EXTEND:
14824 /* The zero extensions is often completely free on x86_64, so make
14825 it as cheap as possible. */
14826 if (TARGET_64BIT && mode == DImode
14827 && GET_MODE (XEXP (x, 0)) == SImode)
14828 *total = 1;
14829 else if (TARGET_ZERO_EXTEND_WITH_AND)
14830 *total = COSTS_N_INSNS (ix86_cost->add);
14831 else
14832 *total = COSTS_N_INSNS (ix86_cost->movzx);
14833 return false;
14835 case SIGN_EXTEND:
14836 *total = COSTS_N_INSNS (ix86_cost->movsx);
14837 return false;
14839 case ASHIFT:
14840 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14841 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14843 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14844 if (value == 1)
14846 *total = COSTS_N_INSNS (ix86_cost->add);
14847 return false;
14849 if ((value == 2 || value == 3)
14850 && !TARGET_DECOMPOSE_LEA
14851 && ix86_cost->lea <= ix86_cost->shift_const)
14853 *total = COSTS_N_INSNS (ix86_cost->lea);
14854 return false;
14857 /* FALLTHRU */
14859 case ROTATE:
14860 case ASHIFTRT:
14861 case LSHIFTRT:
14862 case ROTATERT:
14863 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14865 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14867 if (INTVAL (XEXP (x, 1)) > 32)
14868 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14869 else
14870 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14872 else
14874 if (GET_CODE (XEXP (x, 1)) == AND)
14875 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14876 else
14877 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14880 else
14882 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14883 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14884 else
14885 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14887 return false;
14889 case MULT:
14890 if (FLOAT_MODE_P (mode))
14892 *total = COSTS_N_INSNS (ix86_cost->fmul);
14893 return false;
14895 else
14897 rtx op0 = XEXP (x, 0);
14898 rtx op1 = XEXP (x, 1);
14899 int nbits;
14900 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14902 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14903 for (nbits = 0; value != 0; value &= value - 1)
14904 nbits++;
14906 else
14907 /* This is arbitrary. */
14908 nbits = 7;
14910 /* Compute costs correctly for widening multiplication. */
14911 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14912 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14913 == GET_MODE_SIZE (mode))
14915 int is_mulwiden = 0;
14916 enum machine_mode inner_mode = GET_MODE (op0);
14918 if (GET_CODE (op0) == GET_CODE (op1))
14919 is_mulwiden = 1, op1 = XEXP (op1, 0);
14920 else if (GET_CODE (op1) == CONST_INT)
14922 if (GET_CODE (op0) == SIGN_EXTEND)
14923 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14924 == INTVAL (op1);
14925 else
14926 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14929 if (is_mulwiden)
14930 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14933 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14934 + nbits * ix86_cost->mult_bit)
14935 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14937 return true;
14940 case DIV:
14941 case UDIV:
14942 case MOD:
14943 case UMOD:
14944 if (FLOAT_MODE_P (mode))
14945 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14946 else
14947 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14948 return false;
14950 case PLUS:
14951 if (FLOAT_MODE_P (mode))
14952 *total = COSTS_N_INSNS (ix86_cost->fadd);
14953 else if (!TARGET_DECOMPOSE_LEA
14954 && GET_MODE_CLASS (mode) == MODE_INT
14955 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14957 if (GET_CODE (XEXP (x, 0)) == PLUS
14958 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14959 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14960 && CONSTANT_P (XEXP (x, 1)))
14962 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14963 if (val == 2 || val == 4 || val == 8)
14965 *total = COSTS_N_INSNS (ix86_cost->lea);
14966 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14967 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14968 outer_code);
14969 *total += rtx_cost (XEXP (x, 1), outer_code);
14970 return true;
14973 else if (GET_CODE (XEXP (x, 0)) == MULT
14974 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14976 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14977 if (val == 2 || val == 4 || val == 8)
14979 *total = COSTS_N_INSNS (ix86_cost->lea);
14980 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14981 *total += rtx_cost (XEXP (x, 1), outer_code);
14982 return true;
14985 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14987 *total = COSTS_N_INSNS (ix86_cost->lea);
14988 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14989 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14990 *total += rtx_cost (XEXP (x, 1), outer_code);
14991 return true;
14994 /* FALLTHRU */
14996 case MINUS:
14997 if (FLOAT_MODE_P (mode))
14999 *total = COSTS_N_INSNS (ix86_cost->fadd);
15000 return false;
15002 /* FALLTHRU */
15004 case AND:
15005 case IOR:
15006 case XOR:
15007 if (!TARGET_64BIT && mode == DImode)
15009 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15010 + (rtx_cost (XEXP (x, 0), outer_code)
15011 << (GET_MODE (XEXP (x, 0)) != DImode))
15012 + (rtx_cost (XEXP (x, 1), outer_code)
15013 << (GET_MODE (XEXP (x, 1)) != DImode)));
15014 return true;
15016 /* FALLTHRU */
15018 case NEG:
15019 if (FLOAT_MODE_P (mode))
15021 *total = COSTS_N_INSNS (ix86_cost->fchs);
15022 return false;
15024 /* FALLTHRU */
15026 case NOT:
15027 if (!TARGET_64BIT && mode == DImode)
15028 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15029 else
15030 *total = COSTS_N_INSNS (ix86_cost->add);
15031 return false;
15033 case FLOAT_EXTEND:
15034 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15035 *total = 0;
15036 return false;
15038 case ABS:
15039 if (FLOAT_MODE_P (mode))
15040 *total = COSTS_N_INSNS (ix86_cost->fabs);
15041 return false;
15043 case SQRT:
15044 if (FLOAT_MODE_P (mode))
15045 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15046 return false;
15048 case UNSPEC:
15049 if (XINT (x, 1) == UNSPEC_TP)
15050 *total = 0;
15051 return false;
15053 default:
15054 return false;
15058 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15059 static void
15060 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15062 init_section ();
15063 fputs ("\tpushl $", asm_out_file);
15064 assemble_name (asm_out_file, XSTR (symbol, 0));
15065 fputc ('\n', asm_out_file);
15067 #endif
15069 #if TARGET_MACHO
15071 static int current_machopic_label_num;
15073 /* Given a symbol name and its associated stub, write out the
15074 definition of the stub. */
15076 void
15077 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15079 unsigned int length;
15080 char *binder_name, *symbol_name, lazy_ptr_name[32];
15081 int label = ++current_machopic_label_num;
15083 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15084 symb = (*targetm.strip_name_encoding) (symb);
15086 length = strlen (stub);
15087 binder_name = alloca (length + 32);
15088 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15090 length = strlen (symb);
15091 symbol_name = alloca (length + 32);
15092 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15094 sprintf (lazy_ptr_name, "L%d$lz", label);
15096 if (MACHOPIC_PURE)
15097 machopic_picsymbol_stub_section ();
15098 else
15099 machopic_symbol_stub_section ();
15101 fprintf (file, "%s:\n", stub);
15102 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15104 if (MACHOPIC_PURE)
15106 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15107 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15108 fprintf (file, "\tjmp %%edx\n");
15110 else
15111 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15113 fprintf (file, "%s:\n", binder_name);
15115 if (MACHOPIC_PURE)
15117 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15118 fprintf (file, "\tpushl %%eax\n");
15120 else
15121 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15123 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15125 machopic_lazy_symbol_ptr_section ();
15126 fprintf (file, "%s:\n", lazy_ptr_name);
15127 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15128 fprintf (file, "\t.long %s\n", binder_name);
15130 #endif /* TARGET_MACHO */
15132 /* Order the registers for register allocator. */
15134 void
15135 x86_order_regs_for_local_alloc (void)
15137 int pos = 0;
15138 int i;
15140 /* First allocate the local general purpose registers. */
15141 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15142 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15143 reg_alloc_order [pos++] = i;
15145 /* Global general purpose registers. */
15146 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15147 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15148 reg_alloc_order [pos++] = i;
15150 /* x87 registers come first in case we are doing FP math
15151 using them. */
15152 if (!TARGET_SSE_MATH)
15153 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15154 reg_alloc_order [pos++] = i;
15156 /* SSE registers. */
15157 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15158 reg_alloc_order [pos++] = i;
15159 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15160 reg_alloc_order [pos++] = i;
15162 /* x87 registers. */
15163 if (TARGET_SSE_MATH)
15164 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15165 reg_alloc_order [pos++] = i;
15167 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15168 reg_alloc_order [pos++] = i;
15170 /* Initialize the rest of array as we do not allocate some registers
15171 at all. */
15172 while (pos < FIRST_PSEUDO_REGISTER)
15173 reg_alloc_order [pos++] = 0;
15176 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15177 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15178 #endif
15180 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15181 struct attribute_spec.handler. */
15182 static tree
15183 ix86_handle_struct_attribute (tree *node, tree name,
15184 tree args ATTRIBUTE_UNUSED,
15185 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15187 tree *type = NULL;
15188 if (DECL_P (*node))
15190 if (TREE_CODE (*node) == TYPE_DECL)
15191 type = &TREE_TYPE (*node);
15193 else
15194 type = node;
15196 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15197 || TREE_CODE (*type) == UNION_TYPE)))
15199 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15200 *no_add_attrs = true;
15203 else if ((is_attribute_p ("ms_struct", name)
15204 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15205 || ((is_attribute_p ("gcc_struct", name)
15206 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15208 warning ("`%s' incompatible attribute ignored",
15209 IDENTIFIER_POINTER (name));
15210 *no_add_attrs = true;
15213 return NULL_TREE;
15216 static bool
15217 ix86_ms_bitfield_layout_p (tree record_type)
15219 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15220 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15221 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15224 /* Returns an expression indicating where the this parameter is
15225 located on entry to the FUNCTION. */
15227 static rtx
15228 x86_this_parameter (tree function)
15230 tree type = TREE_TYPE (function);
15232 if (TARGET_64BIT)
15234 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15235 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15238 if (ix86_function_regparm (type, function) > 0)
15240 tree parm;
15242 parm = TYPE_ARG_TYPES (type);
15243 /* Figure out whether or not the function has a variable number of
15244 arguments. */
15245 for (; parm; parm = TREE_CHAIN (parm))
15246 if (TREE_VALUE (parm) == void_type_node)
15247 break;
15248 /* If not, the this parameter is in the first argument. */
15249 if (parm)
15251 int regno = 0;
15252 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15253 regno = 2;
15254 return gen_rtx_REG (SImode, regno);
15258 if (aggregate_value_p (TREE_TYPE (type), type))
15259 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15260 else
15261 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15264 /* Determine whether x86_output_mi_thunk can succeed. */
15266 static bool
15267 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15268 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15269 HOST_WIDE_INT vcall_offset, tree function)
15271 /* 64-bit can handle anything. */
15272 if (TARGET_64BIT)
15273 return true;
15275 /* For 32-bit, everything's fine if we have one free register. */
15276 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15277 return true;
15279 /* Need a free register for vcall_offset. */
15280 if (vcall_offset)
15281 return false;
15283 /* Need a free register for GOT references. */
15284 if (flag_pic && !(*targetm.binds_local_p) (function))
15285 return false;
15287 /* Otherwise ok. */
15288 return true;
15291 /* Output the assembler code for a thunk function. THUNK_DECL is the
15292 declaration for the thunk function itself, FUNCTION is the decl for
15293 the target function. DELTA is an immediate constant offset to be
15294 added to THIS. If VCALL_OFFSET is nonzero, the word at
15295 *(*this + vcall_offset) should be added to THIS. */
15297 static void
15298 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15299 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15300 HOST_WIDE_INT vcall_offset, tree function)
15302 rtx xops[3];
15303 rtx this = x86_this_parameter (function);
15304 rtx this_reg, tmp;
15306 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15307 pull it in now and let DELTA benefit. */
15308 if (REG_P (this))
15309 this_reg = this;
15310 else if (vcall_offset)
15312 /* Put the this parameter into %eax. */
15313 xops[0] = this;
15314 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15315 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15317 else
15318 this_reg = NULL_RTX;
15320 /* Adjust the this parameter by a fixed constant. */
15321 if (delta)
15323 xops[0] = GEN_INT (delta);
15324 xops[1] = this_reg ? this_reg : this;
15325 if (TARGET_64BIT)
15327 if (!x86_64_general_operand (xops[0], DImode))
15329 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15330 xops[1] = tmp;
15331 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15332 xops[0] = tmp;
15333 xops[1] = this;
15335 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15337 else
15338 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15341 /* Adjust the this parameter by a value stored in the vtable. */
15342 if (vcall_offset)
15344 if (TARGET_64BIT)
15345 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15346 else
15348 int tmp_regno = 2 /* ECX */;
15349 if (lookup_attribute ("fastcall",
15350 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15351 tmp_regno = 0 /* EAX */;
15352 tmp = gen_rtx_REG (SImode, tmp_regno);
15355 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15356 xops[1] = tmp;
15357 if (TARGET_64BIT)
15358 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15359 else
15360 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15362 /* Adjust the this parameter. */
15363 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15364 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15366 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15367 xops[0] = GEN_INT (vcall_offset);
15368 xops[1] = tmp2;
15369 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15370 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15372 xops[1] = this_reg;
15373 if (TARGET_64BIT)
15374 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15375 else
15376 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15379 /* If necessary, drop THIS back to its stack slot. */
15380 if (this_reg && this_reg != this)
15382 xops[0] = this_reg;
15383 xops[1] = this;
15384 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15387 xops[0] = XEXP (DECL_RTL (function), 0);
15388 if (TARGET_64BIT)
15390 if (!flag_pic || (*targetm.binds_local_p) (function))
15391 output_asm_insn ("jmp\t%P0", xops);
15392 else
15394 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15395 tmp = gen_rtx_CONST (Pmode, tmp);
15396 tmp = gen_rtx_MEM (QImode, tmp);
15397 xops[0] = tmp;
15398 output_asm_insn ("jmp\t%A0", xops);
15401 else
15403 if (!flag_pic || (*targetm.binds_local_p) (function))
15404 output_asm_insn ("jmp\t%P0", xops);
15405 else
15406 #if TARGET_MACHO
15407 if (TARGET_MACHO)
15409 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15410 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15411 tmp = gen_rtx_MEM (QImode, tmp);
15412 xops[0] = tmp;
15413 output_asm_insn ("jmp\t%0", xops);
15415 else
15416 #endif /* TARGET_MACHO */
15418 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15419 output_set_got (tmp);
15421 xops[1] = tmp;
15422 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15423 output_asm_insn ("jmp\t{*}%1", xops);
15428 static void
15429 x86_file_start (void)
15431 default_file_start ();
15432 if (X86_FILE_START_VERSION_DIRECTIVE)
15433 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15434 if (X86_FILE_START_FLTUSED)
15435 fputs ("\t.global\t__fltused\n", asm_out_file);
15436 if (ix86_asm_dialect == ASM_INTEL)
15437 fputs ("\t.intel_syntax\n", asm_out_file);
15441 x86_field_alignment (tree field, int computed)
15443 enum machine_mode mode;
15444 tree type = TREE_TYPE (field);
15446 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15447 return computed;
15448 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15449 ? get_inner_array_type (type) : type);
15450 if (mode == DFmode || mode == DCmode
15451 || GET_MODE_CLASS (mode) == MODE_INT
15452 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15453 return MIN (32, computed);
15454 return computed;
15457 /* Output assembler code to FILE to increment profiler label # LABELNO
15458 for profiling a function entry. */
15459 void
15460 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15462 if (TARGET_64BIT)
15463 if (flag_pic)
15465 #ifndef NO_PROFILE_COUNTERS
15466 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15467 #endif
15468 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15470 else
15472 #ifndef NO_PROFILE_COUNTERS
15473 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15474 #endif
15475 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15477 else if (flag_pic)
15479 #ifndef NO_PROFILE_COUNTERS
15480 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15481 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15482 #endif
15483 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15485 else
15487 #ifndef NO_PROFILE_COUNTERS
15488 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15489 PROFILE_COUNT_REGISTER);
15490 #endif
15491 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15495 /* We don't have exact information about the insn sizes, but we may assume
15496 quite safely that we are informed about all 1 byte insns and memory
15497 address sizes. This is enough to eliminate unnecessary padding in
15498 99% of cases. */
15500 static int
15501 min_insn_size (rtx insn)
15503 int l = 0;
15505 if (!INSN_P (insn) || !active_insn_p (insn))
15506 return 0;
15508 /* Discard alignments we've emit and jump instructions. */
15509 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15510 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15511 return 0;
15512 if (GET_CODE (insn) == JUMP_INSN
15513 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15514 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15515 return 0;
15517 /* Important case - calls are always 5 bytes.
15518 It is common to have many calls in the row. */
15519 if (GET_CODE (insn) == CALL_INSN
15520 && symbolic_reference_mentioned_p (PATTERN (insn))
15521 && !SIBLING_CALL_P (insn))
15522 return 5;
15523 if (get_attr_length (insn) <= 1)
15524 return 1;
15526 /* For normal instructions we may rely on the sizes of addresses
15527 and the presence of symbol to require 4 bytes of encoding.
15528 This is not the case for jumps where references are PC relative. */
15529 if (GET_CODE (insn) != JUMP_INSN)
15531 l = get_attr_length_address (insn);
15532 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15533 l = 4;
15535 if (l)
15536 return 1+l;
15537 else
15538 return 2;
15541 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15542 window. */
15544 static void
15545 ix86_avoid_jump_misspredicts (void)
15547 rtx insn, start = get_insns ();
15548 int nbytes = 0, njumps = 0;
15549 int isjump = 0;
15551 /* Look for all minimal intervals of instructions containing 4 jumps.
15552 The intervals are bounded by START and INSN. NBYTES is the total
15553 size of instructions in the interval including INSN and not including
15554 START. When the NBYTES is smaller than 16 bytes, it is possible
15555 that the end of START and INSN ends up in the same 16byte page.
15557 The smallest offset in the page INSN can start is the case where START
15558 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15559 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15561 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15564 nbytes += min_insn_size (insn);
15565 if (dump_file)
15566 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15567 INSN_UID (insn), min_insn_size (insn));
15568 if ((GET_CODE (insn) == JUMP_INSN
15569 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15570 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15571 || GET_CODE (insn) == CALL_INSN)
15572 njumps++;
15573 else
15574 continue;
15576 while (njumps > 3)
15578 start = NEXT_INSN (start);
15579 if ((GET_CODE (start) == JUMP_INSN
15580 && GET_CODE (PATTERN (start)) != ADDR_VEC
15581 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15582 || GET_CODE (start) == CALL_INSN)
15583 njumps--, isjump = 1;
15584 else
15585 isjump = 0;
15586 nbytes -= min_insn_size (start);
15588 if (njumps < 0)
15589 abort ();
15590 if (dump_file)
15591 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15592 INSN_UID (start), INSN_UID (insn), nbytes);
15594 if (njumps == 3 && isjump && nbytes < 16)
15596 int padsize = 15 - nbytes + min_insn_size (insn);
15598 if (dump_file)
15599 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15600 INSN_UID (insn), padsize);
15601 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15606 /* AMD Athlon works faster
15607 when RET is not destination of conditional jump or directly preceded
15608 by other jump instruction. We avoid the penalty by inserting NOP just
15609 before the RET instructions in such cases. */
15610 static void
15611 ix86_pad_returns (void)
15613 edge e;
15615 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15617 basic_block bb = e->src;
15618 rtx ret = BB_END (bb);
15619 rtx prev;
15620 bool replace = false;
15622 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15623 || !maybe_hot_bb_p (bb))
15624 continue;
15625 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15626 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15627 break;
15628 if (prev && GET_CODE (prev) == CODE_LABEL)
15630 edge e;
15631 for (e = bb->pred; e; e = e->pred_next)
15632 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15633 && !(e->flags & EDGE_FALLTHRU))
15634 replace = true;
15636 if (!replace)
15638 prev = prev_active_insn (ret);
15639 if (prev
15640 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15641 || GET_CODE (prev) == CALL_INSN))
15642 replace = true;
15643 /* Empty functions get branch mispredict even when the jump destination
15644 is not visible to us. */
15645 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15646 replace = true;
15648 if (replace)
15650 emit_insn_before (gen_return_internal_long (), ret);
15651 delete_insn (ret);
15656 /* Implement machine specific optimizations. We implement padding of returns
15657 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15658 static void
15659 ix86_reorg (void)
15661 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15662 ix86_pad_returns ();
15663 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15664 ix86_avoid_jump_misspredicts ();
15667 /* Return nonzero when QImode register that must be represented via REX prefix
15668 is used. */
15669 bool
15670 x86_extended_QIreg_mentioned_p (rtx insn)
15672 int i;
15673 extract_insn_cached (insn);
15674 for (i = 0; i < recog_data.n_operands; i++)
15675 if (REG_P (recog_data.operand[i])
15676 && REGNO (recog_data.operand[i]) >= 4)
15677 return true;
15678 return false;
15681 /* Return nonzero when P points to register encoded via REX prefix.
15682 Called via for_each_rtx. */
15683 static int
15684 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15686 unsigned int regno;
15687 if (!REG_P (*p))
15688 return 0;
15689 regno = REGNO (*p);
15690 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15693 /* Return true when INSN mentions register that must be encoded using REX
15694 prefix. */
15695 bool
15696 x86_extended_reg_mentioned_p (rtx insn)
15698 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15701 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15702 optabs would emit if we didn't have TFmode patterns. */
15704 void
15705 x86_emit_floatuns (rtx operands[2])
15707 rtx neglab, donelab, i0, i1, f0, in, out;
15708 enum machine_mode mode, inmode;
15710 inmode = GET_MODE (operands[1]);
15711 if (inmode != SImode
15712 && inmode != DImode)
15713 abort ();
15715 out = operands[0];
15716 in = force_reg (inmode, operands[1]);
15717 mode = GET_MODE (out);
15718 neglab = gen_label_rtx ();
15719 donelab = gen_label_rtx ();
15720 i1 = gen_reg_rtx (Pmode);
15721 f0 = gen_reg_rtx (mode);
15723 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15725 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15726 emit_jump_insn (gen_jump (donelab));
15727 emit_barrier ();
15729 emit_label (neglab);
15731 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15732 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15733 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15734 expand_float (f0, i0, 0);
15735 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15737 emit_label (donelab);
15740 /* Return if we do not know how to pass TYPE solely in registers. */
15741 bool
15742 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15744 if (default_must_pass_in_stack (mode, type))
15745 return true;
15746 return (!TARGET_64BIT && type && mode == TImode);
15749 /* Initialize vector TARGET via VALS. */
15750 void
15751 ix86_expand_vector_init (rtx target, rtx vals)
15753 enum machine_mode mode = GET_MODE (target);
15754 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15755 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15756 int i;
15758 for (i = n_elts - 1; i >= 0; i--)
15759 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15760 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15761 break;
15763 /* Few special cases first...
15764 ... constants are best loaded from constant pool. */
15765 if (i < 0)
15767 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15768 return;
15771 /* ... values where only first field is non-constant are best loaded
15772 from the pool and overwriten via move later. */
15773 if (!i)
15775 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15776 GET_MODE_INNER (mode), 0);
15778 op = force_reg (mode, op);
15779 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15780 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15781 switch (GET_MODE (target))
15783 case V2DFmode:
15784 emit_insn (gen_sse2_movsd (target, target, op));
15785 break;
15786 case V4SFmode:
15787 emit_insn (gen_sse_movss (target, target, op));
15788 break;
15789 default:
15790 break;
15792 return;
15795 /* And the busy sequence doing rotations. */
15796 switch (GET_MODE (target))
15798 case V2DFmode:
15800 rtx vecop0 =
15801 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15802 rtx vecop1 =
15803 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15805 vecop0 = force_reg (V2DFmode, vecop0);
15806 vecop1 = force_reg (V2DFmode, vecop1);
15807 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15809 break;
15810 case V4SFmode:
15812 rtx vecop0 =
15813 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15814 rtx vecop1 =
15815 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15816 rtx vecop2 =
15817 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15818 rtx vecop3 =
15819 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15820 rtx tmp1 = gen_reg_rtx (V4SFmode);
15821 rtx tmp2 = gen_reg_rtx (V4SFmode);
15823 vecop0 = force_reg (V4SFmode, vecop0);
15824 vecop1 = force_reg (V4SFmode, vecop1);
15825 vecop2 = force_reg (V4SFmode, vecop2);
15826 vecop3 = force_reg (V4SFmode, vecop3);
15827 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15828 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15829 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15831 break;
15832 default:
15833 abort ();
15837 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15839 We do this in the new i386 backend to maintain source compatibility
15840 with the old cc0-based compiler. */
15842 static tree
15843 ix86_md_asm_clobbers (tree clobbers)
15845 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15846 clobbers);
15847 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15848 clobbers);
15849 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15850 clobbers);
15851 return clobbers;
15854 /* Worker function for REVERSE_CONDITION. */
15856 enum rtx_code
15857 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15859 return (mode != CCFPmode && mode != CCFPUmode
15860 ? reverse_condition (code)
15861 : reverse_condition_maybe_unordered (code));
15864 #include "gt-i386.h"