gcc/ChangeLog:
[official-gcc.git] / gcc / config / i386 / i386.c
blob821bf960d2dd08f9f2dc1b8c5ee46160aaa6476b
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 2, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 2, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 const int x86_branch_hints = m_PENT4 | m_NOCONA;
531 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
532 const int x86_partial_reg_stall = m_PPRO;
533 const int x86_use_loop = m_K6;
534 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
535 const int x86_use_mov0 = m_K6;
536 const int x86_use_cltd = ~(m_PENT | m_K6);
537 const int x86_read_modify_write = ~m_PENT;
538 const int x86_read_modify = ~(m_PENT | m_PPRO);
539 const int x86_split_long_moves = m_PPRO;
540 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
541 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
542 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
543 const int x86_qimode_math = ~(0);
544 const int x86_promote_qi_regs = 0;
545 const int x86_himode_math = ~(m_PPRO);
546 const int x86_promote_hi_regs = m_PPRO;
547 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
555 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
557 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
558 const int x86_shift1 = ~m_486;
559 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs = m_ATHLON_K8;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568 const int x86_sse_typeless_stores = m_ATHLON_K8;
569 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
570 const int x86_use_ffreep = m_ATHLON_K8;
571 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
572 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
573 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
580 epilogue code. */
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
593 /* ax, dx, cx, bx */
594 AREG, DREG, CREG, BREG,
595 /* si, di, bp, sp */
596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
597 /* FP registers */
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
600 /* arg pointer */
601 NON_Q_REGS,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
605 SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
607 MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
633 static int const x86_64_int_return_registers[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
694 numbers.
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0 = NULL_RTX;
719 rtx ix86_compare_op1 = NULL_RTX;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry GTY(())
729 unsigned short mode;
730 unsigned short n;
731 rtx rtl;
732 struct stack_local_entry *next;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
738 [arguments]
739 <- ARG_POINTER
740 saved pc
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
744 [saved regs]
746 [padding1] \
748 [va_arg registers] (
749 > to_allocate <- FRAME_POINTER
750 [frame] (
752 [padding2] /
754 struct ix86_frame
756 int nregs;
757 int padding1;
758 int va_arg_size;
759 HOST_WIDE_INT frame;
760 int padding2;
761 int outgoing_arguments_size;
762 int red_zone_size;
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string;
779 /* Parsed value. */
780 enum cmodel ix86_cmodel;
781 /* Asm dialect. */
782 const char *ix86_asm_string;
783 enum asm_dialect ix86_asm_dialect = ASM_ATT;
784 /* TLS dialext. */
785 const char *ix86_tls_dialect_string;
786 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string; /* for -mtune=<xxx> */
798 const char *ix86_arch_string; /* for -march=<xxx> */
799 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
808 int ix86_regparm;
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string;
821 /* Preferred alignment for stack boundary in bits. */
822 int ix86_preferred_stack_boundary;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost;
826 const char *ix86_branch_cost_string;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 static char internal_label_prefix[16];
833 static int internal_label_prefix_len;
835 static int local_symbolic_operand (rtx, enum machine_mode);
836 static int tls_symbolic_operand_1 (rtx, enum tls_model);
837 static void output_pic_addr_const (FILE *, rtx, int);
838 static void put_condition_code (enum rtx_code, enum machine_mode,
839 int, int, FILE *);
840 static const char *get_some_local_dynamic_name (void);
841 static int get_some_local_dynamic_name_1 (rtx *, void *);
842 static rtx maybe_get_pool_constant (rtx);
843 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
844 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
845 rtx *);
846 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
848 enum machine_mode);
849 static rtx get_thread_pointer (int);
850 static rtx legitimize_tls_address (rtx, enum tls_model, int);
851 static void get_pc_thunk_name (char [32], unsigned int);
852 static rtx gen_push (rtx);
853 static int memory_address_length (rtx addr);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx x86_this_parameter (tree);
872 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878 static tree ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
881 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
883 struct ix86_address
885 rtx base, index, disp;
886 HOST_WIDE_INT scale;
887 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
890 static int ix86_decompose_address (rtx, struct ix86_address *);
891 static int ix86_address_cost (rtx);
892 static bool ix86_cannot_force_const_mem (rtx);
893 static rtx ix86_delegitimize_address (rtx);
895 struct builtin_description;
896 static rtx ix86_expand_sse_comi (const struct builtin_description *,
897 tree, rtx);
898 static rtx ix86_expand_sse_compare (const struct builtin_description *,
899 tree, rtx);
900 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
901 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
902 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
903 static rtx ix86_expand_store_builtin (enum insn_code, tree);
904 static rtx safe_vector_operand (rtx, enum machine_mode);
905 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
906 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
907 enum rtx_code *, enum rtx_code *);
908 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
912 static int ix86_fp_comparison_cost (enum rtx_code code);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame *);
916 static int ix86_comp_type_attributes (tree, tree);
917 static int ix86_function_regparm (tree, tree);
918 const struct attribute_spec ix86_attribute_table[];
919 static bool ix86_function_ok_for_sibcall (tree, tree);
920 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
921 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
922 static int ix86_value_regno (enum machine_mode);
923 static bool contains_128bit_aligned_vector_p (tree);
924 static rtx ix86_struct_value_rtx (tree, int);
925 static bool ix86_ms_bitfield_layout_p (tree);
926 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
927 static int extended_reg_mentioned_1 (rtx *, void *);
928 static bool ix86_rtx_costs (rtx, int, int, int *);
929 static int min_insn_size (rtx);
930 static tree ix86_md_asm_clobbers (tree clobbers);
932 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
933 static void ix86_svr3_asm_out_constructor (rtx, int);
934 #endif
936 /* Register class used for passing given 64bit part of the argument.
937 These represent classes as documented by the PS ABI, with the exception
938 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
939 use SF or DFmode move instead of DImode to avoid reformatting penalties.
941 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
942 whenever possible (upper half does contain padding).
944 enum x86_64_reg_class
946 X86_64_NO_CLASS,
947 X86_64_INTEGER_CLASS,
948 X86_64_INTEGERSI_CLASS,
949 X86_64_SSE_CLASS,
950 X86_64_SSESF_CLASS,
951 X86_64_SSEDF_CLASS,
952 X86_64_SSEUP_CLASS,
953 X86_64_X87_CLASS,
954 X86_64_X87UP_CLASS,
955 X86_64_MEMORY_CLASS
957 static const char * const x86_64_reg_class_name[] =
958 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
960 #define MAX_CLASSES 4
961 static int classify_argument (enum machine_mode, tree,
962 enum x86_64_reg_class [MAX_CLASSES], int);
963 static int examine_argument (enum machine_mode, tree, int, int *, int *);
964 static rtx construct_container (enum machine_mode, tree, int, int, int,
965 const int *, int);
966 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
967 enum x86_64_reg_class);
969 /* Table of constants used by fldpi, fldln2, etc.... */
970 static REAL_VALUE_TYPE ext_80387_constants_table [5];
971 static bool ext_80387_constants_init = 0;
972 static void init_ext_80387_constants (void);
974 /* Initialize the GCC target structure. */
975 #undef TARGET_ATTRIBUTE_TABLE
976 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
977 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
978 # undef TARGET_MERGE_DECL_ATTRIBUTES
979 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
980 #endif
982 #undef TARGET_COMP_TYPE_ATTRIBUTES
983 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
985 #undef TARGET_INIT_BUILTINS
986 #define TARGET_INIT_BUILTINS ix86_init_builtins
988 #undef TARGET_EXPAND_BUILTIN
989 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
991 #undef TARGET_ASM_FUNCTION_EPILOGUE
992 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
994 #undef TARGET_ASM_OPEN_PAREN
995 #define TARGET_ASM_OPEN_PAREN ""
996 #undef TARGET_ASM_CLOSE_PAREN
997 #define TARGET_ASM_CLOSE_PAREN ""
999 #undef TARGET_ASM_ALIGNED_HI_OP
1000 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1001 #undef TARGET_ASM_ALIGNED_SI_OP
1002 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1003 #ifdef ASM_QUAD
1004 #undef TARGET_ASM_ALIGNED_DI_OP
1005 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1006 #endif
1008 #undef TARGET_ASM_UNALIGNED_HI_OP
1009 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1010 #undef TARGET_ASM_UNALIGNED_SI_OP
1011 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1012 #undef TARGET_ASM_UNALIGNED_DI_OP
1013 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1015 #undef TARGET_SCHED_ADJUST_COST
1016 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1017 #undef TARGET_SCHED_ISSUE_RATE
1018 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1019 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
1020 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
1021 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1022 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1023 ia32_multipass_dfa_lookahead
1025 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1026 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1028 #ifdef HAVE_AS_TLS
1029 #undef TARGET_HAVE_TLS
1030 #define TARGET_HAVE_TLS true
1031 #endif
1032 #undef TARGET_CANNOT_FORCE_CONST_MEM
1033 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1035 #undef TARGET_DELEGITIMIZE_ADDRESS
1036 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1038 #undef TARGET_MS_BITFIELD_LAYOUT_P
1039 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1041 #undef TARGET_ASM_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1043 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1044 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1046 #undef TARGET_ASM_FILE_START
1047 #define TARGET_ASM_FILE_START x86_file_start
1049 #undef TARGET_RTX_COSTS
1050 #define TARGET_RTX_COSTS ix86_rtx_costs
1051 #undef TARGET_ADDRESS_COST
1052 #define TARGET_ADDRESS_COST ix86_address_cost
1054 #undef TARGET_FIXED_CONDITION_CODE_REGS
1055 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1056 #undef TARGET_CC_MODES_COMPATIBLE
1057 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1059 #undef TARGET_MACHINE_DEPENDENT_REORG
1060 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1062 #undef TARGET_BUILD_BUILTIN_VA_LIST
1063 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1065 #undef TARGET_MD_ASM_CLOBBERS
1066 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1068 #undef TARGET_PROMOTE_PROTOTYPES
1069 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1070 #undef TARGET_STRUCT_VALUE_RTX
1071 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1072 #undef TARGET_SETUP_INCOMING_VARARGS
1073 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1075 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1076 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1078 struct gcc_target targetm = TARGET_INITIALIZER;
1081 /* The svr4 ABI for the i386 says that records and unions are returned
1082 in memory. */
1083 #ifndef DEFAULT_PCC_STRUCT_RETURN
1084 #define DEFAULT_PCC_STRUCT_RETURN 1
1085 #endif
1087 /* Sometimes certain combinations of command options do not make
1088 sense on a particular target machine. You can define a macro
1089 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1090 defined, is executed once just after all the command options have
1091 been parsed.
1093 Don't use this macro to turn on various extra optimizations for
1094 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1096 void
1097 override_options (void)
1099 int i;
1100 /* Comes from final.c -- no real reason to change it. */
1101 #define MAX_CODE_ALIGN 16
1103 static struct ptt
1105 const struct processor_costs *cost; /* Processor costs */
1106 const int target_enable; /* Target flags to enable. */
1107 const int target_disable; /* Target flags to disable. */
1108 const int align_loop; /* Default alignments. */
1109 const int align_loop_max_skip;
1110 const int align_jump;
1111 const int align_jump_max_skip;
1112 const int align_func;
1114 const processor_target_table[PROCESSOR_max] =
1116 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1117 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1118 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1119 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1120 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1121 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1122 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1123 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1124 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1127 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1128 static struct pta
1130 const char *const name; /* processor name or nickname. */
1131 const enum processor_type processor;
1132 const enum pta_flags
1134 PTA_SSE = 1,
1135 PTA_SSE2 = 2,
1136 PTA_SSE3 = 4,
1137 PTA_MMX = 8,
1138 PTA_PREFETCH_SSE = 16,
1139 PTA_3DNOW = 32,
1140 PTA_3DNOW_A = 64,
1141 PTA_64BIT = 128
1142 } flags;
1144 const processor_alias_table[] =
1146 {"i386", PROCESSOR_I386, 0},
1147 {"i486", PROCESSOR_I486, 0},
1148 {"i586", PROCESSOR_PENTIUM, 0},
1149 {"pentium", PROCESSOR_PENTIUM, 0},
1150 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1151 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1152 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1153 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1154 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1155 {"i686", PROCESSOR_PENTIUMPRO, 0},
1156 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1157 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1158 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1159 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1160 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1161 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1162 | PTA_MMX | PTA_PREFETCH_SSE},
1163 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1164 | PTA_MMX | PTA_PREFETCH_SSE},
1165 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1166 | PTA_MMX | PTA_PREFETCH_SSE},
1167 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1168 | PTA_MMX | PTA_PREFETCH_SSE},
1169 {"k6", PROCESSOR_K6, PTA_MMX},
1170 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1171 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1172 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1173 | PTA_3DNOW_A},
1174 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1175 | PTA_3DNOW | PTA_3DNOW_A},
1176 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1177 | PTA_3DNOW_A | PTA_SSE},
1178 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1179 | PTA_3DNOW_A | PTA_SSE},
1180 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1181 | PTA_3DNOW_A | PTA_SSE},
1182 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1183 | PTA_SSE | PTA_SSE2 },
1184 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1185 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1186 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1187 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1188 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1189 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1190 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1191 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1194 int const pta_size = ARRAY_SIZE (processor_alias_table);
1196 /* Set the default values for switches whose default depends on TARGET_64BIT
1197 in case they weren't overwritten by command line options. */
1198 if (TARGET_64BIT)
1200 if (flag_omit_frame_pointer == 2)
1201 flag_omit_frame_pointer = 1;
1202 if (flag_asynchronous_unwind_tables == 2)
1203 flag_asynchronous_unwind_tables = 1;
1204 if (flag_pcc_struct_return == 2)
1205 flag_pcc_struct_return = 0;
1207 else
1209 if (flag_omit_frame_pointer == 2)
1210 flag_omit_frame_pointer = 0;
1211 if (flag_asynchronous_unwind_tables == 2)
1212 flag_asynchronous_unwind_tables = 0;
1213 if (flag_pcc_struct_return == 2)
1214 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1217 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1218 SUBTARGET_OVERRIDE_OPTIONS;
1219 #endif
1221 if (!ix86_tune_string && ix86_arch_string)
1222 ix86_tune_string = ix86_arch_string;
1223 if (!ix86_tune_string)
1224 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1225 if (!ix86_arch_string)
1226 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1228 if (ix86_cmodel_string != 0)
1230 if (!strcmp (ix86_cmodel_string, "small"))
1231 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1232 else if (flag_pic)
1233 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1234 else if (!strcmp (ix86_cmodel_string, "32"))
1235 ix86_cmodel = CM_32;
1236 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1237 ix86_cmodel = CM_KERNEL;
1238 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1239 ix86_cmodel = CM_MEDIUM;
1240 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1241 ix86_cmodel = CM_LARGE;
1242 else
1243 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1245 else
1247 ix86_cmodel = CM_32;
1248 if (TARGET_64BIT)
1249 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1251 if (ix86_asm_string != 0)
1253 if (!strcmp (ix86_asm_string, "intel"))
1254 ix86_asm_dialect = ASM_INTEL;
1255 else if (!strcmp (ix86_asm_string, "att"))
1256 ix86_asm_dialect = ASM_ATT;
1257 else
1258 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1260 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1261 error ("code model `%s' not supported in the %s bit mode",
1262 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1263 if (ix86_cmodel == CM_LARGE)
1264 sorry ("code model `large' not supported yet");
1265 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1266 sorry ("%i-bit mode not compiled in",
1267 (target_flags & MASK_64BIT) ? 64 : 32);
1269 for (i = 0; i < pta_size; i++)
1270 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1272 ix86_arch = processor_alias_table[i].processor;
1273 /* Default cpu tuning to the architecture. */
1274 ix86_tune = ix86_arch;
1275 if (processor_alias_table[i].flags & PTA_MMX
1276 && !(target_flags_explicit & MASK_MMX))
1277 target_flags |= MASK_MMX;
1278 if (processor_alias_table[i].flags & PTA_3DNOW
1279 && !(target_flags_explicit & MASK_3DNOW))
1280 target_flags |= MASK_3DNOW;
1281 if (processor_alias_table[i].flags & PTA_3DNOW_A
1282 && !(target_flags_explicit & MASK_3DNOW_A))
1283 target_flags |= MASK_3DNOW_A;
1284 if (processor_alias_table[i].flags & PTA_SSE
1285 && !(target_flags_explicit & MASK_SSE))
1286 target_flags |= MASK_SSE;
1287 if (processor_alias_table[i].flags & PTA_SSE2
1288 && !(target_flags_explicit & MASK_SSE2))
1289 target_flags |= MASK_SSE2;
1290 if (processor_alias_table[i].flags & PTA_SSE3
1291 && !(target_flags_explicit & MASK_SSE3))
1292 target_flags |= MASK_SSE3;
1293 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1294 x86_prefetch_sse = true;
1295 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1296 error ("CPU you selected does not support x86-64 instruction set");
1297 break;
1300 if (i == pta_size)
1301 error ("bad value (%s) for -march= switch", ix86_arch_string);
1303 for (i = 0; i < pta_size; i++)
1304 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1306 ix86_tune = processor_alias_table[i].processor;
1307 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1308 error ("CPU you selected does not support x86-64 instruction set");
1309 break;
1311 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1312 x86_prefetch_sse = true;
1313 if (i == pta_size)
1314 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1316 if (optimize_size)
1317 ix86_cost = &size_cost;
1318 else
1319 ix86_cost = processor_target_table[ix86_tune].cost;
1320 target_flags |= processor_target_table[ix86_tune].target_enable;
1321 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1323 /* Arrange to set up i386_stack_locals for all functions. */
1324 init_machine_status = ix86_init_machine_status;
1326 /* Validate -mregparm= value. */
1327 if (ix86_regparm_string)
1329 i = atoi (ix86_regparm_string);
1330 if (i < 0 || i > REGPARM_MAX)
1331 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1332 else
1333 ix86_regparm = i;
1335 else
1336 if (TARGET_64BIT)
1337 ix86_regparm = REGPARM_MAX;
1339 /* If the user has provided any of the -malign-* options,
1340 warn and use that value only if -falign-* is not set.
1341 Remove this code in GCC 3.2 or later. */
1342 if (ix86_align_loops_string)
1344 warning ("-malign-loops is obsolete, use -falign-loops");
1345 if (align_loops == 0)
1347 i = atoi (ix86_align_loops_string);
1348 if (i < 0 || i > MAX_CODE_ALIGN)
1349 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1350 else
1351 align_loops = 1 << i;
1355 if (ix86_align_jumps_string)
1357 warning ("-malign-jumps is obsolete, use -falign-jumps");
1358 if (align_jumps == 0)
1360 i = atoi (ix86_align_jumps_string);
1361 if (i < 0 || i > MAX_CODE_ALIGN)
1362 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1363 else
1364 align_jumps = 1 << i;
1368 if (ix86_align_funcs_string)
1370 warning ("-malign-functions is obsolete, use -falign-functions");
1371 if (align_functions == 0)
1373 i = atoi (ix86_align_funcs_string);
1374 if (i < 0 || i > MAX_CODE_ALIGN)
1375 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1376 else
1377 align_functions = 1 << i;
1381 /* Default align_* from the processor table. */
1382 if (align_loops == 0)
1384 align_loops = processor_target_table[ix86_tune].align_loop;
1385 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1387 if (align_jumps == 0)
1389 align_jumps = processor_target_table[ix86_tune].align_jump;
1390 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1392 if (align_functions == 0)
1394 align_functions = processor_target_table[ix86_tune].align_func;
1397 /* Validate -mpreferred-stack-boundary= value, or provide default.
1398 The default of 128 bits is for Pentium III's SSE __m128, but we
1399 don't want additional code to keep the stack aligned when
1400 optimizing for code size. */
1401 ix86_preferred_stack_boundary = (optimize_size
1402 ? TARGET_64BIT ? 128 : 32
1403 : 128);
1404 if (ix86_preferred_stack_boundary_string)
1406 i = atoi (ix86_preferred_stack_boundary_string);
1407 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1408 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1409 TARGET_64BIT ? 4 : 2);
1410 else
1411 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1414 /* Validate -mbranch-cost= value, or provide default. */
1415 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1416 if (ix86_branch_cost_string)
1418 i = atoi (ix86_branch_cost_string);
1419 if (i < 0 || i > 5)
1420 error ("-mbranch-cost=%d is not between 0 and 5", i);
1421 else
1422 ix86_branch_cost = i;
1425 if (ix86_tls_dialect_string)
1427 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1428 ix86_tls_dialect = TLS_DIALECT_GNU;
1429 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1430 ix86_tls_dialect = TLS_DIALECT_SUN;
1431 else
1432 error ("bad value (%s) for -mtls-dialect= switch",
1433 ix86_tls_dialect_string);
1436 /* Keep nonleaf frame pointers. */
1437 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1438 flag_omit_frame_pointer = 1;
1440 /* If we're doing fast math, we don't care about comparison order
1441 wrt NaNs. This lets us use a shorter comparison sequence. */
1442 if (flag_unsafe_math_optimizations)
1443 target_flags &= ~MASK_IEEE_FP;
1445 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1446 since the insns won't need emulation. */
1447 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1448 target_flags &= ~MASK_NO_FANCY_MATH_387;
1450 /* Turn on SSE2 builtins for -msse3. */
1451 if (TARGET_SSE3)
1452 target_flags |= MASK_SSE2;
1454 /* Turn on SSE builtins for -msse2. */
1455 if (TARGET_SSE2)
1456 target_flags |= MASK_SSE;
1458 if (TARGET_64BIT)
1460 if (TARGET_ALIGN_DOUBLE)
1461 error ("-malign-double makes no sense in the 64bit mode");
1462 if (TARGET_RTD)
1463 error ("-mrtd calling convention not supported in the 64bit mode");
1464 /* Enable by default the SSE and MMX builtins. */
1465 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1466 ix86_fpmath = FPMATH_SSE;
1468 else
1470 ix86_fpmath = FPMATH_387;
1471 /* i386 ABI does not specify red zone. It still makes sense to use it
1472 when programmer takes care to stack from being destroyed. */
1473 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1474 target_flags |= MASK_NO_RED_ZONE;
1477 if (ix86_fpmath_string != 0)
1479 if (! strcmp (ix86_fpmath_string, "387"))
1480 ix86_fpmath = FPMATH_387;
1481 else if (! strcmp (ix86_fpmath_string, "sse"))
1483 if (!TARGET_SSE)
1485 warning ("SSE instruction set disabled, using 387 arithmetics");
1486 ix86_fpmath = FPMATH_387;
1488 else
1489 ix86_fpmath = FPMATH_SSE;
1491 else if (! strcmp (ix86_fpmath_string, "387,sse")
1492 || ! strcmp (ix86_fpmath_string, "sse,387"))
1494 if (!TARGET_SSE)
1496 warning ("SSE instruction set disabled, using 387 arithmetics");
1497 ix86_fpmath = FPMATH_387;
1499 else if (!TARGET_80387)
1501 warning ("387 instruction set disabled, using SSE arithmetics");
1502 ix86_fpmath = FPMATH_SSE;
1504 else
1505 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1507 else
1508 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1511 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1512 on by -msse. */
1513 if (TARGET_SSE)
1515 target_flags |= MASK_MMX;
1516 x86_prefetch_sse = true;
1519 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1520 if (TARGET_3DNOW)
1522 target_flags |= MASK_MMX;
1523 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1524 extensions it adds. */
1525 if (x86_3dnow_a & (1 << ix86_arch))
1526 target_flags |= MASK_3DNOW_A;
1528 if ((x86_accumulate_outgoing_args & TUNEMASK)
1529 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1530 && !optimize_size)
1531 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1533 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1535 char *p;
1536 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1537 p = strchr (internal_label_prefix, 'X');
1538 internal_label_prefix_len = p - internal_label_prefix;
1539 *p = '\0';
1543 void
1544 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1546 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1547 make the problem with not enough registers even worse. */
1548 #ifdef INSN_SCHEDULING
1549 if (level > 1)
1550 flag_schedule_insns = 0;
1551 #endif
1553 /* The default values of these switches depend on the TARGET_64BIT
1554 that is not known at this moment. Mark these values with 2 and
1555 let user the to override these. In case there is no command line option
1556 specifying them, we will set the defaults in override_options. */
1557 if (optimize >= 1)
1558 flag_omit_frame_pointer = 2;
1559 flag_pcc_struct_return = 2;
1560 flag_asynchronous_unwind_tables = 2;
1563 /* Table of valid machine attributes. */
1564 const struct attribute_spec ix86_attribute_table[] =
1566 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1567 /* Stdcall attribute says callee is responsible for popping arguments
1568 if they are not variable. */
1569 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1570 /* Fastcall attribute says callee is responsible for popping arguments
1571 if they are not variable. */
1572 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1573 /* Cdecl attribute says the callee is a normal C declaration */
1574 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1575 /* Regparm attribute specifies how many integer arguments are to be
1576 passed in registers. */
1577 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1578 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1579 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1580 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1581 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1582 #endif
1583 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1584 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1585 { NULL, 0, 0, false, false, false, NULL }
1588 /* Decide whether we can make a sibling call to a function. DECL is the
1589 declaration of the function being targeted by the call and EXP is the
1590 CALL_EXPR representing the call. */
1592 static bool
1593 ix86_function_ok_for_sibcall (tree decl, tree exp)
1595 /* If we are generating position-independent code, we cannot sibcall
1596 optimize any indirect call, or a direct call to a global function,
1597 as the PLT requires %ebx be live. */
1598 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1599 return false;
1601 /* If we are returning floats on the 80387 register stack, we cannot
1602 make a sibcall from a function that doesn't return a float to a
1603 function that does or, conversely, from a function that does return
1604 a float to a function that doesn't; the necessary stack adjustment
1605 would not be executed. */
1606 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1607 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1608 return false;
1610 /* If this call is indirect, we'll need to be able to use a call-clobbered
1611 register for the address of the target function. Make sure that all
1612 such registers are not used for passing parameters. */
1613 if (!decl && !TARGET_64BIT)
1615 tree type;
1617 /* We're looking at the CALL_EXPR, we need the type of the function. */
1618 type = TREE_OPERAND (exp, 0); /* pointer expression */
1619 type = TREE_TYPE (type); /* pointer type */
1620 type = TREE_TYPE (type); /* function type */
1622 if (ix86_function_regparm (type, NULL) >= 3)
1624 /* ??? Need to count the actual number of registers to be used,
1625 not the possible number of registers. Fix later. */
1626 return false;
1630 /* Otherwise okay. That also includes certain types of indirect calls. */
1631 return true;
1634 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1635 arguments as in struct attribute_spec.handler. */
1636 static tree
1637 ix86_handle_cdecl_attribute (tree *node, tree name,
1638 tree args ATTRIBUTE_UNUSED,
1639 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1641 if (TREE_CODE (*node) != FUNCTION_TYPE
1642 && TREE_CODE (*node) != METHOD_TYPE
1643 && TREE_CODE (*node) != FIELD_DECL
1644 && TREE_CODE (*node) != TYPE_DECL)
1646 warning ("`%s' attribute only applies to functions",
1647 IDENTIFIER_POINTER (name));
1648 *no_add_attrs = true;
1650 else
1652 if (is_attribute_p ("fastcall", name))
1654 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1656 error ("fastcall and stdcall attributes are not compatible");
1658 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1660 error ("fastcall and regparm attributes are not compatible");
1663 else if (is_attribute_p ("stdcall", name))
1665 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1667 error ("fastcall and stdcall attributes are not compatible");
1672 if (TARGET_64BIT)
1674 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1675 *no_add_attrs = true;
1678 return NULL_TREE;
1681 /* Handle a "regparm" attribute;
1682 arguments as in struct attribute_spec.handler. */
1683 static tree
1684 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1685 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1687 if (TREE_CODE (*node) != FUNCTION_TYPE
1688 && TREE_CODE (*node) != METHOD_TYPE
1689 && TREE_CODE (*node) != FIELD_DECL
1690 && TREE_CODE (*node) != TYPE_DECL)
1692 warning ("`%s' attribute only applies to functions",
1693 IDENTIFIER_POINTER (name));
1694 *no_add_attrs = true;
1696 else
1698 tree cst;
1700 cst = TREE_VALUE (args);
1701 if (TREE_CODE (cst) != INTEGER_CST)
1703 warning ("`%s' attribute requires an integer constant argument",
1704 IDENTIFIER_POINTER (name));
1705 *no_add_attrs = true;
1707 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1709 warning ("argument to `%s' attribute larger than %d",
1710 IDENTIFIER_POINTER (name), REGPARM_MAX);
1711 *no_add_attrs = true;
1714 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1716 error ("fastcall and regparm attributes are not compatible");
1720 return NULL_TREE;
1723 /* Return 0 if the attributes for two types are incompatible, 1 if they
1724 are compatible, and 2 if they are nearly compatible (which causes a
1725 warning to be generated). */
1727 static int
1728 ix86_comp_type_attributes (tree type1, tree type2)
1730 /* Check for mismatch of non-default calling convention. */
1731 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1733 if (TREE_CODE (type1) != FUNCTION_TYPE)
1734 return 1;
1736 /* Check for mismatched fastcall types */
1737 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1738 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1739 return 0;
1741 /* Check for mismatched return types (cdecl vs stdcall). */
1742 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1743 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1744 return 0;
1745 if (ix86_function_regparm (type1, NULL)
1746 != ix86_function_regparm (type2, NULL))
1747 return 0;
1748 return 1;
1751 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1752 DECL may be NULL when calling function indirectly
1753 or considering a libcall. */
1755 static int
1756 ix86_function_regparm (tree type, tree decl)
1758 tree attr;
1759 int regparm = ix86_regparm;
1760 bool user_convention = false;
1762 if (!TARGET_64BIT)
1764 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1765 if (attr)
1767 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1768 user_convention = true;
1771 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1773 regparm = 2;
1774 user_convention = true;
1777 /* Use register calling convention for local functions when possible. */
1778 if (!TARGET_64BIT && !user_convention && decl
1779 && flag_unit_at_a_time && !profile_flag)
1781 struct cgraph_local_info *i = cgraph_local_info (decl);
1782 if (i && i->local)
1784 /* We can't use regparm(3) for nested functions as these use
1785 static chain pointer in third argument. */
1786 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1787 regparm = 2;
1788 else
1789 regparm = 3;
1793 return regparm;
1796 /* Return true if EAX is live at the start of the function. Used by
1797 ix86_expand_prologue to determine if we need special help before
1798 calling allocate_stack_worker. */
1800 static bool
1801 ix86_eax_live_at_start_p (void)
1803 /* Cheat. Don't bother working forward from ix86_function_regparm
1804 to the function type to whether an actual argument is located in
1805 eax. Instead just look at cfg info, which is still close enough
1806 to correct at this point. This gives false positives for broken
1807 functions that might use uninitialized data that happens to be
1808 allocated in eax, but who cares? */
1809 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1812 /* Value is the number of bytes of arguments automatically
1813 popped when returning from a subroutine call.
1814 FUNDECL is the declaration node of the function (as a tree),
1815 FUNTYPE is the data type of the function (as a tree),
1816 or for a library call it is an identifier node for the subroutine name.
1817 SIZE is the number of bytes of arguments passed on the stack.
1819 On the 80386, the RTD insn may be used to pop them if the number
1820 of args is fixed, but if the number is variable then the caller
1821 must pop them all. RTD can't be used for library calls now
1822 because the library is compiled with the Unix compiler.
1823 Use of RTD is a selectable option, since it is incompatible with
1824 standard Unix calling sequences. If the option is not selected,
1825 the caller must always pop the args.
1827 The attribute stdcall is equivalent to RTD on a per module basis. */
1830 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1832 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1834 /* Cdecl functions override -mrtd, and never pop the stack. */
1835 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1837 /* Stdcall and fastcall functions will pop the stack if not
1838 variable args. */
1839 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1840 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1841 rtd = 1;
1843 if (rtd
1844 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1845 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1846 == void_type_node)))
1847 return size;
1850 /* Lose any fake structure return argument if it is passed on the stack. */
1851 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1852 && !TARGET_64BIT)
1854 int nregs = ix86_function_regparm (funtype, fundecl);
1856 if (!nregs)
1857 return GET_MODE_SIZE (Pmode);
1860 return 0;
1863 /* Argument support functions. */
1865 /* Return true when register may be used to pass function parameters. */
1866 bool
1867 ix86_function_arg_regno_p (int regno)
1869 int i;
1870 if (!TARGET_64BIT)
1871 return (regno < REGPARM_MAX
1872 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1873 if (SSE_REGNO_P (regno) && TARGET_SSE)
1874 return true;
1875 /* RAX is used as hidden argument to va_arg functions. */
1876 if (!regno)
1877 return true;
1878 for (i = 0; i < REGPARM_MAX; i++)
1879 if (regno == x86_64_int_parameter_registers[i])
1880 return true;
1881 return false;
1884 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1885 for a call to a function whose data type is FNTYPE.
1886 For a library call, FNTYPE is 0. */
1888 void
1889 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1890 tree fntype, /* tree ptr for function decl */
1891 rtx libname, /* SYMBOL_REF of library name or 0 */
1892 tree fndecl)
1894 static CUMULATIVE_ARGS zero_cum;
1895 tree param, next_param;
1897 if (TARGET_DEBUG_ARG)
1899 fprintf (stderr, "\ninit_cumulative_args (");
1900 if (fntype)
1901 fprintf (stderr, "fntype code = %s, ret code = %s",
1902 tree_code_name[(int) TREE_CODE (fntype)],
1903 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1904 else
1905 fprintf (stderr, "no fntype");
1907 if (libname)
1908 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1911 *cum = zero_cum;
1913 /* Set up the number of registers to use for passing arguments. */
1914 if (fntype)
1915 cum->nregs = ix86_function_regparm (fntype, fndecl);
1916 else
1917 cum->nregs = ix86_regparm;
1918 cum->sse_nregs = SSE_REGPARM_MAX;
1919 cum->mmx_nregs = MMX_REGPARM_MAX;
1920 cum->warn_sse = true;
1921 cum->warn_mmx = true;
1922 cum->maybe_vaarg = false;
1924 /* Use ecx and edx registers if function has fastcall attribute */
1925 if (fntype && !TARGET_64BIT)
1927 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1929 cum->nregs = 2;
1930 cum->fastcall = 1;
1935 /* Determine if this function has variable arguments. This is
1936 indicated by the last argument being 'void_type_mode' if there
1937 are no variable arguments. If there are variable arguments, then
1938 we won't pass anything in registers */
1940 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1942 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1943 param != 0; param = next_param)
1945 next_param = TREE_CHAIN (param);
1946 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1948 if (!TARGET_64BIT)
1950 cum->nregs = 0;
1951 cum->sse_nregs = 0;
1952 cum->mmx_nregs = 0;
1953 cum->warn_sse = 0;
1954 cum->warn_mmx = 0;
1955 cum->fastcall = 0;
1957 cum->maybe_vaarg = true;
1961 if ((!fntype && !libname)
1962 || (fntype && !TYPE_ARG_TYPES (fntype)))
1963 cum->maybe_vaarg = 1;
1965 if (TARGET_DEBUG_ARG)
1966 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1968 return;
1971 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1972 of this code is to classify each 8bytes of incoming argument by the register
1973 class and assign registers accordingly. */
1975 /* Return the union class of CLASS1 and CLASS2.
1976 See the x86-64 PS ABI for details. */
1978 static enum x86_64_reg_class
1979 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1981 /* Rule #1: If both classes are equal, this is the resulting class. */
1982 if (class1 == class2)
1983 return class1;
1985 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1986 the other class. */
1987 if (class1 == X86_64_NO_CLASS)
1988 return class2;
1989 if (class2 == X86_64_NO_CLASS)
1990 return class1;
1992 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1993 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1994 return X86_64_MEMORY_CLASS;
1996 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1997 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1998 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1999 return X86_64_INTEGERSI_CLASS;
2000 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2001 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2002 return X86_64_INTEGER_CLASS;
2004 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2005 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2006 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2007 return X86_64_MEMORY_CLASS;
2009 /* Rule #6: Otherwise class SSE is used. */
2010 return X86_64_SSE_CLASS;
2013 /* Classify the argument of type TYPE and mode MODE.
2014 CLASSES will be filled by the register class used to pass each word
2015 of the operand. The number of words is returned. In case the parameter
2016 should be passed in memory, 0 is returned. As a special case for zero
2017 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2019 BIT_OFFSET is used internally for handling records and specifies offset
2020 of the offset in bits modulo 256 to avoid overflow cases.
2022 See the x86-64 PS ABI for details.
2025 static int
2026 classify_argument (enum machine_mode mode, tree type,
2027 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2029 HOST_WIDE_INT bytes =
2030 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2031 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2033 /* Variable sized entities are always passed/returned in memory. */
2034 if (bytes < 0)
2035 return 0;
2037 if (mode != VOIDmode
2038 && MUST_PASS_IN_STACK (mode, type))
2039 return 0;
2041 if (type && AGGREGATE_TYPE_P (type))
2043 int i;
2044 tree field;
2045 enum x86_64_reg_class subclasses[MAX_CLASSES];
2047 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2048 if (bytes > 16)
2049 return 0;
2051 for (i = 0; i < words; i++)
2052 classes[i] = X86_64_NO_CLASS;
2054 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2055 signalize memory class, so handle it as special case. */
2056 if (!words)
2058 classes[0] = X86_64_NO_CLASS;
2059 return 1;
2062 /* Classify each field of record and merge classes. */
2063 if (TREE_CODE (type) == RECORD_TYPE)
2065 /* For classes first merge in the field of the subclasses. */
2066 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2068 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2069 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2070 int i;
2072 for (i = 0; i < n_bases; ++i)
2074 tree binfo = TREE_VEC_ELT (bases, i);
2075 int num;
2076 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2077 tree type = BINFO_TYPE (binfo);
2079 num = classify_argument (TYPE_MODE (type),
2080 type, subclasses,
2081 (offset + bit_offset) % 256);
2082 if (!num)
2083 return 0;
2084 for (i = 0; i < num; i++)
2086 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2087 classes[i + pos] =
2088 merge_classes (subclasses[i], classes[i + pos]);
2092 /* And now merge the fields of structure. */
2093 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2095 if (TREE_CODE (field) == FIELD_DECL)
2097 int num;
2099 /* Bitfields are always classified as integer. Handle them
2100 early, since later code would consider them to be
2101 misaligned integers. */
2102 if (DECL_BIT_FIELD (field))
2104 for (i = int_bit_position (field) / 8 / 8;
2105 i < (int_bit_position (field)
2106 + tree_low_cst (DECL_SIZE (field), 0)
2107 + 63) / 8 / 8; i++)
2108 classes[i] =
2109 merge_classes (X86_64_INTEGER_CLASS,
2110 classes[i]);
2112 else
2114 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2115 TREE_TYPE (field), subclasses,
2116 (int_bit_position (field)
2117 + bit_offset) % 256);
2118 if (!num)
2119 return 0;
2120 for (i = 0; i < num; i++)
2122 int pos =
2123 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2124 classes[i + pos] =
2125 merge_classes (subclasses[i], classes[i + pos]);
2131 /* Arrays are handled as small records. */
2132 else if (TREE_CODE (type) == ARRAY_TYPE)
2134 int num;
2135 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2136 TREE_TYPE (type), subclasses, bit_offset);
2137 if (!num)
2138 return 0;
2140 /* The partial classes are now full classes. */
2141 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2142 subclasses[0] = X86_64_SSE_CLASS;
2143 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2144 subclasses[0] = X86_64_INTEGER_CLASS;
2146 for (i = 0; i < words; i++)
2147 classes[i] = subclasses[i % num];
2149 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2150 else if (TREE_CODE (type) == UNION_TYPE
2151 || TREE_CODE (type) == QUAL_UNION_TYPE)
2153 /* For classes first merge in the field of the subclasses. */
2154 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2156 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2157 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2158 int i;
2160 for (i = 0; i < n_bases; ++i)
2162 tree binfo = TREE_VEC_ELT (bases, i);
2163 int num;
2164 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2165 tree type = BINFO_TYPE (binfo);
2167 num = classify_argument (TYPE_MODE (type),
2168 type, subclasses,
2169 (offset + (bit_offset % 64)) % 256);
2170 if (!num)
2171 return 0;
2172 for (i = 0; i < num; i++)
2174 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2175 classes[i + pos] =
2176 merge_classes (subclasses[i], classes[i + pos]);
2180 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2182 if (TREE_CODE (field) == FIELD_DECL)
2184 int num;
2185 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2186 TREE_TYPE (field), subclasses,
2187 bit_offset);
2188 if (!num)
2189 return 0;
2190 for (i = 0; i < num; i++)
2191 classes[i] = merge_classes (subclasses[i], classes[i]);
2195 else if (TREE_CODE (type) == SET_TYPE)
2197 if (bytes <= 4)
2199 classes[0] = X86_64_INTEGERSI_CLASS;
2200 return 1;
2202 else if (bytes <= 8)
2204 classes[0] = X86_64_INTEGER_CLASS;
2205 return 1;
2207 else if (bytes <= 12)
2209 classes[0] = X86_64_INTEGER_CLASS;
2210 classes[1] = X86_64_INTEGERSI_CLASS;
2211 return 2;
2213 else
2215 classes[0] = X86_64_INTEGER_CLASS;
2216 classes[1] = X86_64_INTEGER_CLASS;
2217 return 2;
2220 else
2221 abort ();
2223 /* Final merger cleanup. */
2224 for (i = 0; i < words; i++)
2226 /* If one class is MEMORY, everything should be passed in
2227 memory. */
2228 if (classes[i] == X86_64_MEMORY_CLASS)
2229 return 0;
2231 /* The X86_64_SSEUP_CLASS should be always preceded by
2232 X86_64_SSE_CLASS. */
2233 if (classes[i] == X86_64_SSEUP_CLASS
2234 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2235 classes[i] = X86_64_SSE_CLASS;
2237 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2238 if (classes[i] == X86_64_X87UP_CLASS
2239 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2240 classes[i] = X86_64_SSE_CLASS;
2242 return words;
2245 /* Compute alignment needed. We align all types to natural boundaries with
2246 exception of XFmode that is aligned to 64bits. */
2247 if (mode != VOIDmode && mode != BLKmode)
2249 int mode_alignment = GET_MODE_BITSIZE (mode);
2251 if (mode == XFmode)
2252 mode_alignment = 128;
2253 else if (mode == XCmode)
2254 mode_alignment = 256;
2255 if (COMPLEX_MODE_P (mode))
2256 mode_alignment /= 2;
2257 /* Misaligned fields are always returned in memory. */
2258 if (bit_offset % mode_alignment)
2259 return 0;
2262 /* Classification of atomic types. */
2263 switch (mode)
2265 case DImode:
2266 case SImode:
2267 case HImode:
2268 case QImode:
2269 case CSImode:
2270 case CHImode:
2271 case CQImode:
2272 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2273 classes[0] = X86_64_INTEGERSI_CLASS;
2274 else
2275 classes[0] = X86_64_INTEGER_CLASS;
2276 return 1;
2277 case CDImode:
2278 case TImode:
2279 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2280 return 2;
2281 case CTImode:
2282 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2283 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2284 return 4;
2285 case SFmode:
2286 if (!(bit_offset % 64))
2287 classes[0] = X86_64_SSESF_CLASS;
2288 else
2289 classes[0] = X86_64_SSE_CLASS;
2290 return 1;
2291 case DFmode:
2292 classes[0] = X86_64_SSEDF_CLASS;
2293 return 1;
2294 case XFmode:
2295 classes[0] = X86_64_X87_CLASS;
2296 classes[1] = X86_64_X87UP_CLASS;
2297 return 2;
2298 case TFmode:
2299 case TCmode:
2300 return 0;
2301 case XCmode:
2302 classes[0] = X86_64_X87_CLASS;
2303 classes[1] = X86_64_X87UP_CLASS;
2304 classes[2] = X86_64_X87_CLASS;
2305 classes[3] = X86_64_X87UP_CLASS;
2306 return 4;
2307 case DCmode:
2308 classes[0] = X86_64_SSEDF_CLASS;
2309 classes[1] = X86_64_SSEDF_CLASS;
2310 return 2;
2311 case SCmode:
2312 classes[0] = X86_64_SSE_CLASS;
2313 return 1;
2314 case V4SFmode:
2315 case V4SImode:
2316 case V16QImode:
2317 case V8HImode:
2318 case V2DFmode:
2319 case V2DImode:
2320 classes[0] = X86_64_SSE_CLASS;
2321 classes[1] = X86_64_SSEUP_CLASS;
2322 return 2;
2323 case V2SFmode:
2324 case V2SImode:
2325 case V4HImode:
2326 case V8QImode:
2327 return 0;
2328 case BLKmode:
2329 case VOIDmode:
2330 return 0;
2331 default:
2332 abort ();
2336 /* Examine the argument and return set number of register required in each
2337 class. Return 0 iff parameter should be passed in memory. */
2338 static int
2339 examine_argument (enum machine_mode mode, tree type, int in_return,
2340 int *int_nregs, int *sse_nregs)
2342 enum x86_64_reg_class class[MAX_CLASSES];
2343 int n = classify_argument (mode, type, class, 0);
2345 *int_nregs = 0;
2346 *sse_nregs = 0;
2347 if (!n)
2348 return 0;
2349 for (n--; n >= 0; n--)
2350 switch (class[n])
2352 case X86_64_INTEGER_CLASS:
2353 case X86_64_INTEGERSI_CLASS:
2354 (*int_nregs)++;
2355 break;
2356 case X86_64_SSE_CLASS:
2357 case X86_64_SSESF_CLASS:
2358 case X86_64_SSEDF_CLASS:
2359 (*sse_nregs)++;
2360 break;
2361 case X86_64_NO_CLASS:
2362 case X86_64_SSEUP_CLASS:
2363 break;
2364 case X86_64_X87_CLASS:
2365 case X86_64_X87UP_CLASS:
2366 if (!in_return)
2367 return 0;
2368 break;
2369 case X86_64_MEMORY_CLASS:
2370 abort ();
2372 return 1;
2374 /* Construct container for the argument used by GCC interface. See
2375 FUNCTION_ARG for the detailed description. */
2376 static rtx
2377 construct_container (enum machine_mode mode, tree type, int in_return,
2378 int nintregs, int nsseregs, const int * intreg,
2379 int sse_regno)
2381 enum machine_mode tmpmode;
2382 int bytes =
2383 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2384 enum x86_64_reg_class class[MAX_CLASSES];
2385 int n;
2386 int i;
2387 int nexps = 0;
2388 int needed_sseregs, needed_intregs;
2389 rtx exp[MAX_CLASSES];
2390 rtx ret;
2392 n = classify_argument (mode, type, class, 0);
2393 if (TARGET_DEBUG_ARG)
2395 if (!n)
2396 fprintf (stderr, "Memory class\n");
2397 else
2399 fprintf (stderr, "Classes:");
2400 for (i = 0; i < n; i++)
2402 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2404 fprintf (stderr, "\n");
2407 if (!n)
2408 return NULL;
2409 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2410 return NULL;
2411 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2412 return NULL;
2414 /* First construct simple cases. Avoid SCmode, since we want to use
2415 single register to pass this type. */
2416 if (n == 1 && mode != SCmode)
2417 switch (class[0])
2419 case X86_64_INTEGER_CLASS:
2420 case X86_64_INTEGERSI_CLASS:
2421 return gen_rtx_REG (mode, intreg[0]);
2422 case X86_64_SSE_CLASS:
2423 case X86_64_SSESF_CLASS:
2424 case X86_64_SSEDF_CLASS:
2425 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2426 case X86_64_X87_CLASS:
2427 return gen_rtx_REG (mode, FIRST_STACK_REG);
2428 case X86_64_NO_CLASS:
2429 /* Zero sized array, struct or class. */
2430 return NULL;
2431 default:
2432 abort ();
2434 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2435 && mode != BLKmode)
2436 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2437 if (n == 2
2438 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2439 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2440 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2441 && class[1] == X86_64_INTEGER_CLASS
2442 && (mode == CDImode || mode == TImode || mode == TFmode)
2443 && intreg[0] + 1 == intreg[1])
2444 return gen_rtx_REG (mode, intreg[0]);
2445 if (n == 4
2446 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2447 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2448 && mode != BLKmode)
2449 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2451 /* Otherwise figure out the entries of the PARALLEL. */
2452 for (i = 0; i < n; i++)
2454 switch (class[i])
2456 case X86_64_NO_CLASS:
2457 break;
2458 case X86_64_INTEGER_CLASS:
2459 case X86_64_INTEGERSI_CLASS:
2460 /* Merge TImodes on aligned occasions here too. */
2461 if (i * 8 + 8 > bytes)
2462 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2463 else if (class[i] == X86_64_INTEGERSI_CLASS)
2464 tmpmode = SImode;
2465 else
2466 tmpmode = DImode;
2467 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2468 if (tmpmode == BLKmode)
2469 tmpmode = DImode;
2470 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2471 gen_rtx_REG (tmpmode, *intreg),
2472 GEN_INT (i*8));
2473 intreg++;
2474 break;
2475 case X86_64_SSESF_CLASS:
2476 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2477 gen_rtx_REG (SFmode,
2478 SSE_REGNO (sse_regno)),
2479 GEN_INT (i*8));
2480 sse_regno++;
2481 break;
2482 case X86_64_SSEDF_CLASS:
2483 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2484 gen_rtx_REG (DFmode,
2485 SSE_REGNO (sse_regno)),
2486 GEN_INT (i*8));
2487 sse_regno++;
2488 break;
2489 case X86_64_SSE_CLASS:
2490 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2491 tmpmode = TImode;
2492 else
2493 tmpmode = DImode;
2494 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2495 gen_rtx_REG (tmpmode,
2496 SSE_REGNO (sse_regno)),
2497 GEN_INT (i*8));
2498 if (tmpmode == TImode)
2499 i++;
2500 sse_regno++;
2501 break;
2502 default:
2503 abort ();
2506 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2507 for (i = 0; i < nexps; i++)
2508 XVECEXP (ret, 0, i) = exp [i];
2509 return ret;
2512 /* Update the data in CUM to advance over an argument
2513 of mode MODE and data type TYPE.
2514 (TYPE is null for libcalls where that information may not be available.) */
2516 void
2517 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2518 enum machine_mode mode, /* current arg mode */
2519 tree type, /* type of the argument or 0 if lib support */
2520 int named) /* whether or not the argument was named */
2522 int bytes =
2523 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2524 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2526 if (TARGET_DEBUG_ARG)
2527 fprintf (stderr,
2528 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2529 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2530 if (TARGET_64BIT)
2532 int int_nregs, sse_nregs;
2533 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2534 cum->words += words;
2535 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2537 cum->nregs -= int_nregs;
2538 cum->sse_nregs -= sse_nregs;
2539 cum->regno += int_nregs;
2540 cum->sse_regno += sse_nregs;
2542 else
2543 cum->words += words;
2545 else
2547 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2548 && (!type || !AGGREGATE_TYPE_P (type)))
2550 cum->sse_words += words;
2551 cum->sse_nregs -= 1;
2552 cum->sse_regno += 1;
2553 if (cum->sse_nregs <= 0)
2555 cum->sse_nregs = 0;
2556 cum->sse_regno = 0;
2559 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2560 && (!type || !AGGREGATE_TYPE_P (type)))
2562 cum->mmx_words += words;
2563 cum->mmx_nregs -= 1;
2564 cum->mmx_regno += 1;
2565 if (cum->mmx_nregs <= 0)
2567 cum->mmx_nregs = 0;
2568 cum->mmx_regno = 0;
2571 else
2573 cum->words += words;
2574 cum->nregs -= words;
2575 cum->regno += words;
2577 if (cum->nregs <= 0)
2579 cum->nregs = 0;
2580 cum->regno = 0;
2584 return;
2587 /* Define where to put the arguments to a function.
2588 Value is zero to push the argument on the stack,
2589 or a hard register in which to store the argument.
2591 MODE is the argument's machine mode.
2592 TYPE is the data type of the argument (as a tree).
2593 This is null for libcalls where that information may
2594 not be available.
2595 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2596 the preceding args and about the function being called.
2597 NAMED is nonzero if this argument is a named parameter
2598 (otherwise it is an extra parameter matching an ellipsis). */
2601 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2602 enum machine_mode mode, /* current arg mode */
2603 tree type, /* type of the argument or 0 if lib support */
2604 int named) /* != 0 for normal args, == 0 for ... args */
2606 rtx ret = NULL_RTX;
2607 int bytes =
2608 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2609 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2610 static bool warnedsse, warnedmmx;
2612 /* Handle a hidden AL argument containing number of registers for varargs
2613 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2614 any AL settings. */
2615 if (mode == VOIDmode)
2617 if (TARGET_64BIT)
2618 return GEN_INT (cum->maybe_vaarg
2619 ? (cum->sse_nregs < 0
2620 ? SSE_REGPARM_MAX
2621 : cum->sse_regno)
2622 : -1);
2623 else
2624 return constm1_rtx;
2626 if (TARGET_64BIT)
2627 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2628 &x86_64_int_parameter_registers [cum->regno],
2629 cum->sse_regno);
2630 else
2631 switch (mode)
2633 /* For now, pass fp/complex values on the stack. */
2634 default:
2635 break;
2637 case BLKmode:
2638 if (bytes < 0)
2639 break;
2640 /* FALLTHRU */
2641 case DImode:
2642 case SImode:
2643 case HImode:
2644 case QImode:
2645 if (words <= cum->nregs)
2647 int regno = cum->regno;
2649 /* Fastcall allocates the first two DWORD (SImode) or
2650 smaller arguments to ECX and EDX. */
2651 if (cum->fastcall)
2653 if (mode == BLKmode || mode == DImode)
2654 break;
2656 /* ECX not EAX is the first allocated register. */
2657 if (regno == 0)
2658 regno = 2;
2660 ret = gen_rtx_REG (mode, regno);
2662 break;
2663 case TImode:
2664 case V16QImode:
2665 case V8HImode:
2666 case V4SImode:
2667 case V2DImode:
2668 case V4SFmode:
2669 case V2DFmode:
2670 if (!type || !AGGREGATE_TYPE_P (type))
2672 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2674 warnedsse = true;
2675 warning ("SSE vector argument without SSE enabled "
2676 "changes the ABI");
2678 if (cum->sse_nregs)
2679 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2681 break;
2682 case V8QImode:
2683 case V4HImode:
2684 case V2SImode:
2685 case V2SFmode:
2686 if (!type || !AGGREGATE_TYPE_P (type))
2688 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2690 warnedmmx = true;
2691 warning ("MMX vector argument without MMX enabled "
2692 "changes the ABI");
2694 if (cum->mmx_nregs)
2695 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2697 break;
2700 if (TARGET_DEBUG_ARG)
2702 fprintf (stderr,
2703 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2704 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2706 if (ret)
2707 print_simple_rtl (stderr, ret);
2708 else
2709 fprintf (stderr, ", stack");
2711 fprintf (stderr, " )\n");
2714 return ret;
2717 /* A C expression that indicates when an argument must be passed by
2718 reference. If nonzero for an argument, a copy of that argument is
2719 made in memory and a pointer to the argument is passed instead of
2720 the argument itself. The pointer is passed in whatever way is
2721 appropriate for passing a pointer to that type. */
2724 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2725 enum machine_mode mode ATTRIBUTE_UNUSED,
2726 tree type, int named ATTRIBUTE_UNUSED)
2728 if (!TARGET_64BIT)
2729 return 0;
2731 if (type && int_size_in_bytes (type) == -1)
2733 if (TARGET_DEBUG_ARG)
2734 fprintf (stderr, "function_arg_pass_by_reference\n");
2735 return 1;
2738 return 0;
2741 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2742 ABI */
2743 static bool
2744 contains_128bit_aligned_vector_p (tree type)
2746 enum machine_mode mode = TYPE_MODE (type);
2747 if (SSE_REG_MODE_P (mode)
2748 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2749 return true;
2750 if (TYPE_ALIGN (type) < 128)
2751 return false;
2753 if (AGGREGATE_TYPE_P (type))
2755 /* Walk the aggregates recursively. */
2756 if (TREE_CODE (type) == RECORD_TYPE
2757 || TREE_CODE (type) == UNION_TYPE
2758 || TREE_CODE (type) == QUAL_UNION_TYPE)
2760 tree field;
2762 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2764 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2765 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2766 int i;
2768 for (i = 0; i < n_bases; ++i)
2770 tree binfo = TREE_VEC_ELT (bases, i);
2771 tree type = BINFO_TYPE (binfo);
2773 if (contains_128bit_aligned_vector_p (type))
2774 return true;
2777 /* And now merge the fields of structure. */
2778 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2780 if (TREE_CODE (field) == FIELD_DECL
2781 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2782 return true;
2785 /* Just for use if some languages passes arrays by value. */
2786 else if (TREE_CODE (type) == ARRAY_TYPE)
2788 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2789 return true;
2791 else
2792 abort ();
2794 return false;
2797 /* Gives the alignment boundary, in bits, of an argument with the
2798 specified mode and type. */
2801 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2803 int align;
2804 if (type)
2805 align = TYPE_ALIGN (type);
2806 else
2807 align = GET_MODE_ALIGNMENT (mode);
2808 if (align < PARM_BOUNDARY)
2809 align = PARM_BOUNDARY;
2810 if (!TARGET_64BIT)
2812 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2813 make an exception for SSE modes since these require 128bit
2814 alignment.
2816 The handling here differs from field_alignment. ICC aligns MMX
2817 arguments to 4 byte boundaries, while structure fields are aligned
2818 to 8 byte boundaries. */
2819 if (!type)
2821 if (!SSE_REG_MODE_P (mode))
2822 align = PARM_BOUNDARY;
2824 else
2826 if (!contains_128bit_aligned_vector_p (type))
2827 align = PARM_BOUNDARY;
2830 if (align > 128)
2831 align = 128;
2832 return align;
2835 /* Return true if N is a possible register number of function value. */
2836 bool
2837 ix86_function_value_regno_p (int regno)
2839 if (!TARGET_64BIT)
2841 return ((regno) == 0
2842 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2843 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2845 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2846 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2847 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2850 /* Define how to find the value returned by a function.
2851 VALTYPE is the data type of the value (as a tree).
2852 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2853 otherwise, FUNC is 0. */
2855 ix86_function_value (tree valtype)
2857 if (TARGET_64BIT)
2859 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2860 REGPARM_MAX, SSE_REGPARM_MAX,
2861 x86_64_int_return_registers, 0);
2862 /* For zero sized structures, construct_container return NULL, but we need
2863 to keep rest of compiler happy by returning meaningful value. */
2864 if (!ret)
2865 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2866 return ret;
2868 else
2869 return gen_rtx_REG (TYPE_MODE (valtype),
2870 ix86_value_regno (TYPE_MODE (valtype)));
2873 /* Return false iff type is returned in memory. */
2875 ix86_return_in_memory (tree type)
2877 int needed_intregs, needed_sseregs, size;
2878 enum machine_mode mode = TYPE_MODE (type);
2880 if (TARGET_64BIT)
2881 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2883 if (mode == BLKmode)
2884 return 1;
2886 size = int_size_in_bytes (type);
2888 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2889 return 0;
2891 if (VECTOR_MODE_P (mode) || mode == TImode)
2893 /* User-created vectors small enough to fit in EAX. */
2894 if (size < 8)
2895 return 0;
2897 /* MMX/3dNow values are returned on the stack, since we've
2898 got to EMMS/FEMMS before returning. */
2899 if (size == 8)
2900 return 1;
2902 /* SSE values are returned in XMM0, except when it doesn't exist. */
2903 if (size == 16)
2904 return (TARGET_SSE ? 0 : 1);
2907 if (mode == XFmode)
2908 return 0;
2910 if (size > 12)
2911 return 1;
2912 return 0;
2915 /* When returning SSE vector types, we have a choice of either
2916 (1) being abi incompatible with a -march switch, or
2917 (2) generating an error.
2918 Given no good solution, I think the safest thing is one warning.
2919 The user won't be able to use -Werror, but....
2921 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2922 called in response to actually generating a caller or callee that
2923 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2924 via aggregate_value_p for general type probing from tree-ssa. */
2926 static rtx
2927 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
2929 static bool warned;
2931 if (!TARGET_SSE && type && !warned)
2933 /* Look at the return type of the function, not the function type. */
2934 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
2936 if (mode == TImode
2937 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2939 warned = true;
2940 warning ("SSE vector return without SSE enabled changes the ABI");
2944 return NULL;
2947 /* Define how to find the value returned by a library function
2948 assuming the value has mode MODE. */
2950 ix86_libcall_value (enum machine_mode mode)
2952 if (TARGET_64BIT)
2954 switch (mode)
2956 case SFmode:
2957 case SCmode:
2958 case DFmode:
2959 case DCmode:
2960 return gen_rtx_REG (mode, FIRST_SSE_REG);
2961 case XFmode:
2962 case XCmode:
2963 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2964 case TFmode:
2965 case TCmode:
2966 return NULL;
2967 default:
2968 return gen_rtx_REG (mode, 0);
2971 else
2972 return gen_rtx_REG (mode, ix86_value_regno (mode));
2975 /* Given a mode, return the register to use for a return value. */
2977 static int
2978 ix86_value_regno (enum machine_mode mode)
2980 /* Floating point return values in %st(0). */
2981 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2982 return FIRST_FLOAT_REG;
2983 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2984 we prevent this case when sse is not available. */
2985 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2986 return FIRST_SSE_REG;
2987 /* Everything else in %eax. */
2988 return 0;
2991 /* Create the va_list data type. */
2993 static tree
2994 ix86_build_builtin_va_list (void)
2996 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2998 /* For i386 we use plain pointer to argument area. */
2999 if (!TARGET_64BIT)
3000 return build_pointer_type (char_type_node);
3002 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3003 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3005 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3006 unsigned_type_node);
3007 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3008 unsigned_type_node);
3009 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3010 ptr_type_node);
3011 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3012 ptr_type_node);
3014 DECL_FIELD_CONTEXT (f_gpr) = record;
3015 DECL_FIELD_CONTEXT (f_fpr) = record;
3016 DECL_FIELD_CONTEXT (f_ovf) = record;
3017 DECL_FIELD_CONTEXT (f_sav) = record;
3019 TREE_CHAIN (record) = type_decl;
3020 TYPE_NAME (record) = type_decl;
3021 TYPE_FIELDS (record) = f_gpr;
3022 TREE_CHAIN (f_gpr) = f_fpr;
3023 TREE_CHAIN (f_fpr) = f_ovf;
3024 TREE_CHAIN (f_ovf) = f_sav;
3026 layout_type (record);
3028 /* The correct type is an array type of one element. */
3029 return build_array_type (record, build_index_type (size_zero_node));
3032 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3034 static void
3035 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3036 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3037 int no_rtl)
3039 CUMULATIVE_ARGS next_cum;
3040 rtx save_area = NULL_RTX, mem;
3041 rtx label;
3042 rtx label_ref;
3043 rtx tmp_reg;
3044 rtx nsse_reg;
3045 int set;
3046 tree fntype;
3047 int stdarg_p;
3048 int i;
3050 if (!TARGET_64BIT)
3051 return;
3053 /* Indicate to allocate space on the stack for varargs save area. */
3054 ix86_save_varrargs_registers = 1;
3056 cfun->stack_alignment_needed = 128;
3058 fntype = TREE_TYPE (current_function_decl);
3059 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3060 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3061 != void_type_node));
3063 /* For varargs, we do not want to skip the dummy va_dcl argument.
3064 For stdargs, we do want to skip the last named argument. */
3065 next_cum = *cum;
3066 if (stdarg_p)
3067 function_arg_advance (&next_cum, mode, type, 1);
3069 if (!no_rtl)
3070 save_area = frame_pointer_rtx;
3072 set = get_varargs_alias_set ();
3074 for (i = next_cum.regno; i < ix86_regparm; i++)
3076 mem = gen_rtx_MEM (Pmode,
3077 plus_constant (save_area, i * UNITS_PER_WORD));
3078 set_mem_alias_set (mem, set);
3079 emit_move_insn (mem, gen_rtx_REG (Pmode,
3080 x86_64_int_parameter_registers[i]));
3083 if (next_cum.sse_nregs)
3085 /* Now emit code to save SSE registers. The AX parameter contains number
3086 of SSE parameter registers used to call this function. We use
3087 sse_prologue_save insn template that produces computed jump across
3088 SSE saves. We need some preparation work to get this working. */
3090 label = gen_label_rtx ();
3091 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3093 /* Compute address to jump to :
3094 label - 5*eax + nnamed_sse_arguments*5 */
3095 tmp_reg = gen_reg_rtx (Pmode);
3096 nsse_reg = gen_reg_rtx (Pmode);
3097 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3098 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3099 gen_rtx_MULT (Pmode, nsse_reg,
3100 GEN_INT (4))));
3101 if (next_cum.sse_regno)
3102 emit_move_insn
3103 (nsse_reg,
3104 gen_rtx_CONST (DImode,
3105 gen_rtx_PLUS (DImode,
3106 label_ref,
3107 GEN_INT (next_cum.sse_regno * 4))));
3108 else
3109 emit_move_insn (nsse_reg, label_ref);
3110 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3112 /* Compute address of memory block we save into. We always use pointer
3113 pointing 127 bytes after first byte to store - this is needed to keep
3114 instruction size limited by 4 bytes. */
3115 tmp_reg = gen_reg_rtx (Pmode);
3116 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3117 plus_constant (save_area,
3118 8 * REGPARM_MAX + 127)));
3119 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3120 set_mem_alias_set (mem, set);
3121 set_mem_align (mem, BITS_PER_WORD);
3123 /* And finally do the dirty job! */
3124 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3125 GEN_INT (next_cum.sse_regno), label));
3130 /* Implement va_start. */
3132 void
3133 ix86_va_start (tree valist, rtx nextarg)
3135 HOST_WIDE_INT words, n_gpr, n_fpr;
3136 tree f_gpr, f_fpr, f_ovf, f_sav;
3137 tree gpr, fpr, ovf, sav, t;
3139 /* Only 64bit target needs something special. */
3140 if (!TARGET_64BIT)
3142 std_expand_builtin_va_start (valist, nextarg);
3143 return;
3146 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3147 f_fpr = TREE_CHAIN (f_gpr);
3148 f_ovf = TREE_CHAIN (f_fpr);
3149 f_sav = TREE_CHAIN (f_ovf);
3151 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3152 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3153 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3154 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3155 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3157 /* Count number of gp and fp argument registers used. */
3158 words = current_function_args_info.words;
3159 n_gpr = current_function_args_info.regno;
3160 n_fpr = current_function_args_info.sse_regno;
3162 if (TARGET_DEBUG_ARG)
3163 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3164 (int) words, (int) n_gpr, (int) n_fpr);
3166 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3167 build_int_2 (n_gpr * 8, 0));
3168 TREE_SIDE_EFFECTS (t) = 1;
3169 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3171 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3172 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3173 TREE_SIDE_EFFECTS (t) = 1;
3174 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3176 /* Find the overflow area. */
3177 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3178 if (words != 0)
3179 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3180 build_int_2 (words * UNITS_PER_WORD, 0));
3181 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3182 TREE_SIDE_EFFECTS (t) = 1;
3183 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3185 /* Find the register save area.
3186 Prologue of the function save it right above stack frame. */
3187 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3188 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3189 TREE_SIDE_EFFECTS (t) = 1;
3190 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3193 /* Implement va_arg. */
3195 tree
3196 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3198 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3199 tree f_gpr, f_fpr, f_ovf, f_sav;
3200 tree gpr, fpr, ovf, sav, t;
3201 int size, rsize;
3202 tree lab_false, lab_over = NULL_TREE;
3203 tree addr, t2;
3204 rtx container;
3205 int indirect_p = 0;
3206 tree ptrtype;
3208 /* Only 64bit target needs something special. */
3209 if (!TARGET_64BIT)
3210 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3212 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3213 f_fpr = TREE_CHAIN (f_gpr);
3214 f_ovf = TREE_CHAIN (f_fpr);
3215 f_sav = TREE_CHAIN (f_ovf);
3217 valist = build_fold_indirect_ref (valist);
3218 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3219 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3220 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3221 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3223 size = int_size_in_bytes (type);
3224 if (size == -1)
3226 /* Variable-size types are passed by reference. */
3227 indirect_p = 1;
3228 type = build_pointer_type (type);
3229 size = int_size_in_bytes (type);
3231 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3233 container = construct_container (TYPE_MODE (type), type, 0,
3234 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3236 * Pull the value out of the saved registers ...
3239 addr = create_tmp_var (ptr_type_node, "addr");
3240 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3242 if (container)
3244 int needed_intregs, needed_sseregs;
3245 int need_temp;
3246 tree int_addr, sse_addr;
3248 lab_false = create_artificial_label ();
3249 lab_over = create_artificial_label ();
3251 examine_argument (TYPE_MODE (type), type, 0,
3252 &needed_intregs, &needed_sseregs);
3255 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3256 || TYPE_ALIGN (type) > 128);
3258 /* In case we are passing structure, verify that it is consecutive block
3259 on the register save area. If not we need to do moves. */
3260 if (!need_temp && !REG_P (container))
3262 /* Verify that all registers are strictly consecutive */
3263 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3265 int i;
3267 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3269 rtx slot = XVECEXP (container, 0, i);
3270 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3271 || INTVAL (XEXP (slot, 1)) != i * 16)
3272 need_temp = 1;
3275 else
3277 int i;
3279 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3281 rtx slot = XVECEXP (container, 0, i);
3282 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3283 || INTVAL (XEXP (slot, 1)) != i * 8)
3284 need_temp = 1;
3288 if (!need_temp)
3290 int_addr = addr;
3291 sse_addr = addr;
3293 else
3295 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3296 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3297 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3298 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3300 /* First ensure that we fit completely in registers. */
3301 if (needed_intregs)
3303 t = build_int_2 ((REGPARM_MAX - needed_intregs + 1) * 8, 0);
3304 TREE_TYPE (t) = TREE_TYPE (gpr);
3305 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3306 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3307 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3308 gimplify_and_add (t, pre_p);
3310 if (needed_sseregs)
3312 t = build_int_2 ((SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3313 + REGPARM_MAX * 8, 0);
3314 TREE_TYPE (t) = TREE_TYPE (fpr);
3315 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3316 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3317 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3318 gimplify_and_add (t, pre_p);
3321 /* Compute index to start of area used for integer regs. */
3322 if (needed_intregs)
3324 /* int_addr = gpr + sav; */
3325 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3326 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3327 gimplify_and_add (t, pre_p);
3329 if (needed_sseregs)
3331 /* sse_addr = fpr + sav; */
3332 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3333 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3334 gimplify_and_add (t, pre_p);
3336 if (need_temp)
3338 int i;
3339 tree temp = create_tmp_var (type, "va_arg_tmp");
3341 /* addr = &temp; */
3342 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3343 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3344 gimplify_and_add (t, pre_p);
3346 for (i = 0; i < XVECLEN (container, 0); i++)
3348 rtx slot = XVECEXP (container, 0, i);
3349 rtx reg = XEXP (slot, 0);
3350 enum machine_mode mode = GET_MODE (reg);
3351 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3352 tree addr_type = build_pointer_type (piece_type);
3353 tree src_addr, src;
3354 int src_offset;
3355 tree dest_addr, dest;
3357 if (SSE_REGNO_P (REGNO (reg)))
3359 src_addr = sse_addr;
3360 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3362 else
3364 src_addr = int_addr;
3365 src_offset = REGNO (reg) * 8;
3367 src_addr = fold_convert (addr_type, src_addr);
3368 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3369 size_int (src_offset)));
3370 src = build_fold_indirect_ref (src_addr);
3372 dest_addr = fold_convert (addr_type, addr);
3373 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3374 size_int (INTVAL (XEXP (slot, 1)))));
3375 dest = build_fold_indirect_ref (dest_addr);
3377 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3378 gimplify_and_add (t, pre_p);
3382 if (needed_intregs)
3384 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3385 build_int_2 (needed_intregs * 8, 0));
3386 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3387 gimplify_and_add (t, pre_p);
3389 if (needed_sseregs)
3392 build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3393 build_int_2 (needed_sseregs * 16, 0));
3394 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3395 gimplify_and_add (t, pre_p);
3398 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3399 gimplify_and_add (t, pre_p);
3401 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3402 append_to_statement_list (t, pre_p);
3405 /* ... otherwise out of the overflow area. */
3407 /* Care for on-stack alignment if needed. */
3408 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3409 t = ovf;
3410 else
3412 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3413 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3414 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3416 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3418 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3419 gimplify_and_add (t2, pre_p);
3421 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3422 build_int_2 (rsize * UNITS_PER_WORD, 0));
3423 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3424 gimplify_and_add (t, pre_p);
3426 if (container)
3428 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3429 append_to_statement_list (t, pre_p);
3432 ptrtype = build_pointer_type (type);
3433 addr = fold_convert (ptrtype, addr);
3435 if (indirect_p)
3436 addr = build_fold_indirect_ref (addr);
3437 return build_fold_indirect_ref (addr);
3440 /* Return nonzero if OP is either a i387 or SSE fp register. */
3442 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3444 return ANY_FP_REG_P (op);
3447 /* Return nonzero if OP is an i387 fp register. */
3449 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3451 return FP_REG_P (op);
3454 /* Return nonzero if OP is a non-fp register_operand. */
3456 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3458 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3461 /* Return nonzero if OP is a register operand other than an
3462 i387 fp register. */
3464 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3466 return register_operand (op, mode) && !FP_REG_P (op);
3469 /* Return nonzero if OP is general operand representable on x86_64. */
3472 x86_64_general_operand (rtx op, enum machine_mode mode)
3474 if (!TARGET_64BIT)
3475 return general_operand (op, mode);
3476 if (nonimmediate_operand (op, mode))
3477 return 1;
3478 return x86_64_sign_extended_value (op);
3481 /* Return nonzero if OP is general operand representable on x86_64
3482 as either sign extended or zero extended constant. */
3485 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3487 if (!TARGET_64BIT)
3488 return general_operand (op, mode);
3489 if (nonimmediate_operand (op, mode))
3490 return 1;
3491 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3494 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3497 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3499 if (!TARGET_64BIT)
3500 return nonmemory_operand (op, mode);
3501 if (register_operand (op, mode))
3502 return 1;
3503 return x86_64_sign_extended_value (op);
3506 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3509 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3511 if (!TARGET_64BIT || !flag_pic)
3512 return nonmemory_operand (op, mode);
3513 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3514 return 1;
3515 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3516 return 1;
3517 return 0;
3520 /* Return nonzero if OPNUM's MEM should be matched
3521 in movabs* patterns. */
3524 ix86_check_movabs (rtx insn, int opnum)
3526 rtx set, mem;
3528 set = PATTERN (insn);
3529 if (GET_CODE (set) == PARALLEL)
3530 set = XVECEXP (set, 0, 0);
3531 if (GET_CODE (set) != SET)
3532 abort ();
3533 mem = XEXP (set, opnum);
3534 while (GET_CODE (mem) == SUBREG)
3535 mem = SUBREG_REG (mem);
3536 if (GET_CODE (mem) != MEM)
3537 abort ();
3538 return (volatile_ok || !MEM_VOLATILE_P (mem));
3541 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3544 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3546 if (!TARGET_64BIT)
3547 return nonmemory_operand (op, mode);
3548 if (register_operand (op, mode))
3549 return 1;
3550 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3553 /* Return nonzero if OP is immediate operand representable on x86_64. */
3556 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3558 if (!TARGET_64BIT)
3559 return immediate_operand (op, mode);
3560 return x86_64_sign_extended_value (op);
3563 /* Return nonzero if OP is immediate operand representable on x86_64. */
3566 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3568 return x86_64_zero_extended_value (op);
3571 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3572 for shift & compare patterns, as shifting by 0 does not change flags),
3573 else return zero. */
3576 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3578 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3581 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3582 reference and a constant. */
3585 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3587 switch (GET_CODE (op))
3589 case SYMBOL_REF:
3590 case LABEL_REF:
3591 return 1;
3593 case CONST:
3594 op = XEXP (op, 0);
3595 if (GET_CODE (op) == SYMBOL_REF
3596 || GET_CODE (op) == LABEL_REF
3597 || (GET_CODE (op) == UNSPEC
3598 && (XINT (op, 1) == UNSPEC_GOT
3599 || XINT (op, 1) == UNSPEC_GOTOFF
3600 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3601 return 1;
3602 if (GET_CODE (op) != PLUS
3603 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3604 return 0;
3606 op = XEXP (op, 0);
3607 if (GET_CODE (op) == SYMBOL_REF
3608 || GET_CODE (op) == LABEL_REF)
3609 return 1;
3610 /* Only @GOTOFF gets offsets. */
3611 if (GET_CODE (op) != UNSPEC
3612 || XINT (op, 1) != UNSPEC_GOTOFF)
3613 return 0;
3615 op = XVECEXP (op, 0, 0);
3616 if (GET_CODE (op) == SYMBOL_REF
3617 || GET_CODE (op) == LABEL_REF)
3618 return 1;
3619 return 0;
3621 default:
3622 return 0;
3626 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3629 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3631 if (GET_CODE (op) != CONST)
3632 return 0;
3633 op = XEXP (op, 0);
3634 if (TARGET_64BIT)
3636 if (GET_CODE (op) == UNSPEC
3637 && XINT (op, 1) == UNSPEC_GOTPCREL)
3638 return 1;
3639 if (GET_CODE (op) == PLUS
3640 && GET_CODE (XEXP (op, 0)) == UNSPEC
3641 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3642 return 1;
3644 else
3646 if (GET_CODE (op) == UNSPEC)
3647 return 1;
3648 if (GET_CODE (op) != PLUS
3649 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3650 return 0;
3651 op = XEXP (op, 0);
3652 if (GET_CODE (op) == UNSPEC)
3653 return 1;
3655 return 0;
3658 /* Return true if OP is a symbolic operand that resolves locally. */
3660 static int
3661 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3663 if (GET_CODE (op) == CONST
3664 && GET_CODE (XEXP (op, 0)) == PLUS
3665 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3666 op = XEXP (XEXP (op, 0), 0);
3668 if (GET_CODE (op) == LABEL_REF)
3669 return 1;
3671 if (GET_CODE (op) != SYMBOL_REF)
3672 return 0;
3674 if (SYMBOL_REF_LOCAL_P (op))
3675 return 1;
3677 /* There is, however, a not insubstantial body of code in the rest of
3678 the compiler that assumes it can just stick the results of
3679 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3680 /* ??? This is a hack. Should update the body of the compiler to
3681 always create a DECL an invoke targetm.encode_section_info. */
3682 if (strncmp (XSTR (op, 0), internal_label_prefix,
3683 internal_label_prefix_len) == 0)
3684 return 1;
3686 return 0;
3689 /* Test for various thread-local symbols. */
3692 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3694 if (GET_CODE (op) != SYMBOL_REF)
3695 return 0;
3696 return SYMBOL_REF_TLS_MODEL (op);
3699 static inline int
3700 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3702 if (GET_CODE (op) != SYMBOL_REF)
3703 return 0;
3704 return SYMBOL_REF_TLS_MODEL (op) == kind;
3708 global_dynamic_symbolic_operand (rtx op,
3709 enum machine_mode mode ATTRIBUTE_UNUSED)
3711 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3715 local_dynamic_symbolic_operand (rtx op,
3716 enum machine_mode mode ATTRIBUTE_UNUSED)
3718 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3722 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3724 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3728 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3730 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3733 /* Test for a valid operand for a call instruction. Don't allow the
3734 arg pointer register or virtual regs since they may decay into
3735 reg + const, which the patterns can't handle. */
3738 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3740 /* Disallow indirect through a virtual register. This leads to
3741 compiler aborts when trying to eliminate them. */
3742 if (GET_CODE (op) == REG
3743 && (op == arg_pointer_rtx
3744 || op == frame_pointer_rtx
3745 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3746 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3747 return 0;
3749 /* Disallow `call 1234'. Due to varying assembler lameness this
3750 gets either rejected or translated to `call .+1234'. */
3751 if (GET_CODE (op) == CONST_INT)
3752 return 0;
3754 /* Explicitly allow SYMBOL_REF even if pic. */
3755 if (GET_CODE (op) == SYMBOL_REF)
3756 return 1;
3758 /* Otherwise we can allow any general_operand in the address. */
3759 return general_operand (op, Pmode);
3762 /* Test for a valid operand for a call instruction. Don't allow the
3763 arg pointer register or virtual regs since they may decay into
3764 reg + const, which the patterns can't handle. */
3767 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3769 /* Disallow indirect through a virtual register. This leads to
3770 compiler aborts when trying to eliminate them. */
3771 if (GET_CODE (op) == REG
3772 && (op == arg_pointer_rtx
3773 || op == frame_pointer_rtx
3774 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3775 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3776 return 0;
3778 /* Explicitly allow SYMBOL_REF even if pic. */
3779 if (GET_CODE (op) == SYMBOL_REF)
3780 return 1;
3782 /* Otherwise we can only allow register operands. */
3783 return register_operand (op, Pmode);
3787 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3789 if (GET_CODE (op) == CONST
3790 && GET_CODE (XEXP (op, 0)) == PLUS
3791 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3792 op = XEXP (XEXP (op, 0), 0);
3793 return GET_CODE (op) == SYMBOL_REF;
3796 /* Match exactly zero and one. */
3799 const0_operand (rtx op, enum machine_mode mode)
3801 return op == CONST0_RTX (mode);
3805 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3807 return op == const1_rtx;
3810 /* Match 2, 4, or 8. Used for leal multiplicands. */
3813 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3815 return (GET_CODE (op) == CONST_INT
3816 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3820 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3822 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3826 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3828 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3832 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3834 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3838 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3840 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3844 /* True if this is a constant appropriate for an increment or decrement. */
3847 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3849 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3850 registers, since carry flag is not set. */
3851 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
3852 return 0;
3853 return op == const1_rtx || op == constm1_rtx;
3856 /* Return nonzero if OP is acceptable as operand of DImode shift
3857 expander. */
3860 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3862 if (TARGET_64BIT)
3863 return nonimmediate_operand (op, mode);
3864 else
3865 return register_operand (op, mode);
3868 /* Return false if this is the stack pointer, or any other fake
3869 register eliminable to the stack pointer. Otherwise, this is
3870 a register operand.
3872 This is used to prevent esp from being used as an index reg.
3873 Which would only happen in pathological cases. */
3876 reg_no_sp_operand (rtx op, enum machine_mode mode)
3878 rtx t = op;
3879 if (GET_CODE (t) == SUBREG)
3880 t = SUBREG_REG (t);
3881 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3882 return 0;
3884 return register_operand (op, mode);
3888 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3890 return MMX_REG_P (op);
3893 /* Return false if this is any eliminable register. Otherwise
3894 general_operand. */
3897 general_no_elim_operand (rtx op, enum machine_mode mode)
3899 rtx t = op;
3900 if (GET_CODE (t) == SUBREG)
3901 t = SUBREG_REG (t);
3902 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3903 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3904 || t == virtual_stack_dynamic_rtx)
3905 return 0;
3906 if (REG_P (t)
3907 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3908 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3909 return 0;
3911 return general_operand (op, mode);
3914 /* Return false if this is any eliminable register. Otherwise
3915 register_operand or const_int. */
3918 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3920 rtx t = op;
3921 if (GET_CODE (t) == SUBREG)
3922 t = SUBREG_REG (t);
3923 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3924 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3925 || t == virtual_stack_dynamic_rtx)
3926 return 0;
3928 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3931 /* Return false if this is any eliminable register or stack register,
3932 otherwise work like register_operand. */
3935 index_register_operand (rtx op, enum machine_mode mode)
3937 rtx t = op;
3938 if (GET_CODE (t) == SUBREG)
3939 t = SUBREG_REG (t);
3940 if (!REG_P (t))
3941 return 0;
3942 if (t == arg_pointer_rtx
3943 || t == frame_pointer_rtx
3944 || t == virtual_incoming_args_rtx
3945 || t == virtual_stack_vars_rtx
3946 || t == virtual_stack_dynamic_rtx
3947 || REGNO (t) == STACK_POINTER_REGNUM)
3948 return 0;
3950 return general_operand (op, mode);
3953 /* Return true if op is a Q_REGS class register. */
3956 q_regs_operand (rtx op, enum machine_mode mode)
3958 if (mode != VOIDmode && GET_MODE (op) != mode)
3959 return 0;
3960 if (GET_CODE (op) == SUBREG)
3961 op = SUBREG_REG (op);
3962 return ANY_QI_REG_P (op);
3965 /* Return true if op is an flags register. */
3968 flags_reg_operand (rtx op, enum machine_mode mode)
3970 if (mode != VOIDmode && GET_MODE (op) != mode)
3971 return 0;
3972 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3975 /* Return true if op is a NON_Q_REGS class register. */
3978 non_q_regs_operand (rtx op, enum machine_mode mode)
3980 if (mode != VOIDmode && GET_MODE (op) != mode)
3981 return 0;
3982 if (GET_CODE (op) == SUBREG)
3983 op = SUBREG_REG (op);
3984 return NON_QI_REG_P (op);
3988 zero_extended_scalar_load_operand (rtx op,
3989 enum machine_mode mode ATTRIBUTE_UNUSED)
3991 unsigned n_elts;
3992 if (GET_CODE (op) != MEM)
3993 return 0;
3994 op = maybe_get_pool_constant (op);
3995 if (!op)
3996 return 0;
3997 if (GET_CODE (op) != CONST_VECTOR)
3998 return 0;
3999 n_elts =
4000 (GET_MODE_SIZE (GET_MODE (op)) /
4001 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
4002 for (n_elts--; n_elts > 0; n_elts--)
4004 rtx elt = CONST_VECTOR_ELT (op, n_elts);
4005 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
4006 return 0;
4008 return 1;
4011 /* Return 1 when OP is operand acceptable for standard SSE move. */
4013 vector_move_operand (rtx op, enum machine_mode mode)
4015 if (nonimmediate_operand (op, mode))
4016 return 1;
4017 if (GET_MODE (op) != mode && mode != VOIDmode)
4018 return 0;
4019 return (op == CONST0_RTX (GET_MODE (op)));
4022 /* Return true if op if a valid address, and does not contain
4023 a segment override. */
4026 no_seg_address_operand (rtx op, enum machine_mode mode)
4028 struct ix86_address parts;
4030 if (! address_operand (op, mode))
4031 return 0;
4033 if (! ix86_decompose_address (op, &parts))
4034 abort ();
4036 return parts.seg == SEG_DEFAULT;
4039 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4040 insns. */
4042 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4044 enum rtx_code code = GET_CODE (op);
4045 switch (code)
4047 /* Operations supported directly. */
4048 case EQ:
4049 case LT:
4050 case LE:
4051 case UNORDERED:
4052 case NE:
4053 case UNGE:
4054 case UNGT:
4055 case ORDERED:
4056 return 1;
4057 /* These are equivalent to ones above in non-IEEE comparisons. */
4058 case UNEQ:
4059 case UNLT:
4060 case UNLE:
4061 case LTGT:
4062 case GE:
4063 case GT:
4064 return !TARGET_IEEE_FP;
4065 default:
4066 return 0;
4069 /* Return 1 if OP is a valid comparison operator in valid mode. */
4071 ix86_comparison_operator (rtx op, enum machine_mode mode)
4073 enum machine_mode inmode;
4074 enum rtx_code code = GET_CODE (op);
4075 if (mode != VOIDmode && GET_MODE (op) != mode)
4076 return 0;
4077 if (!COMPARISON_P (op))
4078 return 0;
4079 inmode = GET_MODE (XEXP (op, 0));
4081 if (inmode == CCFPmode || inmode == CCFPUmode)
4083 enum rtx_code second_code, bypass_code;
4084 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4085 return (bypass_code == NIL && second_code == NIL);
4087 switch (code)
4089 case EQ: case NE:
4090 return 1;
4091 case LT: case GE:
4092 if (inmode == CCmode || inmode == CCGCmode
4093 || inmode == CCGOCmode || inmode == CCNOmode)
4094 return 1;
4095 return 0;
4096 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4097 if (inmode == CCmode)
4098 return 1;
4099 return 0;
4100 case GT: case LE:
4101 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4102 return 1;
4103 return 0;
4104 default:
4105 return 0;
4109 /* Return 1 if OP is a valid comparison operator testing carry flag
4110 to be set. */
4112 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4114 enum machine_mode inmode;
4115 enum rtx_code code = GET_CODE (op);
4117 if (mode != VOIDmode && GET_MODE (op) != mode)
4118 return 0;
4119 if (!COMPARISON_P (op))
4120 return 0;
4121 inmode = GET_MODE (XEXP (op, 0));
4122 if (GET_CODE (XEXP (op, 0)) != REG
4123 || REGNO (XEXP (op, 0)) != 17
4124 || XEXP (op, 1) != const0_rtx)
4125 return 0;
4127 if (inmode == CCFPmode || inmode == CCFPUmode)
4129 enum rtx_code second_code, bypass_code;
4131 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4132 if (bypass_code != NIL || second_code != NIL)
4133 return 0;
4134 code = ix86_fp_compare_code_to_integer (code);
4136 else if (inmode != CCmode)
4137 return 0;
4138 return code == LTU;
4141 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4144 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4146 enum machine_mode inmode;
4147 enum rtx_code code = GET_CODE (op);
4149 if (mode != VOIDmode && GET_MODE (op) != mode)
4150 return 0;
4151 if (!COMPARISON_P (op))
4152 return 0;
4153 inmode = GET_MODE (XEXP (op, 0));
4154 if (inmode == CCFPmode || inmode == CCFPUmode)
4156 enum rtx_code second_code, bypass_code;
4158 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4159 if (bypass_code != NIL || second_code != NIL)
4160 return 0;
4161 code = ix86_fp_compare_code_to_integer (code);
4163 /* i387 supports just limited amount of conditional codes. */
4164 switch (code)
4166 case LTU: case GTU: case LEU: case GEU:
4167 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4168 return 1;
4169 return 0;
4170 case ORDERED: case UNORDERED:
4171 case EQ: case NE:
4172 return 1;
4173 default:
4174 return 0;
4178 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4181 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4183 switch (GET_CODE (op))
4185 case MULT:
4186 /* Modern CPUs have same latency for HImode and SImode multiply,
4187 but 386 and 486 do HImode multiply faster. */
4188 return ix86_tune > PROCESSOR_I486;
4189 case PLUS:
4190 case AND:
4191 case IOR:
4192 case XOR:
4193 case ASHIFT:
4194 return 1;
4195 default:
4196 return 0;
4200 /* Nearly general operand, but accept any const_double, since we wish
4201 to be able to drop them into memory rather than have them get pulled
4202 into registers. */
4205 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4207 if (mode != VOIDmode && mode != GET_MODE (op))
4208 return 0;
4209 if (GET_CODE (op) == CONST_DOUBLE)
4210 return 1;
4211 return general_operand (op, mode);
4214 /* Match an SI or HImode register for a zero_extract. */
4217 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4219 int regno;
4220 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4221 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4222 return 0;
4224 if (!register_operand (op, VOIDmode))
4225 return 0;
4227 /* Be careful to accept only registers having upper parts. */
4228 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4229 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4232 /* Return 1 if this is a valid binary floating-point operation.
4233 OP is the expression matched, and MODE is its mode. */
4236 binary_fp_operator (rtx op, enum machine_mode mode)
4238 if (mode != VOIDmode && mode != GET_MODE (op))
4239 return 0;
4241 switch (GET_CODE (op))
4243 case PLUS:
4244 case MINUS:
4245 case MULT:
4246 case DIV:
4247 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4249 default:
4250 return 0;
4255 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4257 return GET_CODE (op) == MULT;
4261 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4263 return GET_CODE (op) == DIV;
4267 arith_or_logical_operator (rtx op, enum machine_mode mode)
4269 return ((mode == VOIDmode || GET_MODE (op) == mode)
4270 && ARITHMETIC_P (op));
4273 /* Returns 1 if OP is memory operand with a displacement. */
4276 memory_displacement_operand (rtx op, enum machine_mode mode)
4278 struct ix86_address parts;
4280 if (! memory_operand (op, mode))
4281 return 0;
4283 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4284 abort ();
4286 return parts.disp != NULL_RTX;
4289 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4290 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4292 ??? It seems likely that this will only work because cmpsi is an
4293 expander, and no actual insns use this. */
4296 cmpsi_operand (rtx op, enum machine_mode mode)
4298 if (nonimmediate_operand (op, mode))
4299 return 1;
4301 if (GET_CODE (op) == AND
4302 && GET_MODE (op) == SImode
4303 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4304 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4305 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4306 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4307 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4308 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4309 return 1;
4311 return 0;
4314 /* Returns 1 if OP is memory operand that can not be represented by the
4315 modRM array. */
4318 long_memory_operand (rtx op, enum machine_mode mode)
4320 if (! memory_operand (op, mode))
4321 return 0;
4323 return memory_address_length (op) != 0;
4326 /* Return nonzero if the rtx is known aligned. */
4329 aligned_operand (rtx op, enum machine_mode mode)
4331 struct ix86_address parts;
4333 if (!general_operand (op, mode))
4334 return 0;
4336 /* Registers and immediate operands are always "aligned". */
4337 if (GET_CODE (op) != MEM)
4338 return 1;
4340 /* Don't even try to do any aligned optimizations with volatiles. */
4341 if (MEM_VOLATILE_P (op))
4342 return 0;
4344 op = XEXP (op, 0);
4346 /* Pushes and pops are only valid on the stack pointer. */
4347 if (GET_CODE (op) == PRE_DEC
4348 || GET_CODE (op) == POST_INC)
4349 return 1;
4351 /* Decode the address. */
4352 if (! ix86_decompose_address (op, &parts))
4353 abort ();
4355 /* Look for some component that isn't known to be aligned. */
4356 if (parts.index)
4358 if (parts.scale < 4
4359 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4360 return 0;
4362 if (parts.base)
4364 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4365 return 0;
4367 if (parts.disp)
4369 if (GET_CODE (parts.disp) != CONST_INT
4370 || (INTVAL (parts.disp) & 3) != 0)
4371 return 0;
4374 /* Didn't find one -- this must be an aligned address. */
4375 return 1;
4378 /* Initialize the table of extra 80387 mathematical constants. */
4380 static void
4381 init_ext_80387_constants (void)
4383 static const char * cst[5] =
4385 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4386 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4387 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4388 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4389 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4391 int i;
4393 for (i = 0; i < 5; i++)
4395 real_from_string (&ext_80387_constants_table[i], cst[i]);
4396 /* Ensure each constant is rounded to XFmode precision. */
4397 real_convert (&ext_80387_constants_table[i],
4398 XFmode, &ext_80387_constants_table[i]);
4401 ext_80387_constants_init = 1;
4404 /* Return true if the constant is something that can be loaded with
4405 a special instruction. */
4408 standard_80387_constant_p (rtx x)
4410 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4411 return -1;
4413 if (x == CONST0_RTX (GET_MODE (x)))
4414 return 1;
4415 if (x == CONST1_RTX (GET_MODE (x)))
4416 return 2;
4418 /* For XFmode constants, try to find a special 80387 instruction when
4419 optimizing for size or on those CPUs that benefit from them. */
4420 if (GET_MODE (x) == XFmode
4421 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4423 REAL_VALUE_TYPE r;
4424 int i;
4426 if (! ext_80387_constants_init)
4427 init_ext_80387_constants ();
4429 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4430 for (i = 0; i < 5; i++)
4431 if (real_identical (&r, &ext_80387_constants_table[i]))
4432 return i + 3;
4435 return 0;
4438 /* Return the opcode of the special instruction to be used to load
4439 the constant X. */
4441 const char *
4442 standard_80387_constant_opcode (rtx x)
4444 switch (standard_80387_constant_p (x))
4446 case 1:
4447 return "fldz";
4448 case 2:
4449 return "fld1";
4450 case 3:
4451 return "fldlg2";
4452 case 4:
4453 return "fldln2";
4454 case 5:
4455 return "fldl2e";
4456 case 6:
4457 return "fldl2t";
4458 case 7:
4459 return "fldpi";
4461 abort ();
4464 /* Return the CONST_DOUBLE representing the 80387 constant that is
4465 loaded by the specified special instruction. The argument IDX
4466 matches the return value from standard_80387_constant_p. */
4469 standard_80387_constant_rtx (int idx)
4471 int i;
4473 if (! ext_80387_constants_init)
4474 init_ext_80387_constants ();
4476 switch (idx)
4478 case 3:
4479 case 4:
4480 case 5:
4481 case 6:
4482 case 7:
4483 i = idx - 3;
4484 break;
4486 default:
4487 abort ();
4490 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4491 XFmode);
4494 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4497 standard_sse_constant_p (rtx x)
4499 if (x == const0_rtx)
4500 return 1;
4501 return (x == CONST0_RTX (GET_MODE (x)));
4504 /* Returns 1 if OP contains a symbol reference */
4507 symbolic_reference_mentioned_p (rtx op)
4509 const char *fmt;
4510 int i;
4512 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4513 return 1;
4515 fmt = GET_RTX_FORMAT (GET_CODE (op));
4516 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4518 if (fmt[i] == 'E')
4520 int j;
4522 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4523 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4524 return 1;
4527 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4528 return 1;
4531 return 0;
4534 /* Return 1 if it is appropriate to emit `ret' instructions in the
4535 body of a function. Do this only if the epilogue is simple, needing a
4536 couple of insns. Prior to reloading, we can't tell how many registers
4537 must be saved, so return 0 then. Return 0 if there is no frame
4538 marker to de-allocate.
4540 If NON_SAVING_SETJMP is defined and true, then it is not possible
4541 for the epilogue to be simple, so return 0. This is a special case
4542 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4543 until final, but jump_optimize may need to know sooner if a
4544 `return' is OK. */
4547 ix86_can_use_return_insn_p (void)
4549 struct ix86_frame frame;
4551 #ifdef NON_SAVING_SETJMP
4552 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4553 return 0;
4554 #endif
4556 if (! reload_completed || frame_pointer_needed)
4557 return 0;
4559 /* Don't allow more than 32 pop, since that's all we can do
4560 with one instruction. */
4561 if (current_function_pops_args
4562 && current_function_args_size >= 32768)
4563 return 0;
4565 ix86_compute_frame_layout (&frame);
4566 return frame.to_allocate == 0 && frame.nregs == 0;
4569 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4571 x86_64_sign_extended_value (rtx value)
4573 switch (GET_CODE (value))
4575 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4576 to be at least 32 and this all acceptable constants are
4577 represented as CONST_INT. */
4578 case CONST_INT:
4579 if (HOST_BITS_PER_WIDE_INT == 32)
4580 return 1;
4581 else
4583 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4584 return trunc_int_for_mode (val, SImode) == val;
4586 break;
4588 /* For certain code models, the symbolic references are known to fit.
4589 in CM_SMALL_PIC model we know it fits if it is local to the shared
4590 library. Don't count TLS SYMBOL_REFs here, since they should fit
4591 only if inside of UNSPEC handled below. */
4592 case SYMBOL_REF:
4593 /* TLS symbols are not constant. */
4594 if (tls_symbolic_operand (value, Pmode))
4595 return false;
4596 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4598 /* For certain code models, the code is near as well. */
4599 case LABEL_REF:
4600 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4601 || ix86_cmodel == CM_KERNEL);
4603 /* We also may accept the offsetted memory references in certain special
4604 cases. */
4605 case CONST:
4606 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4607 switch (XINT (XEXP (value, 0), 1))
4609 case UNSPEC_GOTPCREL:
4610 case UNSPEC_DTPOFF:
4611 case UNSPEC_GOTNTPOFF:
4612 case UNSPEC_NTPOFF:
4613 return 1;
4614 default:
4615 break;
4617 if (GET_CODE (XEXP (value, 0)) == PLUS)
4619 rtx op1 = XEXP (XEXP (value, 0), 0);
4620 rtx op2 = XEXP (XEXP (value, 0), 1);
4621 HOST_WIDE_INT offset;
4623 if (ix86_cmodel == CM_LARGE)
4624 return 0;
4625 if (GET_CODE (op2) != CONST_INT)
4626 return 0;
4627 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4628 switch (GET_CODE (op1))
4630 case SYMBOL_REF:
4631 /* For CM_SMALL assume that latest object is 16MB before
4632 end of 31bits boundary. We may also accept pretty
4633 large negative constants knowing that all objects are
4634 in the positive half of address space. */
4635 if (ix86_cmodel == CM_SMALL
4636 && offset < 16*1024*1024
4637 && trunc_int_for_mode (offset, SImode) == offset)
4638 return 1;
4639 /* For CM_KERNEL we know that all object resist in the
4640 negative half of 32bits address space. We may not
4641 accept negative offsets, since they may be just off
4642 and we may accept pretty large positive ones. */
4643 if (ix86_cmodel == CM_KERNEL
4644 && offset > 0
4645 && trunc_int_for_mode (offset, SImode) == offset)
4646 return 1;
4647 break;
4648 case LABEL_REF:
4649 /* These conditions are similar to SYMBOL_REF ones, just the
4650 constraints for code models differ. */
4651 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4652 && offset < 16*1024*1024
4653 && trunc_int_for_mode (offset, SImode) == offset)
4654 return 1;
4655 if (ix86_cmodel == CM_KERNEL
4656 && offset > 0
4657 && trunc_int_for_mode (offset, SImode) == offset)
4658 return 1;
4659 break;
4660 case UNSPEC:
4661 switch (XINT (op1, 1))
4663 case UNSPEC_DTPOFF:
4664 case UNSPEC_NTPOFF:
4665 if (offset > 0
4666 && trunc_int_for_mode (offset, SImode) == offset)
4667 return 1;
4669 break;
4670 default:
4671 return 0;
4674 return 0;
4675 default:
4676 return 0;
4680 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4682 x86_64_zero_extended_value (rtx value)
4684 switch (GET_CODE (value))
4686 case CONST_DOUBLE:
4687 if (HOST_BITS_PER_WIDE_INT == 32)
4688 return (GET_MODE (value) == VOIDmode
4689 && !CONST_DOUBLE_HIGH (value));
4690 else
4691 return 0;
4692 case CONST_INT:
4693 if (HOST_BITS_PER_WIDE_INT == 32)
4694 return INTVAL (value) >= 0;
4695 else
4696 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4697 break;
4699 /* For certain code models, the symbolic references are known to fit. */
4700 case SYMBOL_REF:
4701 /* TLS symbols are not constant. */
4702 if (tls_symbolic_operand (value, Pmode))
4703 return false;
4704 return ix86_cmodel == CM_SMALL;
4706 /* For certain code models, the code is near as well. */
4707 case LABEL_REF:
4708 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4710 /* We also may accept the offsetted memory references in certain special
4711 cases. */
4712 case CONST:
4713 if (GET_CODE (XEXP (value, 0)) == PLUS)
4715 rtx op1 = XEXP (XEXP (value, 0), 0);
4716 rtx op2 = XEXP (XEXP (value, 0), 1);
4718 if (ix86_cmodel == CM_LARGE)
4719 return 0;
4720 switch (GET_CODE (op1))
4722 case SYMBOL_REF:
4723 return 0;
4724 /* For small code model we may accept pretty large positive
4725 offsets, since one bit is available for free. Negative
4726 offsets are limited by the size of NULL pointer area
4727 specified by the ABI. */
4728 if (ix86_cmodel == CM_SMALL
4729 && GET_CODE (op2) == CONST_INT
4730 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4731 && (trunc_int_for_mode (INTVAL (op2), SImode)
4732 == INTVAL (op2)))
4733 return 1;
4734 /* ??? For the kernel, we may accept adjustment of
4735 -0x10000000, since we know that it will just convert
4736 negative address space to positive, but perhaps this
4737 is not worthwhile. */
4738 break;
4739 case LABEL_REF:
4740 /* These conditions are similar to SYMBOL_REF ones, just the
4741 constraints for code models differ. */
4742 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4743 && GET_CODE (op2) == CONST_INT
4744 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4745 && (trunc_int_for_mode (INTVAL (op2), SImode)
4746 == INTVAL (op2)))
4747 return 1;
4748 break;
4749 default:
4750 return 0;
4753 return 0;
4754 default:
4755 return 0;
4759 /* Value should be nonzero if functions must have frame pointers.
4760 Zero means the frame pointer need not be set up (and parms may
4761 be accessed via the stack pointer) in functions that seem suitable. */
4764 ix86_frame_pointer_required (void)
4766 /* If we accessed previous frames, then the generated code expects
4767 to be able to access the saved ebp value in our frame. */
4768 if (cfun->machine->accesses_prev_frame)
4769 return 1;
4771 /* Several x86 os'es need a frame pointer for other reasons,
4772 usually pertaining to setjmp. */
4773 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4774 return 1;
4776 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4777 the frame pointer by default. Turn it back on now if we've not
4778 got a leaf function. */
4779 if (TARGET_OMIT_LEAF_FRAME_POINTER
4780 && (!current_function_is_leaf))
4781 return 1;
4783 if (current_function_profile)
4784 return 1;
4786 return 0;
4789 /* Record that the current function accesses previous call frames. */
4791 void
4792 ix86_setup_frame_addresses (void)
4794 cfun->machine->accesses_prev_frame = 1;
4797 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4798 # define USE_HIDDEN_LINKONCE 1
4799 #else
4800 # define USE_HIDDEN_LINKONCE 0
4801 #endif
4803 static int pic_labels_used;
4805 /* Fills in the label name that should be used for a pc thunk for
4806 the given register. */
4808 static void
4809 get_pc_thunk_name (char name[32], unsigned int regno)
4811 if (USE_HIDDEN_LINKONCE)
4812 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4813 else
4814 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4818 /* This function generates code for -fpic that loads %ebx with
4819 the return address of the caller and then returns. */
4821 void
4822 ix86_file_end (void)
4824 rtx xops[2];
4825 int regno;
4827 for (regno = 0; regno < 8; ++regno)
4829 char name[32];
4831 if (! ((pic_labels_used >> regno) & 1))
4832 continue;
4834 get_pc_thunk_name (name, regno);
4836 if (USE_HIDDEN_LINKONCE)
4838 tree decl;
4840 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4841 error_mark_node);
4842 TREE_PUBLIC (decl) = 1;
4843 TREE_STATIC (decl) = 1;
4844 DECL_ONE_ONLY (decl) = 1;
4846 (*targetm.asm_out.unique_section) (decl, 0);
4847 named_section (decl, NULL, 0);
4849 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4850 fputs ("\t.hidden\t", asm_out_file);
4851 assemble_name (asm_out_file, name);
4852 fputc ('\n', asm_out_file);
4853 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4855 else
4857 text_section ();
4858 ASM_OUTPUT_LABEL (asm_out_file, name);
4861 xops[0] = gen_rtx_REG (SImode, regno);
4862 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4863 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4864 output_asm_insn ("ret", xops);
4867 if (NEED_INDICATE_EXEC_STACK)
4868 file_end_indicate_exec_stack ();
4871 /* Emit code for the SET_GOT patterns. */
4873 const char *
4874 output_set_got (rtx dest)
4876 rtx xops[3];
4878 xops[0] = dest;
4879 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4881 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4883 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4885 if (!flag_pic)
4886 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4887 else
4888 output_asm_insn ("call\t%a2", xops);
4890 #if TARGET_MACHO
4891 /* Output the "canonical" label name ("Lxx$pb") here too. This
4892 is what will be referred to by the Mach-O PIC subsystem. */
4893 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4894 #endif
4895 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4896 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4898 if (flag_pic)
4899 output_asm_insn ("pop{l}\t%0", xops);
4901 else
4903 char name[32];
4904 get_pc_thunk_name (name, REGNO (dest));
4905 pic_labels_used |= 1 << REGNO (dest);
4907 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4908 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4909 output_asm_insn ("call\t%X2", xops);
4912 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4913 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4914 else if (!TARGET_MACHO)
4915 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4917 return "";
4920 /* Generate an "push" pattern for input ARG. */
4922 static rtx
4923 gen_push (rtx arg)
4925 return gen_rtx_SET (VOIDmode,
4926 gen_rtx_MEM (Pmode,
4927 gen_rtx_PRE_DEC (Pmode,
4928 stack_pointer_rtx)),
4929 arg);
4932 /* Return >= 0 if there is an unused call-clobbered register available
4933 for the entire function. */
4935 static unsigned int
4936 ix86_select_alt_pic_regnum (void)
4938 if (current_function_is_leaf && !current_function_profile)
4940 int i;
4941 for (i = 2; i >= 0; --i)
4942 if (!regs_ever_live[i])
4943 return i;
4946 return INVALID_REGNUM;
4949 /* Return 1 if we need to save REGNO. */
4950 static int
4951 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4953 if (pic_offset_table_rtx
4954 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4955 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4956 || current_function_profile
4957 || current_function_calls_eh_return
4958 || current_function_uses_const_pool))
4960 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4961 return 0;
4962 return 1;
4965 if (current_function_calls_eh_return && maybe_eh_return)
4967 unsigned i;
4968 for (i = 0; ; i++)
4970 unsigned test = EH_RETURN_DATA_REGNO (i);
4971 if (test == INVALID_REGNUM)
4972 break;
4973 if (test == regno)
4974 return 1;
4978 return (regs_ever_live[regno]
4979 && !call_used_regs[regno]
4980 && !fixed_regs[regno]
4981 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4984 /* Return number of registers to be saved on the stack. */
4986 static int
4987 ix86_nsaved_regs (void)
4989 int nregs = 0;
4990 int regno;
4992 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4993 if (ix86_save_reg (regno, true))
4994 nregs++;
4995 return nregs;
4998 /* Return the offset between two registers, one to be eliminated, and the other
4999 its replacement, at the start of a routine. */
5001 HOST_WIDE_INT
5002 ix86_initial_elimination_offset (int from, int to)
5004 struct ix86_frame frame;
5005 ix86_compute_frame_layout (&frame);
5007 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5008 return frame.hard_frame_pointer_offset;
5009 else if (from == FRAME_POINTER_REGNUM
5010 && to == HARD_FRAME_POINTER_REGNUM)
5011 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5012 else
5014 if (to != STACK_POINTER_REGNUM)
5015 abort ();
5016 else if (from == ARG_POINTER_REGNUM)
5017 return frame.stack_pointer_offset;
5018 else if (from != FRAME_POINTER_REGNUM)
5019 abort ();
5020 else
5021 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5025 /* Fill structure ix86_frame about frame of currently computed function. */
5027 static void
5028 ix86_compute_frame_layout (struct ix86_frame *frame)
5030 HOST_WIDE_INT total_size;
5031 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5032 HOST_WIDE_INT offset;
5033 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5034 HOST_WIDE_INT size = get_frame_size ();
5036 frame->nregs = ix86_nsaved_regs ();
5037 total_size = size;
5039 /* During reload iteration the amount of registers saved can change.
5040 Recompute the value as needed. Do not recompute when amount of registers
5041 didn't change as reload does mutiple calls to the function and does not
5042 expect the decision to change within single iteration. */
5043 if (!optimize_size
5044 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5046 int count = frame->nregs;
5048 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5049 /* The fast prologue uses move instead of push to save registers. This
5050 is significantly longer, but also executes faster as modern hardware
5051 can execute the moves in parallel, but can't do that for push/pop.
5053 Be careful about choosing what prologue to emit: When function takes
5054 many instructions to execute we may use slow version as well as in
5055 case function is known to be outside hot spot (this is known with
5056 feedback only). Weight the size of function by number of registers
5057 to save as it is cheap to use one or two push instructions but very
5058 slow to use many of them. */
5059 if (count)
5060 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5061 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5062 || (flag_branch_probabilities
5063 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5064 cfun->machine->use_fast_prologue_epilogue = false;
5065 else
5066 cfun->machine->use_fast_prologue_epilogue
5067 = !expensive_function_p (count);
5069 if (TARGET_PROLOGUE_USING_MOVE
5070 && cfun->machine->use_fast_prologue_epilogue)
5071 frame->save_regs_using_mov = true;
5072 else
5073 frame->save_regs_using_mov = false;
5076 /* Skip return address and saved base pointer. */
5077 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5079 frame->hard_frame_pointer_offset = offset;
5081 /* Do some sanity checking of stack_alignment_needed and
5082 preferred_alignment, since i386 port is the only using those features
5083 that may break easily. */
5085 if (size && !stack_alignment_needed)
5086 abort ();
5087 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5088 abort ();
5089 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5090 abort ();
5091 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5092 abort ();
5094 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5095 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5097 /* Register save area */
5098 offset += frame->nregs * UNITS_PER_WORD;
5100 /* Va-arg area */
5101 if (ix86_save_varrargs_registers)
5103 offset += X86_64_VARARGS_SIZE;
5104 frame->va_arg_size = X86_64_VARARGS_SIZE;
5106 else
5107 frame->va_arg_size = 0;
5109 /* Align start of frame for local function. */
5110 frame->padding1 = ((offset + stack_alignment_needed - 1)
5111 & -stack_alignment_needed) - offset;
5113 offset += frame->padding1;
5115 /* Frame pointer points here. */
5116 frame->frame_pointer_offset = offset;
5118 offset += size;
5120 /* Add outgoing arguments area. Can be skipped if we eliminated
5121 all the function calls as dead code.
5122 Skipping is however impossible when function calls alloca. Alloca
5123 expander assumes that last current_function_outgoing_args_size
5124 of stack frame are unused. */
5125 if (ACCUMULATE_OUTGOING_ARGS
5126 && (!current_function_is_leaf || current_function_calls_alloca))
5128 offset += current_function_outgoing_args_size;
5129 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5131 else
5132 frame->outgoing_arguments_size = 0;
5134 /* Align stack boundary. Only needed if we're calling another function
5135 or using alloca. */
5136 if (!current_function_is_leaf || current_function_calls_alloca)
5137 frame->padding2 = ((offset + preferred_alignment - 1)
5138 & -preferred_alignment) - offset;
5139 else
5140 frame->padding2 = 0;
5142 offset += frame->padding2;
5144 /* We've reached end of stack frame. */
5145 frame->stack_pointer_offset = offset;
5147 /* Size prologue needs to allocate. */
5148 frame->to_allocate =
5149 (size + frame->padding1 + frame->padding2
5150 + frame->outgoing_arguments_size + frame->va_arg_size);
5152 if ((!frame->to_allocate && frame->nregs <= 1)
5153 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5154 frame->save_regs_using_mov = false;
5156 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5157 && current_function_is_leaf)
5159 frame->red_zone_size = frame->to_allocate;
5160 if (frame->save_regs_using_mov)
5161 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5162 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5163 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5165 else
5166 frame->red_zone_size = 0;
5167 frame->to_allocate -= frame->red_zone_size;
5168 frame->stack_pointer_offset -= frame->red_zone_size;
5169 #if 0
5170 fprintf (stderr, "nregs: %i\n", frame->nregs);
5171 fprintf (stderr, "size: %i\n", size);
5172 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5173 fprintf (stderr, "padding1: %i\n", frame->padding1);
5174 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5175 fprintf (stderr, "padding2: %i\n", frame->padding2);
5176 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5177 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5178 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5179 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5180 frame->hard_frame_pointer_offset);
5181 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5182 #endif
5185 /* Emit code to save registers in the prologue. */
5187 static void
5188 ix86_emit_save_regs (void)
5190 int regno;
5191 rtx insn;
5193 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5194 if (ix86_save_reg (regno, true))
5196 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5197 RTX_FRAME_RELATED_P (insn) = 1;
5201 /* Emit code to save registers using MOV insns. First register
5202 is restored from POINTER + OFFSET. */
5203 static void
5204 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5206 int regno;
5207 rtx insn;
5209 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5210 if (ix86_save_reg (regno, true))
5212 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5213 Pmode, offset),
5214 gen_rtx_REG (Pmode, regno));
5215 RTX_FRAME_RELATED_P (insn) = 1;
5216 offset += UNITS_PER_WORD;
5220 /* Expand prologue or epilogue stack adjustment.
5221 The pattern exist to put a dependency on all ebp-based memory accesses.
5222 STYLE should be negative if instructions should be marked as frame related,
5223 zero if %r11 register is live and cannot be freely used and positive
5224 otherwise. */
5226 static void
5227 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5229 rtx insn;
5231 if (! TARGET_64BIT)
5232 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5233 else if (x86_64_immediate_operand (offset, DImode))
5234 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5235 else
5237 rtx r11;
5238 /* r11 is used by indirect sibcall return as well, set before the
5239 epilogue and used after the epilogue. ATM indirect sibcall
5240 shouldn't be used together with huge frame sizes in one
5241 function because of the frame_size check in sibcall.c. */
5242 if (style == 0)
5243 abort ();
5244 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5245 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5246 if (style < 0)
5247 RTX_FRAME_RELATED_P (insn) = 1;
5248 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5249 offset));
5251 if (style < 0)
5252 RTX_FRAME_RELATED_P (insn) = 1;
5255 /* Expand the prologue into a bunch of separate insns. */
5257 void
5258 ix86_expand_prologue (void)
5260 rtx insn;
5261 bool pic_reg_used;
5262 struct ix86_frame frame;
5263 HOST_WIDE_INT allocate;
5265 ix86_compute_frame_layout (&frame);
5267 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5268 slower on all targets. Also sdb doesn't like it. */
5270 if (frame_pointer_needed)
5272 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5273 RTX_FRAME_RELATED_P (insn) = 1;
5275 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5276 RTX_FRAME_RELATED_P (insn) = 1;
5279 allocate = frame.to_allocate;
5281 if (!frame.save_regs_using_mov)
5282 ix86_emit_save_regs ();
5283 else
5284 allocate += frame.nregs * UNITS_PER_WORD;
5286 /* When using red zone we may start register saving before allocating
5287 the stack frame saving one cycle of the prologue. */
5288 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5289 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5290 : stack_pointer_rtx,
5291 -frame.nregs * UNITS_PER_WORD);
5293 if (allocate == 0)
5295 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5296 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5297 GEN_INT (-allocate), -1);
5298 else
5300 /* Only valid for Win32. */
5301 rtx eax = gen_rtx_REG (SImode, 0);
5302 bool eax_live = ix86_eax_live_at_start_p ();
5304 if (TARGET_64BIT)
5305 abort ();
5307 if (eax_live)
5309 emit_insn (gen_push (eax));
5310 allocate -= 4;
5313 insn = emit_move_insn (eax, GEN_INT (allocate));
5314 RTX_FRAME_RELATED_P (insn) = 1;
5316 insn = emit_insn (gen_allocate_stack_worker (eax));
5317 RTX_FRAME_RELATED_P (insn) = 1;
5319 if (eax_live)
5321 rtx t = plus_constant (stack_pointer_rtx, allocate);
5322 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5326 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5328 if (!frame_pointer_needed || !frame.to_allocate)
5329 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5330 else
5331 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5332 -frame.nregs * UNITS_PER_WORD);
5335 pic_reg_used = false;
5336 if (pic_offset_table_rtx
5337 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5338 || current_function_profile))
5340 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5342 if (alt_pic_reg_used != INVALID_REGNUM)
5343 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5345 pic_reg_used = true;
5348 if (pic_reg_used)
5350 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5352 /* Even with accurate pre-reload life analysis, we can wind up
5353 deleting all references to the pic register after reload.
5354 Consider if cross-jumping unifies two sides of a branch
5355 controlled by a comparison vs the only read from a global.
5356 In which case, allow the set_got to be deleted, though we're
5357 too late to do anything about the ebx save in the prologue. */
5358 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5361 /* Prevent function calls from be scheduled before the call to mcount.
5362 In the pic_reg_used case, make sure that the got load isn't deleted. */
5363 if (current_function_profile)
5364 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5367 /* Emit code to restore saved registers using MOV insns. First register
5368 is restored from POINTER + OFFSET. */
5369 static void
5370 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5371 int maybe_eh_return)
5373 int regno;
5374 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5376 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5377 if (ix86_save_reg (regno, maybe_eh_return))
5379 /* Ensure that adjust_address won't be forced to produce pointer
5380 out of range allowed by x86-64 instruction set. */
5381 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5383 rtx r11;
5385 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5386 emit_move_insn (r11, GEN_INT (offset));
5387 emit_insn (gen_adddi3 (r11, r11, pointer));
5388 base_address = gen_rtx_MEM (Pmode, r11);
5389 offset = 0;
5391 emit_move_insn (gen_rtx_REG (Pmode, regno),
5392 adjust_address (base_address, Pmode, offset));
5393 offset += UNITS_PER_WORD;
5397 /* Restore function stack, frame, and registers. */
5399 void
5400 ix86_expand_epilogue (int style)
5402 int regno;
5403 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5404 struct ix86_frame frame;
5405 HOST_WIDE_INT offset;
5407 ix86_compute_frame_layout (&frame);
5409 /* Calculate start of saved registers relative to ebp. Special care
5410 must be taken for the normal return case of a function using
5411 eh_return: the eax and edx registers are marked as saved, but not
5412 restored along this path. */
5413 offset = frame.nregs;
5414 if (current_function_calls_eh_return && style != 2)
5415 offset -= 2;
5416 offset *= -UNITS_PER_WORD;
5418 /* If we're only restoring one register and sp is not valid then
5419 using a move instruction to restore the register since it's
5420 less work than reloading sp and popping the register.
5422 The default code result in stack adjustment using add/lea instruction,
5423 while this code results in LEAVE instruction (or discrete equivalent),
5424 so it is profitable in some other cases as well. Especially when there
5425 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5426 and there is exactly one register to pop. This heuristic may need some
5427 tuning in future. */
5428 if ((!sp_valid && frame.nregs <= 1)
5429 || (TARGET_EPILOGUE_USING_MOVE
5430 && cfun->machine->use_fast_prologue_epilogue
5431 && (frame.nregs > 1 || frame.to_allocate))
5432 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5433 || (frame_pointer_needed && TARGET_USE_LEAVE
5434 && cfun->machine->use_fast_prologue_epilogue
5435 && frame.nregs == 1)
5436 || current_function_calls_eh_return)
5438 /* Restore registers. We can use ebp or esp to address the memory
5439 locations. If both are available, default to ebp, since offsets
5440 are known to be small. Only exception is esp pointing directly to the
5441 end of block of saved registers, where we may simplify addressing
5442 mode. */
5444 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5445 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5446 frame.to_allocate, style == 2);
5447 else
5448 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5449 offset, style == 2);
5451 /* eh_return epilogues need %ecx added to the stack pointer. */
5452 if (style == 2)
5454 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5456 if (frame_pointer_needed)
5458 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5459 tmp = plus_constant (tmp, UNITS_PER_WORD);
5460 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5462 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5463 emit_move_insn (hard_frame_pointer_rtx, tmp);
5465 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5466 const0_rtx, style);
5468 else
5470 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5471 tmp = plus_constant (tmp, (frame.to_allocate
5472 + frame.nregs * UNITS_PER_WORD));
5473 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5476 else if (!frame_pointer_needed)
5477 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5478 GEN_INT (frame.to_allocate
5479 + frame.nregs * UNITS_PER_WORD),
5480 style);
5481 /* If not an i386, mov & pop is faster than "leave". */
5482 else if (TARGET_USE_LEAVE || optimize_size
5483 || !cfun->machine->use_fast_prologue_epilogue)
5484 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5485 else
5487 pro_epilogue_adjust_stack (stack_pointer_rtx,
5488 hard_frame_pointer_rtx,
5489 const0_rtx, style);
5490 if (TARGET_64BIT)
5491 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5492 else
5493 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5496 else
5498 /* First step is to deallocate the stack frame so that we can
5499 pop the registers. */
5500 if (!sp_valid)
5502 if (!frame_pointer_needed)
5503 abort ();
5504 pro_epilogue_adjust_stack (stack_pointer_rtx,
5505 hard_frame_pointer_rtx,
5506 GEN_INT (offset), style);
5508 else if (frame.to_allocate)
5509 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5510 GEN_INT (frame.to_allocate), style);
5512 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5513 if (ix86_save_reg (regno, false))
5515 if (TARGET_64BIT)
5516 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5517 else
5518 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5520 if (frame_pointer_needed)
5522 /* Leave results in shorter dependency chains on CPUs that are
5523 able to grok it fast. */
5524 if (TARGET_USE_LEAVE)
5525 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5526 else if (TARGET_64BIT)
5527 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5528 else
5529 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5533 /* Sibcall epilogues don't want a return instruction. */
5534 if (style == 0)
5535 return;
5537 if (current_function_pops_args && current_function_args_size)
5539 rtx popc = GEN_INT (current_function_pops_args);
5541 /* i386 can only pop 64K bytes. If asked to pop more, pop
5542 return address, do explicit add, and jump indirectly to the
5543 caller. */
5545 if (current_function_pops_args >= 65536)
5547 rtx ecx = gen_rtx_REG (SImode, 2);
5549 /* There is no "pascal" calling convention in 64bit ABI. */
5550 if (TARGET_64BIT)
5551 abort ();
5553 emit_insn (gen_popsi1 (ecx));
5554 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5555 emit_jump_insn (gen_return_indirect_internal (ecx));
5557 else
5558 emit_jump_insn (gen_return_pop_internal (popc));
5560 else
5561 emit_jump_insn (gen_return_internal ());
5564 /* Reset from the function's potential modifications. */
5566 static void
5567 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5568 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5570 if (pic_offset_table_rtx)
5571 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5574 /* Extract the parts of an RTL expression that is a valid memory address
5575 for an instruction. Return 0 if the structure of the address is
5576 grossly off. Return -1 if the address contains ASHIFT, so it is not
5577 strictly valid, but still used for computing length of lea instruction. */
5579 static int
5580 ix86_decompose_address (rtx addr, struct ix86_address *out)
5582 rtx base = NULL_RTX;
5583 rtx index = NULL_RTX;
5584 rtx disp = NULL_RTX;
5585 HOST_WIDE_INT scale = 1;
5586 rtx scale_rtx = NULL_RTX;
5587 int retval = 1;
5588 enum ix86_address_seg seg = SEG_DEFAULT;
5590 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5591 base = addr;
5592 else if (GET_CODE (addr) == PLUS)
5594 rtx addends[4], op;
5595 int n = 0, i;
5597 op = addr;
5600 if (n >= 4)
5601 return 0;
5602 addends[n++] = XEXP (op, 1);
5603 op = XEXP (op, 0);
5605 while (GET_CODE (op) == PLUS);
5606 if (n >= 4)
5607 return 0;
5608 addends[n] = op;
5610 for (i = n; i >= 0; --i)
5612 op = addends[i];
5613 switch (GET_CODE (op))
5615 case MULT:
5616 if (index)
5617 return 0;
5618 index = XEXP (op, 0);
5619 scale_rtx = XEXP (op, 1);
5620 break;
5622 case UNSPEC:
5623 if (XINT (op, 1) == UNSPEC_TP
5624 && TARGET_TLS_DIRECT_SEG_REFS
5625 && seg == SEG_DEFAULT)
5626 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5627 else
5628 return 0;
5629 break;
5631 case REG:
5632 case SUBREG:
5633 if (!base)
5634 base = op;
5635 else if (!index)
5636 index = op;
5637 else
5638 return 0;
5639 break;
5641 case CONST:
5642 case CONST_INT:
5643 case SYMBOL_REF:
5644 case LABEL_REF:
5645 if (disp)
5646 return 0;
5647 disp = op;
5648 break;
5650 default:
5651 return 0;
5655 else if (GET_CODE (addr) == MULT)
5657 index = XEXP (addr, 0); /* index*scale */
5658 scale_rtx = XEXP (addr, 1);
5660 else if (GET_CODE (addr) == ASHIFT)
5662 rtx tmp;
5664 /* We're called for lea too, which implements ashift on occasion. */
5665 index = XEXP (addr, 0);
5666 tmp = XEXP (addr, 1);
5667 if (GET_CODE (tmp) != CONST_INT)
5668 return 0;
5669 scale = INTVAL (tmp);
5670 if ((unsigned HOST_WIDE_INT) scale > 3)
5671 return 0;
5672 scale = 1 << scale;
5673 retval = -1;
5675 else
5676 disp = addr; /* displacement */
5678 /* Extract the integral value of scale. */
5679 if (scale_rtx)
5681 if (GET_CODE (scale_rtx) != CONST_INT)
5682 return 0;
5683 scale = INTVAL (scale_rtx);
5686 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5687 if (base && index && scale == 1
5688 && (index == arg_pointer_rtx
5689 || index == frame_pointer_rtx
5690 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5692 rtx tmp = base;
5693 base = index;
5694 index = tmp;
5697 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5698 if ((base == hard_frame_pointer_rtx
5699 || base == frame_pointer_rtx
5700 || base == arg_pointer_rtx) && !disp)
5701 disp = const0_rtx;
5703 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5704 Avoid this by transforming to [%esi+0]. */
5705 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5706 && base && !index && !disp
5707 && REG_P (base)
5708 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5709 disp = const0_rtx;
5711 /* Special case: encode reg+reg instead of reg*2. */
5712 if (!base && index && scale && scale == 2)
5713 base = index, scale = 1;
5715 /* Special case: scaling cannot be encoded without base or displacement. */
5716 if (!base && !disp && index && scale != 1)
5717 disp = const0_rtx;
5719 out->base = base;
5720 out->index = index;
5721 out->disp = disp;
5722 out->scale = scale;
5723 out->seg = seg;
5725 return retval;
5728 /* Return cost of the memory address x.
5729 For i386, it is better to use a complex address than let gcc copy
5730 the address into a reg and make a new pseudo. But not if the address
5731 requires to two regs - that would mean more pseudos with longer
5732 lifetimes. */
5733 static int
5734 ix86_address_cost (rtx x)
5736 struct ix86_address parts;
5737 int cost = 1;
5739 if (!ix86_decompose_address (x, &parts))
5740 abort ();
5742 /* More complex memory references are better. */
5743 if (parts.disp && parts.disp != const0_rtx)
5744 cost--;
5745 if (parts.seg != SEG_DEFAULT)
5746 cost--;
5748 /* Attempt to minimize number of registers in the address. */
5749 if ((parts.base
5750 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5751 || (parts.index
5752 && (!REG_P (parts.index)
5753 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5754 cost++;
5756 if (parts.base
5757 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5758 && parts.index
5759 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5760 && parts.base != parts.index)
5761 cost++;
5763 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5764 since it's predecode logic can't detect the length of instructions
5765 and it degenerates to vector decoded. Increase cost of such
5766 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5767 to split such addresses or even refuse such addresses at all.
5769 Following addressing modes are affected:
5770 [base+scale*index]
5771 [scale*index+disp]
5772 [base+index]
5774 The first and last case may be avoidable by explicitly coding the zero in
5775 memory address, but I don't have AMD-K6 machine handy to check this
5776 theory. */
5778 if (TARGET_K6
5779 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5780 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5781 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5782 cost += 10;
5784 return cost;
5787 /* If X is a machine specific address (i.e. a symbol or label being
5788 referenced as a displacement from the GOT implemented using an
5789 UNSPEC), then return the base term. Otherwise return X. */
5792 ix86_find_base_term (rtx x)
5794 rtx term;
5796 if (TARGET_64BIT)
5798 if (GET_CODE (x) != CONST)
5799 return x;
5800 term = XEXP (x, 0);
5801 if (GET_CODE (term) == PLUS
5802 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5803 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5804 term = XEXP (term, 0);
5805 if (GET_CODE (term) != UNSPEC
5806 || XINT (term, 1) != UNSPEC_GOTPCREL)
5807 return x;
5809 term = XVECEXP (term, 0, 0);
5811 if (GET_CODE (term) != SYMBOL_REF
5812 && GET_CODE (term) != LABEL_REF)
5813 return x;
5815 return term;
5818 term = ix86_delegitimize_address (x);
5820 if (GET_CODE (term) != SYMBOL_REF
5821 && GET_CODE (term) != LABEL_REF)
5822 return x;
5824 return term;
5827 /* Determine if a given RTX is a valid constant. We already know this
5828 satisfies CONSTANT_P. */
5830 bool
5831 legitimate_constant_p (rtx x)
5833 rtx inner;
5835 switch (GET_CODE (x))
5837 case SYMBOL_REF:
5838 /* TLS symbols are not constant. */
5839 if (tls_symbolic_operand (x, Pmode))
5840 return false;
5841 break;
5843 case CONST:
5844 inner = XEXP (x, 0);
5846 /* Offsets of TLS symbols are never valid.
5847 Discourage CSE from creating them. */
5848 if (GET_CODE (inner) == PLUS
5849 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5850 return false;
5852 if (GET_CODE (inner) == PLUS
5853 || GET_CODE (inner) == MINUS)
5855 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5856 return false;
5857 inner = XEXP (inner, 0);
5860 /* Only some unspecs are valid as "constants". */
5861 if (GET_CODE (inner) == UNSPEC)
5862 switch (XINT (inner, 1))
5864 case UNSPEC_TPOFF:
5865 case UNSPEC_NTPOFF:
5866 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5867 case UNSPEC_DTPOFF:
5868 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5869 default:
5870 return false;
5872 break;
5874 default:
5875 break;
5878 /* Otherwise we handle everything else in the move patterns. */
5879 return true;
5882 /* Determine if it's legal to put X into the constant pool. This
5883 is not possible for the address of thread-local symbols, which
5884 is checked above. */
5886 static bool
5887 ix86_cannot_force_const_mem (rtx x)
5889 return !legitimate_constant_p (x);
5892 /* Determine if a given RTX is a valid constant address. */
5894 bool
5895 constant_address_p (rtx x)
5897 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5900 /* Nonzero if the constant value X is a legitimate general operand
5901 when generating PIC code. It is given that flag_pic is on and
5902 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5904 bool
5905 legitimate_pic_operand_p (rtx x)
5907 rtx inner;
5909 switch (GET_CODE (x))
5911 case CONST:
5912 inner = XEXP (x, 0);
5914 /* Only some unspecs are valid as "constants". */
5915 if (GET_CODE (inner) == UNSPEC)
5916 switch (XINT (inner, 1))
5918 case UNSPEC_TPOFF:
5919 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5920 default:
5921 return false;
5923 /* FALLTHRU */
5925 case SYMBOL_REF:
5926 case LABEL_REF:
5927 return legitimate_pic_address_disp_p (x);
5929 default:
5930 return true;
5934 /* Determine if a given CONST RTX is a valid memory displacement
5935 in PIC mode. */
5938 legitimate_pic_address_disp_p (rtx disp)
5940 bool saw_plus;
5942 /* In 64bit mode we can allow direct addresses of symbols and labels
5943 when they are not dynamic symbols. */
5944 if (TARGET_64BIT)
5946 /* TLS references should always be enclosed in UNSPEC. */
5947 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5948 return 0;
5949 if (GET_CODE (disp) == SYMBOL_REF
5950 && ix86_cmodel == CM_SMALL_PIC
5951 && SYMBOL_REF_LOCAL_P (disp))
5952 return 1;
5953 if (GET_CODE (disp) == LABEL_REF)
5954 return 1;
5955 if (GET_CODE (disp) == CONST
5956 && GET_CODE (XEXP (disp, 0)) == PLUS)
5958 rtx op0 = XEXP (XEXP (disp, 0), 0);
5959 rtx op1 = XEXP (XEXP (disp, 0), 1);
5961 /* TLS references should always be enclosed in UNSPEC. */
5962 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5963 return 0;
5964 if (((GET_CODE (op0) == SYMBOL_REF
5965 && ix86_cmodel == CM_SMALL_PIC
5966 && SYMBOL_REF_LOCAL_P (op0))
5967 || GET_CODE (op0) == LABEL_REF)
5968 && GET_CODE (op1) == CONST_INT
5969 && INTVAL (op1) < 16*1024*1024
5970 && INTVAL (op1) >= -16*1024*1024)
5971 return 1;
5974 if (GET_CODE (disp) != CONST)
5975 return 0;
5976 disp = XEXP (disp, 0);
5978 if (TARGET_64BIT)
5980 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5981 of GOT tables. We should not need these anyway. */
5982 if (GET_CODE (disp) != UNSPEC
5983 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5984 return 0;
5986 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5987 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5988 return 0;
5989 return 1;
5992 saw_plus = false;
5993 if (GET_CODE (disp) == PLUS)
5995 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5996 return 0;
5997 disp = XEXP (disp, 0);
5998 saw_plus = true;
6001 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6002 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
6004 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6005 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6006 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6008 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6009 if (! strcmp (sym_name, "<pic base>"))
6010 return 1;
6014 if (GET_CODE (disp) != UNSPEC)
6015 return 0;
6017 switch (XINT (disp, 1))
6019 case UNSPEC_GOT:
6020 if (saw_plus)
6021 return false;
6022 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6023 case UNSPEC_GOTOFF:
6024 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6025 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6026 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6027 return false;
6028 case UNSPEC_GOTTPOFF:
6029 case UNSPEC_GOTNTPOFF:
6030 case UNSPEC_INDNTPOFF:
6031 if (saw_plus)
6032 return false;
6033 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6034 case UNSPEC_NTPOFF:
6035 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6036 case UNSPEC_DTPOFF:
6037 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6040 return 0;
6043 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6044 memory address for an instruction. The MODE argument is the machine mode
6045 for the MEM expression that wants to use this address.
6047 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6048 convert common non-canonical forms to canonical form so that they will
6049 be recognized. */
6052 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6054 struct ix86_address parts;
6055 rtx base, index, disp;
6056 HOST_WIDE_INT scale;
6057 const char *reason = NULL;
6058 rtx reason_rtx = NULL_RTX;
6060 if (TARGET_DEBUG_ADDR)
6062 fprintf (stderr,
6063 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6064 GET_MODE_NAME (mode), strict);
6065 debug_rtx (addr);
6068 if (ix86_decompose_address (addr, &parts) <= 0)
6070 reason = "decomposition failed";
6071 goto report_error;
6074 base = parts.base;
6075 index = parts.index;
6076 disp = parts.disp;
6077 scale = parts.scale;
6079 /* Validate base register.
6081 Don't allow SUBREG's here, it can lead to spill failures when the base
6082 is one word out of a two word structure, which is represented internally
6083 as a DImode int. */
6085 if (base)
6087 reason_rtx = base;
6089 if (GET_CODE (base) != REG)
6091 reason = "base is not a register";
6092 goto report_error;
6095 if (GET_MODE (base) != Pmode)
6097 reason = "base is not in Pmode";
6098 goto report_error;
6101 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6102 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6104 reason = "base is not valid";
6105 goto report_error;
6109 /* Validate index register.
6111 Don't allow SUBREG's here, it can lead to spill failures when the index
6112 is one word out of a two word structure, which is represented internally
6113 as a DImode int. */
6115 if (index)
6117 reason_rtx = index;
6119 if (GET_CODE (index) != REG)
6121 reason = "index is not a register";
6122 goto report_error;
6125 if (GET_MODE (index) != Pmode)
6127 reason = "index is not in Pmode";
6128 goto report_error;
6131 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6132 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6134 reason = "index is not valid";
6135 goto report_error;
6139 /* Validate scale factor. */
6140 if (scale != 1)
6142 reason_rtx = GEN_INT (scale);
6143 if (!index)
6145 reason = "scale without index";
6146 goto report_error;
6149 if (scale != 2 && scale != 4 && scale != 8)
6151 reason = "scale is not a valid multiplier";
6152 goto report_error;
6156 /* Validate displacement. */
6157 if (disp)
6159 reason_rtx = disp;
6161 if (GET_CODE (disp) == CONST
6162 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6163 switch (XINT (XEXP (disp, 0), 1))
6165 case UNSPEC_GOT:
6166 case UNSPEC_GOTOFF:
6167 case UNSPEC_GOTPCREL:
6168 if (!flag_pic)
6169 abort ();
6170 goto is_legitimate_pic;
6172 case UNSPEC_GOTTPOFF:
6173 case UNSPEC_GOTNTPOFF:
6174 case UNSPEC_INDNTPOFF:
6175 case UNSPEC_NTPOFF:
6176 case UNSPEC_DTPOFF:
6177 break;
6179 default:
6180 reason = "invalid address unspec";
6181 goto report_error;
6184 else if (flag_pic && (SYMBOLIC_CONST (disp)
6185 #if TARGET_MACHO
6186 && !machopic_operand_p (disp)
6187 #endif
6190 is_legitimate_pic:
6191 if (TARGET_64BIT && (index || base))
6193 /* foo@dtpoff(%rX) is ok. */
6194 if (GET_CODE (disp) != CONST
6195 || GET_CODE (XEXP (disp, 0)) != PLUS
6196 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6197 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6198 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6199 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6201 reason = "non-constant pic memory reference";
6202 goto report_error;
6205 else if (! legitimate_pic_address_disp_p (disp))
6207 reason = "displacement is an invalid pic construct";
6208 goto report_error;
6211 /* This code used to verify that a symbolic pic displacement
6212 includes the pic_offset_table_rtx register.
6214 While this is good idea, unfortunately these constructs may
6215 be created by "adds using lea" optimization for incorrect
6216 code like:
6218 int a;
6219 int foo(int i)
6221 return *(&a+i);
6224 This code is nonsensical, but results in addressing
6225 GOT table with pic_offset_table_rtx base. We can't
6226 just refuse it easily, since it gets matched by
6227 "addsi3" pattern, that later gets split to lea in the
6228 case output register differs from input. While this
6229 can be handled by separate addsi pattern for this case
6230 that never results in lea, this seems to be easier and
6231 correct fix for crash to disable this test. */
6233 else if (GET_CODE (disp) != LABEL_REF
6234 && GET_CODE (disp) != CONST_INT
6235 && (GET_CODE (disp) != CONST
6236 || !legitimate_constant_p (disp))
6237 && (GET_CODE (disp) != SYMBOL_REF
6238 || !legitimate_constant_p (disp)))
6240 reason = "displacement is not constant";
6241 goto report_error;
6243 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6245 reason = "displacement is out of range";
6246 goto report_error;
6250 /* Everything looks valid. */
6251 if (TARGET_DEBUG_ADDR)
6252 fprintf (stderr, "Success.\n");
6253 return TRUE;
6255 report_error:
6256 if (TARGET_DEBUG_ADDR)
6258 fprintf (stderr, "Error: %s\n", reason);
6259 debug_rtx (reason_rtx);
6261 return FALSE;
6264 /* Return an unique alias set for the GOT. */
6266 static HOST_WIDE_INT
6267 ix86_GOT_alias_set (void)
6269 static HOST_WIDE_INT set = -1;
6270 if (set == -1)
6271 set = new_alias_set ();
6272 return set;
6275 /* Return a legitimate reference for ORIG (an address) using the
6276 register REG. If REG is 0, a new pseudo is generated.
6278 There are two types of references that must be handled:
6280 1. Global data references must load the address from the GOT, via
6281 the PIC reg. An insn is emitted to do this load, and the reg is
6282 returned.
6284 2. Static data references, constant pool addresses, and code labels
6285 compute the address as an offset from the GOT, whose base is in
6286 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6287 differentiate them from global data objects. The returned
6288 address is the PIC reg + an unspec constant.
6290 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6291 reg also appears in the address. */
6293 static rtx
6294 legitimize_pic_address (rtx orig, rtx reg)
6296 rtx addr = orig;
6297 rtx new = orig;
6298 rtx base;
6300 #if TARGET_MACHO
6301 if (reg == 0)
6302 reg = gen_reg_rtx (Pmode);
6303 /* Use the generic Mach-O PIC machinery. */
6304 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6305 #endif
6307 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6308 new = addr;
6309 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6311 /* This symbol may be referenced via a displacement from the PIC
6312 base address (@GOTOFF). */
6314 if (reload_in_progress)
6315 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6316 if (GET_CODE (addr) == CONST)
6317 addr = XEXP (addr, 0);
6318 if (GET_CODE (addr) == PLUS)
6320 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6321 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6323 else
6324 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6325 new = gen_rtx_CONST (Pmode, new);
6326 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6328 if (reg != 0)
6330 emit_move_insn (reg, new);
6331 new = reg;
6334 else if (GET_CODE (addr) == SYMBOL_REF)
6336 if (TARGET_64BIT)
6338 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6339 new = gen_rtx_CONST (Pmode, new);
6340 new = gen_rtx_MEM (Pmode, new);
6341 RTX_UNCHANGING_P (new) = 1;
6342 set_mem_alias_set (new, ix86_GOT_alias_set ());
6344 if (reg == 0)
6345 reg = gen_reg_rtx (Pmode);
6346 /* Use directly gen_movsi, otherwise the address is loaded
6347 into register for CSE. We don't want to CSE this addresses,
6348 instead we CSE addresses from the GOT table, so skip this. */
6349 emit_insn (gen_movsi (reg, new));
6350 new = reg;
6352 else
6354 /* This symbol must be referenced via a load from the
6355 Global Offset Table (@GOT). */
6357 if (reload_in_progress)
6358 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6359 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6360 new = gen_rtx_CONST (Pmode, new);
6361 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6362 new = gen_rtx_MEM (Pmode, new);
6363 RTX_UNCHANGING_P (new) = 1;
6364 set_mem_alias_set (new, ix86_GOT_alias_set ());
6366 if (reg == 0)
6367 reg = gen_reg_rtx (Pmode);
6368 emit_move_insn (reg, new);
6369 new = reg;
6372 else
6374 if (GET_CODE (addr) == CONST)
6376 addr = XEXP (addr, 0);
6378 /* We must match stuff we generate before. Assume the only
6379 unspecs that can get here are ours. Not that we could do
6380 anything with them anyway.... */
6381 if (GET_CODE (addr) == UNSPEC
6382 || (GET_CODE (addr) == PLUS
6383 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6384 return orig;
6385 if (GET_CODE (addr) != PLUS)
6386 abort ();
6388 if (GET_CODE (addr) == PLUS)
6390 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6392 /* Check first to see if this is a constant offset from a @GOTOFF
6393 symbol reference. */
6394 if (local_symbolic_operand (op0, Pmode)
6395 && GET_CODE (op1) == CONST_INT)
6397 if (!TARGET_64BIT)
6399 if (reload_in_progress)
6400 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6401 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6402 UNSPEC_GOTOFF);
6403 new = gen_rtx_PLUS (Pmode, new, op1);
6404 new = gen_rtx_CONST (Pmode, new);
6405 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6407 if (reg != 0)
6409 emit_move_insn (reg, new);
6410 new = reg;
6413 else
6415 if (INTVAL (op1) < -16*1024*1024
6416 || INTVAL (op1) >= 16*1024*1024)
6417 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6420 else
6422 base = legitimize_pic_address (XEXP (addr, 0), reg);
6423 new = legitimize_pic_address (XEXP (addr, 1),
6424 base == reg ? NULL_RTX : reg);
6426 if (GET_CODE (new) == CONST_INT)
6427 new = plus_constant (base, INTVAL (new));
6428 else
6430 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6432 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6433 new = XEXP (new, 1);
6435 new = gen_rtx_PLUS (Pmode, base, new);
6440 return new;
6443 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6445 static rtx
6446 get_thread_pointer (int to_reg)
6448 rtx tp, reg, insn;
6450 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6451 if (!to_reg)
6452 return tp;
6454 reg = gen_reg_rtx (Pmode);
6455 insn = gen_rtx_SET (VOIDmode, reg, tp);
6456 insn = emit_insn (insn);
6458 return reg;
6461 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6462 false if we expect this to be used for a memory address and true if
6463 we expect to load the address into a register. */
6465 static rtx
6466 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6468 rtx dest, base, off, pic;
6469 int type;
6471 switch (model)
6473 case TLS_MODEL_GLOBAL_DYNAMIC:
6474 dest = gen_reg_rtx (Pmode);
6475 if (TARGET_64BIT)
6477 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6479 start_sequence ();
6480 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6481 insns = get_insns ();
6482 end_sequence ();
6484 emit_libcall_block (insns, dest, rax, x);
6486 else
6487 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6488 break;
6490 case TLS_MODEL_LOCAL_DYNAMIC:
6491 base = gen_reg_rtx (Pmode);
6492 if (TARGET_64BIT)
6494 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6496 start_sequence ();
6497 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6498 insns = get_insns ();
6499 end_sequence ();
6501 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6502 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6503 emit_libcall_block (insns, base, rax, note);
6505 else
6506 emit_insn (gen_tls_local_dynamic_base_32 (base));
6508 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6509 off = gen_rtx_CONST (Pmode, off);
6511 return gen_rtx_PLUS (Pmode, base, off);
6513 case TLS_MODEL_INITIAL_EXEC:
6514 if (TARGET_64BIT)
6516 pic = NULL;
6517 type = UNSPEC_GOTNTPOFF;
6519 else if (flag_pic)
6521 if (reload_in_progress)
6522 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6523 pic = pic_offset_table_rtx;
6524 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6526 else if (!TARGET_GNU_TLS)
6528 pic = gen_reg_rtx (Pmode);
6529 emit_insn (gen_set_got (pic));
6530 type = UNSPEC_GOTTPOFF;
6532 else
6534 pic = NULL;
6535 type = UNSPEC_INDNTPOFF;
6538 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6539 off = gen_rtx_CONST (Pmode, off);
6540 if (pic)
6541 off = gen_rtx_PLUS (Pmode, pic, off);
6542 off = gen_rtx_MEM (Pmode, off);
6543 RTX_UNCHANGING_P (off) = 1;
6544 set_mem_alias_set (off, ix86_GOT_alias_set ());
6546 if (TARGET_64BIT || TARGET_GNU_TLS)
6548 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6549 off = force_reg (Pmode, off);
6550 return gen_rtx_PLUS (Pmode, base, off);
6552 else
6554 base = get_thread_pointer (true);
6555 dest = gen_reg_rtx (Pmode);
6556 emit_insn (gen_subsi3 (dest, base, off));
6558 break;
6560 case TLS_MODEL_LOCAL_EXEC:
6561 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6562 (TARGET_64BIT || TARGET_GNU_TLS)
6563 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6564 off = gen_rtx_CONST (Pmode, off);
6566 if (TARGET_64BIT || TARGET_GNU_TLS)
6568 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6569 return gen_rtx_PLUS (Pmode, base, off);
6571 else
6573 base = get_thread_pointer (true);
6574 dest = gen_reg_rtx (Pmode);
6575 emit_insn (gen_subsi3 (dest, base, off));
6577 break;
6579 default:
6580 abort ();
6583 return dest;
6586 /* Try machine-dependent ways of modifying an illegitimate address
6587 to be legitimate. If we find one, return the new, valid address.
6588 This macro is used in only one place: `memory_address' in explow.c.
6590 OLDX is the address as it was before break_out_memory_refs was called.
6591 In some cases it is useful to look at this to decide what needs to be done.
6593 MODE and WIN are passed so that this macro can use
6594 GO_IF_LEGITIMATE_ADDRESS.
6596 It is always safe for this macro to do nothing. It exists to recognize
6597 opportunities to optimize the output.
6599 For the 80386, we handle X+REG by loading X into a register R and
6600 using R+REG. R will go in a general reg and indexing will be used.
6601 However, if REG is a broken-out memory address or multiplication,
6602 nothing needs to be done because REG can certainly go in a general reg.
6604 When -fpic is used, special handling is needed for symbolic references.
6605 See comments by legitimize_pic_address in i386.c for details. */
6608 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6610 int changed = 0;
6611 unsigned log;
6613 if (TARGET_DEBUG_ADDR)
6615 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6616 GET_MODE_NAME (mode));
6617 debug_rtx (x);
6620 log = tls_symbolic_operand (x, mode);
6621 if (log)
6622 return legitimize_tls_address (x, log, false);
6623 if (GET_CODE (x) == CONST
6624 && GET_CODE (XEXP (x, 0)) == PLUS
6625 && (log = tls_symbolic_operand (XEXP (XEXP (x, 0), 0), Pmode)))
6627 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6628 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6631 if (flag_pic && SYMBOLIC_CONST (x))
6632 return legitimize_pic_address (x, 0);
6634 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6635 if (GET_CODE (x) == ASHIFT
6636 && GET_CODE (XEXP (x, 1)) == CONST_INT
6637 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6639 changed = 1;
6640 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6641 GEN_INT (1 << log));
6644 if (GET_CODE (x) == PLUS)
6646 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6648 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6649 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6650 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6652 changed = 1;
6653 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6654 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6655 GEN_INT (1 << log));
6658 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6659 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6660 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6662 changed = 1;
6663 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6664 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6665 GEN_INT (1 << log));
6668 /* Put multiply first if it isn't already. */
6669 if (GET_CODE (XEXP (x, 1)) == MULT)
6671 rtx tmp = XEXP (x, 0);
6672 XEXP (x, 0) = XEXP (x, 1);
6673 XEXP (x, 1) = tmp;
6674 changed = 1;
6677 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6678 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6679 created by virtual register instantiation, register elimination, and
6680 similar optimizations. */
6681 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6683 changed = 1;
6684 x = gen_rtx_PLUS (Pmode,
6685 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6686 XEXP (XEXP (x, 1), 0)),
6687 XEXP (XEXP (x, 1), 1));
6690 /* Canonicalize
6691 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6692 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6693 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6694 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6695 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6696 && CONSTANT_P (XEXP (x, 1)))
6698 rtx constant;
6699 rtx other = NULL_RTX;
6701 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6703 constant = XEXP (x, 1);
6704 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6706 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6708 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6709 other = XEXP (x, 1);
6711 else
6712 constant = 0;
6714 if (constant)
6716 changed = 1;
6717 x = gen_rtx_PLUS (Pmode,
6718 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6719 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6720 plus_constant (other, INTVAL (constant)));
6724 if (changed && legitimate_address_p (mode, x, FALSE))
6725 return x;
6727 if (GET_CODE (XEXP (x, 0)) == MULT)
6729 changed = 1;
6730 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6733 if (GET_CODE (XEXP (x, 1)) == MULT)
6735 changed = 1;
6736 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6739 if (changed
6740 && GET_CODE (XEXP (x, 1)) == REG
6741 && GET_CODE (XEXP (x, 0)) == REG)
6742 return x;
6744 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6746 changed = 1;
6747 x = legitimize_pic_address (x, 0);
6750 if (changed && legitimate_address_p (mode, x, FALSE))
6751 return x;
6753 if (GET_CODE (XEXP (x, 0)) == REG)
6755 rtx temp = gen_reg_rtx (Pmode);
6756 rtx val = force_operand (XEXP (x, 1), temp);
6757 if (val != temp)
6758 emit_move_insn (temp, val);
6760 XEXP (x, 1) = temp;
6761 return x;
6764 else if (GET_CODE (XEXP (x, 1)) == REG)
6766 rtx temp = gen_reg_rtx (Pmode);
6767 rtx val = force_operand (XEXP (x, 0), temp);
6768 if (val != temp)
6769 emit_move_insn (temp, val);
6771 XEXP (x, 0) = temp;
6772 return x;
6776 return x;
6779 /* Print an integer constant expression in assembler syntax. Addition
6780 and subtraction are the only arithmetic that may appear in these
6781 expressions. FILE is the stdio stream to write to, X is the rtx, and
6782 CODE is the operand print code from the output string. */
6784 static void
6785 output_pic_addr_const (FILE *file, rtx x, int code)
6787 char buf[256];
6789 switch (GET_CODE (x))
6791 case PC:
6792 if (flag_pic)
6793 putc ('.', file);
6794 else
6795 abort ();
6796 break;
6798 case SYMBOL_REF:
6799 /* Mark the decl as referenced so that cgraph will output the function. */
6800 if (SYMBOL_REF_DECL (x))
6801 mark_decl_referenced (SYMBOL_REF_DECL (x));
6803 assemble_name (file, XSTR (x, 0));
6804 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6805 fputs ("@PLT", file);
6806 break;
6808 case LABEL_REF:
6809 x = XEXP (x, 0);
6810 /* FALLTHRU */
6811 case CODE_LABEL:
6812 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6813 assemble_name (asm_out_file, buf);
6814 break;
6816 case CONST_INT:
6817 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6818 break;
6820 case CONST:
6821 /* This used to output parentheses around the expression,
6822 but that does not work on the 386 (either ATT or BSD assembler). */
6823 output_pic_addr_const (file, XEXP (x, 0), code);
6824 break;
6826 case CONST_DOUBLE:
6827 if (GET_MODE (x) == VOIDmode)
6829 /* We can use %d if the number is <32 bits and positive. */
6830 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6831 fprintf (file, "0x%lx%08lx",
6832 (unsigned long) CONST_DOUBLE_HIGH (x),
6833 (unsigned long) CONST_DOUBLE_LOW (x));
6834 else
6835 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6837 else
6838 /* We can't handle floating point constants;
6839 PRINT_OPERAND must handle them. */
6840 output_operand_lossage ("floating constant misused");
6841 break;
6843 case PLUS:
6844 /* Some assemblers need integer constants to appear first. */
6845 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6847 output_pic_addr_const (file, XEXP (x, 0), code);
6848 putc ('+', file);
6849 output_pic_addr_const (file, XEXP (x, 1), code);
6851 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6853 output_pic_addr_const (file, XEXP (x, 1), code);
6854 putc ('+', file);
6855 output_pic_addr_const (file, XEXP (x, 0), code);
6857 else
6858 abort ();
6859 break;
6861 case MINUS:
6862 if (!TARGET_MACHO)
6863 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6864 output_pic_addr_const (file, XEXP (x, 0), code);
6865 putc ('-', file);
6866 output_pic_addr_const (file, XEXP (x, 1), code);
6867 if (!TARGET_MACHO)
6868 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6869 break;
6871 case UNSPEC:
6872 if (XVECLEN (x, 0) != 1)
6873 abort ();
6874 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6875 switch (XINT (x, 1))
6877 case UNSPEC_GOT:
6878 fputs ("@GOT", file);
6879 break;
6880 case UNSPEC_GOTOFF:
6881 fputs ("@GOTOFF", file);
6882 break;
6883 case UNSPEC_GOTPCREL:
6884 fputs ("@GOTPCREL(%rip)", file);
6885 break;
6886 case UNSPEC_GOTTPOFF:
6887 /* FIXME: This might be @TPOFF in Sun ld too. */
6888 fputs ("@GOTTPOFF", file);
6889 break;
6890 case UNSPEC_TPOFF:
6891 fputs ("@TPOFF", file);
6892 break;
6893 case UNSPEC_NTPOFF:
6894 if (TARGET_64BIT)
6895 fputs ("@TPOFF", file);
6896 else
6897 fputs ("@NTPOFF", file);
6898 break;
6899 case UNSPEC_DTPOFF:
6900 fputs ("@DTPOFF", file);
6901 break;
6902 case UNSPEC_GOTNTPOFF:
6903 if (TARGET_64BIT)
6904 fputs ("@GOTTPOFF(%rip)", file);
6905 else
6906 fputs ("@GOTNTPOFF", file);
6907 break;
6908 case UNSPEC_INDNTPOFF:
6909 fputs ("@INDNTPOFF", file);
6910 break;
6911 default:
6912 output_operand_lossage ("invalid UNSPEC as operand");
6913 break;
6915 break;
6917 default:
6918 output_operand_lossage ("invalid expression as operand");
6922 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6923 We need to handle our special PIC relocations. */
6925 void
6926 i386_dwarf_output_addr_const (FILE *file, rtx x)
6928 #ifdef ASM_QUAD
6929 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6930 #else
6931 if (TARGET_64BIT)
6932 abort ();
6933 fprintf (file, "%s", ASM_LONG);
6934 #endif
6935 if (flag_pic)
6936 output_pic_addr_const (file, x, '\0');
6937 else
6938 output_addr_const (file, x);
6939 fputc ('\n', file);
6942 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6943 We need to emit DTP-relative relocations. */
6945 void
6946 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6948 fputs (ASM_LONG, file);
6949 output_addr_const (file, x);
6950 fputs ("@DTPOFF", file);
6951 switch (size)
6953 case 4:
6954 break;
6955 case 8:
6956 fputs (", 0", file);
6957 break;
6958 default:
6959 abort ();
6963 /* In the name of slightly smaller debug output, and to cater to
6964 general assembler losage, recognize PIC+GOTOFF and turn it back
6965 into a direct symbol reference. */
6967 static rtx
6968 ix86_delegitimize_address (rtx orig_x)
6970 rtx x = orig_x, y;
6972 if (GET_CODE (x) == MEM)
6973 x = XEXP (x, 0);
6975 if (TARGET_64BIT)
6977 if (GET_CODE (x) != CONST
6978 || GET_CODE (XEXP (x, 0)) != UNSPEC
6979 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6980 || GET_CODE (orig_x) != MEM)
6981 return orig_x;
6982 return XVECEXP (XEXP (x, 0), 0, 0);
6985 if (GET_CODE (x) != PLUS
6986 || GET_CODE (XEXP (x, 1)) != CONST)
6987 return orig_x;
6989 if (GET_CODE (XEXP (x, 0)) == REG
6990 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6991 /* %ebx + GOT/GOTOFF */
6992 y = NULL;
6993 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6995 /* %ebx + %reg * scale + GOT/GOTOFF */
6996 y = XEXP (x, 0);
6997 if (GET_CODE (XEXP (y, 0)) == REG
6998 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6999 y = XEXP (y, 1);
7000 else if (GET_CODE (XEXP (y, 1)) == REG
7001 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7002 y = XEXP (y, 0);
7003 else
7004 return orig_x;
7005 if (GET_CODE (y) != REG
7006 && GET_CODE (y) != MULT
7007 && GET_CODE (y) != ASHIFT)
7008 return orig_x;
7010 else
7011 return orig_x;
7013 x = XEXP (XEXP (x, 1), 0);
7014 if (GET_CODE (x) == UNSPEC
7015 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7016 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7018 if (y)
7019 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7020 return XVECEXP (x, 0, 0);
7023 if (GET_CODE (x) == PLUS
7024 && GET_CODE (XEXP (x, 0)) == UNSPEC
7025 && GET_CODE (XEXP (x, 1)) == CONST_INT
7026 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7027 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7028 && GET_CODE (orig_x) != MEM)))
7030 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7031 if (y)
7032 return gen_rtx_PLUS (Pmode, y, x);
7033 return x;
7036 return orig_x;
7039 static void
7040 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7041 int fp, FILE *file)
7043 const char *suffix;
7045 if (mode == CCFPmode || mode == CCFPUmode)
7047 enum rtx_code second_code, bypass_code;
7048 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7049 if (bypass_code != NIL || second_code != NIL)
7050 abort ();
7051 code = ix86_fp_compare_code_to_integer (code);
7052 mode = CCmode;
7054 if (reverse)
7055 code = reverse_condition (code);
7057 switch (code)
7059 case EQ:
7060 suffix = "e";
7061 break;
7062 case NE:
7063 suffix = "ne";
7064 break;
7065 case GT:
7066 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7067 abort ();
7068 suffix = "g";
7069 break;
7070 case GTU:
7071 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7072 Those same assemblers have the same but opposite losage on cmov. */
7073 if (mode != CCmode)
7074 abort ();
7075 suffix = fp ? "nbe" : "a";
7076 break;
7077 case LT:
7078 if (mode == CCNOmode || mode == CCGOCmode)
7079 suffix = "s";
7080 else if (mode == CCmode || mode == CCGCmode)
7081 suffix = "l";
7082 else
7083 abort ();
7084 break;
7085 case LTU:
7086 if (mode != CCmode)
7087 abort ();
7088 suffix = "b";
7089 break;
7090 case GE:
7091 if (mode == CCNOmode || mode == CCGOCmode)
7092 suffix = "ns";
7093 else if (mode == CCmode || mode == CCGCmode)
7094 suffix = "ge";
7095 else
7096 abort ();
7097 break;
7098 case GEU:
7099 /* ??? As above. */
7100 if (mode != CCmode)
7101 abort ();
7102 suffix = fp ? "nb" : "ae";
7103 break;
7104 case LE:
7105 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7106 abort ();
7107 suffix = "le";
7108 break;
7109 case LEU:
7110 if (mode != CCmode)
7111 abort ();
7112 suffix = "be";
7113 break;
7114 case UNORDERED:
7115 suffix = fp ? "u" : "p";
7116 break;
7117 case ORDERED:
7118 suffix = fp ? "nu" : "np";
7119 break;
7120 default:
7121 abort ();
7123 fputs (suffix, file);
7126 /* Print the name of register X to FILE based on its machine mode and number.
7127 If CODE is 'w', pretend the mode is HImode.
7128 If CODE is 'b', pretend the mode is QImode.
7129 If CODE is 'k', pretend the mode is SImode.
7130 If CODE is 'q', pretend the mode is DImode.
7131 If CODE is 'h', pretend the reg is the `high' byte register.
7132 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7134 void
7135 print_reg (rtx x, int code, FILE *file)
7137 if (REGNO (x) == ARG_POINTER_REGNUM
7138 || REGNO (x) == FRAME_POINTER_REGNUM
7139 || REGNO (x) == FLAGS_REG
7140 || REGNO (x) == FPSR_REG)
7141 abort ();
7143 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7144 putc ('%', file);
7146 if (code == 'w' || MMX_REG_P (x))
7147 code = 2;
7148 else if (code == 'b')
7149 code = 1;
7150 else if (code == 'k')
7151 code = 4;
7152 else if (code == 'q')
7153 code = 8;
7154 else if (code == 'y')
7155 code = 3;
7156 else if (code == 'h')
7157 code = 0;
7158 else
7159 code = GET_MODE_SIZE (GET_MODE (x));
7161 /* Irritatingly, AMD extended registers use different naming convention
7162 from the normal registers. */
7163 if (REX_INT_REG_P (x))
7165 if (!TARGET_64BIT)
7166 abort ();
7167 switch (code)
7169 case 0:
7170 error ("extended registers have no high halves");
7171 break;
7172 case 1:
7173 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7174 break;
7175 case 2:
7176 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7177 break;
7178 case 4:
7179 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7180 break;
7181 case 8:
7182 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7183 break;
7184 default:
7185 error ("unsupported operand size for extended register");
7186 break;
7188 return;
7190 switch (code)
7192 case 3:
7193 if (STACK_TOP_P (x))
7195 fputs ("st(0)", file);
7196 break;
7198 /* FALLTHRU */
7199 case 8:
7200 case 4:
7201 case 12:
7202 if (! ANY_FP_REG_P (x))
7203 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7204 /* FALLTHRU */
7205 case 16:
7206 case 2:
7207 normal:
7208 fputs (hi_reg_name[REGNO (x)], file);
7209 break;
7210 case 1:
7211 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7212 goto normal;
7213 fputs (qi_reg_name[REGNO (x)], file);
7214 break;
7215 case 0:
7216 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7217 goto normal;
7218 fputs (qi_high_reg_name[REGNO (x)], file);
7219 break;
7220 default:
7221 abort ();
7225 /* Locate some local-dynamic symbol still in use by this function
7226 so that we can print its name in some tls_local_dynamic_base
7227 pattern. */
7229 static const char *
7230 get_some_local_dynamic_name (void)
7232 rtx insn;
7234 if (cfun->machine->some_ld_name)
7235 return cfun->machine->some_ld_name;
7237 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7238 if (INSN_P (insn)
7239 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7240 return cfun->machine->some_ld_name;
7242 abort ();
7245 static int
7246 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7248 rtx x = *px;
7250 if (GET_CODE (x) == SYMBOL_REF
7251 && local_dynamic_symbolic_operand (x, Pmode))
7253 cfun->machine->some_ld_name = XSTR (x, 0);
7254 return 1;
7257 return 0;
7260 /* Meaning of CODE:
7261 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7262 C -- print opcode suffix for set/cmov insn.
7263 c -- like C, but print reversed condition
7264 F,f -- likewise, but for floating-point.
7265 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7266 otherwise nothing
7267 R -- print the prefix for register names.
7268 z -- print the opcode suffix for the size of the current operand.
7269 * -- print a star (in certain assembler syntax)
7270 A -- print an absolute memory reference.
7271 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7272 s -- print a shift double count, followed by the assemblers argument
7273 delimiter.
7274 b -- print the QImode name of the register for the indicated operand.
7275 %b0 would print %al if operands[0] is reg 0.
7276 w -- likewise, print the HImode name of the register.
7277 k -- likewise, print the SImode name of the register.
7278 q -- likewise, print the DImode name of the register.
7279 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7280 y -- print "st(0)" instead of "st" as a register.
7281 D -- print condition for SSE cmp instruction.
7282 P -- if PIC, print an @PLT suffix.
7283 X -- don't print any sort of PIC '@' suffix for a symbol.
7284 & -- print some in-use local-dynamic symbol name.
7287 void
7288 print_operand (FILE *file, rtx x, int code)
7290 if (code)
7292 switch (code)
7294 case '*':
7295 if (ASSEMBLER_DIALECT == ASM_ATT)
7296 putc ('*', file);
7297 return;
7299 case '&':
7300 assemble_name (file, get_some_local_dynamic_name ());
7301 return;
7303 case 'A':
7304 if (ASSEMBLER_DIALECT == ASM_ATT)
7305 putc ('*', file);
7306 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7308 /* Intel syntax. For absolute addresses, registers should not
7309 be surrounded by braces. */
7310 if (GET_CODE (x) != REG)
7312 putc ('[', file);
7313 PRINT_OPERAND (file, x, 0);
7314 putc (']', file);
7315 return;
7318 else
7319 abort ();
7321 PRINT_OPERAND (file, x, 0);
7322 return;
7325 case 'L':
7326 if (ASSEMBLER_DIALECT == ASM_ATT)
7327 putc ('l', file);
7328 return;
7330 case 'W':
7331 if (ASSEMBLER_DIALECT == ASM_ATT)
7332 putc ('w', file);
7333 return;
7335 case 'B':
7336 if (ASSEMBLER_DIALECT == ASM_ATT)
7337 putc ('b', file);
7338 return;
7340 case 'Q':
7341 if (ASSEMBLER_DIALECT == ASM_ATT)
7342 putc ('l', file);
7343 return;
7345 case 'S':
7346 if (ASSEMBLER_DIALECT == ASM_ATT)
7347 putc ('s', file);
7348 return;
7350 case 'T':
7351 if (ASSEMBLER_DIALECT == ASM_ATT)
7352 putc ('t', file);
7353 return;
7355 case 'z':
7356 /* 387 opcodes don't get size suffixes if the operands are
7357 registers. */
7358 if (STACK_REG_P (x))
7359 return;
7361 /* Likewise if using Intel opcodes. */
7362 if (ASSEMBLER_DIALECT == ASM_INTEL)
7363 return;
7365 /* This is the size of op from size of operand. */
7366 switch (GET_MODE_SIZE (GET_MODE (x)))
7368 case 2:
7369 #ifdef HAVE_GAS_FILDS_FISTS
7370 putc ('s', file);
7371 #endif
7372 return;
7374 case 4:
7375 if (GET_MODE (x) == SFmode)
7377 putc ('s', file);
7378 return;
7380 else
7381 putc ('l', file);
7382 return;
7384 case 12:
7385 case 16:
7386 putc ('t', file);
7387 return;
7389 case 8:
7390 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7392 #ifdef GAS_MNEMONICS
7393 putc ('q', file);
7394 #else
7395 putc ('l', file);
7396 putc ('l', file);
7397 #endif
7399 else
7400 putc ('l', file);
7401 return;
7403 default:
7404 abort ();
7407 case 'b':
7408 case 'w':
7409 case 'k':
7410 case 'q':
7411 case 'h':
7412 case 'y':
7413 case 'X':
7414 case 'P':
7415 break;
7417 case 's':
7418 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7420 PRINT_OPERAND (file, x, 0);
7421 putc (',', file);
7423 return;
7425 case 'D':
7426 /* Little bit of braindamage here. The SSE compare instructions
7427 does use completely different names for the comparisons that the
7428 fp conditional moves. */
7429 switch (GET_CODE (x))
7431 case EQ:
7432 case UNEQ:
7433 fputs ("eq", file);
7434 break;
7435 case LT:
7436 case UNLT:
7437 fputs ("lt", file);
7438 break;
7439 case LE:
7440 case UNLE:
7441 fputs ("le", file);
7442 break;
7443 case UNORDERED:
7444 fputs ("unord", file);
7445 break;
7446 case NE:
7447 case LTGT:
7448 fputs ("neq", file);
7449 break;
7450 case UNGE:
7451 case GE:
7452 fputs ("nlt", file);
7453 break;
7454 case UNGT:
7455 case GT:
7456 fputs ("nle", file);
7457 break;
7458 case ORDERED:
7459 fputs ("ord", file);
7460 break;
7461 default:
7462 abort ();
7463 break;
7465 return;
7466 case 'O':
7467 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7468 if (ASSEMBLER_DIALECT == ASM_ATT)
7470 switch (GET_MODE (x))
7472 case HImode: putc ('w', file); break;
7473 case SImode:
7474 case SFmode: putc ('l', file); break;
7475 case DImode:
7476 case DFmode: putc ('q', file); break;
7477 default: abort ();
7479 putc ('.', file);
7481 #endif
7482 return;
7483 case 'C':
7484 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7485 return;
7486 case 'F':
7487 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7488 if (ASSEMBLER_DIALECT == ASM_ATT)
7489 putc ('.', file);
7490 #endif
7491 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7492 return;
7494 /* Like above, but reverse condition */
7495 case 'c':
7496 /* Check to see if argument to %c is really a constant
7497 and not a condition code which needs to be reversed. */
7498 if (!COMPARISON_P (x))
7500 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7501 return;
7503 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7504 return;
7505 case 'f':
7506 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7507 if (ASSEMBLER_DIALECT == ASM_ATT)
7508 putc ('.', file);
7509 #endif
7510 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7511 return;
7512 case '+':
7514 rtx x;
7516 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7517 return;
7519 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7520 if (x)
7522 int pred_val = INTVAL (XEXP (x, 0));
7524 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7525 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7527 int taken = pred_val > REG_BR_PROB_BASE / 2;
7528 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7530 /* Emit hints only in the case default branch prediction
7531 heuristics would fail. */
7532 if (taken != cputaken)
7534 /* We use 3e (DS) prefix for taken branches and
7535 2e (CS) prefix for not taken branches. */
7536 if (taken)
7537 fputs ("ds ; ", file);
7538 else
7539 fputs ("cs ; ", file);
7543 return;
7545 default:
7546 output_operand_lossage ("invalid operand code `%c'", code);
7550 if (GET_CODE (x) == REG)
7551 print_reg (x, code, file);
7553 else if (GET_CODE (x) == MEM)
7555 /* No `byte ptr' prefix for call instructions. */
7556 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7558 const char * size;
7559 switch (GET_MODE_SIZE (GET_MODE (x)))
7561 case 1: size = "BYTE"; break;
7562 case 2: size = "WORD"; break;
7563 case 4: size = "DWORD"; break;
7564 case 8: size = "QWORD"; break;
7565 case 12: size = "XWORD"; break;
7566 case 16: size = "XMMWORD"; break;
7567 default:
7568 abort ();
7571 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7572 if (code == 'b')
7573 size = "BYTE";
7574 else if (code == 'w')
7575 size = "WORD";
7576 else if (code == 'k')
7577 size = "DWORD";
7579 fputs (size, file);
7580 fputs (" PTR ", file);
7583 x = XEXP (x, 0);
7584 /* Avoid (%rip) for call operands. */
7585 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7586 && GET_CODE (x) != CONST_INT)
7587 output_addr_const (file, x);
7588 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7589 output_operand_lossage ("invalid constraints for operand");
7590 else
7591 output_address (x);
7594 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7596 REAL_VALUE_TYPE r;
7597 long l;
7599 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7600 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7602 if (ASSEMBLER_DIALECT == ASM_ATT)
7603 putc ('$', file);
7604 fprintf (file, "0x%08lx", l);
7607 /* These float cases don't actually occur as immediate operands. */
7608 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7610 char dstr[30];
7612 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7613 fprintf (file, "%s", dstr);
7616 else if (GET_CODE (x) == CONST_DOUBLE
7617 && GET_MODE (x) == XFmode)
7619 char dstr[30];
7621 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7622 fprintf (file, "%s", dstr);
7625 else
7627 if (code != 'P')
7629 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7631 if (ASSEMBLER_DIALECT == ASM_ATT)
7632 putc ('$', file);
7634 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7635 || GET_CODE (x) == LABEL_REF)
7637 if (ASSEMBLER_DIALECT == ASM_ATT)
7638 putc ('$', file);
7639 else
7640 fputs ("OFFSET FLAT:", file);
7643 if (GET_CODE (x) == CONST_INT)
7644 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7645 else if (flag_pic)
7646 output_pic_addr_const (file, x, code);
7647 else
7648 output_addr_const (file, x);
7652 /* Print a memory operand whose address is ADDR. */
7654 void
7655 print_operand_address (FILE *file, rtx addr)
7657 struct ix86_address parts;
7658 rtx base, index, disp;
7659 int scale;
7661 if (! ix86_decompose_address (addr, &parts))
7662 abort ();
7664 base = parts.base;
7665 index = parts.index;
7666 disp = parts.disp;
7667 scale = parts.scale;
7669 switch (parts.seg)
7671 case SEG_DEFAULT:
7672 break;
7673 case SEG_FS:
7674 case SEG_GS:
7675 if (USER_LABEL_PREFIX[0] == 0)
7676 putc ('%', file);
7677 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7678 break;
7679 default:
7680 abort ();
7683 if (!base && !index)
7685 /* Displacement only requires special attention. */
7687 if (GET_CODE (disp) == CONST_INT)
7689 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7691 if (USER_LABEL_PREFIX[0] == 0)
7692 putc ('%', file);
7693 fputs ("ds:", file);
7695 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7697 else if (flag_pic)
7698 output_pic_addr_const (file, disp, 0);
7699 else
7700 output_addr_const (file, disp);
7702 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7703 if (TARGET_64BIT
7704 && ((GET_CODE (disp) == SYMBOL_REF
7705 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7706 || GET_CODE (disp) == LABEL_REF
7707 || (GET_CODE (disp) == CONST
7708 && GET_CODE (XEXP (disp, 0)) == PLUS
7709 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7710 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7711 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7712 fputs ("(%rip)", file);
7714 else
7716 if (ASSEMBLER_DIALECT == ASM_ATT)
7718 if (disp)
7720 if (flag_pic)
7721 output_pic_addr_const (file, disp, 0);
7722 else if (GET_CODE (disp) == LABEL_REF)
7723 output_asm_label (disp);
7724 else
7725 output_addr_const (file, disp);
7728 putc ('(', file);
7729 if (base)
7730 print_reg (base, 0, file);
7731 if (index)
7733 putc (',', file);
7734 print_reg (index, 0, file);
7735 if (scale != 1)
7736 fprintf (file, ",%d", scale);
7738 putc (')', file);
7740 else
7742 rtx offset = NULL_RTX;
7744 if (disp)
7746 /* Pull out the offset of a symbol; print any symbol itself. */
7747 if (GET_CODE (disp) == CONST
7748 && GET_CODE (XEXP (disp, 0)) == PLUS
7749 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7751 offset = XEXP (XEXP (disp, 0), 1);
7752 disp = gen_rtx_CONST (VOIDmode,
7753 XEXP (XEXP (disp, 0), 0));
7756 if (flag_pic)
7757 output_pic_addr_const (file, disp, 0);
7758 else if (GET_CODE (disp) == LABEL_REF)
7759 output_asm_label (disp);
7760 else if (GET_CODE (disp) == CONST_INT)
7761 offset = disp;
7762 else
7763 output_addr_const (file, disp);
7766 putc ('[', file);
7767 if (base)
7769 print_reg (base, 0, file);
7770 if (offset)
7772 if (INTVAL (offset) >= 0)
7773 putc ('+', file);
7774 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7777 else if (offset)
7778 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7779 else
7780 putc ('0', file);
7782 if (index)
7784 putc ('+', file);
7785 print_reg (index, 0, file);
7786 if (scale != 1)
7787 fprintf (file, "*%d", scale);
7789 putc (']', file);
7794 bool
7795 output_addr_const_extra (FILE *file, rtx x)
7797 rtx op;
7799 if (GET_CODE (x) != UNSPEC)
7800 return false;
7802 op = XVECEXP (x, 0, 0);
7803 switch (XINT (x, 1))
7805 case UNSPEC_GOTTPOFF:
7806 output_addr_const (file, op);
7807 /* FIXME: This might be @TPOFF in Sun ld. */
7808 fputs ("@GOTTPOFF", file);
7809 break;
7810 case UNSPEC_TPOFF:
7811 output_addr_const (file, op);
7812 fputs ("@TPOFF", file);
7813 break;
7814 case UNSPEC_NTPOFF:
7815 output_addr_const (file, op);
7816 if (TARGET_64BIT)
7817 fputs ("@TPOFF", file);
7818 else
7819 fputs ("@NTPOFF", file);
7820 break;
7821 case UNSPEC_DTPOFF:
7822 output_addr_const (file, op);
7823 fputs ("@DTPOFF", file);
7824 break;
7825 case UNSPEC_GOTNTPOFF:
7826 output_addr_const (file, op);
7827 if (TARGET_64BIT)
7828 fputs ("@GOTTPOFF(%rip)", file);
7829 else
7830 fputs ("@GOTNTPOFF", file);
7831 break;
7832 case UNSPEC_INDNTPOFF:
7833 output_addr_const (file, op);
7834 fputs ("@INDNTPOFF", file);
7835 break;
7837 default:
7838 return false;
7841 return true;
7844 /* Split one or more DImode RTL references into pairs of SImode
7845 references. The RTL can be REG, offsettable MEM, integer constant, or
7846 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7847 split and "num" is its length. lo_half and hi_half are output arrays
7848 that parallel "operands". */
7850 void
7851 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7853 while (num--)
7855 rtx op = operands[num];
7857 /* simplify_subreg refuse to split volatile memory addresses,
7858 but we still have to handle it. */
7859 if (GET_CODE (op) == MEM)
7861 lo_half[num] = adjust_address (op, SImode, 0);
7862 hi_half[num] = adjust_address (op, SImode, 4);
7864 else
7866 lo_half[num] = simplify_gen_subreg (SImode, op,
7867 GET_MODE (op) == VOIDmode
7868 ? DImode : GET_MODE (op), 0);
7869 hi_half[num] = simplify_gen_subreg (SImode, op,
7870 GET_MODE (op) == VOIDmode
7871 ? DImode : GET_MODE (op), 4);
7875 /* Split one or more TImode RTL references into pairs of SImode
7876 references. The RTL can be REG, offsettable MEM, integer constant, or
7877 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7878 split and "num" is its length. lo_half and hi_half are output arrays
7879 that parallel "operands". */
7881 void
7882 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7884 while (num--)
7886 rtx op = operands[num];
7888 /* simplify_subreg refuse to split volatile memory addresses, but we
7889 still have to handle it. */
7890 if (GET_CODE (op) == MEM)
7892 lo_half[num] = adjust_address (op, DImode, 0);
7893 hi_half[num] = adjust_address (op, DImode, 8);
7895 else
7897 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7898 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7903 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7904 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7905 is the expression of the binary operation. The output may either be
7906 emitted here, or returned to the caller, like all output_* functions.
7908 There is no guarantee that the operands are the same mode, as they
7909 might be within FLOAT or FLOAT_EXTEND expressions. */
7911 #ifndef SYSV386_COMPAT
7912 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7913 wants to fix the assemblers because that causes incompatibility
7914 with gcc. No-one wants to fix gcc because that causes
7915 incompatibility with assemblers... You can use the option of
7916 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7917 #define SYSV386_COMPAT 1
7918 #endif
7920 const char *
7921 output_387_binary_op (rtx insn, rtx *operands)
7923 static char buf[30];
7924 const char *p;
7925 const char *ssep;
7926 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7928 #ifdef ENABLE_CHECKING
7929 /* Even if we do not want to check the inputs, this documents input
7930 constraints. Which helps in understanding the following code. */
7931 if (STACK_REG_P (operands[0])
7932 && ((REG_P (operands[1])
7933 && REGNO (operands[0]) == REGNO (operands[1])
7934 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7935 || (REG_P (operands[2])
7936 && REGNO (operands[0]) == REGNO (operands[2])
7937 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7938 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7939 ; /* ok */
7940 else if (!is_sse)
7941 abort ();
7942 #endif
7944 switch (GET_CODE (operands[3]))
7946 case PLUS:
7947 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7948 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7949 p = "fiadd";
7950 else
7951 p = "fadd";
7952 ssep = "add";
7953 break;
7955 case MINUS:
7956 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7957 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7958 p = "fisub";
7959 else
7960 p = "fsub";
7961 ssep = "sub";
7962 break;
7964 case MULT:
7965 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7966 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7967 p = "fimul";
7968 else
7969 p = "fmul";
7970 ssep = "mul";
7971 break;
7973 case DIV:
7974 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7975 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7976 p = "fidiv";
7977 else
7978 p = "fdiv";
7979 ssep = "div";
7980 break;
7982 default:
7983 abort ();
7986 if (is_sse)
7988 strcpy (buf, ssep);
7989 if (GET_MODE (operands[0]) == SFmode)
7990 strcat (buf, "ss\t{%2, %0|%0, %2}");
7991 else
7992 strcat (buf, "sd\t{%2, %0|%0, %2}");
7993 return buf;
7995 strcpy (buf, p);
7997 switch (GET_CODE (operands[3]))
7999 case MULT:
8000 case PLUS:
8001 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8003 rtx temp = operands[2];
8004 operands[2] = operands[1];
8005 operands[1] = temp;
8008 /* know operands[0] == operands[1]. */
8010 if (GET_CODE (operands[2]) == MEM)
8012 p = "%z2\t%2";
8013 break;
8016 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8018 if (STACK_TOP_P (operands[0]))
8019 /* How is it that we are storing to a dead operand[2]?
8020 Well, presumably operands[1] is dead too. We can't
8021 store the result to st(0) as st(0) gets popped on this
8022 instruction. Instead store to operands[2] (which I
8023 think has to be st(1)). st(1) will be popped later.
8024 gcc <= 2.8.1 didn't have this check and generated
8025 assembly code that the Unixware assembler rejected. */
8026 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8027 else
8028 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8029 break;
8032 if (STACK_TOP_P (operands[0]))
8033 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8034 else
8035 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8036 break;
8038 case MINUS:
8039 case DIV:
8040 if (GET_CODE (operands[1]) == MEM)
8042 p = "r%z1\t%1";
8043 break;
8046 if (GET_CODE (operands[2]) == MEM)
8048 p = "%z2\t%2";
8049 break;
8052 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8054 #if SYSV386_COMPAT
8055 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8056 derived assemblers, confusingly reverse the direction of
8057 the operation for fsub{r} and fdiv{r} when the
8058 destination register is not st(0). The Intel assembler
8059 doesn't have this brain damage. Read !SYSV386_COMPAT to
8060 figure out what the hardware really does. */
8061 if (STACK_TOP_P (operands[0]))
8062 p = "{p\t%0, %2|rp\t%2, %0}";
8063 else
8064 p = "{rp\t%2, %0|p\t%0, %2}";
8065 #else
8066 if (STACK_TOP_P (operands[0]))
8067 /* As above for fmul/fadd, we can't store to st(0). */
8068 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8069 else
8070 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8071 #endif
8072 break;
8075 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8077 #if SYSV386_COMPAT
8078 if (STACK_TOP_P (operands[0]))
8079 p = "{rp\t%0, %1|p\t%1, %0}";
8080 else
8081 p = "{p\t%1, %0|rp\t%0, %1}";
8082 #else
8083 if (STACK_TOP_P (operands[0]))
8084 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8085 else
8086 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8087 #endif
8088 break;
8091 if (STACK_TOP_P (operands[0]))
8093 if (STACK_TOP_P (operands[1]))
8094 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8095 else
8096 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8097 break;
8099 else if (STACK_TOP_P (operands[1]))
8101 #if SYSV386_COMPAT
8102 p = "{\t%1, %0|r\t%0, %1}";
8103 #else
8104 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8105 #endif
8107 else
8109 #if SYSV386_COMPAT
8110 p = "{r\t%2, %0|\t%0, %2}";
8111 #else
8112 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8113 #endif
8115 break;
8117 default:
8118 abort ();
8121 strcat (buf, p);
8122 return buf;
8125 /* Output code to initialize control word copies used by
8126 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8127 is set to control word rounding downwards. */
8128 void
8129 emit_i387_cw_initialization (rtx normal, rtx round_down)
8131 rtx reg = gen_reg_rtx (HImode);
8133 emit_insn (gen_x86_fnstcw_1 (normal));
8134 emit_move_insn (reg, normal);
8135 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8136 && !TARGET_64BIT)
8137 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8138 else
8139 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8140 emit_move_insn (round_down, reg);
8143 /* Output code for INSN to convert a float to a signed int. OPERANDS
8144 are the insn operands. The output may be [HSD]Imode and the input
8145 operand may be [SDX]Fmode. */
8147 const char *
8148 output_fix_trunc (rtx insn, rtx *operands)
8150 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8151 int dimode_p = GET_MODE (operands[0]) == DImode;
8153 /* Jump through a hoop or two for DImode, since the hardware has no
8154 non-popping instruction. We used to do this a different way, but
8155 that was somewhat fragile and broke with post-reload splitters. */
8156 if (dimode_p && !stack_top_dies)
8157 output_asm_insn ("fld\t%y1", operands);
8159 if (!STACK_TOP_P (operands[1]))
8160 abort ();
8162 if (GET_CODE (operands[0]) != MEM)
8163 abort ();
8165 output_asm_insn ("fldcw\t%3", operands);
8166 if (stack_top_dies || dimode_p)
8167 output_asm_insn ("fistp%z0\t%0", operands);
8168 else
8169 output_asm_insn ("fist%z0\t%0", operands);
8170 output_asm_insn ("fldcw\t%2", operands);
8172 return "";
8175 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8176 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8177 when fucom should be used. */
8179 const char *
8180 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8182 int stack_top_dies;
8183 rtx cmp_op0 = operands[0];
8184 rtx cmp_op1 = operands[1];
8185 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8187 if (eflags_p == 2)
8189 cmp_op0 = cmp_op1;
8190 cmp_op1 = operands[2];
8192 if (is_sse)
8194 if (GET_MODE (operands[0]) == SFmode)
8195 if (unordered_p)
8196 return "ucomiss\t{%1, %0|%0, %1}";
8197 else
8198 return "comiss\t{%1, %0|%0, %1}";
8199 else
8200 if (unordered_p)
8201 return "ucomisd\t{%1, %0|%0, %1}";
8202 else
8203 return "comisd\t{%1, %0|%0, %1}";
8206 if (! STACK_TOP_P (cmp_op0))
8207 abort ();
8209 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8211 if (STACK_REG_P (cmp_op1)
8212 && stack_top_dies
8213 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8214 && REGNO (cmp_op1) != FIRST_STACK_REG)
8216 /* If both the top of the 387 stack dies, and the other operand
8217 is also a stack register that dies, then this must be a
8218 `fcompp' float compare */
8220 if (eflags_p == 1)
8222 /* There is no double popping fcomi variant. Fortunately,
8223 eflags is immune from the fstp's cc clobbering. */
8224 if (unordered_p)
8225 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8226 else
8227 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8228 return "fstp\t%y0";
8230 else
8232 if (eflags_p == 2)
8234 if (unordered_p)
8235 return "fucompp\n\tfnstsw\t%0";
8236 else
8237 return "fcompp\n\tfnstsw\t%0";
8239 else
8241 if (unordered_p)
8242 return "fucompp";
8243 else
8244 return "fcompp";
8248 else
8250 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8252 static const char * const alt[24] =
8254 "fcom%z1\t%y1",
8255 "fcomp%z1\t%y1",
8256 "fucom%z1\t%y1",
8257 "fucomp%z1\t%y1",
8259 "ficom%z1\t%y1",
8260 "ficomp%z1\t%y1",
8261 NULL,
8262 NULL,
8264 "fcomi\t{%y1, %0|%0, %y1}",
8265 "fcomip\t{%y1, %0|%0, %y1}",
8266 "fucomi\t{%y1, %0|%0, %y1}",
8267 "fucomip\t{%y1, %0|%0, %y1}",
8269 NULL,
8270 NULL,
8271 NULL,
8272 NULL,
8274 "fcom%z2\t%y2\n\tfnstsw\t%0",
8275 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8276 "fucom%z2\t%y2\n\tfnstsw\t%0",
8277 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8279 "ficom%z2\t%y2\n\tfnstsw\t%0",
8280 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8281 NULL,
8282 NULL
8285 int mask;
8286 const char *ret;
8288 mask = eflags_p << 3;
8289 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8290 mask |= unordered_p << 1;
8291 mask |= stack_top_dies;
8293 if (mask >= 24)
8294 abort ();
8295 ret = alt[mask];
8296 if (ret == NULL)
8297 abort ();
8299 return ret;
8303 void
8304 ix86_output_addr_vec_elt (FILE *file, int value)
8306 const char *directive = ASM_LONG;
8308 if (TARGET_64BIT)
8310 #ifdef ASM_QUAD
8311 directive = ASM_QUAD;
8312 #else
8313 abort ();
8314 #endif
8317 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8320 void
8321 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8323 if (TARGET_64BIT)
8324 fprintf (file, "%s%s%d-%s%d\n",
8325 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8326 else if (HAVE_AS_GOTOFF_IN_DATA)
8327 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8328 #if TARGET_MACHO
8329 else if (TARGET_MACHO)
8331 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8332 machopic_output_function_base_name (file);
8333 fprintf(file, "\n");
8335 #endif
8336 else
8337 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8338 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8341 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8342 for the target. */
8344 void
8345 ix86_expand_clear (rtx dest)
8347 rtx tmp;
8349 /* We play register width games, which are only valid after reload. */
8350 if (!reload_completed)
8351 abort ();
8353 /* Avoid HImode and its attendant prefix byte. */
8354 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8355 dest = gen_rtx_REG (SImode, REGNO (dest));
8357 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8359 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8360 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8362 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8363 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8366 emit_insn (tmp);
8369 /* X is an unchanging MEM. If it is a constant pool reference, return
8370 the constant pool rtx, else NULL. */
8372 static rtx
8373 maybe_get_pool_constant (rtx x)
8375 x = ix86_delegitimize_address (XEXP (x, 0));
8377 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8378 return get_pool_constant (x);
8380 return NULL_RTX;
8383 void
8384 ix86_expand_move (enum machine_mode mode, rtx operands[])
8386 int strict = (reload_in_progress || reload_completed);
8387 rtx op0, op1;
8388 enum tls_model model;
8390 op0 = operands[0];
8391 op1 = operands[1];
8393 model = tls_symbolic_operand (op1, Pmode);
8394 if (model)
8396 op1 = legitimize_tls_address (op1, model, true);
8397 op1 = force_operand (op1, op0);
8398 if (op1 == op0)
8399 return;
8402 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8404 #if TARGET_MACHO
8405 if (MACHOPIC_PURE)
8407 rtx temp = ((reload_in_progress
8408 || ((op0 && GET_CODE (op0) == REG)
8409 && mode == Pmode))
8410 ? op0 : gen_reg_rtx (Pmode));
8411 op1 = machopic_indirect_data_reference (op1, temp);
8412 op1 = machopic_legitimize_pic_address (op1, mode,
8413 temp == op1 ? 0 : temp);
8415 else if (MACHOPIC_INDIRECT)
8416 op1 = machopic_indirect_data_reference (op1, 0);
8417 if (op0 == op1)
8418 return;
8419 #else
8420 if (GET_CODE (op0) == MEM)
8421 op1 = force_reg (Pmode, op1);
8422 else
8423 op1 = legitimize_address (op1, op1, Pmode);
8424 #endif /* TARGET_MACHO */
8426 else
8428 if (GET_CODE (op0) == MEM
8429 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8430 || !push_operand (op0, mode))
8431 && GET_CODE (op1) == MEM)
8432 op1 = force_reg (mode, op1);
8434 if (push_operand (op0, mode)
8435 && ! general_no_elim_operand (op1, mode))
8436 op1 = copy_to_mode_reg (mode, op1);
8438 /* Force large constants in 64bit compilation into register
8439 to get them CSEed. */
8440 if (TARGET_64BIT && mode == DImode
8441 && immediate_operand (op1, mode)
8442 && !x86_64_zero_extended_value (op1)
8443 && !register_operand (op0, mode)
8444 && optimize && !reload_completed && !reload_in_progress)
8445 op1 = copy_to_mode_reg (mode, op1);
8447 if (FLOAT_MODE_P (mode))
8449 /* If we are loading a floating point constant to a register,
8450 force the value to memory now, since we'll get better code
8451 out the back end. */
8453 if (strict)
8455 else if (GET_CODE (op1) == CONST_DOUBLE)
8457 op1 = validize_mem (force_const_mem (mode, op1));
8458 if (!register_operand (op0, mode))
8460 rtx temp = gen_reg_rtx (mode);
8461 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8462 emit_move_insn (op0, temp);
8463 return;
8469 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8472 void
8473 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8475 /* Force constants other than zero into memory. We do not know how
8476 the instructions used to build constants modify the upper 64 bits
8477 of the register, once we have that information we may be able
8478 to handle some of them more efficiently. */
8479 if ((reload_in_progress | reload_completed) == 0
8480 && register_operand (operands[0], mode)
8481 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8482 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8484 /* Make operand1 a register if it isn't already. */
8485 if (!no_new_pseudos
8486 && !register_operand (operands[0], mode)
8487 && !register_operand (operands[1], mode))
8489 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8490 emit_move_insn (operands[0], temp);
8491 return;
8494 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8497 /* Attempt to expand a binary operator. Make the expansion closer to the
8498 actual machine, then just general_operand, which will allow 3 separate
8499 memory references (one output, two input) in a single insn. */
8501 void
8502 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8503 rtx operands[])
8505 int matching_memory;
8506 rtx src1, src2, dst, op, clob;
8508 dst = operands[0];
8509 src1 = operands[1];
8510 src2 = operands[2];
8512 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8513 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8514 && (rtx_equal_p (dst, src2)
8515 || immediate_operand (src1, mode)))
8517 rtx temp = src1;
8518 src1 = src2;
8519 src2 = temp;
8522 /* If the destination is memory, and we do not have matching source
8523 operands, do things in registers. */
8524 matching_memory = 0;
8525 if (GET_CODE (dst) == MEM)
8527 if (rtx_equal_p (dst, src1))
8528 matching_memory = 1;
8529 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8530 && rtx_equal_p (dst, src2))
8531 matching_memory = 2;
8532 else
8533 dst = gen_reg_rtx (mode);
8536 /* Both source operands cannot be in memory. */
8537 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8539 if (matching_memory != 2)
8540 src2 = force_reg (mode, src2);
8541 else
8542 src1 = force_reg (mode, src1);
8545 /* If the operation is not commutable, source 1 cannot be a constant
8546 or non-matching memory. */
8547 if ((CONSTANT_P (src1)
8548 || (!matching_memory && GET_CODE (src1) == MEM))
8549 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8550 src1 = force_reg (mode, src1);
8552 /* If optimizing, copy to regs to improve CSE */
8553 if (optimize && ! no_new_pseudos)
8555 if (GET_CODE (dst) == MEM)
8556 dst = gen_reg_rtx (mode);
8557 if (GET_CODE (src1) == MEM)
8558 src1 = force_reg (mode, src1);
8559 if (GET_CODE (src2) == MEM)
8560 src2 = force_reg (mode, src2);
8563 /* Emit the instruction. */
8565 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8566 if (reload_in_progress)
8568 /* Reload doesn't know about the flags register, and doesn't know that
8569 it doesn't want to clobber it. We can only do this with PLUS. */
8570 if (code != PLUS)
8571 abort ();
8572 emit_insn (op);
8574 else
8576 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8577 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8580 /* Fix up the destination if needed. */
8581 if (dst != operands[0])
8582 emit_move_insn (operands[0], dst);
8585 /* Return TRUE or FALSE depending on whether the binary operator meets the
8586 appropriate constraints. */
8589 ix86_binary_operator_ok (enum rtx_code code,
8590 enum machine_mode mode ATTRIBUTE_UNUSED,
8591 rtx operands[3])
8593 /* Both source operands cannot be in memory. */
8594 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8595 return 0;
8596 /* If the operation is not commutable, source 1 cannot be a constant. */
8597 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8598 return 0;
8599 /* If the destination is memory, we must have a matching source operand. */
8600 if (GET_CODE (operands[0]) == MEM
8601 && ! (rtx_equal_p (operands[0], operands[1])
8602 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8603 && rtx_equal_p (operands[0], operands[2]))))
8604 return 0;
8605 /* If the operation is not commutable and the source 1 is memory, we must
8606 have a matching destination. */
8607 if (GET_CODE (operands[1]) == MEM
8608 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8609 && ! rtx_equal_p (operands[0], operands[1]))
8610 return 0;
8611 return 1;
8614 /* Attempt to expand a unary operator. Make the expansion closer to the
8615 actual machine, then just general_operand, which will allow 2 separate
8616 memory references (one output, one input) in a single insn. */
8618 void
8619 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8620 rtx operands[])
8622 int matching_memory;
8623 rtx src, dst, op, clob;
8625 dst = operands[0];
8626 src = operands[1];
8628 /* If the destination is memory, and we do not have matching source
8629 operands, do things in registers. */
8630 matching_memory = 0;
8631 if (GET_CODE (dst) == MEM)
8633 if (rtx_equal_p (dst, src))
8634 matching_memory = 1;
8635 else
8636 dst = gen_reg_rtx (mode);
8639 /* When source operand is memory, destination must match. */
8640 if (!matching_memory && GET_CODE (src) == MEM)
8641 src = force_reg (mode, src);
8643 /* If optimizing, copy to regs to improve CSE */
8644 if (optimize && ! no_new_pseudos)
8646 if (GET_CODE (dst) == MEM)
8647 dst = gen_reg_rtx (mode);
8648 if (GET_CODE (src) == MEM)
8649 src = force_reg (mode, src);
8652 /* Emit the instruction. */
8654 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8655 if (reload_in_progress || code == NOT)
8657 /* Reload doesn't know about the flags register, and doesn't know that
8658 it doesn't want to clobber it. */
8659 if (code != NOT)
8660 abort ();
8661 emit_insn (op);
8663 else
8665 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8666 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8669 /* Fix up the destination if needed. */
8670 if (dst != operands[0])
8671 emit_move_insn (operands[0], dst);
8674 /* Return TRUE or FALSE depending on whether the unary operator meets the
8675 appropriate constraints. */
8678 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8679 enum machine_mode mode ATTRIBUTE_UNUSED,
8680 rtx operands[2] ATTRIBUTE_UNUSED)
8682 /* If one of operands is memory, source and destination must match. */
8683 if ((GET_CODE (operands[0]) == MEM
8684 || GET_CODE (operands[1]) == MEM)
8685 && ! rtx_equal_p (operands[0], operands[1]))
8686 return FALSE;
8687 return TRUE;
8690 /* Return TRUE or FALSE depending on whether the first SET in INSN
8691 has source and destination with matching CC modes, and that the
8692 CC mode is at least as constrained as REQ_MODE. */
8695 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8697 rtx set;
8698 enum machine_mode set_mode;
8700 set = PATTERN (insn);
8701 if (GET_CODE (set) == PARALLEL)
8702 set = XVECEXP (set, 0, 0);
8703 if (GET_CODE (set) != SET)
8704 abort ();
8705 if (GET_CODE (SET_SRC (set)) != COMPARE)
8706 abort ();
8708 set_mode = GET_MODE (SET_DEST (set));
8709 switch (set_mode)
8711 case CCNOmode:
8712 if (req_mode != CCNOmode
8713 && (req_mode != CCmode
8714 || XEXP (SET_SRC (set), 1) != const0_rtx))
8715 return 0;
8716 break;
8717 case CCmode:
8718 if (req_mode == CCGCmode)
8719 return 0;
8720 /* FALLTHRU */
8721 case CCGCmode:
8722 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8723 return 0;
8724 /* FALLTHRU */
8725 case CCGOCmode:
8726 if (req_mode == CCZmode)
8727 return 0;
8728 /* FALLTHRU */
8729 case CCZmode:
8730 break;
8732 default:
8733 abort ();
8736 return (GET_MODE (SET_SRC (set)) == set_mode);
8739 /* Generate insn patterns to do an integer compare of OPERANDS. */
8741 static rtx
8742 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8744 enum machine_mode cmpmode;
8745 rtx tmp, flags;
8747 cmpmode = SELECT_CC_MODE (code, op0, op1);
8748 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8750 /* This is very simple, but making the interface the same as in the
8751 FP case makes the rest of the code easier. */
8752 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8753 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8755 /* Return the test that should be put into the flags user, i.e.
8756 the bcc, scc, or cmov instruction. */
8757 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8760 /* Figure out whether to use ordered or unordered fp comparisons.
8761 Return the appropriate mode to use. */
8763 enum machine_mode
8764 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8766 /* ??? In order to make all comparisons reversible, we do all comparisons
8767 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8768 all forms trapping and nontrapping comparisons, we can make inequality
8769 comparisons trapping again, since it results in better code when using
8770 FCOM based compares. */
8771 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8774 enum machine_mode
8775 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8777 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8778 return ix86_fp_compare_mode (code);
8779 switch (code)
8781 /* Only zero flag is needed. */
8782 case EQ: /* ZF=0 */
8783 case NE: /* ZF!=0 */
8784 return CCZmode;
8785 /* Codes needing carry flag. */
8786 case GEU: /* CF=0 */
8787 case GTU: /* CF=0 & ZF=0 */
8788 case LTU: /* CF=1 */
8789 case LEU: /* CF=1 | ZF=1 */
8790 return CCmode;
8791 /* Codes possibly doable only with sign flag when
8792 comparing against zero. */
8793 case GE: /* SF=OF or SF=0 */
8794 case LT: /* SF<>OF or SF=1 */
8795 if (op1 == const0_rtx)
8796 return CCGOCmode;
8797 else
8798 /* For other cases Carry flag is not required. */
8799 return CCGCmode;
8800 /* Codes doable only with sign flag when comparing
8801 against zero, but we miss jump instruction for it
8802 so we need to use relational tests against overflow
8803 that thus needs to be zero. */
8804 case GT: /* ZF=0 & SF=OF */
8805 case LE: /* ZF=1 | SF<>OF */
8806 if (op1 == const0_rtx)
8807 return CCNOmode;
8808 else
8809 return CCGCmode;
8810 /* strcmp pattern do (use flags) and combine may ask us for proper
8811 mode. */
8812 case USE:
8813 return CCmode;
8814 default:
8815 abort ();
8819 /* Return the fixed registers used for condition codes. */
8821 static bool
8822 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8824 *p1 = FLAGS_REG;
8825 *p2 = FPSR_REG;
8826 return true;
8829 /* If two condition code modes are compatible, return a condition code
8830 mode which is compatible with both. Otherwise, return
8831 VOIDmode. */
8833 static enum machine_mode
8834 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8836 if (m1 == m2)
8837 return m1;
8839 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8840 return VOIDmode;
8842 if ((m1 == CCGCmode && m2 == CCGOCmode)
8843 || (m1 == CCGOCmode && m2 == CCGCmode))
8844 return CCGCmode;
8846 switch (m1)
8848 default:
8849 abort ();
8851 case CCmode:
8852 case CCGCmode:
8853 case CCGOCmode:
8854 case CCNOmode:
8855 case CCZmode:
8856 switch (m2)
8858 default:
8859 return VOIDmode;
8861 case CCmode:
8862 case CCGCmode:
8863 case CCGOCmode:
8864 case CCNOmode:
8865 case CCZmode:
8866 return CCmode;
8869 case CCFPmode:
8870 case CCFPUmode:
8871 /* These are only compatible with themselves, which we already
8872 checked above. */
8873 return VOIDmode;
8877 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8880 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8882 enum rtx_code swapped_code = swap_condition (code);
8883 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8884 || (ix86_fp_comparison_cost (swapped_code)
8885 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8888 /* Swap, force into registers, or otherwise massage the two operands
8889 to a fp comparison. The operands are updated in place; the new
8890 comparison code is returned. */
8892 static enum rtx_code
8893 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8895 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8896 rtx op0 = *pop0, op1 = *pop1;
8897 enum machine_mode op_mode = GET_MODE (op0);
8898 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8900 /* All of the unordered compare instructions only work on registers.
8901 The same is true of the XFmode compare instructions. The same is
8902 true of the fcomi compare instructions. */
8904 if (!is_sse
8905 && (fpcmp_mode == CCFPUmode
8906 || op_mode == XFmode
8907 || ix86_use_fcomi_compare (code)))
8909 op0 = force_reg (op_mode, op0);
8910 op1 = force_reg (op_mode, op1);
8912 else
8914 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8915 things around if they appear profitable, otherwise force op0
8916 into a register. */
8918 if (standard_80387_constant_p (op0) == 0
8919 || (GET_CODE (op0) == MEM
8920 && ! (standard_80387_constant_p (op1) == 0
8921 || GET_CODE (op1) == MEM)))
8923 rtx tmp;
8924 tmp = op0, op0 = op1, op1 = tmp;
8925 code = swap_condition (code);
8928 if (GET_CODE (op0) != REG)
8929 op0 = force_reg (op_mode, op0);
8931 if (CONSTANT_P (op1))
8933 if (standard_80387_constant_p (op1))
8934 op1 = force_reg (op_mode, op1);
8935 else
8936 op1 = validize_mem (force_const_mem (op_mode, op1));
8940 /* Try to rearrange the comparison to make it cheaper. */
8941 if (ix86_fp_comparison_cost (code)
8942 > ix86_fp_comparison_cost (swap_condition (code))
8943 && (GET_CODE (op1) == REG || !no_new_pseudos))
8945 rtx tmp;
8946 tmp = op0, op0 = op1, op1 = tmp;
8947 code = swap_condition (code);
8948 if (GET_CODE (op0) != REG)
8949 op0 = force_reg (op_mode, op0);
8952 *pop0 = op0;
8953 *pop1 = op1;
8954 return code;
8957 /* Convert comparison codes we use to represent FP comparison to integer
8958 code that will result in proper branch. Return UNKNOWN if no such code
8959 is available. */
8960 static enum rtx_code
8961 ix86_fp_compare_code_to_integer (enum rtx_code code)
8963 switch (code)
8965 case GT:
8966 return GTU;
8967 case GE:
8968 return GEU;
8969 case ORDERED:
8970 case UNORDERED:
8971 return code;
8972 break;
8973 case UNEQ:
8974 return EQ;
8975 break;
8976 case UNLT:
8977 return LTU;
8978 break;
8979 case UNLE:
8980 return LEU;
8981 break;
8982 case LTGT:
8983 return NE;
8984 break;
8985 default:
8986 return UNKNOWN;
8990 /* Split comparison code CODE into comparisons we can do using branch
8991 instructions. BYPASS_CODE is comparison code for branch that will
8992 branch around FIRST_CODE and SECOND_CODE. If some of branches
8993 is not required, set value to NIL.
8994 We never require more than two branches. */
8995 static void
8996 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8997 enum rtx_code *first_code,
8998 enum rtx_code *second_code)
9000 *first_code = code;
9001 *bypass_code = NIL;
9002 *second_code = NIL;
9004 /* The fcomi comparison sets flags as follows:
9006 cmp ZF PF CF
9007 > 0 0 0
9008 < 0 0 1
9009 = 1 0 0
9010 un 1 1 1 */
9012 switch (code)
9014 case GT: /* GTU - CF=0 & ZF=0 */
9015 case GE: /* GEU - CF=0 */
9016 case ORDERED: /* PF=0 */
9017 case UNORDERED: /* PF=1 */
9018 case UNEQ: /* EQ - ZF=1 */
9019 case UNLT: /* LTU - CF=1 */
9020 case UNLE: /* LEU - CF=1 | ZF=1 */
9021 case LTGT: /* EQ - ZF=0 */
9022 break;
9023 case LT: /* LTU - CF=1 - fails on unordered */
9024 *first_code = UNLT;
9025 *bypass_code = UNORDERED;
9026 break;
9027 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9028 *first_code = UNLE;
9029 *bypass_code = UNORDERED;
9030 break;
9031 case EQ: /* EQ - ZF=1 - fails on unordered */
9032 *first_code = UNEQ;
9033 *bypass_code = UNORDERED;
9034 break;
9035 case NE: /* NE - ZF=0 - fails on unordered */
9036 *first_code = LTGT;
9037 *second_code = UNORDERED;
9038 break;
9039 case UNGE: /* GEU - CF=0 - fails on unordered */
9040 *first_code = GE;
9041 *second_code = UNORDERED;
9042 break;
9043 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9044 *first_code = GT;
9045 *second_code = UNORDERED;
9046 break;
9047 default:
9048 abort ();
9050 if (!TARGET_IEEE_FP)
9052 *second_code = NIL;
9053 *bypass_code = NIL;
9057 /* Return cost of comparison done fcom + arithmetics operations on AX.
9058 All following functions do use number of instructions as a cost metrics.
9059 In future this should be tweaked to compute bytes for optimize_size and
9060 take into account performance of various instructions on various CPUs. */
9061 static int
9062 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9064 if (!TARGET_IEEE_FP)
9065 return 4;
9066 /* The cost of code output by ix86_expand_fp_compare. */
9067 switch (code)
9069 case UNLE:
9070 case UNLT:
9071 case LTGT:
9072 case GT:
9073 case GE:
9074 case UNORDERED:
9075 case ORDERED:
9076 case UNEQ:
9077 return 4;
9078 break;
9079 case LT:
9080 case NE:
9081 case EQ:
9082 case UNGE:
9083 return 5;
9084 break;
9085 case LE:
9086 case UNGT:
9087 return 6;
9088 break;
9089 default:
9090 abort ();
9094 /* Return cost of comparison done using fcomi operation.
9095 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9096 static int
9097 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9099 enum rtx_code bypass_code, first_code, second_code;
9100 /* Return arbitrarily high cost when instruction is not supported - this
9101 prevents gcc from using it. */
9102 if (!TARGET_CMOVE)
9103 return 1024;
9104 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9105 return (bypass_code != NIL || second_code != NIL) + 2;
9108 /* Return cost of comparison done using sahf operation.
9109 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9110 static int
9111 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9113 enum rtx_code bypass_code, first_code, second_code;
9114 /* Return arbitrarily high cost when instruction is not preferred - this
9115 avoids gcc from using it. */
9116 if (!TARGET_USE_SAHF && !optimize_size)
9117 return 1024;
9118 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9119 return (bypass_code != NIL || second_code != NIL) + 3;
9122 /* Compute cost of the comparison done using any method.
9123 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9124 static int
9125 ix86_fp_comparison_cost (enum rtx_code code)
9127 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9128 int min;
9130 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9131 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9133 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9134 if (min > sahf_cost)
9135 min = sahf_cost;
9136 if (min > fcomi_cost)
9137 min = fcomi_cost;
9138 return min;
9141 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9143 static rtx
9144 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9145 rtx *second_test, rtx *bypass_test)
9147 enum machine_mode fpcmp_mode, intcmp_mode;
9148 rtx tmp, tmp2;
9149 int cost = ix86_fp_comparison_cost (code);
9150 enum rtx_code bypass_code, first_code, second_code;
9152 fpcmp_mode = ix86_fp_compare_mode (code);
9153 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9155 if (second_test)
9156 *second_test = NULL_RTX;
9157 if (bypass_test)
9158 *bypass_test = NULL_RTX;
9160 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9162 /* Do fcomi/sahf based test when profitable. */
9163 if ((bypass_code == NIL || bypass_test)
9164 && (second_code == NIL || second_test)
9165 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9167 if (TARGET_CMOVE)
9169 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9170 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9171 tmp);
9172 emit_insn (tmp);
9174 else
9176 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9177 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9178 if (!scratch)
9179 scratch = gen_reg_rtx (HImode);
9180 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9181 emit_insn (gen_x86_sahf_1 (scratch));
9184 /* The FP codes work out to act like unsigned. */
9185 intcmp_mode = fpcmp_mode;
9186 code = first_code;
9187 if (bypass_code != NIL)
9188 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9189 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9190 const0_rtx);
9191 if (second_code != NIL)
9192 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9193 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9194 const0_rtx);
9196 else
9198 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9199 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9200 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9201 if (!scratch)
9202 scratch = gen_reg_rtx (HImode);
9203 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9205 /* In the unordered case, we have to check C2 for NaN's, which
9206 doesn't happen to work out to anything nice combination-wise.
9207 So do some bit twiddling on the value we've got in AH to come
9208 up with an appropriate set of condition codes. */
9210 intcmp_mode = CCNOmode;
9211 switch (code)
9213 case GT:
9214 case UNGT:
9215 if (code == GT || !TARGET_IEEE_FP)
9217 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9218 code = EQ;
9220 else
9222 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9223 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9224 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9225 intcmp_mode = CCmode;
9226 code = GEU;
9228 break;
9229 case LT:
9230 case UNLT:
9231 if (code == LT && TARGET_IEEE_FP)
9233 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9234 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9235 intcmp_mode = CCmode;
9236 code = EQ;
9238 else
9240 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9241 code = NE;
9243 break;
9244 case GE:
9245 case UNGE:
9246 if (code == GE || !TARGET_IEEE_FP)
9248 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9249 code = EQ;
9251 else
9253 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9254 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9255 GEN_INT (0x01)));
9256 code = NE;
9258 break;
9259 case LE:
9260 case UNLE:
9261 if (code == LE && TARGET_IEEE_FP)
9263 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9264 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9265 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9266 intcmp_mode = CCmode;
9267 code = LTU;
9269 else
9271 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9272 code = NE;
9274 break;
9275 case EQ:
9276 case UNEQ:
9277 if (code == EQ && TARGET_IEEE_FP)
9279 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9280 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9281 intcmp_mode = CCmode;
9282 code = EQ;
9284 else
9286 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9287 code = NE;
9288 break;
9290 break;
9291 case NE:
9292 case LTGT:
9293 if (code == NE && TARGET_IEEE_FP)
9295 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9296 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9297 GEN_INT (0x40)));
9298 code = NE;
9300 else
9302 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9303 code = EQ;
9305 break;
9307 case UNORDERED:
9308 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9309 code = NE;
9310 break;
9311 case ORDERED:
9312 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9313 code = EQ;
9314 break;
9316 default:
9317 abort ();
9321 /* Return the test that should be put into the flags user, i.e.
9322 the bcc, scc, or cmov instruction. */
9323 return gen_rtx_fmt_ee (code, VOIDmode,
9324 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9325 const0_rtx);
9329 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9331 rtx op0, op1, ret;
9332 op0 = ix86_compare_op0;
9333 op1 = ix86_compare_op1;
9335 if (second_test)
9336 *second_test = NULL_RTX;
9337 if (bypass_test)
9338 *bypass_test = NULL_RTX;
9340 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9341 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9342 second_test, bypass_test);
9343 else
9344 ret = ix86_expand_int_compare (code, op0, op1);
9346 return ret;
9349 /* Return true if the CODE will result in nontrivial jump sequence. */
9350 bool
9351 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9353 enum rtx_code bypass_code, first_code, second_code;
9354 if (!TARGET_CMOVE)
9355 return true;
9356 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9357 return bypass_code != NIL || second_code != NIL;
9360 void
9361 ix86_expand_branch (enum rtx_code code, rtx label)
9363 rtx tmp;
9365 switch (GET_MODE (ix86_compare_op0))
9367 case QImode:
9368 case HImode:
9369 case SImode:
9370 simple:
9371 tmp = ix86_expand_compare (code, NULL, NULL);
9372 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9373 gen_rtx_LABEL_REF (VOIDmode, label),
9374 pc_rtx);
9375 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9376 return;
9378 case SFmode:
9379 case DFmode:
9380 case XFmode:
9382 rtvec vec;
9383 int use_fcomi;
9384 enum rtx_code bypass_code, first_code, second_code;
9386 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9387 &ix86_compare_op1);
9389 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9391 /* Check whether we will use the natural sequence with one jump. If
9392 so, we can expand jump early. Otherwise delay expansion by
9393 creating compound insn to not confuse optimizers. */
9394 if (bypass_code == NIL && second_code == NIL
9395 && TARGET_CMOVE)
9397 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9398 gen_rtx_LABEL_REF (VOIDmode, label),
9399 pc_rtx, NULL_RTX);
9401 else
9403 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9404 ix86_compare_op0, ix86_compare_op1);
9405 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9406 gen_rtx_LABEL_REF (VOIDmode, label),
9407 pc_rtx);
9408 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9410 use_fcomi = ix86_use_fcomi_compare (code);
9411 vec = rtvec_alloc (3 + !use_fcomi);
9412 RTVEC_ELT (vec, 0) = tmp;
9413 RTVEC_ELT (vec, 1)
9414 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9415 RTVEC_ELT (vec, 2)
9416 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9417 if (! use_fcomi)
9418 RTVEC_ELT (vec, 3)
9419 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9421 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9423 return;
9426 case DImode:
9427 if (TARGET_64BIT)
9428 goto simple;
9429 /* Expand DImode branch into multiple compare+branch. */
9431 rtx lo[2], hi[2], label2;
9432 enum rtx_code code1, code2, code3;
9434 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9436 tmp = ix86_compare_op0;
9437 ix86_compare_op0 = ix86_compare_op1;
9438 ix86_compare_op1 = tmp;
9439 code = swap_condition (code);
9441 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9442 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9444 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9445 avoid two branches. This costs one extra insn, so disable when
9446 optimizing for size. */
9448 if ((code == EQ || code == NE)
9449 && (!optimize_size
9450 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9452 rtx xor0, xor1;
9454 xor1 = hi[0];
9455 if (hi[1] != const0_rtx)
9456 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9457 NULL_RTX, 0, OPTAB_WIDEN);
9459 xor0 = lo[0];
9460 if (lo[1] != const0_rtx)
9461 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9462 NULL_RTX, 0, OPTAB_WIDEN);
9464 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9465 NULL_RTX, 0, OPTAB_WIDEN);
9467 ix86_compare_op0 = tmp;
9468 ix86_compare_op1 = const0_rtx;
9469 ix86_expand_branch (code, label);
9470 return;
9473 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9474 op1 is a constant and the low word is zero, then we can just
9475 examine the high word. */
9477 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9478 switch (code)
9480 case LT: case LTU: case GE: case GEU:
9481 ix86_compare_op0 = hi[0];
9482 ix86_compare_op1 = hi[1];
9483 ix86_expand_branch (code, label);
9484 return;
9485 default:
9486 break;
9489 /* Otherwise, we need two or three jumps. */
9491 label2 = gen_label_rtx ();
9493 code1 = code;
9494 code2 = swap_condition (code);
9495 code3 = unsigned_condition (code);
9497 switch (code)
9499 case LT: case GT: case LTU: case GTU:
9500 break;
9502 case LE: code1 = LT; code2 = GT; break;
9503 case GE: code1 = GT; code2 = LT; break;
9504 case LEU: code1 = LTU; code2 = GTU; break;
9505 case GEU: code1 = GTU; code2 = LTU; break;
9507 case EQ: code1 = NIL; code2 = NE; break;
9508 case NE: code2 = NIL; break;
9510 default:
9511 abort ();
9515 * a < b =>
9516 * if (hi(a) < hi(b)) goto true;
9517 * if (hi(a) > hi(b)) goto false;
9518 * if (lo(a) < lo(b)) goto true;
9519 * false:
9522 ix86_compare_op0 = hi[0];
9523 ix86_compare_op1 = hi[1];
9525 if (code1 != NIL)
9526 ix86_expand_branch (code1, label);
9527 if (code2 != NIL)
9528 ix86_expand_branch (code2, label2);
9530 ix86_compare_op0 = lo[0];
9531 ix86_compare_op1 = lo[1];
9532 ix86_expand_branch (code3, label);
9534 if (code2 != NIL)
9535 emit_label (label2);
9536 return;
9539 default:
9540 abort ();
9544 /* Split branch based on floating point condition. */
9545 void
9546 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9547 rtx target1, rtx target2, rtx tmp)
9549 rtx second, bypass;
9550 rtx label = NULL_RTX;
9551 rtx condition;
9552 int bypass_probability = -1, second_probability = -1, probability = -1;
9553 rtx i;
9555 if (target2 != pc_rtx)
9557 rtx tmp = target2;
9558 code = reverse_condition_maybe_unordered (code);
9559 target2 = target1;
9560 target1 = tmp;
9563 condition = ix86_expand_fp_compare (code, op1, op2,
9564 tmp, &second, &bypass);
9566 if (split_branch_probability >= 0)
9568 /* Distribute the probabilities across the jumps.
9569 Assume the BYPASS and SECOND to be always test
9570 for UNORDERED. */
9571 probability = split_branch_probability;
9573 /* Value of 1 is low enough to make no need for probability
9574 to be updated. Later we may run some experiments and see
9575 if unordered values are more frequent in practice. */
9576 if (bypass)
9577 bypass_probability = 1;
9578 if (second)
9579 second_probability = 1;
9581 if (bypass != NULL_RTX)
9583 label = gen_label_rtx ();
9584 i = emit_jump_insn (gen_rtx_SET
9585 (VOIDmode, pc_rtx,
9586 gen_rtx_IF_THEN_ELSE (VOIDmode,
9587 bypass,
9588 gen_rtx_LABEL_REF (VOIDmode,
9589 label),
9590 pc_rtx)));
9591 if (bypass_probability >= 0)
9592 REG_NOTES (i)
9593 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9594 GEN_INT (bypass_probability),
9595 REG_NOTES (i));
9597 i = emit_jump_insn (gen_rtx_SET
9598 (VOIDmode, pc_rtx,
9599 gen_rtx_IF_THEN_ELSE (VOIDmode,
9600 condition, target1, target2)));
9601 if (probability >= 0)
9602 REG_NOTES (i)
9603 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9604 GEN_INT (probability),
9605 REG_NOTES (i));
9606 if (second != NULL_RTX)
9608 i = emit_jump_insn (gen_rtx_SET
9609 (VOIDmode, pc_rtx,
9610 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9611 target2)));
9612 if (second_probability >= 0)
9613 REG_NOTES (i)
9614 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9615 GEN_INT (second_probability),
9616 REG_NOTES (i));
9618 if (label != NULL_RTX)
9619 emit_label (label);
9623 ix86_expand_setcc (enum rtx_code code, rtx dest)
9625 rtx ret, tmp, tmpreg, equiv;
9626 rtx second_test, bypass_test;
9628 if (GET_MODE (ix86_compare_op0) == DImode
9629 && !TARGET_64BIT)
9630 return 0; /* FAIL */
9632 if (GET_MODE (dest) != QImode)
9633 abort ();
9635 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9636 PUT_MODE (ret, QImode);
9638 tmp = dest;
9639 tmpreg = dest;
9641 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9642 if (bypass_test || second_test)
9644 rtx test = second_test;
9645 int bypass = 0;
9646 rtx tmp2 = gen_reg_rtx (QImode);
9647 if (bypass_test)
9649 if (second_test)
9650 abort ();
9651 test = bypass_test;
9652 bypass = 1;
9653 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9655 PUT_MODE (test, QImode);
9656 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9658 if (bypass)
9659 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9660 else
9661 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9664 /* Attach a REG_EQUAL note describing the comparison result. */
9665 equiv = simplify_gen_relational (code, QImode,
9666 GET_MODE (ix86_compare_op0),
9667 ix86_compare_op0, ix86_compare_op1);
9668 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9670 return 1; /* DONE */
9673 /* Expand comparison setting or clearing carry flag. Return true when
9674 successful and set pop for the operation. */
9675 static bool
9676 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9678 enum machine_mode mode =
9679 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9681 /* Do not handle DImode compares that go trought special path. Also we can't
9682 deal with FP compares yet. This is possible to add. */
9683 if ((mode == DImode && !TARGET_64BIT))
9684 return false;
9685 if (FLOAT_MODE_P (mode))
9687 rtx second_test = NULL, bypass_test = NULL;
9688 rtx compare_op, compare_seq;
9690 /* Shortcut: following common codes never translate into carry flag compares. */
9691 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9692 || code == ORDERED || code == UNORDERED)
9693 return false;
9695 /* These comparisons require zero flag; swap operands so they won't. */
9696 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9697 && !TARGET_IEEE_FP)
9699 rtx tmp = op0;
9700 op0 = op1;
9701 op1 = tmp;
9702 code = swap_condition (code);
9705 /* Try to expand the comparison and verify that we end up with carry flag
9706 based comparison. This is fails to be true only when we decide to expand
9707 comparison using arithmetic that is not too common scenario. */
9708 start_sequence ();
9709 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9710 &second_test, &bypass_test);
9711 compare_seq = get_insns ();
9712 end_sequence ();
9714 if (second_test || bypass_test)
9715 return false;
9716 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9717 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9718 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9719 else
9720 code = GET_CODE (compare_op);
9721 if (code != LTU && code != GEU)
9722 return false;
9723 emit_insn (compare_seq);
9724 *pop = compare_op;
9725 return true;
9727 if (!INTEGRAL_MODE_P (mode))
9728 return false;
9729 switch (code)
9731 case LTU:
9732 case GEU:
9733 break;
9735 /* Convert a==0 into (unsigned)a<1. */
9736 case EQ:
9737 case NE:
9738 if (op1 != const0_rtx)
9739 return false;
9740 op1 = const1_rtx;
9741 code = (code == EQ ? LTU : GEU);
9742 break;
9744 /* Convert a>b into b<a or a>=b-1. */
9745 case GTU:
9746 case LEU:
9747 if (GET_CODE (op1) == CONST_INT)
9749 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9750 /* Bail out on overflow. We still can swap operands but that
9751 would force loading of the constant into register. */
9752 if (op1 == const0_rtx
9753 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9754 return false;
9755 code = (code == GTU ? GEU : LTU);
9757 else
9759 rtx tmp = op1;
9760 op1 = op0;
9761 op0 = tmp;
9762 code = (code == GTU ? LTU : GEU);
9764 break;
9766 /* Convert a>=0 into (unsigned)a<0x80000000. */
9767 case LT:
9768 case GE:
9769 if (mode == DImode || op1 != const0_rtx)
9770 return false;
9771 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9772 code = (code == LT ? GEU : LTU);
9773 break;
9774 case LE:
9775 case GT:
9776 if (mode == DImode || op1 != constm1_rtx)
9777 return false;
9778 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9779 code = (code == LE ? GEU : LTU);
9780 break;
9782 default:
9783 return false;
9785 /* Swapping operands may cause constant to appear as first operand. */
9786 if (!nonimmediate_operand (op0, VOIDmode))
9788 if (no_new_pseudos)
9789 return false;
9790 op0 = force_reg (mode, op0);
9792 ix86_compare_op0 = op0;
9793 ix86_compare_op1 = op1;
9794 *pop = ix86_expand_compare (code, NULL, NULL);
9795 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9796 abort ();
9797 return true;
9801 ix86_expand_int_movcc (rtx operands[])
9803 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9804 rtx compare_seq, compare_op;
9805 rtx second_test, bypass_test;
9806 enum machine_mode mode = GET_MODE (operands[0]);
9807 bool sign_bit_compare_p = false;;
9809 start_sequence ();
9810 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9811 compare_seq = get_insns ();
9812 end_sequence ();
9814 compare_code = GET_CODE (compare_op);
9816 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9817 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9818 sign_bit_compare_p = true;
9820 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9821 HImode insns, we'd be swallowed in word prefix ops. */
9823 if ((mode != HImode || TARGET_FAST_PREFIX)
9824 && (mode != DImode || TARGET_64BIT)
9825 && GET_CODE (operands[2]) == CONST_INT
9826 && GET_CODE (operands[3]) == CONST_INT)
9828 rtx out = operands[0];
9829 HOST_WIDE_INT ct = INTVAL (operands[2]);
9830 HOST_WIDE_INT cf = INTVAL (operands[3]);
9831 HOST_WIDE_INT diff;
9833 diff = ct - cf;
9834 /* Sign bit compares are better done using shifts than we do by using
9835 sbb. */
9836 if (sign_bit_compare_p
9837 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9838 ix86_compare_op1, &compare_op))
9840 /* Detect overlap between destination and compare sources. */
9841 rtx tmp = out;
9843 if (!sign_bit_compare_p)
9845 bool fpcmp = false;
9847 compare_code = GET_CODE (compare_op);
9849 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9850 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9852 fpcmp = true;
9853 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9856 /* To simplify rest of code, restrict to the GEU case. */
9857 if (compare_code == LTU)
9859 HOST_WIDE_INT tmp = ct;
9860 ct = cf;
9861 cf = tmp;
9862 compare_code = reverse_condition (compare_code);
9863 code = reverse_condition (code);
9865 else
9867 if (fpcmp)
9868 PUT_CODE (compare_op,
9869 reverse_condition_maybe_unordered
9870 (GET_CODE (compare_op)));
9871 else
9872 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9874 diff = ct - cf;
9876 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9877 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9878 tmp = gen_reg_rtx (mode);
9880 if (mode == DImode)
9881 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9882 else
9883 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9885 else
9887 if (code == GT || code == GE)
9888 code = reverse_condition (code);
9889 else
9891 HOST_WIDE_INT tmp = ct;
9892 ct = cf;
9893 cf = tmp;
9894 diff = ct - cf;
9896 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9897 ix86_compare_op1, VOIDmode, 0, -1);
9900 if (diff == 1)
9903 * cmpl op0,op1
9904 * sbbl dest,dest
9905 * [addl dest, ct]
9907 * Size 5 - 8.
9909 if (ct)
9910 tmp = expand_simple_binop (mode, PLUS,
9911 tmp, GEN_INT (ct),
9912 copy_rtx (tmp), 1, OPTAB_DIRECT);
9914 else if (cf == -1)
9917 * cmpl op0,op1
9918 * sbbl dest,dest
9919 * orl $ct, dest
9921 * Size 8.
9923 tmp = expand_simple_binop (mode, IOR,
9924 tmp, GEN_INT (ct),
9925 copy_rtx (tmp), 1, OPTAB_DIRECT);
9927 else if (diff == -1 && ct)
9930 * cmpl op0,op1
9931 * sbbl dest,dest
9932 * notl dest
9933 * [addl dest, cf]
9935 * Size 8 - 11.
9937 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9938 if (cf)
9939 tmp = expand_simple_binop (mode, PLUS,
9940 copy_rtx (tmp), GEN_INT (cf),
9941 copy_rtx (tmp), 1, OPTAB_DIRECT);
9943 else
9946 * cmpl op0,op1
9947 * sbbl dest,dest
9948 * [notl dest]
9949 * andl cf - ct, dest
9950 * [addl dest, ct]
9952 * Size 8 - 11.
9955 if (cf == 0)
9957 cf = ct;
9958 ct = 0;
9959 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9962 tmp = expand_simple_binop (mode, AND,
9963 copy_rtx (tmp),
9964 gen_int_mode (cf - ct, mode),
9965 copy_rtx (tmp), 1, OPTAB_DIRECT);
9966 if (ct)
9967 tmp = expand_simple_binop (mode, PLUS,
9968 copy_rtx (tmp), GEN_INT (ct),
9969 copy_rtx (tmp), 1, OPTAB_DIRECT);
9972 if (!rtx_equal_p (tmp, out))
9973 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9975 return 1; /* DONE */
9978 if (diff < 0)
9980 HOST_WIDE_INT tmp;
9981 tmp = ct, ct = cf, cf = tmp;
9982 diff = -diff;
9983 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9985 /* We may be reversing unordered compare to normal compare, that
9986 is not valid in general (we may convert non-trapping condition
9987 to trapping one), however on i386 we currently emit all
9988 comparisons unordered. */
9989 compare_code = reverse_condition_maybe_unordered (compare_code);
9990 code = reverse_condition_maybe_unordered (code);
9992 else
9994 compare_code = reverse_condition (compare_code);
9995 code = reverse_condition (code);
9999 compare_code = NIL;
10000 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10001 && GET_CODE (ix86_compare_op1) == CONST_INT)
10003 if (ix86_compare_op1 == const0_rtx
10004 && (code == LT || code == GE))
10005 compare_code = code;
10006 else if (ix86_compare_op1 == constm1_rtx)
10008 if (code == LE)
10009 compare_code = LT;
10010 else if (code == GT)
10011 compare_code = GE;
10015 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10016 if (compare_code != NIL
10017 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10018 && (cf == -1 || ct == -1))
10020 /* If lea code below could be used, only optimize
10021 if it results in a 2 insn sequence. */
10023 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10024 || diff == 3 || diff == 5 || diff == 9)
10025 || (compare_code == LT && ct == -1)
10026 || (compare_code == GE && cf == -1))
10029 * notl op1 (if necessary)
10030 * sarl $31, op1
10031 * orl cf, op1
10033 if (ct != -1)
10035 cf = ct;
10036 ct = -1;
10037 code = reverse_condition (code);
10040 out = emit_store_flag (out, code, ix86_compare_op0,
10041 ix86_compare_op1, VOIDmode, 0, -1);
10043 out = expand_simple_binop (mode, IOR,
10044 out, GEN_INT (cf),
10045 out, 1, OPTAB_DIRECT);
10046 if (out != operands[0])
10047 emit_move_insn (operands[0], out);
10049 return 1; /* DONE */
10054 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10055 || diff == 3 || diff == 5 || diff == 9)
10056 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10057 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10060 * xorl dest,dest
10061 * cmpl op1,op2
10062 * setcc dest
10063 * lea cf(dest*(ct-cf)),dest
10065 * Size 14.
10067 * This also catches the degenerate setcc-only case.
10070 rtx tmp;
10071 int nops;
10073 out = emit_store_flag (out, code, ix86_compare_op0,
10074 ix86_compare_op1, VOIDmode, 0, 1);
10076 nops = 0;
10077 /* On x86_64 the lea instruction operates on Pmode, so we need
10078 to get arithmetics done in proper mode to match. */
10079 if (diff == 1)
10080 tmp = copy_rtx (out);
10081 else
10083 rtx out1;
10084 out1 = copy_rtx (out);
10085 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10086 nops++;
10087 if (diff & 1)
10089 tmp = gen_rtx_PLUS (mode, tmp, out1);
10090 nops++;
10093 if (cf != 0)
10095 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10096 nops++;
10098 if (!rtx_equal_p (tmp, out))
10100 if (nops == 1)
10101 out = force_operand (tmp, copy_rtx (out));
10102 else
10103 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10105 if (!rtx_equal_p (out, operands[0]))
10106 emit_move_insn (operands[0], copy_rtx (out));
10108 return 1; /* DONE */
10112 * General case: Jumpful:
10113 * xorl dest,dest cmpl op1, op2
10114 * cmpl op1, op2 movl ct, dest
10115 * setcc dest jcc 1f
10116 * decl dest movl cf, dest
10117 * andl (cf-ct),dest 1:
10118 * addl ct,dest
10120 * Size 20. Size 14.
10122 * This is reasonably steep, but branch mispredict costs are
10123 * high on modern cpus, so consider failing only if optimizing
10124 * for space.
10127 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10128 && BRANCH_COST >= 2)
10130 if (cf == 0)
10132 cf = ct;
10133 ct = 0;
10134 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10135 /* We may be reversing unordered compare to normal compare,
10136 that is not valid in general (we may convert non-trapping
10137 condition to trapping one), however on i386 we currently
10138 emit all comparisons unordered. */
10139 code = reverse_condition_maybe_unordered (code);
10140 else
10142 code = reverse_condition (code);
10143 if (compare_code != NIL)
10144 compare_code = reverse_condition (compare_code);
10148 if (compare_code != NIL)
10150 /* notl op1 (if needed)
10151 sarl $31, op1
10152 andl (cf-ct), op1
10153 addl ct, op1
10155 For x < 0 (resp. x <= -1) there will be no notl,
10156 so if possible swap the constants to get rid of the
10157 complement.
10158 True/false will be -1/0 while code below (store flag
10159 followed by decrement) is 0/-1, so the constants need
10160 to be exchanged once more. */
10162 if (compare_code == GE || !cf)
10164 code = reverse_condition (code);
10165 compare_code = LT;
10167 else
10169 HOST_WIDE_INT tmp = cf;
10170 cf = ct;
10171 ct = tmp;
10174 out = emit_store_flag (out, code, ix86_compare_op0,
10175 ix86_compare_op1, VOIDmode, 0, -1);
10177 else
10179 out = emit_store_flag (out, code, ix86_compare_op0,
10180 ix86_compare_op1, VOIDmode, 0, 1);
10182 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10183 copy_rtx (out), 1, OPTAB_DIRECT);
10186 out = expand_simple_binop (mode, AND, copy_rtx (out),
10187 gen_int_mode (cf - ct, mode),
10188 copy_rtx (out), 1, OPTAB_DIRECT);
10189 if (ct)
10190 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10191 copy_rtx (out), 1, OPTAB_DIRECT);
10192 if (!rtx_equal_p (out, operands[0]))
10193 emit_move_insn (operands[0], copy_rtx (out));
10195 return 1; /* DONE */
10199 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10201 /* Try a few things more with specific constants and a variable. */
10203 optab op;
10204 rtx var, orig_out, out, tmp;
10206 if (BRANCH_COST <= 2)
10207 return 0; /* FAIL */
10209 /* If one of the two operands is an interesting constant, load a
10210 constant with the above and mask it in with a logical operation. */
10212 if (GET_CODE (operands[2]) == CONST_INT)
10214 var = operands[3];
10215 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10216 operands[3] = constm1_rtx, op = and_optab;
10217 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10218 operands[3] = const0_rtx, op = ior_optab;
10219 else
10220 return 0; /* FAIL */
10222 else if (GET_CODE (operands[3]) == CONST_INT)
10224 var = operands[2];
10225 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10226 operands[2] = constm1_rtx, op = and_optab;
10227 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10228 operands[2] = const0_rtx, op = ior_optab;
10229 else
10230 return 0; /* FAIL */
10232 else
10233 return 0; /* FAIL */
10235 orig_out = operands[0];
10236 tmp = gen_reg_rtx (mode);
10237 operands[0] = tmp;
10239 /* Recurse to get the constant loaded. */
10240 if (ix86_expand_int_movcc (operands) == 0)
10241 return 0; /* FAIL */
10243 /* Mask in the interesting variable. */
10244 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10245 OPTAB_WIDEN);
10246 if (!rtx_equal_p (out, orig_out))
10247 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10249 return 1; /* DONE */
10253 * For comparison with above,
10255 * movl cf,dest
10256 * movl ct,tmp
10257 * cmpl op1,op2
10258 * cmovcc tmp,dest
10260 * Size 15.
10263 if (! nonimmediate_operand (operands[2], mode))
10264 operands[2] = force_reg (mode, operands[2]);
10265 if (! nonimmediate_operand (operands[3], mode))
10266 operands[3] = force_reg (mode, operands[3]);
10268 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10270 rtx tmp = gen_reg_rtx (mode);
10271 emit_move_insn (tmp, operands[3]);
10272 operands[3] = tmp;
10274 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10276 rtx tmp = gen_reg_rtx (mode);
10277 emit_move_insn (tmp, operands[2]);
10278 operands[2] = tmp;
10281 if (! register_operand (operands[2], VOIDmode)
10282 && (mode == QImode
10283 || ! register_operand (operands[3], VOIDmode)))
10284 operands[2] = force_reg (mode, operands[2]);
10286 if (mode == QImode
10287 && ! register_operand (operands[3], VOIDmode))
10288 operands[3] = force_reg (mode, operands[3]);
10290 emit_insn (compare_seq);
10291 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10292 gen_rtx_IF_THEN_ELSE (mode,
10293 compare_op, operands[2],
10294 operands[3])));
10295 if (bypass_test)
10296 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10297 gen_rtx_IF_THEN_ELSE (mode,
10298 bypass_test,
10299 copy_rtx (operands[3]),
10300 copy_rtx (operands[0]))));
10301 if (second_test)
10302 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10303 gen_rtx_IF_THEN_ELSE (mode,
10304 second_test,
10305 copy_rtx (operands[2]),
10306 copy_rtx (operands[0]))));
10308 return 1; /* DONE */
10312 ix86_expand_fp_movcc (rtx operands[])
10314 enum rtx_code code;
10315 rtx tmp;
10316 rtx compare_op, second_test, bypass_test;
10318 /* For SF/DFmode conditional moves based on comparisons
10319 in same mode, we may want to use SSE min/max instructions. */
10320 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10321 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10322 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10323 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10324 && (!TARGET_IEEE_FP
10325 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10326 /* We may be called from the post-reload splitter. */
10327 && (!REG_P (operands[0])
10328 || SSE_REG_P (operands[0])
10329 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10331 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10332 code = GET_CODE (operands[1]);
10334 /* See if we have (cross) match between comparison operands and
10335 conditional move operands. */
10336 if (rtx_equal_p (operands[2], op1))
10338 rtx tmp = op0;
10339 op0 = op1;
10340 op1 = tmp;
10341 code = reverse_condition_maybe_unordered (code);
10343 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10345 /* Check for min operation. */
10346 if (code == LT || code == UNLE)
10348 if (code == UNLE)
10350 rtx tmp = op0;
10351 op0 = op1;
10352 op1 = tmp;
10354 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10355 if (memory_operand (op0, VOIDmode))
10356 op0 = force_reg (GET_MODE (operands[0]), op0);
10357 if (GET_MODE (operands[0]) == SFmode)
10358 emit_insn (gen_minsf3 (operands[0], op0, op1));
10359 else
10360 emit_insn (gen_mindf3 (operands[0], op0, op1));
10361 return 1;
10363 /* Check for max operation. */
10364 if (code == GT || code == UNGE)
10366 if (code == UNGE)
10368 rtx tmp = op0;
10369 op0 = op1;
10370 op1 = tmp;
10372 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10373 if (memory_operand (op0, VOIDmode))
10374 op0 = force_reg (GET_MODE (operands[0]), op0);
10375 if (GET_MODE (operands[0]) == SFmode)
10376 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10377 else
10378 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10379 return 1;
10382 /* Manage condition to be sse_comparison_operator. In case we are
10383 in non-ieee mode, try to canonicalize the destination operand
10384 to be first in the comparison - this helps reload to avoid extra
10385 moves. */
10386 if (!sse_comparison_operator (operands[1], VOIDmode)
10387 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10389 rtx tmp = ix86_compare_op0;
10390 ix86_compare_op0 = ix86_compare_op1;
10391 ix86_compare_op1 = tmp;
10392 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10393 VOIDmode, ix86_compare_op0,
10394 ix86_compare_op1);
10396 /* Similarly try to manage result to be first operand of conditional
10397 move. We also don't support the NE comparison on SSE, so try to
10398 avoid it. */
10399 if ((rtx_equal_p (operands[0], operands[3])
10400 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10401 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10403 rtx tmp = operands[2];
10404 operands[2] = operands[3];
10405 operands[3] = tmp;
10406 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10407 (GET_CODE (operands[1])),
10408 VOIDmode, ix86_compare_op0,
10409 ix86_compare_op1);
10411 if (GET_MODE (operands[0]) == SFmode)
10412 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10413 operands[2], operands[3],
10414 ix86_compare_op0, ix86_compare_op1));
10415 else
10416 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10417 operands[2], operands[3],
10418 ix86_compare_op0, ix86_compare_op1));
10419 return 1;
10422 /* The floating point conditional move instructions don't directly
10423 support conditions resulting from a signed integer comparison. */
10425 code = GET_CODE (operands[1]);
10426 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10428 /* The floating point conditional move instructions don't directly
10429 support signed integer comparisons. */
10431 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10433 if (second_test != NULL || bypass_test != NULL)
10434 abort ();
10435 tmp = gen_reg_rtx (QImode);
10436 ix86_expand_setcc (code, tmp);
10437 code = NE;
10438 ix86_compare_op0 = tmp;
10439 ix86_compare_op1 = const0_rtx;
10440 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10442 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10444 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10445 emit_move_insn (tmp, operands[3]);
10446 operands[3] = tmp;
10448 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10450 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10451 emit_move_insn (tmp, operands[2]);
10452 operands[2] = tmp;
10455 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10456 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10457 compare_op,
10458 operands[2],
10459 operands[3])));
10460 if (bypass_test)
10461 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10462 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10463 bypass_test,
10464 operands[3],
10465 operands[0])));
10466 if (second_test)
10467 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10468 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10469 second_test,
10470 operands[2],
10471 operands[0])));
10473 return 1;
10476 /* Expand conditional increment or decrement using adb/sbb instructions.
10477 The default case using setcc followed by the conditional move can be
10478 done by generic code. */
10480 ix86_expand_int_addcc (rtx operands[])
10482 enum rtx_code code = GET_CODE (operands[1]);
10483 rtx compare_op;
10484 rtx val = const0_rtx;
10485 bool fpcmp = false;
10486 enum machine_mode mode = GET_MODE (operands[0]);
10488 if (operands[3] != const1_rtx
10489 && operands[3] != constm1_rtx)
10490 return 0;
10491 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10492 ix86_compare_op1, &compare_op))
10493 return 0;
10494 code = GET_CODE (compare_op);
10496 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10497 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10499 fpcmp = true;
10500 code = ix86_fp_compare_code_to_integer (code);
10503 if (code != LTU)
10505 val = constm1_rtx;
10506 if (fpcmp)
10507 PUT_CODE (compare_op,
10508 reverse_condition_maybe_unordered
10509 (GET_CODE (compare_op)));
10510 else
10511 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10513 PUT_MODE (compare_op, mode);
10515 /* Construct either adc or sbb insn. */
10516 if ((code == LTU) == (operands[3] == constm1_rtx))
10518 switch (GET_MODE (operands[0]))
10520 case QImode:
10521 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10522 break;
10523 case HImode:
10524 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10525 break;
10526 case SImode:
10527 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10528 break;
10529 case DImode:
10530 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10531 break;
10532 default:
10533 abort ();
10536 else
10538 switch (GET_MODE (operands[0]))
10540 case QImode:
10541 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10542 break;
10543 case HImode:
10544 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10545 break;
10546 case SImode:
10547 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10548 break;
10549 case DImode:
10550 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10551 break;
10552 default:
10553 abort ();
10556 return 1; /* DONE */
10560 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10561 works for floating pointer parameters and nonoffsetable memories.
10562 For pushes, it returns just stack offsets; the values will be saved
10563 in the right order. Maximally three parts are generated. */
10565 static int
10566 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10568 int size;
10570 if (!TARGET_64BIT)
10571 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10572 else
10573 size = (GET_MODE_SIZE (mode) + 4) / 8;
10575 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10576 abort ();
10577 if (size < 2 || size > 3)
10578 abort ();
10580 /* Optimize constant pool reference to immediates. This is used by fp
10581 moves, that force all constants to memory to allow combining. */
10582 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10584 rtx tmp = maybe_get_pool_constant (operand);
10585 if (tmp)
10586 operand = tmp;
10589 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10591 /* The only non-offsetable memories we handle are pushes. */
10592 if (! push_operand (operand, VOIDmode))
10593 abort ();
10595 operand = copy_rtx (operand);
10596 PUT_MODE (operand, Pmode);
10597 parts[0] = parts[1] = parts[2] = operand;
10599 else if (!TARGET_64BIT)
10601 if (mode == DImode)
10602 split_di (&operand, 1, &parts[0], &parts[1]);
10603 else
10605 if (REG_P (operand))
10607 if (!reload_completed)
10608 abort ();
10609 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10610 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10611 if (size == 3)
10612 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10614 else if (offsettable_memref_p (operand))
10616 operand = adjust_address (operand, SImode, 0);
10617 parts[0] = operand;
10618 parts[1] = adjust_address (operand, SImode, 4);
10619 if (size == 3)
10620 parts[2] = adjust_address (operand, SImode, 8);
10622 else if (GET_CODE (operand) == CONST_DOUBLE)
10624 REAL_VALUE_TYPE r;
10625 long l[4];
10627 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10628 switch (mode)
10630 case XFmode:
10631 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10632 parts[2] = gen_int_mode (l[2], SImode);
10633 break;
10634 case DFmode:
10635 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10636 break;
10637 default:
10638 abort ();
10640 parts[1] = gen_int_mode (l[1], SImode);
10641 parts[0] = gen_int_mode (l[0], SImode);
10643 else
10644 abort ();
10647 else
10649 if (mode == TImode)
10650 split_ti (&operand, 1, &parts[0], &parts[1]);
10651 if (mode == XFmode || mode == TFmode)
10653 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10654 if (REG_P (operand))
10656 if (!reload_completed)
10657 abort ();
10658 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10659 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10661 else if (offsettable_memref_p (operand))
10663 operand = adjust_address (operand, DImode, 0);
10664 parts[0] = operand;
10665 parts[1] = adjust_address (operand, upper_mode, 8);
10667 else if (GET_CODE (operand) == CONST_DOUBLE)
10669 REAL_VALUE_TYPE r;
10670 long l[3];
10672 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10673 real_to_target (l, &r, mode);
10674 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10675 if (HOST_BITS_PER_WIDE_INT >= 64)
10676 parts[0]
10677 = gen_int_mode
10678 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10679 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10680 DImode);
10681 else
10682 parts[0] = immed_double_const (l[0], l[1], DImode);
10683 if (upper_mode == SImode)
10684 parts[1] = gen_int_mode (l[2], SImode);
10685 else if (HOST_BITS_PER_WIDE_INT >= 64)
10686 parts[1]
10687 = gen_int_mode
10688 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10689 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10690 DImode);
10691 else
10692 parts[1] = immed_double_const (l[2], l[3], DImode);
10694 else
10695 abort ();
10699 return size;
10702 /* Emit insns to perform a move or push of DI, DF, and XF values.
10703 Return false when normal moves are needed; true when all required
10704 insns have been emitted. Operands 2-4 contain the input values
10705 int the correct order; operands 5-7 contain the output values. */
10707 void
10708 ix86_split_long_move (rtx operands[])
10710 rtx part[2][3];
10711 int nparts;
10712 int push = 0;
10713 int collisions = 0;
10714 enum machine_mode mode = GET_MODE (operands[0]);
10716 /* The DFmode expanders may ask us to move double.
10717 For 64bit target this is single move. By hiding the fact
10718 here we simplify i386.md splitters. */
10719 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10721 /* Optimize constant pool reference to immediates. This is used by
10722 fp moves, that force all constants to memory to allow combining. */
10724 if (GET_CODE (operands[1]) == MEM
10725 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10726 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10727 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10728 if (push_operand (operands[0], VOIDmode))
10730 operands[0] = copy_rtx (operands[0]);
10731 PUT_MODE (operands[0], Pmode);
10733 else
10734 operands[0] = gen_lowpart (DImode, operands[0]);
10735 operands[1] = gen_lowpart (DImode, operands[1]);
10736 emit_move_insn (operands[0], operands[1]);
10737 return;
10740 /* The only non-offsettable memory we handle is push. */
10741 if (push_operand (operands[0], VOIDmode))
10742 push = 1;
10743 else if (GET_CODE (operands[0]) == MEM
10744 && ! offsettable_memref_p (operands[0]))
10745 abort ();
10747 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10748 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10750 /* When emitting push, take care for source operands on the stack. */
10751 if (push && GET_CODE (operands[1]) == MEM
10752 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10754 if (nparts == 3)
10755 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10756 XEXP (part[1][2], 0));
10757 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10758 XEXP (part[1][1], 0));
10761 /* We need to do copy in the right order in case an address register
10762 of the source overlaps the destination. */
10763 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10765 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10766 collisions++;
10767 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10768 collisions++;
10769 if (nparts == 3
10770 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10771 collisions++;
10773 /* Collision in the middle part can be handled by reordering. */
10774 if (collisions == 1 && nparts == 3
10775 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10777 rtx tmp;
10778 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10779 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10782 /* If there are more collisions, we can't handle it by reordering.
10783 Do an lea to the last part and use only one colliding move. */
10784 else if (collisions > 1)
10786 rtx base;
10788 collisions = 1;
10790 base = part[0][nparts - 1];
10792 /* Handle the case when the last part isn't valid for lea.
10793 Happens in 64-bit mode storing the 12-byte XFmode. */
10794 if (GET_MODE (base) != Pmode)
10795 base = gen_rtx_REG (Pmode, REGNO (base));
10797 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10798 part[1][0] = replace_equiv_address (part[1][0], base);
10799 part[1][1] = replace_equiv_address (part[1][1],
10800 plus_constant (base, UNITS_PER_WORD));
10801 if (nparts == 3)
10802 part[1][2] = replace_equiv_address (part[1][2],
10803 plus_constant (base, 8));
10807 if (push)
10809 if (!TARGET_64BIT)
10811 if (nparts == 3)
10813 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10814 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10815 emit_move_insn (part[0][2], part[1][2]);
10818 else
10820 /* In 64bit mode we don't have 32bit push available. In case this is
10821 register, it is OK - we will just use larger counterpart. We also
10822 retype memory - these comes from attempt to avoid REX prefix on
10823 moving of second half of TFmode value. */
10824 if (GET_MODE (part[1][1]) == SImode)
10826 if (GET_CODE (part[1][1]) == MEM)
10827 part[1][1] = adjust_address (part[1][1], DImode, 0);
10828 else if (REG_P (part[1][1]))
10829 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10830 else
10831 abort ();
10832 if (GET_MODE (part[1][0]) == SImode)
10833 part[1][0] = part[1][1];
10836 emit_move_insn (part[0][1], part[1][1]);
10837 emit_move_insn (part[0][0], part[1][0]);
10838 return;
10841 /* Choose correct order to not overwrite the source before it is copied. */
10842 if ((REG_P (part[0][0])
10843 && REG_P (part[1][1])
10844 && (REGNO (part[0][0]) == REGNO (part[1][1])
10845 || (nparts == 3
10846 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10847 || (collisions > 0
10848 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10850 if (nparts == 3)
10852 operands[2] = part[0][2];
10853 operands[3] = part[0][1];
10854 operands[4] = part[0][0];
10855 operands[5] = part[1][2];
10856 operands[6] = part[1][1];
10857 operands[7] = part[1][0];
10859 else
10861 operands[2] = part[0][1];
10862 operands[3] = part[0][0];
10863 operands[5] = part[1][1];
10864 operands[6] = part[1][0];
10867 else
10869 if (nparts == 3)
10871 operands[2] = part[0][0];
10872 operands[3] = part[0][1];
10873 operands[4] = part[0][2];
10874 operands[5] = part[1][0];
10875 operands[6] = part[1][1];
10876 operands[7] = part[1][2];
10878 else
10880 operands[2] = part[0][0];
10881 operands[3] = part[0][1];
10882 operands[5] = part[1][0];
10883 operands[6] = part[1][1];
10886 emit_move_insn (operands[2], operands[5]);
10887 emit_move_insn (operands[3], operands[6]);
10888 if (nparts == 3)
10889 emit_move_insn (operands[4], operands[7]);
10891 return;
10894 void
10895 ix86_split_ashldi (rtx *operands, rtx scratch)
10897 rtx low[2], high[2];
10898 int count;
10900 if (GET_CODE (operands[2]) == CONST_INT)
10902 split_di (operands, 2, low, high);
10903 count = INTVAL (operands[2]) & 63;
10905 if (count >= 32)
10907 emit_move_insn (high[0], low[1]);
10908 emit_move_insn (low[0], const0_rtx);
10910 if (count > 32)
10911 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10913 else
10915 if (!rtx_equal_p (operands[0], operands[1]))
10916 emit_move_insn (operands[0], operands[1]);
10917 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10918 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10921 else
10923 if (!rtx_equal_p (operands[0], operands[1]))
10924 emit_move_insn (operands[0], operands[1]);
10926 split_di (operands, 1, low, high);
10928 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10929 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10931 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10933 if (! no_new_pseudos)
10934 scratch = force_reg (SImode, const0_rtx);
10935 else
10936 emit_move_insn (scratch, const0_rtx);
10938 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10939 scratch));
10941 else
10942 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10946 void
10947 ix86_split_ashrdi (rtx *operands, rtx scratch)
10949 rtx low[2], high[2];
10950 int count;
10952 if (GET_CODE (operands[2]) == CONST_INT)
10954 split_di (operands, 2, low, high);
10955 count = INTVAL (operands[2]) & 63;
10957 if (count == 63)
10959 emit_move_insn (high[0], high[1]);
10960 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10961 emit_move_insn (low[0], high[0]);
10964 else if (count >= 32)
10966 emit_move_insn (low[0], high[1]);
10968 if (! reload_completed)
10969 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10970 else
10972 emit_move_insn (high[0], low[0]);
10973 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10976 if (count > 32)
10977 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10979 else
10981 if (!rtx_equal_p (operands[0], operands[1]))
10982 emit_move_insn (operands[0], operands[1]);
10983 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10984 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10987 else
10989 if (!rtx_equal_p (operands[0], operands[1]))
10990 emit_move_insn (operands[0], operands[1]);
10992 split_di (operands, 1, low, high);
10994 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10995 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10997 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10999 if (! no_new_pseudos)
11000 scratch = gen_reg_rtx (SImode);
11001 emit_move_insn (scratch, high[0]);
11002 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11003 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11004 scratch));
11006 else
11007 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11011 void
11012 ix86_split_lshrdi (rtx *operands, rtx scratch)
11014 rtx low[2], high[2];
11015 int count;
11017 if (GET_CODE (operands[2]) == CONST_INT)
11019 split_di (operands, 2, low, high);
11020 count = INTVAL (operands[2]) & 63;
11022 if (count >= 32)
11024 emit_move_insn (low[0], high[1]);
11025 emit_move_insn (high[0], const0_rtx);
11027 if (count > 32)
11028 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11030 else
11032 if (!rtx_equal_p (operands[0], operands[1]))
11033 emit_move_insn (operands[0], operands[1]);
11034 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11035 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11038 else
11040 if (!rtx_equal_p (operands[0], operands[1]))
11041 emit_move_insn (operands[0], operands[1]);
11043 split_di (operands, 1, low, high);
11045 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11046 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11048 /* Heh. By reversing the arguments, we can reuse this pattern. */
11049 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11051 if (! no_new_pseudos)
11052 scratch = force_reg (SImode, const0_rtx);
11053 else
11054 emit_move_insn (scratch, const0_rtx);
11056 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11057 scratch));
11059 else
11060 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11064 /* Helper function for the string operations below. Dest VARIABLE whether
11065 it is aligned to VALUE bytes. If true, jump to the label. */
11066 static rtx
11067 ix86_expand_aligntest (rtx variable, int value)
11069 rtx label = gen_label_rtx ();
11070 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11071 if (GET_MODE (variable) == DImode)
11072 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11073 else
11074 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11075 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11076 1, label);
11077 return label;
11080 /* Adjust COUNTER by the VALUE. */
11081 static void
11082 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11084 if (GET_MODE (countreg) == DImode)
11085 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11086 else
11087 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11090 /* Zero extend possibly SImode EXP to Pmode register. */
11092 ix86_zero_extend_to_Pmode (rtx exp)
11094 rtx r;
11095 if (GET_MODE (exp) == VOIDmode)
11096 return force_reg (Pmode, exp);
11097 if (GET_MODE (exp) == Pmode)
11098 return copy_to_mode_reg (Pmode, exp);
11099 r = gen_reg_rtx (Pmode);
11100 emit_insn (gen_zero_extendsidi2 (r, exp));
11101 return r;
11104 /* Expand string move (memcpy) operation. Use i386 string operations when
11105 profitable. expand_clrmem contains similar code. */
11107 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11109 rtx srcreg, destreg, countreg, srcexp, destexp;
11110 enum machine_mode counter_mode;
11111 HOST_WIDE_INT align = 0;
11112 unsigned HOST_WIDE_INT count = 0;
11114 if (GET_CODE (align_exp) == CONST_INT)
11115 align = INTVAL (align_exp);
11117 /* Can't use any of this if the user has appropriated esi or edi. */
11118 if (global_regs[4] || global_regs[5])
11119 return 0;
11121 /* This simple hack avoids all inlining code and simplifies code below. */
11122 if (!TARGET_ALIGN_STRINGOPS)
11123 align = 64;
11125 if (GET_CODE (count_exp) == CONST_INT)
11127 count = INTVAL (count_exp);
11128 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11129 return 0;
11132 /* Figure out proper mode for counter. For 32bits it is always SImode,
11133 for 64bits use SImode when possible, otherwise DImode.
11134 Set count to number of bytes copied when known at compile time. */
11135 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11136 || x86_64_zero_extended_value (count_exp))
11137 counter_mode = SImode;
11138 else
11139 counter_mode = DImode;
11141 if (counter_mode != SImode && counter_mode != DImode)
11142 abort ();
11144 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11145 if (destreg != XEXP (dst, 0))
11146 dst = replace_equiv_address_nv (dst, destreg);
11147 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11148 if (srcreg != XEXP (src, 0))
11149 src = replace_equiv_address_nv (src, srcreg);
11151 /* When optimizing for size emit simple rep ; movsb instruction for
11152 counts not divisible by 4. */
11154 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11156 emit_insn (gen_cld ());
11157 countreg = ix86_zero_extend_to_Pmode (count_exp);
11158 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11159 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11160 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11161 destexp, srcexp));
11164 /* For constant aligned (or small unaligned) copies use rep movsl
11165 followed by code copying the rest. For PentiumPro ensure 8 byte
11166 alignment to allow rep movsl acceleration. */
11168 else if (count != 0
11169 && (align >= 8
11170 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11171 || optimize_size || count < (unsigned int) 64))
11173 unsigned HOST_WIDE_INT offset = 0;
11174 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11175 rtx srcmem, dstmem;
11177 emit_insn (gen_cld ());
11178 if (count & ~(size - 1))
11180 countreg = copy_to_mode_reg (counter_mode,
11181 GEN_INT ((count >> (size == 4 ? 2 : 3))
11182 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11183 countreg = ix86_zero_extend_to_Pmode (countreg);
11185 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11186 GEN_INT (size == 4 ? 2 : 3));
11187 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11188 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11190 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11191 countreg, destexp, srcexp));
11192 offset = count & ~(size - 1);
11194 if (size == 8 && (count & 0x04))
11196 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11197 offset);
11198 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11199 offset);
11200 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11201 offset += 4;
11203 if (count & 0x02)
11205 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11206 offset);
11207 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11208 offset);
11209 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11210 offset += 2;
11212 if (count & 0x01)
11214 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11215 offset);
11216 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11217 offset);
11218 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11221 /* The generic code based on the glibc implementation:
11222 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11223 allowing accelerated copying there)
11224 - copy the data using rep movsl
11225 - copy the rest. */
11226 else
11228 rtx countreg2;
11229 rtx label = NULL;
11230 rtx srcmem, dstmem;
11231 int desired_alignment = (TARGET_PENTIUMPRO
11232 && (count == 0 || count >= (unsigned int) 260)
11233 ? 8 : UNITS_PER_WORD);
11234 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11235 dst = change_address (dst, BLKmode, destreg);
11236 src = change_address (src, BLKmode, srcreg);
11238 /* In case we don't know anything about the alignment, default to
11239 library version, since it is usually equally fast and result in
11240 shorter code.
11242 Also emit call when we know that the count is large and call overhead
11243 will not be important. */
11244 if (!TARGET_INLINE_ALL_STRINGOPS
11245 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11246 return 0;
11248 if (TARGET_SINGLE_STRINGOP)
11249 emit_insn (gen_cld ());
11251 countreg2 = gen_reg_rtx (Pmode);
11252 countreg = copy_to_mode_reg (counter_mode, count_exp);
11254 /* We don't use loops to align destination and to copy parts smaller
11255 than 4 bytes, because gcc is able to optimize such code better (in
11256 the case the destination or the count really is aligned, gcc is often
11257 able to predict the branches) and also it is friendlier to the
11258 hardware branch prediction.
11260 Using loops is beneficial for generic case, because we can
11261 handle small counts using the loops. Many CPUs (such as Athlon)
11262 have large REP prefix setup costs.
11264 This is quite costly. Maybe we can revisit this decision later or
11265 add some customizability to this code. */
11267 if (count == 0 && align < desired_alignment)
11269 label = gen_label_rtx ();
11270 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11271 LEU, 0, counter_mode, 1, label);
11273 if (align <= 1)
11275 rtx label = ix86_expand_aligntest (destreg, 1);
11276 srcmem = change_address (src, QImode, srcreg);
11277 dstmem = change_address (dst, QImode, destreg);
11278 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11279 ix86_adjust_counter (countreg, 1);
11280 emit_label (label);
11281 LABEL_NUSES (label) = 1;
11283 if (align <= 2)
11285 rtx label = ix86_expand_aligntest (destreg, 2);
11286 srcmem = change_address (src, HImode, srcreg);
11287 dstmem = change_address (dst, HImode, destreg);
11288 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11289 ix86_adjust_counter (countreg, 2);
11290 emit_label (label);
11291 LABEL_NUSES (label) = 1;
11293 if (align <= 4 && desired_alignment > 4)
11295 rtx label = ix86_expand_aligntest (destreg, 4);
11296 srcmem = change_address (src, SImode, srcreg);
11297 dstmem = change_address (dst, SImode, destreg);
11298 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11299 ix86_adjust_counter (countreg, 4);
11300 emit_label (label);
11301 LABEL_NUSES (label) = 1;
11304 if (label && desired_alignment > 4 && !TARGET_64BIT)
11306 emit_label (label);
11307 LABEL_NUSES (label) = 1;
11308 label = NULL_RTX;
11310 if (!TARGET_SINGLE_STRINGOP)
11311 emit_insn (gen_cld ());
11312 if (TARGET_64BIT)
11314 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11315 GEN_INT (3)));
11316 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11318 else
11320 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11321 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11323 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11324 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11325 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11326 countreg2, destexp, srcexp));
11328 if (label)
11330 emit_label (label);
11331 LABEL_NUSES (label) = 1;
11333 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11335 srcmem = change_address (src, SImode, srcreg);
11336 dstmem = change_address (dst, SImode, destreg);
11337 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11339 if ((align <= 4 || count == 0) && TARGET_64BIT)
11341 rtx label = ix86_expand_aligntest (countreg, 4);
11342 srcmem = change_address (src, SImode, srcreg);
11343 dstmem = change_address (dst, SImode, destreg);
11344 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11345 emit_label (label);
11346 LABEL_NUSES (label) = 1;
11348 if (align > 2 && count != 0 && (count & 2))
11350 srcmem = change_address (src, HImode, srcreg);
11351 dstmem = change_address (dst, HImode, destreg);
11352 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11354 if (align <= 2 || count == 0)
11356 rtx label = ix86_expand_aligntest (countreg, 2);
11357 srcmem = change_address (src, HImode, srcreg);
11358 dstmem = change_address (dst, HImode, destreg);
11359 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11360 emit_label (label);
11361 LABEL_NUSES (label) = 1;
11363 if (align > 1 && count != 0 && (count & 1))
11365 srcmem = change_address (src, QImode, srcreg);
11366 dstmem = change_address (dst, QImode, destreg);
11367 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11369 if (align <= 1 || count == 0)
11371 rtx label = ix86_expand_aligntest (countreg, 1);
11372 srcmem = change_address (src, QImode, srcreg);
11373 dstmem = change_address (dst, QImode, destreg);
11374 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11375 emit_label (label);
11376 LABEL_NUSES (label) = 1;
11380 return 1;
11383 /* Expand string clear operation (bzero). Use i386 string operations when
11384 profitable. expand_movmem contains similar code. */
11386 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11388 rtx destreg, zeroreg, countreg, destexp;
11389 enum machine_mode counter_mode;
11390 HOST_WIDE_INT align = 0;
11391 unsigned HOST_WIDE_INT count = 0;
11393 if (GET_CODE (align_exp) == CONST_INT)
11394 align = INTVAL (align_exp);
11396 /* Can't use any of this if the user has appropriated esi. */
11397 if (global_regs[4])
11398 return 0;
11400 /* This simple hack avoids all inlining code and simplifies code below. */
11401 if (!TARGET_ALIGN_STRINGOPS)
11402 align = 32;
11404 if (GET_CODE (count_exp) == CONST_INT)
11406 count = INTVAL (count_exp);
11407 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11408 return 0;
11410 /* Figure out proper mode for counter. For 32bits it is always SImode,
11411 for 64bits use SImode when possible, otherwise DImode.
11412 Set count to number of bytes copied when known at compile time. */
11413 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11414 || x86_64_zero_extended_value (count_exp))
11415 counter_mode = SImode;
11416 else
11417 counter_mode = DImode;
11419 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11420 if (destreg != XEXP (dst, 0))
11421 dst = replace_equiv_address_nv (dst, destreg);
11423 emit_insn (gen_cld ());
11425 /* When optimizing for size emit simple rep ; movsb instruction for
11426 counts not divisible by 4. */
11428 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11430 countreg = ix86_zero_extend_to_Pmode (count_exp);
11431 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11432 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11433 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11435 else if (count != 0
11436 && (align >= 8
11437 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11438 || optimize_size || count < (unsigned int) 64))
11440 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11441 unsigned HOST_WIDE_INT offset = 0;
11443 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11444 if (count & ~(size - 1))
11446 countreg = copy_to_mode_reg (counter_mode,
11447 GEN_INT ((count >> (size == 4 ? 2 : 3))
11448 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11449 countreg = ix86_zero_extend_to_Pmode (countreg);
11450 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11451 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11452 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11453 offset = count & ~(size - 1);
11455 if (size == 8 && (count & 0x04))
11457 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11458 offset);
11459 emit_insn (gen_strset (destreg, mem,
11460 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11461 offset += 4;
11463 if (count & 0x02)
11465 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11466 offset);
11467 emit_insn (gen_strset (destreg, mem,
11468 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11469 offset += 2;
11471 if (count & 0x01)
11473 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11474 offset);
11475 emit_insn (gen_strset (destreg, mem,
11476 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11479 else
11481 rtx countreg2;
11482 rtx label = NULL;
11483 /* Compute desired alignment of the string operation. */
11484 int desired_alignment = (TARGET_PENTIUMPRO
11485 && (count == 0 || count >= (unsigned int) 260)
11486 ? 8 : UNITS_PER_WORD);
11488 /* In case we don't know anything about the alignment, default to
11489 library version, since it is usually equally fast and result in
11490 shorter code.
11492 Also emit call when we know that the count is large and call overhead
11493 will not be important. */
11494 if (!TARGET_INLINE_ALL_STRINGOPS
11495 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11496 return 0;
11498 if (TARGET_SINGLE_STRINGOP)
11499 emit_insn (gen_cld ());
11501 countreg2 = gen_reg_rtx (Pmode);
11502 countreg = copy_to_mode_reg (counter_mode, count_exp);
11503 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11504 /* Get rid of MEM_OFFSET, it won't be accurate. */
11505 dst = change_address (dst, BLKmode, destreg);
11507 if (count == 0 && align < desired_alignment)
11509 label = gen_label_rtx ();
11510 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11511 LEU, 0, counter_mode, 1, label);
11513 if (align <= 1)
11515 rtx label = ix86_expand_aligntest (destreg, 1);
11516 emit_insn (gen_strset (destreg, dst,
11517 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11518 ix86_adjust_counter (countreg, 1);
11519 emit_label (label);
11520 LABEL_NUSES (label) = 1;
11522 if (align <= 2)
11524 rtx label = ix86_expand_aligntest (destreg, 2);
11525 emit_insn (gen_strset (destreg, dst,
11526 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11527 ix86_adjust_counter (countreg, 2);
11528 emit_label (label);
11529 LABEL_NUSES (label) = 1;
11531 if (align <= 4 && desired_alignment > 4)
11533 rtx label = ix86_expand_aligntest (destreg, 4);
11534 emit_insn (gen_strset (destreg, dst,
11535 (TARGET_64BIT
11536 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11537 : zeroreg)));
11538 ix86_adjust_counter (countreg, 4);
11539 emit_label (label);
11540 LABEL_NUSES (label) = 1;
11543 if (label && desired_alignment > 4 && !TARGET_64BIT)
11545 emit_label (label);
11546 LABEL_NUSES (label) = 1;
11547 label = NULL_RTX;
11550 if (!TARGET_SINGLE_STRINGOP)
11551 emit_insn (gen_cld ());
11552 if (TARGET_64BIT)
11554 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11555 GEN_INT (3)));
11556 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11558 else
11560 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11561 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11563 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11564 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11566 if (label)
11568 emit_label (label);
11569 LABEL_NUSES (label) = 1;
11572 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11573 emit_insn (gen_strset (destreg, dst,
11574 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11575 if (TARGET_64BIT && (align <= 4 || count == 0))
11577 rtx label = ix86_expand_aligntest (countreg, 4);
11578 emit_insn (gen_strset (destreg, dst,
11579 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11580 emit_label (label);
11581 LABEL_NUSES (label) = 1;
11583 if (align > 2 && count != 0 && (count & 2))
11584 emit_insn (gen_strset (destreg, dst,
11585 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11586 if (align <= 2 || count == 0)
11588 rtx label = ix86_expand_aligntest (countreg, 2);
11589 emit_insn (gen_strset (destreg, dst,
11590 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11591 emit_label (label);
11592 LABEL_NUSES (label) = 1;
11594 if (align > 1 && count != 0 && (count & 1))
11595 emit_insn (gen_strset (destreg, dst,
11596 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11597 if (align <= 1 || count == 0)
11599 rtx label = ix86_expand_aligntest (countreg, 1);
11600 emit_insn (gen_strset (destreg, dst,
11601 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11602 emit_label (label);
11603 LABEL_NUSES (label) = 1;
11606 return 1;
11609 /* Expand strlen. */
11611 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11613 rtx addr, scratch1, scratch2, scratch3, scratch4;
11615 /* The generic case of strlen expander is long. Avoid it's
11616 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11618 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11619 && !TARGET_INLINE_ALL_STRINGOPS
11620 && !optimize_size
11621 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11622 return 0;
11624 addr = force_reg (Pmode, XEXP (src, 0));
11625 scratch1 = gen_reg_rtx (Pmode);
11627 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11628 && !optimize_size)
11630 /* Well it seems that some optimizer does not combine a call like
11631 foo(strlen(bar), strlen(bar));
11632 when the move and the subtraction is done here. It does calculate
11633 the length just once when these instructions are done inside of
11634 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11635 often used and I use one fewer register for the lifetime of
11636 output_strlen_unroll() this is better. */
11638 emit_move_insn (out, addr);
11640 ix86_expand_strlensi_unroll_1 (out, src, align);
11642 /* strlensi_unroll_1 returns the address of the zero at the end of
11643 the string, like memchr(), so compute the length by subtracting
11644 the start address. */
11645 if (TARGET_64BIT)
11646 emit_insn (gen_subdi3 (out, out, addr));
11647 else
11648 emit_insn (gen_subsi3 (out, out, addr));
11650 else
11652 rtx unspec;
11653 scratch2 = gen_reg_rtx (Pmode);
11654 scratch3 = gen_reg_rtx (Pmode);
11655 scratch4 = force_reg (Pmode, constm1_rtx);
11657 emit_move_insn (scratch3, addr);
11658 eoschar = force_reg (QImode, eoschar);
11660 emit_insn (gen_cld ());
11661 src = replace_equiv_address_nv (src, scratch3);
11663 /* If .md starts supporting :P, this can be done in .md. */
11664 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11665 scratch4), UNSPEC_SCAS);
11666 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11667 if (TARGET_64BIT)
11669 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11670 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11672 else
11674 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11675 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11678 return 1;
11681 /* Expand the appropriate insns for doing strlen if not just doing
11682 repnz; scasb
11684 out = result, initialized with the start address
11685 align_rtx = alignment of the address.
11686 scratch = scratch register, initialized with the startaddress when
11687 not aligned, otherwise undefined
11689 This is just the body. It needs the initializations mentioned above and
11690 some address computing at the end. These things are done in i386.md. */
11692 static void
11693 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11695 int align;
11696 rtx tmp;
11697 rtx align_2_label = NULL_RTX;
11698 rtx align_3_label = NULL_RTX;
11699 rtx align_4_label = gen_label_rtx ();
11700 rtx end_0_label = gen_label_rtx ();
11701 rtx mem;
11702 rtx tmpreg = gen_reg_rtx (SImode);
11703 rtx scratch = gen_reg_rtx (SImode);
11704 rtx cmp;
11706 align = 0;
11707 if (GET_CODE (align_rtx) == CONST_INT)
11708 align = INTVAL (align_rtx);
11710 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11712 /* Is there a known alignment and is it less than 4? */
11713 if (align < 4)
11715 rtx scratch1 = gen_reg_rtx (Pmode);
11716 emit_move_insn (scratch1, out);
11717 /* Is there a known alignment and is it not 2? */
11718 if (align != 2)
11720 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11721 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11723 /* Leave just the 3 lower bits. */
11724 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11725 NULL_RTX, 0, OPTAB_WIDEN);
11727 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11728 Pmode, 1, align_4_label);
11729 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11730 Pmode, 1, align_2_label);
11731 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11732 Pmode, 1, align_3_label);
11734 else
11736 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11737 check if is aligned to 4 - byte. */
11739 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11740 NULL_RTX, 0, OPTAB_WIDEN);
11742 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11743 Pmode, 1, align_4_label);
11746 mem = change_address (src, QImode, out);
11748 /* Now compare the bytes. */
11750 /* Compare the first n unaligned byte on a byte per byte basis. */
11751 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11752 QImode, 1, end_0_label);
11754 /* Increment the address. */
11755 if (TARGET_64BIT)
11756 emit_insn (gen_adddi3 (out, out, const1_rtx));
11757 else
11758 emit_insn (gen_addsi3 (out, out, const1_rtx));
11760 /* Not needed with an alignment of 2 */
11761 if (align != 2)
11763 emit_label (align_2_label);
11765 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11766 end_0_label);
11768 if (TARGET_64BIT)
11769 emit_insn (gen_adddi3 (out, out, const1_rtx));
11770 else
11771 emit_insn (gen_addsi3 (out, out, const1_rtx));
11773 emit_label (align_3_label);
11776 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11777 end_0_label);
11779 if (TARGET_64BIT)
11780 emit_insn (gen_adddi3 (out, out, const1_rtx));
11781 else
11782 emit_insn (gen_addsi3 (out, out, const1_rtx));
11785 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11786 align this loop. It gives only huge programs, but does not help to
11787 speed up. */
11788 emit_label (align_4_label);
11790 mem = change_address (src, SImode, out);
11791 emit_move_insn (scratch, mem);
11792 if (TARGET_64BIT)
11793 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11794 else
11795 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11797 /* This formula yields a nonzero result iff one of the bytes is zero.
11798 This saves three branches inside loop and many cycles. */
11800 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11801 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11802 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11803 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11804 gen_int_mode (0x80808080, SImode)));
11805 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11806 align_4_label);
11808 if (TARGET_CMOVE)
11810 rtx reg = gen_reg_rtx (SImode);
11811 rtx reg2 = gen_reg_rtx (Pmode);
11812 emit_move_insn (reg, tmpreg);
11813 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11815 /* If zero is not in the first two bytes, move two bytes forward. */
11816 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11817 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11818 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11819 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11820 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11821 reg,
11822 tmpreg)));
11823 /* Emit lea manually to avoid clobbering of flags. */
11824 emit_insn (gen_rtx_SET (SImode, reg2,
11825 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11827 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11828 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11829 emit_insn (gen_rtx_SET (VOIDmode, out,
11830 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11831 reg2,
11832 out)));
11835 else
11837 rtx end_2_label = gen_label_rtx ();
11838 /* Is zero in the first two bytes? */
11840 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11841 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11842 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11843 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11844 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11845 pc_rtx);
11846 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11847 JUMP_LABEL (tmp) = end_2_label;
11849 /* Not in the first two. Move two bytes forward. */
11850 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11851 if (TARGET_64BIT)
11852 emit_insn (gen_adddi3 (out, out, const2_rtx));
11853 else
11854 emit_insn (gen_addsi3 (out, out, const2_rtx));
11856 emit_label (end_2_label);
11860 /* Avoid branch in fixing the byte. */
11861 tmpreg = gen_lowpart (QImode, tmpreg);
11862 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11863 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11864 if (TARGET_64BIT)
11865 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11866 else
11867 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11869 emit_label (end_0_label);
11872 void
11873 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11874 rtx callarg2 ATTRIBUTE_UNUSED,
11875 rtx pop, int sibcall)
11877 rtx use = NULL, call;
11879 if (pop == const0_rtx)
11880 pop = NULL;
11881 if (TARGET_64BIT && pop)
11882 abort ();
11884 #if TARGET_MACHO
11885 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11886 fnaddr = machopic_indirect_call_target (fnaddr);
11887 #else
11888 /* Static functions and indirect calls don't need the pic register. */
11889 if (! TARGET_64BIT && flag_pic
11890 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11891 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11892 use_reg (&use, pic_offset_table_rtx);
11894 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11896 rtx al = gen_rtx_REG (QImode, 0);
11897 emit_move_insn (al, callarg2);
11898 use_reg (&use, al);
11900 #endif /* TARGET_MACHO */
11902 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11904 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11905 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11907 if (sibcall && TARGET_64BIT
11908 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11910 rtx addr;
11911 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11912 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11913 emit_move_insn (fnaddr, addr);
11914 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11917 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11918 if (retval)
11919 call = gen_rtx_SET (VOIDmode, retval, call);
11920 if (pop)
11922 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11923 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11924 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11927 call = emit_call_insn (call);
11928 if (use)
11929 CALL_INSN_FUNCTION_USAGE (call) = use;
11933 /* Clear stack slot assignments remembered from previous functions.
11934 This is called from INIT_EXPANDERS once before RTL is emitted for each
11935 function. */
11937 static struct machine_function *
11938 ix86_init_machine_status (void)
11940 struct machine_function *f;
11942 f = ggc_alloc_cleared (sizeof (struct machine_function));
11943 f->use_fast_prologue_epilogue_nregs = -1;
11945 return f;
11948 /* Return a MEM corresponding to a stack slot with mode MODE.
11949 Allocate a new slot if necessary.
11951 The RTL for a function can have several slots available: N is
11952 which slot to use. */
11955 assign_386_stack_local (enum machine_mode mode, int n)
11957 struct stack_local_entry *s;
11959 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11960 abort ();
11962 for (s = ix86_stack_locals; s; s = s->next)
11963 if (s->mode == mode && s->n == n)
11964 return s->rtl;
11966 s = (struct stack_local_entry *)
11967 ggc_alloc (sizeof (struct stack_local_entry));
11968 s->n = n;
11969 s->mode = mode;
11970 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11972 s->next = ix86_stack_locals;
11973 ix86_stack_locals = s;
11974 return s->rtl;
11977 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11979 static GTY(()) rtx ix86_tls_symbol;
11981 ix86_tls_get_addr (void)
11984 if (!ix86_tls_symbol)
11986 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11987 (TARGET_GNU_TLS && !TARGET_64BIT)
11988 ? "___tls_get_addr"
11989 : "__tls_get_addr");
11992 return ix86_tls_symbol;
11995 /* Calculate the length of the memory address in the instruction
11996 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11998 static int
11999 memory_address_length (rtx addr)
12001 struct ix86_address parts;
12002 rtx base, index, disp;
12003 int len;
12005 if (GET_CODE (addr) == PRE_DEC
12006 || GET_CODE (addr) == POST_INC
12007 || GET_CODE (addr) == PRE_MODIFY
12008 || GET_CODE (addr) == POST_MODIFY)
12009 return 0;
12011 if (! ix86_decompose_address (addr, &parts))
12012 abort ();
12014 base = parts.base;
12015 index = parts.index;
12016 disp = parts.disp;
12017 len = 0;
12019 /* Rule of thumb:
12020 - esp as the base always wants an index,
12021 - ebp as the base always wants a displacement. */
12023 /* Register Indirect. */
12024 if (base && !index && !disp)
12026 /* esp (for its index) and ebp (for its displacement) need
12027 the two-byte modrm form. */
12028 if (addr == stack_pointer_rtx
12029 || addr == arg_pointer_rtx
12030 || addr == frame_pointer_rtx
12031 || addr == hard_frame_pointer_rtx)
12032 len = 1;
12035 /* Direct Addressing. */
12036 else if (disp && !base && !index)
12037 len = 4;
12039 else
12041 /* Find the length of the displacement constant. */
12042 if (disp)
12044 if (GET_CODE (disp) == CONST_INT
12045 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12046 && base)
12047 len = 1;
12048 else
12049 len = 4;
12051 /* ebp always wants a displacement. */
12052 else if (base == hard_frame_pointer_rtx)
12053 len = 1;
12055 /* An index requires the two-byte modrm form.... */
12056 if (index
12057 /* ...like esp, which always wants an index. */
12058 || base == stack_pointer_rtx
12059 || base == arg_pointer_rtx
12060 || base == frame_pointer_rtx)
12061 len += 1;
12064 return len;
12067 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12068 is set, expect that insn have 8bit immediate alternative. */
12070 ix86_attr_length_immediate_default (rtx insn, int shortform)
12072 int len = 0;
12073 int i;
12074 extract_insn_cached (insn);
12075 for (i = recog_data.n_operands - 1; i >= 0; --i)
12076 if (CONSTANT_P (recog_data.operand[i]))
12078 if (len)
12079 abort ();
12080 if (shortform
12081 && GET_CODE (recog_data.operand[i]) == CONST_INT
12082 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12083 len = 1;
12084 else
12086 switch (get_attr_mode (insn))
12088 case MODE_QI:
12089 len+=1;
12090 break;
12091 case MODE_HI:
12092 len+=2;
12093 break;
12094 case MODE_SI:
12095 len+=4;
12096 break;
12097 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12098 case MODE_DI:
12099 len+=4;
12100 break;
12101 default:
12102 fatal_insn ("unknown insn mode", insn);
12106 return len;
12108 /* Compute default value for "length_address" attribute. */
12110 ix86_attr_length_address_default (rtx insn)
12112 int i;
12114 if (get_attr_type (insn) == TYPE_LEA)
12116 rtx set = PATTERN (insn);
12117 if (GET_CODE (set) == SET)
12119 else if (GET_CODE (set) == PARALLEL
12120 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12121 set = XVECEXP (set, 0, 0);
12122 else
12124 #ifdef ENABLE_CHECKING
12125 abort ();
12126 #endif
12127 return 0;
12130 return memory_address_length (SET_SRC (set));
12133 extract_insn_cached (insn);
12134 for (i = recog_data.n_operands - 1; i >= 0; --i)
12135 if (GET_CODE (recog_data.operand[i]) == MEM)
12137 return memory_address_length (XEXP (recog_data.operand[i], 0));
12138 break;
12140 return 0;
12143 /* Return the maximum number of instructions a cpu can issue. */
12145 static int
12146 ix86_issue_rate (void)
12148 switch (ix86_tune)
12150 case PROCESSOR_PENTIUM:
12151 case PROCESSOR_K6:
12152 return 2;
12154 case PROCESSOR_PENTIUMPRO:
12155 case PROCESSOR_PENTIUM4:
12156 case PROCESSOR_ATHLON:
12157 case PROCESSOR_K8:
12158 case PROCESSOR_NOCONA:
12159 return 3;
12161 default:
12162 return 1;
12166 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12167 by DEP_INSN and nothing set by DEP_INSN. */
12169 static int
12170 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12172 rtx set, set2;
12174 /* Simplify the test for uninteresting insns. */
12175 if (insn_type != TYPE_SETCC
12176 && insn_type != TYPE_ICMOV
12177 && insn_type != TYPE_FCMOV
12178 && insn_type != TYPE_IBR)
12179 return 0;
12181 if ((set = single_set (dep_insn)) != 0)
12183 set = SET_DEST (set);
12184 set2 = NULL_RTX;
12186 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12187 && XVECLEN (PATTERN (dep_insn), 0) == 2
12188 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12189 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12191 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12192 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12194 else
12195 return 0;
12197 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12198 return 0;
12200 /* This test is true if the dependent insn reads the flags but
12201 not any other potentially set register. */
12202 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12203 return 0;
12205 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12206 return 0;
12208 return 1;
12211 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12212 address with operands set by DEP_INSN. */
12214 static int
12215 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12217 rtx addr;
12219 if (insn_type == TYPE_LEA
12220 && TARGET_PENTIUM)
12222 addr = PATTERN (insn);
12223 if (GET_CODE (addr) == SET)
12225 else if (GET_CODE (addr) == PARALLEL
12226 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12227 addr = XVECEXP (addr, 0, 0);
12228 else
12229 abort ();
12230 addr = SET_SRC (addr);
12232 else
12234 int i;
12235 extract_insn_cached (insn);
12236 for (i = recog_data.n_operands - 1; i >= 0; --i)
12237 if (GET_CODE (recog_data.operand[i]) == MEM)
12239 addr = XEXP (recog_data.operand[i], 0);
12240 goto found;
12242 return 0;
12243 found:;
12246 return modified_in_p (addr, dep_insn);
12249 static int
12250 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12252 enum attr_type insn_type, dep_insn_type;
12253 enum attr_memory memory;
12254 rtx set, set2;
12255 int dep_insn_code_number;
12257 /* Anti and output dependencies have zero cost on all CPUs. */
12258 if (REG_NOTE_KIND (link) != 0)
12259 return 0;
12261 dep_insn_code_number = recog_memoized (dep_insn);
12263 /* If we can't recognize the insns, we can't really do anything. */
12264 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12265 return cost;
12267 insn_type = get_attr_type (insn);
12268 dep_insn_type = get_attr_type (dep_insn);
12270 switch (ix86_tune)
12272 case PROCESSOR_PENTIUM:
12273 /* Address Generation Interlock adds a cycle of latency. */
12274 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12275 cost += 1;
12277 /* ??? Compares pair with jump/setcc. */
12278 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12279 cost = 0;
12281 /* Floating point stores require value to be ready one cycle earlier. */
12282 if (insn_type == TYPE_FMOV
12283 && get_attr_memory (insn) == MEMORY_STORE
12284 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12285 cost += 1;
12286 break;
12288 case PROCESSOR_PENTIUMPRO:
12289 memory = get_attr_memory (insn);
12291 /* INT->FP conversion is expensive. */
12292 if (get_attr_fp_int_src (dep_insn))
12293 cost += 5;
12295 /* There is one cycle extra latency between an FP op and a store. */
12296 if (insn_type == TYPE_FMOV
12297 && (set = single_set (dep_insn)) != NULL_RTX
12298 && (set2 = single_set (insn)) != NULL_RTX
12299 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12300 && GET_CODE (SET_DEST (set2)) == MEM)
12301 cost += 1;
12303 /* Show ability of reorder buffer to hide latency of load by executing
12304 in parallel with previous instruction in case
12305 previous instruction is not needed to compute the address. */
12306 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12307 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12309 /* Claim moves to take one cycle, as core can issue one load
12310 at time and the next load can start cycle later. */
12311 if (dep_insn_type == TYPE_IMOV
12312 || dep_insn_type == TYPE_FMOV)
12313 cost = 1;
12314 else if (cost > 1)
12315 cost--;
12317 break;
12319 case PROCESSOR_K6:
12320 memory = get_attr_memory (insn);
12322 /* The esp dependency is resolved before the instruction is really
12323 finished. */
12324 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12325 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12326 return 1;
12328 /* INT->FP conversion is expensive. */
12329 if (get_attr_fp_int_src (dep_insn))
12330 cost += 5;
12332 /* Show ability of reorder buffer to hide latency of load by executing
12333 in parallel with previous instruction in case
12334 previous instruction is not needed to compute the address. */
12335 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12336 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12338 /* Claim moves to take one cycle, as core can issue one load
12339 at time and the next load can start cycle later. */
12340 if (dep_insn_type == TYPE_IMOV
12341 || dep_insn_type == TYPE_FMOV)
12342 cost = 1;
12343 else if (cost > 2)
12344 cost -= 2;
12345 else
12346 cost = 1;
12348 break;
12350 case PROCESSOR_ATHLON:
12351 case PROCESSOR_K8:
12352 memory = get_attr_memory (insn);
12354 /* Show ability of reorder buffer to hide latency of load by executing
12355 in parallel with previous instruction in case
12356 previous instruction is not needed to compute the address. */
12357 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12358 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12360 enum attr_unit unit = get_attr_unit (insn);
12361 int loadcost = 3;
12363 /* Because of the difference between the length of integer and
12364 floating unit pipeline preparation stages, the memory operands
12365 for floating point are cheaper.
12367 ??? For Athlon it the difference is most probably 2. */
12368 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12369 loadcost = 3;
12370 else
12371 loadcost = TARGET_ATHLON ? 2 : 0;
12373 if (cost >= loadcost)
12374 cost -= loadcost;
12375 else
12376 cost = 0;
12379 default:
12380 break;
12383 return cost;
12386 /* How many alternative schedules to try. This should be as wide as the
12387 scheduling freedom in the DFA, but no wider. Making this value too
12388 large results extra work for the scheduler. */
12390 static int
12391 ia32_multipass_dfa_lookahead (void)
12393 if (ix86_tune == PROCESSOR_PENTIUM)
12394 return 2;
12396 if (ix86_tune == PROCESSOR_PENTIUMPRO
12397 || ix86_tune == PROCESSOR_K6)
12398 return 1;
12400 else
12401 return 0;
12405 /* Compute the alignment given to a constant that is being placed in memory.
12406 EXP is the constant and ALIGN is the alignment that the object would
12407 ordinarily have.
12408 The value of this function is used instead of that alignment to align
12409 the object. */
12412 ix86_constant_alignment (tree exp, int align)
12414 if (TREE_CODE (exp) == REAL_CST)
12416 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12417 return 64;
12418 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12419 return 128;
12421 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12422 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12423 return BITS_PER_WORD;
12425 return align;
12428 /* Compute the alignment for a static variable.
12429 TYPE is the data type, and ALIGN is the alignment that
12430 the object would ordinarily have. The value of this function is used
12431 instead of that alignment to align the object. */
12434 ix86_data_alignment (tree type, int align)
12436 if (AGGREGATE_TYPE_P (type)
12437 && TYPE_SIZE (type)
12438 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12439 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12440 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12441 return 256;
12443 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12444 to 16byte boundary. */
12445 if (TARGET_64BIT)
12447 if (AGGREGATE_TYPE_P (type)
12448 && TYPE_SIZE (type)
12449 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12450 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12451 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12452 return 128;
12455 if (TREE_CODE (type) == ARRAY_TYPE)
12457 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12458 return 64;
12459 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12460 return 128;
12462 else if (TREE_CODE (type) == COMPLEX_TYPE)
12465 if (TYPE_MODE (type) == DCmode && align < 64)
12466 return 64;
12467 if (TYPE_MODE (type) == XCmode && align < 128)
12468 return 128;
12470 else if ((TREE_CODE (type) == RECORD_TYPE
12471 || TREE_CODE (type) == UNION_TYPE
12472 || TREE_CODE (type) == QUAL_UNION_TYPE)
12473 && TYPE_FIELDS (type))
12475 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12476 return 64;
12477 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12478 return 128;
12480 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12481 || TREE_CODE (type) == INTEGER_TYPE)
12483 if (TYPE_MODE (type) == DFmode && align < 64)
12484 return 64;
12485 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12486 return 128;
12489 return align;
12492 /* Compute the alignment for a local variable.
12493 TYPE is the data type, and ALIGN is the alignment that
12494 the object would ordinarily have. The value of this macro is used
12495 instead of that alignment to align the object. */
12498 ix86_local_alignment (tree type, int align)
12500 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12501 to 16byte boundary. */
12502 if (TARGET_64BIT)
12504 if (AGGREGATE_TYPE_P (type)
12505 && TYPE_SIZE (type)
12506 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12507 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12508 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12509 return 128;
12511 if (TREE_CODE (type) == ARRAY_TYPE)
12513 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12514 return 64;
12515 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12516 return 128;
12518 else if (TREE_CODE (type) == COMPLEX_TYPE)
12520 if (TYPE_MODE (type) == DCmode && align < 64)
12521 return 64;
12522 if (TYPE_MODE (type) == XCmode && align < 128)
12523 return 128;
12525 else if ((TREE_CODE (type) == RECORD_TYPE
12526 || TREE_CODE (type) == UNION_TYPE
12527 || TREE_CODE (type) == QUAL_UNION_TYPE)
12528 && TYPE_FIELDS (type))
12530 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12531 return 64;
12532 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12533 return 128;
12535 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12536 || TREE_CODE (type) == INTEGER_TYPE)
12539 if (TYPE_MODE (type) == DFmode && align < 64)
12540 return 64;
12541 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12542 return 128;
12544 return align;
12547 /* Emit RTL insns to initialize the variable parts of a trampoline.
12548 FNADDR is an RTX for the address of the function's pure code.
12549 CXT is an RTX for the static chain value for the function. */
12550 void
12551 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12553 if (!TARGET_64BIT)
12555 /* Compute offset from the end of the jmp to the target function. */
12556 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12557 plus_constant (tramp, 10),
12558 NULL_RTX, 1, OPTAB_DIRECT);
12559 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12560 gen_int_mode (0xb9, QImode));
12561 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12562 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12563 gen_int_mode (0xe9, QImode));
12564 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12566 else
12568 int offset = 0;
12569 /* Try to load address using shorter movl instead of movabs.
12570 We may want to support movq for kernel mode, but kernel does not use
12571 trampolines at the moment. */
12572 if (x86_64_zero_extended_value (fnaddr))
12574 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12575 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12576 gen_int_mode (0xbb41, HImode));
12577 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12578 gen_lowpart (SImode, fnaddr));
12579 offset += 6;
12581 else
12583 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12584 gen_int_mode (0xbb49, HImode));
12585 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12586 fnaddr);
12587 offset += 10;
12589 /* Load static chain using movabs to r10. */
12590 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12591 gen_int_mode (0xba49, HImode));
12592 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12593 cxt);
12594 offset += 10;
12595 /* Jump to the r11 */
12596 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12597 gen_int_mode (0xff49, HImode));
12598 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12599 gen_int_mode (0xe3, QImode));
12600 offset += 3;
12601 if (offset > TRAMPOLINE_SIZE)
12602 abort ();
12605 #ifdef TRANSFER_FROM_TRAMPOLINE
12606 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12607 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12608 #endif
12611 #define def_builtin(MASK, NAME, TYPE, CODE) \
12612 do { \
12613 if ((MASK) & target_flags \
12614 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12615 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12616 NULL, NULL_TREE); \
12617 } while (0)
12619 struct builtin_description
12621 const unsigned int mask;
12622 const enum insn_code icode;
12623 const char *const name;
12624 const enum ix86_builtins code;
12625 const enum rtx_code comparison;
12626 const unsigned int flag;
12629 static const struct builtin_description bdesc_comi[] =
12631 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12632 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12633 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12634 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12635 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12636 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12637 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12638 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12639 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12640 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12641 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12642 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12643 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12644 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12645 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12646 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12647 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12648 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12649 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12650 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12651 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12652 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12653 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12654 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12657 static const struct builtin_description bdesc_2arg[] =
12659 /* SSE */
12660 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12661 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12662 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12663 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12664 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12665 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12666 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12667 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12669 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12670 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12671 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12672 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12673 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12674 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12675 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12676 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12677 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12678 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12679 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12680 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12681 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12682 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12683 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12684 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12685 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12686 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12687 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12688 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12690 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12691 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12692 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12693 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12695 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12696 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12697 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12698 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12700 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12701 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12702 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12703 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12704 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12706 /* MMX */
12707 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12708 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12709 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12710 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12711 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12712 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12713 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12714 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12716 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12717 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12718 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12719 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12720 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12721 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12722 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12723 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12725 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12726 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12727 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12729 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12730 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12731 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12732 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12734 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12735 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12737 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12738 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12739 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12740 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12741 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12742 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12744 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12745 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12746 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12747 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12749 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12750 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12751 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12752 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12753 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12754 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12756 /* Special. */
12757 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12758 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12759 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12761 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12762 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12763 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12765 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12766 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12767 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12768 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12769 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12770 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12772 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12773 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12774 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12775 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12776 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12777 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12779 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12780 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12781 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12782 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12784 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12785 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12787 /* SSE2 */
12788 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12789 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12790 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12791 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12792 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12793 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12794 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12795 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12797 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12798 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12799 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12800 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12801 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12802 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12803 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12804 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12805 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12806 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12807 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12808 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12809 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12810 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12811 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12812 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12813 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12814 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12815 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12816 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12818 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12820 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12821 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12823 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12830 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12832 /* SSE2 MMX */
12833 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12834 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12835 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12836 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12837 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12838 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12839 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12840 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12842 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12843 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12844 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12845 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12846 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12847 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12848 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12849 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12852 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12853 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12854 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12856 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12858 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12859 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12864 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12865 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12869 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12873 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12901 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12903 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12914 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12918 /* SSE3 MMX */
12919 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12920 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12921 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12922 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12923 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12924 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12927 static const struct builtin_description bdesc_1arg[] =
12929 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12930 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12932 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12933 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12934 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12936 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12937 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12938 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12939 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12940 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12941 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12962 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12963 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12964 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12972 /* SSE3 */
12973 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12974 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12975 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12978 void
12979 ix86_init_builtins (void)
12981 if (TARGET_MMX)
12982 ix86_init_mmx_sse_builtins ();
12985 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12986 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12987 builtins. */
12988 static void
12989 ix86_init_mmx_sse_builtins (void)
12991 const struct builtin_description * d;
12992 size_t i;
12994 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12995 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12996 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12997 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12998 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12999 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13000 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13001 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13002 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13003 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13005 tree pchar_type_node = build_pointer_type (char_type_node);
13006 tree pcchar_type_node = build_pointer_type (
13007 build_type_variant (char_type_node, 1, 0));
13008 tree pfloat_type_node = build_pointer_type (float_type_node);
13009 tree pcfloat_type_node = build_pointer_type (
13010 build_type_variant (float_type_node, 1, 0));
13011 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13012 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13013 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13015 /* Comparisons. */
13016 tree int_ftype_v4sf_v4sf
13017 = build_function_type_list (integer_type_node,
13018 V4SF_type_node, V4SF_type_node, NULL_TREE);
13019 tree v4si_ftype_v4sf_v4sf
13020 = build_function_type_list (V4SI_type_node,
13021 V4SF_type_node, V4SF_type_node, NULL_TREE);
13022 /* MMX/SSE/integer conversions. */
13023 tree int_ftype_v4sf
13024 = build_function_type_list (integer_type_node,
13025 V4SF_type_node, NULL_TREE);
13026 tree int64_ftype_v4sf
13027 = build_function_type_list (long_long_integer_type_node,
13028 V4SF_type_node, NULL_TREE);
13029 tree int_ftype_v8qi
13030 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13031 tree v4sf_ftype_v4sf_int
13032 = build_function_type_list (V4SF_type_node,
13033 V4SF_type_node, integer_type_node, NULL_TREE);
13034 tree v4sf_ftype_v4sf_int64
13035 = build_function_type_list (V4SF_type_node,
13036 V4SF_type_node, long_long_integer_type_node,
13037 NULL_TREE);
13038 tree v4sf_ftype_v4sf_v2si
13039 = build_function_type_list (V4SF_type_node,
13040 V4SF_type_node, V2SI_type_node, NULL_TREE);
13041 tree int_ftype_v4hi_int
13042 = build_function_type_list (integer_type_node,
13043 V4HI_type_node, integer_type_node, NULL_TREE);
13044 tree v4hi_ftype_v4hi_int_int
13045 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13046 integer_type_node, integer_type_node,
13047 NULL_TREE);
13048 /* Miscellaneous. */
13049 tree v8qi_ftype_v4hi_v4hi
13050 = build_function_type_list (V8QI_type_node,
13051 V4HI_type_node, V4HI_type_node, NULL_TREE);
13052 tree v4hi_ftype_v2si_v2si
13053 = build_function_type_list (V4HI_type_node,
13054 V2SI_type_node, V2SI_type_node, NULL_TREE);
13055 tree v4sf_ftype_v4sf_v4sf_int
13056 = build_function_type_list (V4SF_type_node,
13057 V4SF_type_node, V4SF_type_node,
13058 integer_type_node, NULL_TREE);
13059 tree v2si_ftype_v4hi_v4hi
13060 = build_function_type_list (V2SI_type_node,
13061 V4HI_type_node, V4HI_type_node, NULL_TREE);
13062 tree v4hi_ftype_v4hi_int
13063 = build_function_type_list (V4HI_type_node,
13064 V4HI_type_node, integer_type_node, NULL_TREE);
13065 tree v4hi_ftype_v4hi_di
13066 = build_function_type_list (V4HI_type_node,
13067 V4HI_type_node, long_long_unsigned_type_node,
13068 NULL_TREE);
13069 tree v2si_ftype_v2si_di
13070 = build_function_type_list (V2SI_type_node,
13071 V2SI_type_node, long_long_unsigned_type_node,
13072 NULL_TREE);
13073 tree void_ftype_void
13074 = build_function_type (void_type_node, void_list_node);
13075 tree void_ftype_unsigned
13076 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13077 tree void_ftype_unsigned_unsigned
13078 = build_function_type_list (void_type_node, unsigned_type_node,
13079 unsigned_type_node, NULL_TREE);
13080 tree void_ftype_pcvoid_unsigned_unsigned
13081 = build_function_type_list (void_type_node, const_ptr_type_node,
13082 unsigned_type_node, unsigned_type_node,
13083 NULL_TREE);
13084 tree unsigned_ftype_void
13085 = build_function_type (unsigned_type_node, void_list_node);
13086 tree di_ftype_void
13087 = build_function_type (long_long_unsigned_type_node, void_list_node);
13088 tree v4sf_ftype_void
13089 = build_function_type (V4SF_type_node, void_list_node);
13090 tree v2si_ftype_v4sf
13091 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13092 /* Loads/stores. */
13093 tree void_ftype_v8qi_v8qi_pchar
13094 = build_function_type_list (void_type_node,
13095 V8QI_type_node, V8QI_type_node,
13096 pchar_type_node, NULL_TREE);
13097 tree v4sf_ftype_pcfloat
13098 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13099 /* @@@ the type is bogus */
13100 tree v4sf_ftype_v4sf_pv2si
13101 = build_function_type_list (V4SF_type_node,
13102 V4SF_type_node, pv2si_type_node, NULL_TREE);
13103 tree void_ftype_pv2si_v4sf
13104 = build_function_type_list (void_type_node,
13105 pv2si_type_node, V4SF_type_node, NULL_TREE);
13106 tree void_ftype_pfloat_v4sf
13107 = build_function_type_list (void_type_node,
13108 pfloat_type_node, V4SF_type_node, NULL_TREE);
13109 tree void_ftype_pdi_di
13110 = build_function_type_list (void_type_node,
13111 pdi_type_node, long_long_unsigned_type_node,
13112 NULL_TREE);
13113 tree void_ftype_pv2di_v2di
13114 = build_function_type_list (void_type_node,
13115 pv2di_type_node, V2DI_type_node, NULL_TREE);
13116 /* Normal vector unops. */
13117 tree v4sf_ftype_v4sf
13118 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13120 /* Normal vector binops. */
13121 tree v4sf_ftype_v4sf_v4sf
13122 = build_function_type_list (V4SF_type_node,
13123 V4SF_type_node, V4SF_type_node, NULL_TREE);
13124 tree v8qi_ftype_v8qi_v8qi
13125 = build_function_type_list (V8QI_type_node,
13126 V8QI_type_node, V8QI_type_node, NULL_TREE);
13127 tree v4hi_ftype_v4hi_v4hi
13128 = build_function_type_list (V4HI_type_node,
13129 V4HI_type_node, V4HI_type_node, NULL_TREE);
13130 tree v2si_ftype_v2si_v2si
13131 = build_function_type_list (V2SI_type_node,
13132 V2SI_type_node, V2SI_type_node, NULL_TREE);
13133 tree di_ftype_di_di
13134 = build_function_type_list (long_long_unsigned_type_node,
13135 long_long_unsigned_type_node,
13136 long_long_unsigned_type_node, NULL_TREE);
13138 tree v2si_ftype_v2sf
13139 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13140 tree v2sf_ftype_v2si
13141 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13142 tree v2si_ftype_v2si
13143 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13144 tree v2sf_ftype_v2sf
13145 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13146 tree v2sf_ftype_v2sf_v2sf
13147 = build_function_type_list (V2SF_type_node,
13148 V2SF_type_node, V2SF_type_node, NULL_TREE);
13149 tree v2si_ftype_v2sf_v2sf
13150 = build_function_type_list (V2SI_type_node,
13151 V2SF_type_node, V2SF_type_node, NULL_TREE);
13152 tree pint_type_node = build_pointer_type (integer_type_node);
13153 tree pcint_type_node = build_pointer_type (
13154 build_type_variant (integer_type_node, 1, 0));
13155 tree pdouble_type_node = build_pointer_type (double_type_node);
13156 tree pcdouble_type_node = build_pointer_type (
13157 build_type_variant (double_type_node, 1, 0));
13158 tree int_ftype_v2df_v2df
13159 = build_function_type_list (integer_type_node,
13160 V2DF_type_node, V2DF_type_node, NULL_TREE);
13162 tree ti_ftype_void
13163 = build_function_type (intTI_type_node, void_list_node);
13164 tree v2di_ftype_void
13165 = build_function_type (V2DI_type_node, void_list_node);
13166 tree ti_ftype_ti_ti
13167 = build_function_type_list (intTI_type_node,
13168 intTI_type_node, intTI_type_node, NULL_TREE);
13169 tree void_ftype_pcvoid
13170 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13171 tree v2di_ftype_di
13172 = build_function_type_list (V2DI_type_node,
13173 long_long_unsigned_type_node, NULL_TREE);
13174 tree di_ftype_v2di
13175 = build_function_type_list (long_long_unsigned_type_node,
13176 V2DI_type_node, NULL_TREE);
13177 tree v4sf_ftype_v4si
13178 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13179 tree v4si_ftype_v4sf
13180 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13181 tree v2df_ftype_v4si
13182 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13183 tree v4si_ftype_v2df
13184 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13185 tree v2si_ftype_v2df
13186 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13187 tree v4sf_ftype_v2df
13188 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13189 tree v2df_ftype_v2si
13190 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13191 tree v2df_ftype_v4sf
13192 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13193 tree int_ftype_v2df
13194 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13195 tree int64_ftype_v2df
13196 = build_function_type_list (long_long_integer_type_node,
13197 V2DF_type_node, NULL_TREE);
13198 tree v2df_ftype_v2df_int
13199 = build_function_type_list (V2DF_type_node,
13200 V2DF_type_node, integer_type_node, NULL_TREE);
13201 tree v2df_ftype_v2df_int64
13202 = build_function_type_list (V2DF_type_node,
13203 V2DF_type_node, long_long_integer_type_node,
13204 NULL_TREE);
13205 tree v4sf_ftype_v4sf_v2df
13206 = build_function_type_list (V4SF_type_node,
13207 V4SF_type_node, V2DF_type_node, NULL_TREE);
13208 tree v2df_ftype_v2df_v4sf
13209 = build_function_type_list (V2DF_type_node,
13210 V2DF_type_node, V4SF_type_node, NULL_TREE);
13211 tree v2df_ftype_v2df_v2df_int
13212 = build_function_type_list (V2DF_type_node,
13213 V2DF_type_node, V2DF_type_node,
13214 integer_type_node,
13215 NULL_TREE);
13216 tree v2df_ftype_v2df_pv2si
13217 = build_function_type_list (V2DF_type_node,
13218 V2DF_type_node, pv2si_type_node, NULL_TREE);
13219 tree void_ftype_pv2si_v2df
13220 = build_function_type_list (void_type_node,
13221 pv2si_type_node, V2DF_type_node, NULL_TREE);
13222 tree void_ftype_pdouble_v2df
13223 = build_function_type_list (void_type_node,
13224 pdouble_type_node, V2DF_type_node, NULL_TREE);
13225 tree void_ftype_pint_int
13226 = build_function_type_list (void_type_node,
13227 pint_type_node, integer_type_node, NULL_TREE);
13228 tree void_ftype_v16qi_v16qi_pchar
13229 = build_function_type_list (void_type_node,
13230 V16QI_type_node, V16QI_type_node,
13231 pchar_type_node, NULL_TREE);
13232 tree v2df_ftype_pcdouble
13233 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13234 tree v2df_ftype_v2df_v2df
13235 = build_function_type_list (V2DF_type_node,
13236 V2DF_type_node, V2DF_type_node, NULL_TREE);
13237 tree v16qi_ftype_v16qi_v16qi
13238 = build_function_type_list (V16QI_type_node,
13239 V16QI_type_node, V16QI_type_node, NULL_TREE);
13240 tree v8hi_ftype_v8hi_v8hi
13241 = build_function_type_list (V8HI_type_node,
13242 V8HI_type_node, V8HI_type_node, NULL_TREE);
13243 tree v4si_ftype_v4si_v4si
13244 = build_function_type_list (V4SI_type_node,
13245 V4SI_type_node, V4SI_type_node, NULL_TREE);
13246 tree v2di_ftype_v2di_v2di
13247 = build_function_type_list (V2DI_type_node,
13248 V2DI_type_node, V2DI_type_node, NULL_TREE);
13249 tree v2di_ftype_v2df_v2df
13250 = build_function_type_list (V2DI_type_node,
13251 V2DF_type_node, V2DF_type_node, NULL_TREE);
13252 tree v2df_ftype_v2df
13253 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13254 tree v2df_ftype_double
13255 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13256 tree v2df_ftype_double_double
13257 = build_function_type_list (V2DF_type_node,
13258 double_type_node, double_type_node, NULL_TREE);
13259 tree int_ftype_v8hi_int
13260 = build_function_type_list (integer_type_node,
13261 V8HI_type_node, integer_type_node, NULL_TREE);
13262 tree v8hi_ftype_v8hi_int_int
13263 = build_function_type_list (V8HI_type_node,
13264 V8HI_type_node, integer_type_node,
13265 integer_type_node, NULL_TREE);
13266 tree v2di_ftype_v2di_int
13267 = build_function_type_list (V2DI_type_node,
13268 V2DI_type_node, integer_type_node, NULL_TREE);
13269 tree v4si_ftype_v4si_int
13270 = build_function_type_list (V4SI_type_node,
13271 V4SI_type_node, integer_type_node, NULL_TREE);
13272 tree v8hi_ftype_v8hi_int
13273 = build_function_type_list (V8HI_type_node,
13274 V8HI_type_node, integer_type_node, NULL_TREE);
13275 tree v8hi_ftype_v8hi_v2di
13276 = build_function_type_list (V8HI_type_node,
13277 V8HI_type_node, V2DI_type_node, NULL_TREE);
13278 tree v4si_ftype_v4si_v2di
13279 = build_function_type_list (V4SI_type_node,
13280 V4SI_type_node, V2DI_type_node, NULL_TREE);
13281 tree v4si_ftype_v8hi_v8hi
13282 = build_function_type_list (V4SI_type_node,
13283 V8HI_type_node, V8HI_type_node, NULL_TREE);
13284 tree di_ftype_v8qi_v8qi
13285 = build_function_type_list (long_long_unsigned_type_node,
13286 V8QI_type_node, V8QI_type_node, NULL_TREE);
13287 tree v2di_ftype_v16qi_v16qi
13288 = build_function_type_list (V2DI_type_node,
13289 V16QI_type_node, V16QI_type_node, NULL_TREE);
13290 tree int_ftype_v16qi
13291 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13292 tree v16qi_ftype_pcchar
13293 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13294 tree void_ftype_pchar_v16qi
13295 = build_function_type_list (void_type_node,
13296 pchar_type_node, V16QI_type_node, NULL_TREE);
13297 tree v4si_ftype_pcint
13298 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13299 tree void_ftype_pcint_v4si
13300 = build_function_type_list (void_type_node,
13301 pcint_type_node, V4SI_type_node, NULL_TREE);
13302 tree v2di_ftype_v2di
13303 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13305 tree float80_type;
13306 tree float128_type;
13308 /* The __float80 type. */
13309 if (TYPE_MODE (long_double_type_node) == XFmode)
13310 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13311 "__float80");
13312 else
13314 /* The __float80 type. */
13315 float80_type = make_node (REAL_TYPE);
13316 TYPE_PRECISION (float80_type) = 96;
13317 layout_type (float80_type);
13318 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13321 float128_type = make_node (REAL_TYPE);
13322 TYPE_PRECISION (float128_type) = 128;
13323 layout_type (float128_type);
13324 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13326 /* Add all builtins that are more or less simple operations on two
13327 operands. */
13328 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13330 /* Use one of the operands; the target can have a different mode for
13331 mask-generating compares. */
13332 enum machine_mode mode;
13333 tree type;
13335 if (d->name == 0)
13336 continue;
13337 mode = insn_data[d->icode].operand[1].mode;
13339 switch (mode)
13341 case V16QImode:
13342 type = v16qi_ftype_v16qi_v16qi;
13343 break;
13344 case V8HImode:
13345 type = v8hi_ftype_v8hi_v8hi;
13346 break;
13347 case V4SImode:
13348 type = v4si_ftype_v4si_v4si;
13349 break;
13350 case V2DImode:
13351 type = v2di_ftype_v2di_v2di;
13352 break;
13353 case V2DFmode:
13354 type = v2df_ftype_v2df_v2df;
13355 break;
13356 case TImode:
13357 type = ti_ftype_ti_ti;
13358 break;
13359 case V4SFmode:
13360 type = v4sf_ftype_v4sf_v4sf;
13361 break;
13362 case V8QImode:
13363 type = v8qi_ftype_v8qi_v8qi;
13364 break;
13365 case V4HImode:
13366 type = v4hi_ftype_v4hi_v4hi;
13367 break;
13368 case V2SImode:
13369 type = v2si_ftype_v2si_v2si;
13370 break;
13371 case DImode:
13372 type = di_ftype_di_di;
13373 break;
13375 default:
13376 abort ();
13379 /* Override for comparisons. */
13380 if (d->icode == CODE_FOR_maskcmpv4sf3
13381 || d->icode == CODE_FOR_maskncmpv4sf3
13382 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13383 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13384 type = v4si_ftype_v4sf_v4sf;
13386 if (d->icode == CODE_FOR_maskcmpv2df3
13387 || d->icode == CODE_FOR_maskncmpv2df3
13388 || d->icode == CODE_FOR_vmmaskcmpv2df3
13389 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13390 type = v2di_ftype_v2df_v2df;
13392 def_builtin (d->mask, d->name, type, d->code);
13395 /* Add the remaining MMX insns with somewhat more complicated types. */
13396 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13397 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13398 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13399 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13400 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13402 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13403 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13404 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13406 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13407 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13409 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13410 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13412 /* comi/ucomi insns. */
13413 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13414 if (d->mask == MASK_SSE2)
13415 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13416 else
13417 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13419 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13420 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13421 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13423 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13424 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13425 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13426 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13427 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13428 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13429 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13430 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13431 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13432 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13433 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13435 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13436 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13438 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13440 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13441 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13442 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13443 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13444 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13445 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13447 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13448 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13449 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13450 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13452 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13453 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13454 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13455 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13457 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13459 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13461 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13462 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13463 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13464 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13465 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13466 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13468 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13470 /* Original 3DNow! */
13471 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13472 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13473 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13474 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13475 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13476 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13477 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13478 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13479 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13480 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13481 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13482 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13483 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13484 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13485 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13486 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13487 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13488 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13489 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13490 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13492 /* 3DNow! extension as used in the Athlon CPU. */
13493 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13494 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13495 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13496 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13497 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13498 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13500 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13502 /* SSE2 */
13503 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13504 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13506 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13507 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13508 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13510 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13511 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13512 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13513 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13514 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13515 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13517 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13518 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13519 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13520 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13522 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13523 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13524 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13525 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13526 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13528 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13529 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13530 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13531 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13536 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13538 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13549 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13551 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13552 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13559 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13572 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13573 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13583 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13611 /* Prescott New Instructions. */
13612 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13613 void_ftype_pcvoid_unsigned_unsigned,
13614 IX86_BUILTIN_MONITOR);
13615 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13616 void_ftype_unsigned_unsigned,
13617 IX86_BUILTIN_MWAIT);
13618 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13619 v4sf_ftype_v4sf,
13620 IX86_BUILTIN_MOVSHDUP);
13621 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13622 v4sf_ftype_v4sf,
13623 IX86_BUILTIN_MOVSLDUP);
13624 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13625 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13626 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13627 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13628 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13629 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13632 /* Errors in the source file can cause expand_expr to return const0_rtx
13633 where we expect a vector. To avoid crashing, use one of the vector
13634 clear instructions. */
13635 static rtx
13636 safe_vector_operand (rtx x, enum machine_mode mode)
13638 if (x != const0_rtx)
13639 return x;
13640 x = gen_reg_rtx (mode);
13642 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13643 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13644 : gen_rtx_SUBREG (DImode, x, 0)));
13645 else
13646 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13647 : gen_rtx_SUBREG (V4SFmode, x, 0),
13648 CONST0_RTX (V4SFmode)));
13649 return x;
13652 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13654 static rtx
13655 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13657 rtx pat;
13658 tree arg0 = TREE_VALUE (arglist);
13659 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13660 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13661 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13662 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13663 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13664 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13666 if (VECTOR_MODE_P (mode0))
13667 op0 = safe_vector_operand (op0, mode0);
13668 if (VECTOR_MODE_P (mode1))
13669 op1 = safe_vector_operand (op1, mode1);
13671 if (! target
13672 || GET_MODE (target) != tmode
13673 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13674 target = gen_reg_rtx (tmode);
13676 if (GET_MODE (op1) == SImode && mode1 == TImode)
13678 rtx x = gen_reg_rtx (V4SImode);
13679 emit_insn (gen_sse2_loadd (x, op1));
13680 op1 = gen_lowpart (TImode, x);
13683 /* In case the insn wants input operands in modes different from
13684 the result, abort. */
13685 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13686 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13687 abort ();
13689 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13690 op0 = copy_to_mode_reg (mode0, op0);
13691 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13692 op1 = copy_to_mode_reg (mode1, op1);
13694 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13695 yet one of the two must not be a memory. This is normally enforced
13696 by expanders, but we didn't bother to create one here. */
13697 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13698 op0 = copy_to_mode_reg (mode0, op0);
13700 pat = GEN_FCN (icode) (target, op0, op1);
13701 if (! pat)
13702 return 0;
13703 emit_insn (pat);
13704 return target;
13707 /* Subroutine of ix86_expand_builtin to take care of stores. */
13709 static rtx
13710 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13712 rtx pat;
13713 tree arg0 = TREE_VALUE (arglist);
13714 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13715 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13716 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13717 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13718 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13720 if (VECTOR_MODE_P (mode1))
13721 op1 = safe_vector_operand (op1, mode1);
13723 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13724 op1 = copy_to_mode_reg (mode1, op1);
13726 pat = GEN_FCN (icode) (op0, op1);
13727 if (pat)
13728 emit_insn (pat);
13729 return 0;
13732 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13734 static rtx
13735 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13736 rtx target, int do_load)
13738 rtx pat;
13739 tree arg0 = TREE_VALUE (arglist);
13740 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13741 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13742 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13744 if (! target
13745 || GET_MODE (target) != tmode
13746 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13747 target = gen_reg_rtx (tmode);
13748 if (do_load)
13749 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13750 else
13752 if (VECTOR_MODE_P (mode0))
13753 op0 = safe_vector_operand (op0, mode0);
13755 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13756 op0 = copy_to_mode_reg (mode0, op0);
13759 pat = GEN_FCN (icode) (target, op0);
13760 if (! pat)
13761 return 0;
13762 emit_insn (pat);
13763 return target;
13766 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13767 sqrtss, rsqrtss, rcpss. */
13769 static rtx
13770 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13772 rtx pat;
13773 tree arg0 = TREE_VALUE (arglist);
13774 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13775 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13776 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13778 if (! target
13779 || GET_MODE (target) != tmode
13780 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13781 target = gen_reg_rtx (tmode);
13783 if (VECTOR_MODE_P (mode0))
13784 op0 = safe_vector_operand (op0, mode0);
13786 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13787 op0 = copy_to_mode_reg (mode0, op0);
13789 op1 = op0;
13790 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13791 op1 = copy_to_mode_reg (mode0, op1);
13793 pat = GEN_FCN (icode) (target, op0, op1);
13794 if (! pat)
13795 return 0;
13796 emit_insn (pat);
13797 return target;
13800 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13802 static rtx
13803 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13804 rtx target)
13806 rtx pat;
13807 tree arg0 = TREE_VALUE (arglist);
13808 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13809 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13810 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13811 rtx op2;
13812 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13813 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13814 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13815 enum rtx_code comparison = d->comparison;
13817 if (VECTOR_MODE_P (mode0))
13818 op0 = safe_vector_operand (op0, mode0);
13819 if (VECTOR_MODE_P (mode1))
13820 op1 = safe_vector_operand (op1, mode1);
13822 /* Swap operands if we have a comparison that isn't available in
13823 hardware. */
13824 if (d->flag)
13826 rtx tmp = gen_reg_rtx (mode1);
13827 emit_move_insn (tmp, op1);
13828 op1 = op0;
13829 op0 = tmp;
13832 if (! target
13833 || GET_MODE (target) != tmode
13834 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13835 target = gen_reg_rtx (tmode);
13837 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13838 op0 = copy_to_mode_reg (mode0, op0);
13839 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13840 op1 = copy_to_mode_reg (mode1, op1);
13842 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13843 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13844 if (! pat)
13845 return 0;
13846 emit_insn (pat);
13847 return target;
13850 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13852 static rtx
13853 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13854 rtx target)
13856 rtx pat;
13857 tree arg0 = TREE_VALUE (arglist);
13858 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13859 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13860 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13861 rtx op2;
13862 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13863 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13864 enum rtx_code comparison = d->comparison;
13866 if (VECTOR_MODE_P (mode0))
13867 op0 = safe_vector_operand (op0, mode0);
13868 if (VECTOR_MODE_P (mode1))
13869 op1 = safe_vector_operand (op1, mode1);
13871 /* Swap operands if we have a comparison that isn't available in
13872 hardware. */
13873 if (d->flag)
13875 rtx tmp = op1;
13876 op1 = op0;
13877 op0 = tmp;
13880 target = gen_reg_rtx (SImode);
13881 emit_move_insn (target, const0_rtx);
13882 target = gen_rtx_SUBREG (QImode, target, 0);
13884 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13885 op0 = copy_to_mode_reg (mode0, op0);
13886 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13887 op1 = copy_to_mode_reg (mode1, op1);
13889 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13890 pat = GEN_FCN (d->icode) (op0, op1);
13891 if (! pat)
13892 return 0;
13893 emit_insn (pat);
13894 emit_insn (gen_rtx_SET (VOIDmode,
13895 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13896 gen_rtx_fmt_ee (comparison, QImode,
13897 SET_DEST (pat),
13898 const0_rtx)));
13900 return SUBREG_REG (target);
13903 /* Expand an expression EXP that calls a built-in function,
13904 with result going to TARGET if that's convenient
13905 (and in mode MODE if that's convenient).
13906 SUBTARGET may be used as the target for computing one of EXP's operands.
13907 IGNORE is nonzero if the value is to be ignored. */
13910 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13911 enum machine_mode mode ATTRIBUTE_UNUSED,
13912 int ignore ATTRIBUTE_UNUSED)
13914 const struct builtin_description *d;
13915 size_t i;
13916 enum insn_code icode;
13917 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13918 tree arglist = TREE_OPERAND (exp, 1);
13919 tree arg0, arg1, arg2;
13920 rtx op0, op1, op2, pat;
13921 enum machine_mode tmode, mode0, mode1, mode2;
13922 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13924 switch (fcode)
13926 case IX86_BUILTIN_EMMS:
13927 emit_insn (gen_emms ());
13928 return 0;
13930 case IX86_BUILTIN_SFENCE:
13931 emit_insn (gen_sfence ());
13932 return 0;
13934 case IX86_BUILTIN_PEXTRW:
13935 case IX86_BUILTIN_PEXTRW128:
13936 icode = (fcode == IX86_BUILTIN_PEXTRW
13937 ? CODE_FOR_mmx_pextrw
13938 : CODE_FOR_sse2_pextrw);
13939 arg0 = TREE_VALUE (arglist);
13940 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13941 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13942 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13943 tmode = insn_data[icode].operand[0].mode;
13944 mode0 = insn_data[icode].operand[1].mode;
13945 mode1 = insn_data[icode].operand[2].mode;
13947 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13948 op0 = copy_to_mode_reg (mode0, op0);
13949 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13951 error ("selector must be an integer constant in the range 0..%i",
13952 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13953 return gen_reg_rtx (tmode);
13955 if (target == 0
13956 || GET_MODE (target) != tmode
13957 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13958 target = gen_reg_rtx (tmode);
13959 pat = GEN_FCN (icode) (target, op0, op1);
13960 if (! pat)
13961 return 0;
13962 emit_insn (pat);
13963 return target;
13965 case IX86_BUILTIN_PINSRW:
13966 case IX86_BUILTIN_PINSRW128:
13967 icode = (fcode == IX86_BUILTIN_PINSRW
13968 ? CODE_FOR_mmx_pinsrw
13969 : CODE_FOR_sse2_pinsrw);
13970 arg0 = TREE_VALUE (arglist);
13971 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13972 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13973 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13974 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13975 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13976 tmode = insn_data[icode].operand[0].mode;
13977 mode0 = insn_data[icode].operand[1].mode;
13978 mode1 = insn_data[icode].operand[2].mode;
13979 mode2 = insn_data[icode].operand[3].mode;
13981 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13982 op0 = copy_to_mode_reg (mode0, op0);
13983 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13984 op1 = copy_to_mode_reg (mode1, op1);
13985 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13987 error ("selector must be an integer constant in the range 0..%i",
13988 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13989 return const0_rtx;
13991 if (target == 0
13992 || GET_MODE (target) != tmode
13993 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13994 target = gen_reg_rtx (tmode);
13995 pat = GEN_FCN (icode) (target, op0, op1, op2);
13996 if (! pat)
13997 return 0;
13998 emit_insn (pat);
13999 return target;
14001 case IX86_BUILTIN_MASKMOVQ:
14002 case IX86_BUILTIN_MASKMOVDQU:
14003 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14004 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14005 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14006 : CODE_FOR_sse2_maskmovdqu));
14007 /* Note the arg order is different from the operand order. */
14008 arg1 = TREE_VALUE (arglist);
14009 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14010 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14011 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14012 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14013 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14014 mode0 = insn_data[icode].operand[0].mode;
14015 mode1 = insn_data[icode].operand[1].mode;
14016 mode2 = insn_data[icode].operand[2].mode;
14018 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14019 op0 = copy_to_mode_reg (mode0, op0);
14020 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14021 op1 = copy_to_mode_reg (mode1, op1);
14022 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14023 op2 = copy_to_mode_reg (mode2, op2);
14024 pat = GEN_FCN (icode) (op0, op1, op2);
14025 if (! pat)
14026 return 0;
14027 emit_insn (pat);
14028 return 0;
14030 case IX86_BUILTIN_SQRTSS:
14031 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14032 case IX86_BUILTIN_RSQRTSS:
14033 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14034 case IX86_BUILTIN_RCPSS:
14035 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14037 case IX86_BUILTIN_LOADAPS:
14038 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14040 case IX86_BUILTIN_LOADUPS:
14041 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14043 case IX86_BUILTIN_STOREAPS:
14044 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14046 case IX86_BUILTIN_STOREUPS:
14047 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14049 case IX86_BUILTIN_LOADSS:
14050 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14052 case IX86_BUILTIN_STORESS:
14053 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14055 case IX86_BUILTIN_LOADHPS:
14056 case IX86_BUILTIN_LOADLPS:
14057 case IX86_BUILTIN_LOADHPD:
14058 case IX86_BUILTIN_LOADLPD:
14059 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14060 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14061 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14062 : CODE_FOR_sse2_movsd);
14063 arg0 = TREE_VALUE (arglist);
14064 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14065 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14066 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14067 tmode = insn_data[icode].operand[0].mode;
14068 mode0 = insn_data[icode].operand[1].mode;
14069 mode1 = insn_data[icode].operand[2].mode;
14071 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14072 op0 = copy_to_mode_reg (mode0, op0);
14073 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14074 if (target == 0
14075 || GET_MODE (target) != tmode
14076 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14077 target = gen_reg_rtx (tmode);
14078 pat = GEN_FCN (icode) (target, op0, op1);
14079 if (! pat)
14080 return 0;
14081 emit_insn (pat);
14082 return target;
14084 case IX86_BUILTIN_STOREHPS:
14085 case IX86_BUILTIN_STORELPS:
14086 case IX86_BUILTIN_STOREHPD:
14087 case IX86_BUILTIN_STORELPD:
14088 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14089 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14090 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14091 : CODE_FOR_sse2_movsd);
14092 arg0 = TREE_VALUE (arglist);
14093 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14094 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14095 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14096 mode0 = insn_data[icode].operand[1].mode;
14097 mode1 = insn_data[icode].operand[2].mode;
14099 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14100 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14101 op1 = copy_to_mode_reg (mode1, op1);
14103 pat = GEN_FCN (icode) (op0, op0, op1);
14104 if (! pat)
14105 return 0;
14106 emit_insn (pat);
14107 return 0;
14109 case IX86_BUILTIN_MOVNTPS:
14110 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14111 case IX86_BUILTIN_MOVNTQ:
14112 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14114 case IX86_BUILTIN_LDMXCSR:
14115 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14116 target = assign_386_stack_local (SImode, 0);
14117 emit_move_insn (target, op0);
14118 emit_insn (gen_ldmxcsr (target));
14119 return 0;
14121 case IX86_BUILTIN_STMXCSR:
14122 target = assign_386_stack_local (SImode, 0);
14123 emit_insn (gen_stmxcsr (target));
14124 return copy_to_mode_reg (SImode, target);
14126 case IX86_BUILTIN_SHUFPS:
14127 case IX86_BUILTIN_SHUFPD:
14128 icode = (fcode == IX86_BUILTIN_SHUFPS
14129 ? CODE_FOR_sse_shufps
14130 : CODE_FOR_sse2_shufpd);
14131 arg0 = TREE_VALUE (arglist);
14132 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14133 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14134 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14135 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14136 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14137 tmode = insn_data[icode].operand[0].mode;
14138 mode0 = insn_data[icode].operand[1].mode;
14139 mode1 = insn_data[icode].operand[2].mode;
14140 mode2 = insn_data[icode].operand[3].mode;
14142 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14143 op0 = copy_to_mode_reg (mode0, op0);
14144 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14145 op1 = copy_to_mode_reg (mode1, op1);
14146 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14148 /* @@@ better error message */
14149 error ("mask must be an immediate");
14150 return gen_reg_rtx (tmode);
14152 if (target == 0
14153 || GET_MODE (target) != tmode
14154 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14155 target = gen_reg_rtx (tmode);
14156 pat = GEN_FCN (icode) (target, op0, op1, op2);
14157 if (! pat)
14158 return 0;
14159 emit_insn (pat);
14160 return target;
14162 case IX86_BUILTIN_PSHUFW:
14163 case IX86_BUILTIN_PSHUFD:
14164 case IX86_BUILTIN_PSHUFHW:
14165 case IX86_BUILTIN_PSHUFLW:
14166 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14167 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14168 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14169 : CODE_FOR_mmx_pshufw);
14170 arg0 = TREE_VALUE (arglist);
14171 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14172 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14173 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14174 tmode = insn_data[icode].operand[0].mode;
14175 mode1 = insn_data[icode].operand[1].mode;
14176 mode2 = insn_data[icode].operand[2].mode;
14178 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14179 op0 = copy_to_mode_reg (mode1, op0);
14180 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14182 /* @@@ better error message */
14183 error ("mask must be an immediate");
14184 return const0_rtx;
14186 if (target == 0
14187 || GET_MODE (target) != tmode
14188 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14189 target = gen_reg_rtx (tmode);
14190 pat = GEN_FCN (icode) (target, op0, op1);
14191 if (! pat)
14192 return 0;
14193 emit_insn (pat);
14194 return target;
14196 case IX86_BUILTIN_PSLLDQI128:
14197 case IX86_BUILTIN_PSRLDQI128:
14198 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14199 : CODE_FOR_sse2_lshrti3);
14200 arg0 = TREE_VALUE (arglist);
14201 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14202 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14203 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14204 tmode = insn_data[icode].operand[0].mode;
14205 mode1 = insn_data[icode].operand[1].mode;
14206 mode2 = insn_data[icode].operand[2].mode;
14208 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14210 op0 = copy_to_reg (op0);
14211 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14213 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14215 error ("shift must be an immediate");
14216 return const0_rtx;
14218 target = gen_reg_rtx (V2DImode);
14219 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14220 if (! pat)
14221 return 0;
14222 emit_insn (pat);
14223 return target;
14225 case IX86_BUILTIN_FEMMS:
14226 emit_insn (gen_femms ());
14227 return NULL_RTX;
14229 case IX86_BUILTIN_PAVGUSB:
14230 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14232 case IX86_BUILTIN_PF2ID:
14233 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14235 case IX86_BUILTIN_PFACC:
14236 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14238 case IX86_BUILTIN_PFADD:
14239 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14241 case IX86_BUILTIN_PFCMPEQ:
14242 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14244 case IX86_BUILTIN_PFCMPGE:
14245 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14247 case IX86_BUILTIN_PFCMPGT:
14248 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14250 case IX86_BUILTIN_PFMAX:
14251 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14253 case IX86_BUILTIN_PFMIN:
14254 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14256 case IX86_BUILTIN_PFMUL:
14257 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14259 case IX86_BUILTIN_PFRCP:
14260 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14262 case IX86_BUILTIN_PFRCPIT1:
14263 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14265 case IX86_BUILTIN_PFRCPIT2:
14266 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14268 case IX86_BUILTIN_PFRSQIT1:
14269 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14271 case IX86_BUILTIN_PFRSQRT:
14272 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14274 case IX86_BUILTIN_PFSUB:
14275 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14277 case IX86_BUILTIN_PFSUBR:
14278 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14280 case IX86_BUILTIN_PI2FD:
14281 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14283 case IX86_BUILTIN_PMULHRW:
14284 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14286 case IX86_BUILTIN_PF2IW:
14287 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14289 case IX86_BUILTIN_PFNACC:
14290 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14292 case IX86_BUILTIN_PFPNACC:
14293 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14295 case IX86_BUILTIN_PI2FW:
14296 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14298 case IX86_BUILTIN_PSWAPDSI:
14299 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14301 case IX86_BUILTIN_PSWAPDSF:
14302 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14304 case IX86_BUILTIN_SSE_ZERO:
14305 target = gen_reg_rtx (V4SFmode);
14306 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14307 return target;
14309 case IX86_BUILTIN_MMX_ZERO:
14310 target = gen_reg_rtx (DImode);
14311 emit_insn (gen_mmx_clrdi (target));
14312 return target;
14314 case IX86_BUILTIN_CLRTI:
14315 target = gen_reg_rtx (V2DImode);
14316 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14317 return target;
14320 case IX86_BUILTIN_SQRTSD:
14321 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14322 case IX86_BUILTIN_LOADAPD:
14323 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14324 case IX86_BUILTIN_LOADUPD:
14325 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14327 case IX86_BUILTIN_STOREAPD:
14328 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14329 case IX86_BUILTIN_STOREUPD:
14330 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14332 case IX86_BUILTIN_LOADSD:
14333 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14335 case IX86_BUILTIN_STORESD:
14336 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14338 case IX86_BUILTIN_SETPD1:
14339 target = assign_386_stack_local (DFmode, 0);
14340 arg0 = TREE_VALUE (arglist);
14341 emit_move_insn (adjust_address (target, DFmode, 0),
14342 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14343 op0 = gen_reg_rtx (V2DFmode);
14344 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14345 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14346 return op0;
14348 case IX86_BUILTIN_SETPD:
14349 target = assign_386_stack_local (V2DFmode, 0);
14350 arg0 = TREE_VALUE (arglist);
14351 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14352 emit_move_insn (adjust_address (target, DFmode, 0),
14353 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14354 emit_move_insn (adjust_address (target, DFmode, 8),
14355 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14356 op0 = gen_reg_rtx (V2DFmode);
14357 emit_insn (gen_sse2_movapd (op0, target));
14358 return op0;
14360 case IX86_BUILTIN_LOADRPD:
14361 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14362 gen_reg_rtx (V2DFmode), 1);
14363 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14364 return target;
14366 case IX86_BUILTIN_LOADPD1:
14367 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14368 gen_reg_rtx (V2DFmode), 1);
14369 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14370 return target;
14372 case IX86_BUILTIN_STOREPD1:
14373 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14374 case IX86_BUILTIN_STORERPD:
14375 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14377 case IX86_BUILTIN_CLRPD:
14378 target = gen_reg_rtx (V2DFmode);
14379 emit_insn (gen_sse_clrv2df (target));
14380 return target;
14382 case IX86_BUILTIN_MFENCE:
14383 emit_insn (gen_sse2_mfence ());
14384 return 0;
14385 case IX86_BUILTIN_LFENCE:
14386 emit_insn (gen_sse2_lfence ());
14387 return 0;
14389 case IX86_BUILTIN_CLFLUSH:
14390 arg0 = TREE_VALUE (arglist);
14391 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14392 icode = CODE_FOR_sse2_clflush;
14393 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14394 op0 = copy_to_mode_reg (Pmode, op0);
14396 emit_insn (gen_sse2_clflush (op0));
14397 return 0;
14399 case IX86_BUILTIN_MOVNTPD:
14400 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14401 case IX86_BUILTIN_MOVNTDQ:
14402 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14403 case IX86_BUILTIN_MOVNTI:
14404 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14406 case IX86_BUILTIN_LOADDQA:
14407 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14408 case IX86_BUILTIN_LOADDQU:
14409 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14410 case IX86_BUILTIN_LOADD:
14411 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14413 case IX86_BUILTIN_STOREDQA:
14414 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14415 case IX86_BUILTIN_STOREDQU:
14416 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14417 case IX86_BUILTIN_STORED:
14418 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14420 case IX86_BUILTIN_MONITOR:
14421 arg0 = TREE_VALUE (arglist);
14422 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14423 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14424 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14425 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14426 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14427 if (!REG_P (op0))
14428 op0 = copy_to_mode_reg (SImode, op0);
14429 if (!REG_P (op1))
14430 op1 = copy_to_mode_reg (SImode, op1);
14431 if (!REG_P (op2))
14432 op2 = copy_to_mode_reg (SImode, op2);
14433 emit_insn (gen_monitor (op0, op1, op2));
14434 return 0;
14436 case IX86_BUILTIN_MWAIT:
14437 arg0 = TREE_VALUE (arglist);
14438 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14439 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14440 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14441 if (!REG_P (op0))
14442 op0 = copy_to_mode_reg (SImode, op0);
14443 if (!REG_P (op1))
14444 op1 = copy_to_mode_reg (SImode, op1);
14445 emit_insn (gen_mwait (op0, op1));
14446 return 0;
14448 case IX86_BUILTIN_LOADDDUP:
14449 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14451 case IX86_BUILTIN_LDDQU:
14452 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14455 default:
14456 break;
14459 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14460 if (d->code == fcode)
14462 /* Compares are treated specially. */
14463 if (d->icode == CODE_FOR_maskcmpv4sf3
14464 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14465 || d->icode == CODE_FOR_maskncmpv4sf3
14466 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14467 || d->icode == CODE_FOR_maskcmpv2df3
14468 || d->icode == CODE_FOR_vmmaskcmpv2df3
14469 || d->icode == CODE_FOR_maskncmpv2df3
14470 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14471 return ix86_expand_sse_compare (d, arglist, target);
14473 return ix86_expand_binop_builtin (d->icode, arglist, target);
14476 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14477 if (d->code == fcode)
14478 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14480 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14481 if (d->code == fcode)
14482 return ix86_expand_sse_comi (d, arglist, target);
14484 /* @@@ Should really do something sensible here. */
14485 return 0;
14488 /* Store OPERAND to the memory after reload is completed. This means
14489 that we can't easily use assign_stack_local. */
14491 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14493 rtx result;
14494 if (!reload_completed)
14495 abort ();
14496 if (TARGET_RED_ZONE)
14498 result = gen_rtx_MEM (mode,
14499 gen_rtx_PLUS (Pmode,
14500 stack_pointer_rtx,
14501 GEN_INT (-RED_ZONE_SIZE)));
14502 emit_move_insn (result, operand);
14504 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14506 switch (mode)
14508 case HImode:
14509 case SImode:
14510 operand = gen_lowpart (DImode, operand);
14511 /* FALLTHRU */
14512 case DImode:
14513 emit_insn (
14514 gen_rtx_SET (VOIDmode,
14515 gen_rtx_MEM (DImode,
14516 gen_rtx_PRE_DEC (DImode,
14517 stack_pointer_rtx)),
14518 operand));
14519 break;
14520 default:
14521 abort ();
14523 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14525 else
14527 switch (mode)
14529 case DImode:
14531 rtx operands[2];
14532 split_di (&operand, 1, operands, operands + 1);
14533 emit_insn (
14534 gen_rtx_SET (VOIDmode,
14535 gen_rtx_MEM (SImode,
14536 gen_rtx_PRE_DEC (Pmode,
14537 stack_pointer_rtx)),
14538 operands[1]));
14539 emit_insn (
14540 gen_rtx_SET (VOIDmode,
14541 gen_rtx_MEM (SImode,
14542 gen_rtx_PRE_DEC (Pmode,
14543 stack_pointer_rtx)),
14544 operands[0]));
14546 break;
14547 case HImode:
14548 /* It is better to store HImodes as SImodes. */
14549 if (!TARGET_PARTIAL_REG_STALL)
14550 operand = gen_lowpart (SImode, operand);
14551 /* FALLTHRU */
14552 case SImode:
14553 emit_insn (
14554 gen_rtx_SET (VOIDmode,
14555 gen_rtx_MEM (GET_MODE (operand),
14556 gen_rtx_PRE_DEC (SImode,
14557 stack_pointer_rtx)),
14558 operand));
14559 break;
14560 default:
14561 abort ();
14563 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14565 return result;
14568 /* Free operand from the memory. */
14569 void
14570 ix86_free_from_memory (enum machine_mode mode)
14572 if (!TARGET_RED_ZONE)
14574 int size;
14576 if (mode == DImode || TARGET_64BIT)
14577 size = 8;
14578 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14579 size = 2;
14580 else
14581 size = 4;
14582 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14583 to pop or add instruction if registers are available. */
14584 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14585 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14586 GEN_INT (size))));
14590 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14591 QImode must go into class Q_REGS.
14592 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14593 movdf to do mem-to-mem moves through integer regs. */
14594 enum reg_class
14595 ix86_preferred_reload_class (rtx x, enum reg_class class)
14597 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14598 return NO_REGS;
14599 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14601 /* SSE can't load any constant directly yet. */
14602 if (SSE_CLASS_P (class))
14603 return NO_REGS;
14604 /* Floats can load 0 and 1. */
14605 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14607 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14608 if (MAYBE_SSE_CLASS_P (class))
14609 return (reg_class_subset_p (class, GENERAL_REGS)
14610 ? GENERAL_REGS : FLOAT_REGS);
14611 else
14612 return class;
14614 /* General regs can load everything. */
14615 if (reg_class_subset_p (class, GENERAL_REGS))
14616 return GENERAL_REGS;
14617 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14618 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14619 return NO_REGS;
14621 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14622 return NO_REGS;
14623 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14624 return Q_REGS;
14625 return class;
14628 /* If we are copying between general and FP registers, we need a memory
14629 location. The same is true for SSE and MMX registers.
14631 The macro can't work reliably when one of the CLASSES is class containing
14632 registers from multiple units (SSE, MMX, integer). We avoid this by never
14633 combining those units in single alternative in the machine description.
14634 Ensure that this constraint holds to avoid unexpected surprises.
14636 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14637 enforce these sanity checks. */
14639 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14640 enum machine_mode mode, int strict)
14642 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14643 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14644 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14645 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14646 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14647 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14649 if (strict)
14650 abort ();
14651 else
14652 return 1;
14654 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14655 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14656 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14657 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14658 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14660 /* Return the cost of moving data from a register in class CLASS1 to
14661 one in class CLASS2.
14663 It is not required that the cost always equal 2 when FROM is the same as TO;
14664 on some machines it is expensive to move between registers if they are not
14665 general registers. */
14667 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14668 enum reg_class class2)
14670 /* In case we require secondary memory, compute cost of the store followed
14671 by load. In order to avoid bad register allocation choices, we need
14672 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14674 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14676 int cost = 1;
14678 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14679 MEMORY_MOVE_COST (mode, class1, 1));
14680 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14681 MEMORY_MOVE_COST (mode, class2, 1));
14683 /* In case of copying from general_purpose_register we may emit multiple
14684 stores followed by single load causing memory size mismatch stall.
14685 Count this as arbitrarily high cost of 20. */
14686 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14687 cost += 20;
14689 /* In the case of FP/MMX moves, the registers actually overlap, and we
14690 have to switch modes in order to treat them differently. */
14691 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14692 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14693 cost += 20;
14695 return cost;
14698 /* Moves between SSE/MMX and integer unit are expensive. */
14699 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14700 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14701 return ix86_cost->mmxsse_to_integer;
14702 if (MAYBE_FLOAT_CLASS_P (class1))
14703 return ix86_cost->fp_move;
14704 if (MAYBE_SSE_CLASS_P (class1))
14705 return ix86_cost->sse_move;
14706 if (MAYBE_MMX_CLASS_P (class1))
14707 return ix86_cost->mmx_move;
14708 return 2;
14711 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14713 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14715 /* Flags and only flags can only hold CCmode values. */
14716 if (CC_REGNO_P (regno))
14717 return GET_MODE_CLASS (mode) == MODE_CC;
14718 if (GET_MODE_CLASS (mode) == MODE_CC
14719 || GET_MODE_CLASS (mode) == MODE_RANDOM
14720 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14721 return 0;
14722 if (FP_REGNO_P (regno))
14723 return VALID_FP_MODE_P (mode);
14724 if (SSE_REGNO_P (regno))
14725 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14726 if (MMX_REGNO_P (regno))
14727 return (TARGET_MMX
14728 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14729 /* We handle both integer and floats in the general purpose registers.
14730 In future we should be able to handle vector modes as well. */
14731 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14732 return 0;
14733 /* Take care for QImode values - they can be in non-QI regs, but then
14734 they do cause partial register stalls. */
14735 if (regno < 4 || mode != QImode || TARGET_64BIT)
14736 return 1;
14737 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14740 /* Return the cost of moving data of mode M between a
14741 register and memory. A value of 2 is the default; this cost is
14742 relative to those in `REGISTER_MOVE_COST'.
14744 If moving between registers and memory is more expensive than
14745 between two registers, you should define this macro to express the
14746 relative cost.
14748 Model also increased moving costs of QImode registers in non
14749 Q_REGS classes.
14752 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14754 if (FLOAT_CLASS_P (class))
14756 int index;
14757 switch (mode)
14759 case SFmode:
14760 index = 0;
14761 break;
14762 case DFmode:
14763 index = 1;
14764 break;
14765 case XFmode:
14766 index = 2;
14767 break;
14768 default:
14769 return 100;
14771 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14773 if (SSE_CLASS_P (class))
14775 int index;
14776 switch (GET_MODE_SIZE (mode))
14778 case 4:
14779 index = 0;
14780 break;
14781 case 8:
14782 index = 1;
14783 break;
14784 case 16:
14785 index = 2;
14786 break;
14787 default:
14788 return 100;
14790 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14792 if (MMX_CLASS_P (class))
14794 int index;
14795 switch (GET_MODE_SIZE (mode))
14797 case 4:
14798 index = 0;
14799 break;
14800 case 8:
14801 index = 1;
14802 break;
14803 default:
14804 return 100;
14806 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14808 switch (GET_MODE_SIZE (mode))
14810 case 1:
14811 if (in)
14812 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14813 : ix86_cost->movzbl_load);
14814 else
14815 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14816 : ix86_cost->int_store[0] + 4);
14817 break;
14818 case 2:
14819 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14820 default:
14821 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14822 if (mode == TFmode)
14823 mode = XFmode;
14824 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14825 * (((int) GET_MODE_SIZE (mode)
14826 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14830 /* Compute a (partial) cost for rtx X. Return true if the complete
14831 cost has been computed, and false if subexpressions should be
14832 scanned. In either case, *TOTAL contains the cost result. */
14834 static bool
14835 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14837 enum machine_mode mode = GET_MODE (x);
14839 switch (code)
14841 case CONST_INT:
14842 case CONST:
14843 case LABEL_REF:
14844 case SYMBOL_REF:
14845 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14846 *total = 3;
14847 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14848 *total = 2;
14849 else if (flag_pic && SYMBOLIC_CONST (x)
14850 && (!TARGET_64BIT
14851 || (!GET_CODE (x) != LABEL_REF
14852 && (GET_CODE (x) != SYMBOL_REF
14853 || !SYMBOL_REF_LOCAL_P (x)))))
14854 *total = 1;
14855 else
14856 *total = 0;
14857 return true;
14859 case CONST_DOUBLE:
14860 if (mode == VOIDmode)
14861 *total = 0;
14862 else
14863 switch (standard_80387_constant_p (x))
14865 case 1: /* 0.0 */
14866 *total = 1;
14867 break;
14868 default: /* Other constants */
14869 *total = 2;
14870 break;
14871 case 0:
14872 case -1:
14873 /* Start with (MEM (SYMBOL_REF)), since that's where
14874 it'll probably end up. Add a penalty for size. */
14875 *total = (COSTS_N_INSNS (1)
14876 + (flag_pic != 0 && !TARGET_64BIT)
14877 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14878 break;
14880 return true;
14882 case ZERO_EXTEND:
14883 /* The zero extensions is often completely free on x86_64, so make
14884 it as cheap as possible. */
14885 if (TARGET_64BIT && mode == DImode
14886 && GET_MODE (XEXP (x, 0)) == SImode)
14887 *total = 1;
14888 else if (TARGET_ZERO_EXTEND_WITH_AND)
14889 *total = COSTS_N_INSNS (ix86_cost->add);
14890 else
14891 *total = COSTS_N_INSNS (ix86_cost->movzx);
14892 return false;
14894 case SIGN_EXTEND:
14895 *total = COSTS_N_INSNS (ix86_cost->movsx);
14896 return false;
14898 case ASHIFT:
14899 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14900 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14902 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14903 if (value == 1)
14905 *total = COSTS_N_INSNS (ix86_cost->add);
14906 return false;
14908 if ((value == 2 || value == 3)
14909 && ix86_cost->lea <= ix86_cost->shift_const)
14911 *total = COSTS_N_INSNS (ix86_cost->lea);
14912 return false;
14915 /* FALLTHRU */
14917 case ROTATE:
14918 case ASHIFTRT:
14919 case LSHIFTRT:
14920 case ROTATERT:
14921 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14923 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14925 if (INTVAL (XEXP (x, 1)) > 32)
14926 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14927 else
14928 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14930 else
14932 if (GET_CODE (XEXP (x, 1)) == AND)
14933 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14934 else
14935 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14938 else
14940 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14941 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14942 else
14943 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14945 return false;
14947 case MULT:
14948 if (FLOAT_MODE_P (mode))
14950 *total = COSTS_N_INSNS (ix86_cost->fmul);
14951 return false;
14953 else
14955 rtx op0 = XEXP (x, 0);
14956 rtx op1 = XEXP (x, 1);
14957 int nbits;
14958 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14960 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14961 for (nbits = 0; value != 0; value &= value - 1)
14962 nbits++;
14964 else
14965 /* This is arbitrary. */
14966 nbits = 7;
14968 /* Compute costs correctly for widening multiplication. */
14969 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14970 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14971 == GET_MODE_SIZE (mode))
14973 int is_mulwiden = 0;
14974 enum machine_mode inner_mode = GET_MODE (op0);
14976 if (GET_CODE (op0) == GET_CODE (op1))
14977 is_mulwiden = 1, op1 = XEXP (op1, 0);
14978 else if (GET_CODE (op1) == CONST_INT)
14980 if (GET_CODE (op0) == SIGN_EXTEND)
14981 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14982 == INTVAL (op1);
14983 else
14984 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14987 if (is_mulwiden)
14988 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14991 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14992 + nbits * ix86_cost->mult_bit)
14993 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14995 return true;
14998 case DIV:
14999 case UDIV:
15000 case MOD:
15001 case UMOD:
15002 if (FLOAT_MODE_P (mode))
15003 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15004 else
15005 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15006 return false;
15008 case PLUS:
15009 if (FLOAT_MODE_P (mode))
15010 *total = COSTS_N_INSNS (ix86_cost->fadd);
15011 else if (GET_MODE_CLASS (mode) == MODE_INT
15012 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15014 if (GET_CODE (XEXP (x, 0)) == PLUS
15015 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15016 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15017 && CONSTANT_P (XEXP (x, 1)))
15019 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15020 if (val == 2 || val == 4 || val == 8)
15022 *total = COSTS_N_INSNS (ix86_cost->lea);
15023 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15024 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15025 outer_code);
15026 *total += rtx_cost (XEXP (x, 1), outer_code);
15027 return true;
15030 else if (GET_CODE (XEXP (x, 0)) == MULT
15031 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15033 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15034 if (val == 2 || val == 4 || val == 8)
15036 *total = COSTS_N_INSNS (ix86_cost->lea);
15037 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15038 *total += rtx_cost (XEXP (x, 1), outer_code);
15039 return true;
15042 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15044 *total = COSTS_N_INSNS (ix86_cost->lea);
15045 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15046 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15047 *total += rtx_cost (XEXP (x, 1), outer_code);
15048 return true;
15051 /* FALLTHRU */
15053 case MINUS:
15054 if (FLOAT_MODE_P (mode))
15056 *total = COSTS_N_INSNS (ix86_cost->fadd);
15057 return false;
15059 /* FALLTHRU */
15061 case AND:
15062 case IOR:
15063 case XOR:
15064 if (!TARGET_64BIT && mode == DImode)
15066 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15067 + (rtx_cost (XEXP (x, 0), outer_code)
15068 << (GET_MODE (XEXP (x, 0)) != DImode))
15069 + (rtx_cost (XEXP (x, 1), outer_code)
15070 << (GET_MODE (XEXP (x, 1)) != DImode)));
15071 return true;
15073 /* FALLTHRU */
15075 case NEG:
15076 if (FLOAT_MODE_P (mode))
15078 *total = COSTS_N_INSNS (ix86_cost->fchs);
15079 return false;
15081 /* FALLTHRU */
15083 case NOT:
15084 if (!TARGET_64BIT && mode == DImode)
15085 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15086 else
15087 *total = COSTS_N_INSNS (ix86_cost->add);
15088 return false;
15090 case FLOAT_EXTEND:
15091 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15092 *total = 0;
15093 return false;
15095 case ABS:
15096 if (FLOAT_MODE_P (mode))
15097 *total = COSTS_N_INSNS (ix86_cost->fabs);
15098 return false;
15100 case SQRT:
15101 if (FLOAT_MODE_P (mode))
15102 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15103 return false;
15105 case UNSPEC:
15106 if (XINT (x, 1) == UNSPEC_TP)
15107 *total = 0;
15108 return false;
15110 default:
15111 return false;
15115 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15116 static void
15117 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15119 init_section ();
15120 fputs ("\tpushl $", asm_out_file);
15121 assemble_name (asm_out_file, XSTR (symbol, 0));
15122 fputc ('\n', asm_out_file);
15124 #endif
15126 #if TARGET_MACHO
15128 static int current_machopic_label_num;
15130 /* Given a symbol name and its associated stub, write out the
15131 definition of the stub. */
15133 void
15134 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15136 unsigned int length;
15137 char *binder_name, *symbol_name, lazy_ptr_name[32];
15138 int label = ++current_machopic_label_num;
15140 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15141 symb = (*targetm.strip_name_encoding) (symb);
15143 length = strlen (stub);
15144 binder_name = alloca (length + 32);
15145 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15147 length = strlen (symb);
15148 symbol_name = alloca (length + 32);
15149 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15151 sprintf (lazy_ptr_name, "L%d$lz", label);
15153 if (MACHOPIC_PURE)
15154 machopic_picsymbol_stub_section ();
15155 else
15156 machopic_symbol_stub_section ();
15158 fprintf (file, "%s:\n", stub);
15159 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15161 if (MACHOPIC_PURE)
15163 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15164 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15165 fprintf (file, "\tjmp %%edx\n");
15167 else
15168 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15170 fprintf (file, "%s:\n", binder_name);
15172 if (MACHOPIC_PURE)
15174 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15175 fprintf (file, "\tpushl %%eax\n");
15177 else
15178 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15180 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15182 machopic_lazy_symbol_ptr_section ();
15183 fprintf (file, "%s:\n", lazy_ptr_name);
15184 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15185 fprintf (file, "\t.long %s\n", binder_name);
15187 #endif /* TARGET_MACHO */
15189 /* Order the registers for register allocator. */
15191 void
15192 x86_order_regs_for_local_alloc (void)
15194 int pos = 0;
15195 int i;
15197 /* First allocate the local general purpose registers. */
15198 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15199 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15200 reg_alloc_order [pos++] = i;
15202 /* Global general purpose registers. */
15203 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15204 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15205 reg_alloc_order [pos++] = i;
15207 /* x87 registers come first in case we are doing FP math
15208 using them. */
15209 if (!TARGET_SSE_MATH)
15210 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15211 reg_alloc_order [pos++] = i;
15213 /* SSE registers. */
15214 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15215 reg_alloc_order [pos++] = i;
15216 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15217 reg_alloc_order [pos++] = i;
15219 /* x87 registers. */
15220 if (TARGET_SSE_MATH)
15221 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15222 reg_alloc_order [pos++] = i;
15224 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15225 reg_alloc_order [pos++] = i;
15227 /* Initialize the rest of array as we do not allocate some registers
15228 at all. */
15229 while (pos < FIRST_PSEUDO_REGISTER)
15230 reg_alloc_order [pos++] = 0;
15233 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15234 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15235 #endif
15237 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15238 struct attribute_spec.handler. */
15239 static tree
15240 ix86_handle_struct_attribute (tree *node, tree name,
15241 tree args ATTRIBUTE_UNUSED,
15242 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15244 tree *type = NULL;
15245 if (DECL_P (*node))
15247 if (TREE_CODE (*node) == TYPE_DECL)
15248 type = &TREE_TYPE (*node);
15250 else
15251 type = node;
15253 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15254 || TREE_CODE (*type) == UNION_TYPE)))
15256 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15257 *no_add_attrs = true;
15260 else if ((is_attribute_p ("ms_struct", name)
15261 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15262 || ((is_attribute_p ("gcc_struct", name)
15263 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15265 warning ("`%s' incompatible attribute ignored",
15266 IDENTIFIER_POINTER (name));
15267 *no_add_attrs = true;
15270 return NULL_TREE;
15273 static bool
15274 ix86_ms_bitfield_layout_p (tree record_type)
15276 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15277 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15278 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15281 /* Returns an expression indicating where the this parameter is
15282 located on entry to the FUNCTION. */
15284 static rtx
15285 x86_this_parameter (tree function)
15287 tree type = TREE_TYPE (function);
15289 if (TARGET_64BIT)
15291 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15292 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15295 if (ix86_function_regparm (type, function) > 0)
15297 tree parm;
15299 parm = TYPE_ARG_TYPES (type);
15300 /* Figure out whether or not the function has a variable number of
15301 arguments. */
15302 for (; parm; parm = TREE_CHAIN (parm))
15303 if (TREE_VALUE (parm) == void_type_node)
15304 break;
15305 /* If not, the this parameter is in the first argument. */
15306 if (parm)
15308 int regno = 0;
15309 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15310 regno = 2;
15311 return gen_rtx_REG (SImode, regno);
15315 if (aggregate_value_p (TREE_TYPE (type), type))
15316 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15317 else
15318 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15321 /* Determine whether x86_output_mi_thunk can succeed. */
15323 static bool
15324 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15325 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15326 HOST_WIDE_INT vcall_offset, tree function)
15328 /* 64-bit can handle anything. */
15329 if (TARGET_64BIT)
15330 return true;
15332 /* For 32-bit, everything's fine if we have one free register. */
15333 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15334 return true;
15336 /* Need a free register for vcall_offset. */
15337 if (vcall_offset)
15338 return false;
15340 /* Need a free register for GOT references. */
15341 if (flag_pic && !(*targetm.binds_local_p) (function))
15342 return false;
15344 /* Otherwise ok. */
15345 return true;
15348 /* Output the assembler code for a thunk function. THUNK_DECL is the
15349 declaration for the thunk function itself, FUNCTION is the decl for
15350 the target function. DELTA is an immediate constant offset to be
15351 added to THIS. If VCALL_OFFSET is nonzero, the word at
15352 *(*this + vcall_offset) should be added to THIS. */
15354 static void
15355 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15356 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15357 HOST_WIDE_INT vcall_offset, tree function)
15359 rtx xops[3];
15360 rtx this = x86_this_parameter (function);
15361 rtx this_reg, tmp;
15363 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15364 pull it in now and let DELTA benefit. */
15365 if (REG_P (this))
15366 this_reg = this;
15367 else if (vcall_offset)
15369 /* Put the this parameter into %eax. */
15370 xops[0] = this;
15371 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15372 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15374 else
15375 this_reg = NULL_RTX;
15377 /* Adjust the this parameter by a fixed constant. */
15378 if (delta)
15380 xops[0] = GEN_INT (delta);
15381 xops[1] = this_reg ? this_reg : this;
15382 if (TARGET_64BIT)
15384 if (!x86_64_general_operand (xops[0], DImode))
15386 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15387 xops[1] = tmp;
15388 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15389 xops[0] = tmp;
15390 xops[1] = this;
15392 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15394 else
15395 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15398 /* Adjust the this parameter by a value stored in the vtable. */
15399 if (vcall_offset)
15401 if (TARGET_64BIT)
15402 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15403 else
15405 int tmp_regno = 2 /* ECX */;
15406 if (lookup_attribute ("fastcall",
15407 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15408 tmp_regno = 0 /* EAX */;
15409 tmp = gen_rtx_REG (SImode, tmp_regno);
15412 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15413 xops[1] = tmp;
15414 if (TARGET_64BIT)
15415 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15416 else
15417 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15419 /* Adjust the this parameter. */
15420 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15421 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15423 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15424 xops[0] = GEN_INT (vcall_offset);
15425 xops[1] = tmp2;
15426 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15427 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15429 xops[1] = this_reg;
15430 if (TARGET_64BIT)
15431 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15432 else
15433 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15436 /* If necessary, drop THIS back to its stack slot. */
15437 if (this_reg && this_reg != this)
15439 xops[0] = this_reg;
15440 xops[1] = this;
15441 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15444 xops[0] = XEXP (DECL_RTL (function), 0);
15445 if (TARGET_64BIT)
15447 if (!flag_pic || (*targetm.binds_local_p) (function))
15448 output_asm_insn ("jmp\t%P0", xops);
15449 else
15451 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15452 tmp = gen_rtx_CONST (Pmode, tmp);
15453 tmp = gen_rtx_MEM (QImode, tmp);
15454 xops[0] = tmp;
15455 output_asm_insn ("jmp\t%A0", xops);
15458 else
15460 if (!flag_pic || (*targetm.binds_local_p) (function))
15461 output_asm_insn ("jmp\t%P0", xops);
15462 else
15463 #if TARGET_MACHO
15464 if (TARGET_MACHO)
15466 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15467 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15468 tmp = gen_rtx_MEM (QImode, tmp);
15469 xops[0] = tmp;
15470 output_asm_insn ("jmp\t%0", xops);
15472 else
15473 #endif /* TARGET_MACHO */
15475 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15476 output_set_got (tmp);
15478 xops[1] = tmp;
15479 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15480 output_asm_insn ("jmp\t{*}%1", xops);
15485 static void
15486 x86_file_start (void)
15488 default_file_start ();
15489 if (X86_FILE_START_VERSION_DIRECTIVE)
15490 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15491 if (X86_FILE_START_FLTUSED)
15492 fputs ("\t.global\t__fltused\n", asm_out_file);
15493 if (ix86_asm_dialect == ASM_INTEL)
15494 fputs ("\t.intel_syntax\n", asm_out_file);
15498 x86_field_alignment (tree field, int computed)
15500 enum machine_mode mode;
15501 tree type = TREE_TYPE (field);
15503 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15504 return computed;
15505 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15506 ? get_inner_array_type (type) : type);
15507 if (mode == DFmode || mode == DCmode
15508 || GET_MODE_CLASS (mode) == MODE_INT
15509 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15510 return MIN (32, computed);
15511 return computed;
15514 /* Output assembler code to FILE to increment profiler label # LABELNO
15515 for profiling a function entry. */
15516 void
15517 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15519 if (TARGET_64BIT)
15520 if (flag_pic)
15522 #ifndef NO_PROFILE_COUNTERS
15523 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15524 #endif
15525 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15527 else
15529 #ifndef NO_PROFILE_COUNTERS
15530 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15531 #endif
15532 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15534 else if (flag_pic)
15536 #ifndef NO_PROFILE_COUNTERS
15537 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15538 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15539 #endif
15540 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15542 else
15544 #ifndef NO_PROFILE_COUNTERS
15545 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15546 PROFILE_COUNT_REGISTER);
15547 #endif
15548 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15552 /* We don't have exact information about the insn sizes, but we may assume
15553 quite safely that we are informed about all 1 byte insns and memory
15554 address sizes. This is enough to eliminate unnecessary padding in
15555 99% of cases. */
15557 static int
15558 min_insn_size (rtx insn)
15560 int l = 0;
15562 if (!INSN_P (insn) || !active_insn_p (insn))
15563 return 0;
15565 /* Discard alignments we've emit and jump instructions. */
15566 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15567 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15568 return 0;
15569 if (GET_CODE (insn) == JUMP_INSN
15570 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15571 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15572 return 0;
15574 /* Important case - calls are always 5 bytes.
15575 It is common to have many calls in the row. */
15576 if (GET_CODE (insn) == CALL_INSN
15577 && symbolic_reference_mentioned_p (PATTERN (insn))
15578 && !SIBLING_CALL_P (insn))
15579 return 5;
15580 if (get_attr_length (insn) <= 1)
15581 return 1;
15583 /* For normal instructions we may rely on the sizes of addresses
15584 and the presence of symbol to require 4 bytes of encoding.
15585 This is not the case for jumps where references are PC relative. */
15586 if (GET_CODE (insn) != JUMP_INSN)
15588 l = get_attr_length_address (insn);
15589 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15590 l = 4;
15592 if (l)
15593 return 1+l;
15594 else
15595 return 2;
15598 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15599 window. */
15601 static void
15602 ix86_avoid_jump_misspredicts (void)
15604 rtx insn, start = get_insns ();
15605 int nbytes = 0, njumps = 0;
15606 int isjump = 0;
15608 /* Look for all minimal intervals of instructions containing 4 jumps.
15609 The intervals are bounded by START and INSN. NBYTES is the total
15610 size of instructions in the interval including INSN and not including
15611 START. When the NBYTES is smaller than 16 bytes, it is possible
15612 that the end of START and INSN ends up in the same 16byte page.
15614 The smallest offset in the page INSN can start is the case where START
15615 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15616 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15618 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15621 nbytes += min_insn_size (insn);
15622 if (dump_file)
15623 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15624 INSN_UID (insn), min_insn_size (insn));
15625 if ((GET_CODE (insn) == JUMP_INSN
15626 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15627 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15628 || GET_CODE (insn) == CALL_INSN)
15629 njumps++;
15630 else
15631 continue;
15633 while (njumps > 3)
15635 start = NEXT_INSN (start);
15636 if ((GET_CODE (start) == JUMP_INSN
15637 && GET_CODE (PATTERN (start)) != ADDR_VEC
15638 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15639 || GET_CODE (start) == CALL_INSN)
15640 njumps--, isjump = 1;
15641 else
15642 isjump = 0;
15643 nbytes -= min_insn_size (start);
15645 if (njumps < 0)
15646 abort ();
15647 if (dump_file)
15648 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15649 INSN_UID (start), INSN_UID (insn), nbytes);
15651 if (njumps == 3 && isjump && nbytes < 16)
15653 int padsize = 15 - nbytes + min_insn_size (insn);
15655 if (dump_file)
15656 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15657 INSN_UID (insn), padsize);
15658 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15663 /* AMD Athlon works faster
15664 when RET is not destination of conditional jump or directly preceded
15665 by other jump instruction. We avoid the penalty by inserting NOP just
15666 before the RET instructions in such cases. */
15667 static void
15668 ix86_pad_returns (void)
15670 edge e;
15672 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15674 basic_block bb = e->src;
15675 rtx ret = BB_END (bb);
15676 rtx prev;
15677 bool replace = false;
15679 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15680 || !maybe_hot_bb_p (bb))
15681 continue;
15682 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15683 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15684 break;
15685 if (prev && GET_CODE (prev) == CODE_LABEL)
15687 edge e;
15688 for (e = bb->pred; e; e = e->pred_next)
15689 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15690 && !(e->flags & EDGE_FALLTHRU))
15691 replace = true;
15693 if (!replace)
15695 prev = prev_active_insn (ret);
15696 if (prev
15697 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15698 || GET_CODE (prev) == CALL_INSN))
15699 replace = true;
15700 /* Empty functions get branch mispredict even when the jump destination
15701 is not visible to us. */
15702 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15703 replace = true;
15705 if (replace)
15707 emit_insn_before (gen_return_internal_long (), ret);
15708 delete_insn (ret);
15713 /* Implement machine specific optimizations. We implement padding of returns
15714 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15715 static void
15716 ix86_reorg (void)
15718 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15719 ix86_pad_returns ();
15720 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15721 ix86_avoid_jump_misspredicts ();
15724 /* Return nonzero when QImode register that must be represented via REX prefix
15725 is used. */
15726 bool
15727 x86_extended_QIreg_mentioned_p (rtx insn)
15729 int i;
15730 extract_insn_cached (insn);
15731 for (i = 0; i < recog_data.n_operands; i++)
15732 if (REG_P (recog_data.operand[i])
15733 && REGNO (recog_data.operand[i]) >= 4)
15734 return true;
15735 return false;
15738 /* Return nonzero when P points to register encoded via REX prefix.
15739 Called via for_each_rtx. */
15740 static int
15741 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15743 unsigned int regno;
15744 if (!REG_P (*p))
15745 return 0;
15746 regno = REGNO (*p);
15747 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15750 /* Return true when INSN mentions register that must be encoded using REX
15751 prefix. */
15752 bool
15753 x86_extended_reg_mentioned_p (rtx insn)
15755 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15758 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15759 optabs would emit if we didn't have TFmode patterns. */
15761 void
15762 x86_emit_floatuns (rtx operands[2])
15764 rtx neglab, donelab, i0, i1, f0, in, out;
15765 enum machine_mode mode, inmode;
15767 inmode = GET_MODE (operands[1]);
15768 if (inmode != SImode
15769 && inmode != DImode)
15770 abort ();
15772 out = operands[0];
15773 in = force_reg (inmode, operands[1]);
15774 mode = GET_MODE (out);
15775 neglab = gen_label_rtx ();
15776 donelab = gen_label_rtx ();
15777 i1 = gen_reg_rtx (Pmode);
15778 f0 = gen_reg_rtx (mode);
15780 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15782 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15783 emit_jump_insn (gen_jump (donelab));
15784 emit_barrier ();
15786 emit_label (neglab);
15788 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15789 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15790 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15791 expand_float (f0, i0, 0);
15792 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15794 emit_label (donelab);
15797 /* Return if we do not know how to pass TYPE solely in registers. */
15798 bool
15799 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15801 if (default_must_pass_in_stack (mode, type))
15802 return true;
15803 return (!TARGET_64BIT && type && mode == TImode);
15806 /* Initialize vector TARGET via VALS. */
15807 void
15808 ix86_expand_vector_init (rtx target, rtx vals)
15810 enum machine_mode mode = GET_MODE (target);
15811 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15812 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15813 int i;
15815 for (i = n_elts - 1; i >= 0; i--)
15816 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15817 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15818 break;
15820 /* Few special cases first...
15821 ... constants are best loaded from constant pool. */
15822 if (i < 0)
15824 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15825 return;
15828 /* ... values where only first field is non-constant are best loaded
15829 from the pool and overwritten via move later. */
15830 if (!i)
15832 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15833 GET_MODE_INNER (mode), 0);
15835 op = force_reg (mode, op);
15836 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15837 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15838 switch (GET_MODE (target))
15840 case V2DFmode:
15841 emit_insn (gen_sse2_movsd (target, target, op));
15842 break;
15843 case V4SFmode:
15844 emit_insn (gen_sse_movss (target, target, op));
15845 break;
15846 default:
15847 break;
15849 return;
15852 /* And the busy sequence doing rotations. */
15853 switch (GET_MODE (target))
15855 case V2DFmode:
15857 rtx vecop0 =
15858 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15859 rtx vecop1 =
15860 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15862 vecop0 = force_reg (V2DFmode, vecop0);
15863 vecop1 = force_reg (V2DFmode, vecop1);
15864 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15866 break;
15867 case V4SFmode:
15869 rtx vecop0 =
15870 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15871 rtx vecop1 =
15872 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15873 rtx vecop2 =
15874 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15875 rtx vecop3 =
15876 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15877 rtx tmp1 = gen_reg_rtx (V4SFmode);
15878 rtx tmp2 = gen_reg_rtx (V4SFmode);
15880 vecop0 = force_reg (V4SFmode, vecop0);
15881 vecop1 = force_reg (V4SFmode, vecop1);
15882 vecop2 = force_reg (V4SFmode, vecop2);
15883 vecop3 = force_reg (V4SFmode, vecop3);
15884 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15885 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15886 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15888 break;
15889 default:
15890 abort ();
15894 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15896 We do this in the new i386 backend to maintain source compatibility
15897 with the old cc0-based compiler. */
15899 static tree
15900 ix86_md_asm_clobbers (tree clobbers)
15902 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15903 clobbers);
15904 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15905 clobbers);
15906 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15907 clobbers);
15908 return clobbers;
15911 /* Worker function for REVERSE_CONDITION. */
15913 enum rtx_code
15914 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15916 return (mode != CCFPmode && mode != CCFPUmode
15917 ? reverse_condition (code)
15918 : reverse_condition_maybe_unordered (code));
15921 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15922 to OPERANDS[0]. */
15924 const char *
15925 output_387_reg_move (rtx insn, rtx *operands)
15927 if (REG_P (operands[1])
15928 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15930 if (REGNO (operands[0]) == FIRST_STACK_REG
15931 && TARGET_USE_FFREEP)
15932 return "ffreep\t%y0";
15933 return "fstp\t%y0";
15935 if (STACK_TOP_P (operands[0]))
15936 return "fld%z1\t%y1";
15937 return "fst\t%y0";
15940 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15941 FP status register is set. */
15943 void
15944 ix86_emit_fp_unordered_jump (rtx label)
15946 rtx reg = gen_reg_rtx (HImode);
15947 rtx temp;
15949 emit_insn (gen_x86_fnstsw_1 (reg));
15951 if (TARGET_USE_SAHF)
15953 emit_insn (gen_x86_sahf_1 (reg));
15955 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15956 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15958 else
15960 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15962 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15963 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15966 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15967 gen_rtx_LABEL_REF (VOIDmode, label),
15968 pc_rtx);
15969 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15970 emit_jump_insn (temp);
15973 /* Output code to perform a log1p XFmode calculation. */
15975 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15977 rtx label1 = gen_label_rtx ();
15978 rtx label2 = gen_label_rtx ();
15980 rtx tmp = gen_reg_rtx (XFmode);
15981 rtx tmp2 = gen_reg_rtx (XFmode);
15983 emit_insn (gen_absxf2 (tmp, op1));
15984 emit_insn (gen_cmpxf (tmp,
15985 CONST_DOUBLE_FROM_REAL_VALUE (
15986 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15987 XFmode)));
15988 emit_jump_insn (gen_bge (label1));
15990 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15991 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15992 emit_jump (label2);
15994 emit_label (label1);
15995 emit_move_insn (tmp, CONST1_RTX (XFmode));
15996 emit_insn (gen_addxf3 (tmp, op1, tmp));
15997 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15998 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16000 emit_label (label2);
16003 #include "gt-i386.h"