* tree.c (handle_dll_attribute): Move here from i383/winnt.c.
[official-gcc.git] / gcc / config / i386 / i386.c
blob22de6e362172bb835099338b676548426a5b38d9
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 2, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 2, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
506 const struct processor_costs *ix86_cost = &pentium_cost;
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 const int x86_branch_hints = m_PENT4 | m_NOCONA;
531 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
532 const int x86_partial_reg_stall = m_PPRO;
533 const int x86_use_loop = m_K6;
534 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
535 const int x86_use_mov0 = m_K6;
536 const int x86_use_cltd = ~(m_PENT | m_K6);
537 const int x86_read_modify_write = ~m_PENT;
538 const int x86_read_modify = ~(m_PENT | m_PPRO);
539 const int x86_split_long_moves = m_PPRO;
540 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
541 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
542 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
543 const int x86_qimode_math = ~(0);
544 const int x86_promote_qi_regs = 0;
545 const int x86_himode_math = ~(m_PPRO);
546 const int x86_promote_hi_regs = m_PPRO;
547 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
555 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
557 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
558 const int x86_shift1 = ~m_486;
559 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs = m_ATHLON_K8;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568 const int x86_sse_typeless_stores = m_ATHLON_K8;
569 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
570 const int x86_use_ffreep = m_ATHLON_K8;
571 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
572 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
573 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
580 epilogue code. */
581 #define FAST_PROLOGUE_INSN_COUNT 20
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
591 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
593 /* ax, dx, cx, bx */
594 AREG, DREG, CREG, BREG,
595 /* si, di, bp, sp */
596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
597 /* FP registers */
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
600 /* arg pointer */
601 NON_Q_REGS,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
605 SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
607 MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
614 /* The "default" register map used in 32bit mode. */
616 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
627 static int const x86_64_int_parameter_registers[6] =
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
633 static int const x86_64_int_return_registers[4] =
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
694 numbers.
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
704 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
718 rtx ix86_compare_op0 = NULL_RTX;
719 rtx ix86_compare_op1 = NULL_RTX;
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
725 /* Define the structure for the machine field in struct function. */
727 struct stack_local_entry GTY(())
729 unsigned short mode;
730 unsigned short n;
731 rtx rtl;
732 struct stack_local_entry *next;
735 /* Structure describing stack frame layout.
736 Stack grows downward:
738 [arguments]
739 <- ARG_POINTER
740 saved pc
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
744 [saved regs]
746 [padding1] \
748 [va_arg registers] (
749 > to_allocate <- FRAME_POINTER
750 [frame] (
752 [padding2] /
754 struct ix86_frame
756 int nregs;
757 int padding1;
758 int va_arg_size;
759 HOST_WIDE_INT frame;
760 int padding2;
761 int outgoing_arguments_size;
762 int red_zone_size;
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string;
779 /* Parsed value. */
780 enum cmodel ix86_cmodel;
781 /* Asm dialect. */
782 const char *ix86_asm_string;
783 enum asm_dialect ix86_asm_dialect = ASM_ATT;
784 /* TLS dialext. */
785 const char *ix86_tls_dialect_string;
786 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath;
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch;
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string; /* for -mtune=<xxx> */
798 const char *ix86_arch_string; /* for -march=<xxx> */
799 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
808 int ix86_regparm;
810 /* Alignment to use for loops and jumps: */
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string;
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string;
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string;
821 /* Preferred alignment for stack boundary in bits. */
822 unsigned int ix86_preferred_stack_boundary;
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost;
826 const char *ix86_branch_cost_string;
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string;
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 static char internal_label_prefix[16];
833 static int internal_label_prefix_len;
835 static int local_symbolic_operand (rtx, enum machine_mode);
836 static int tls_symbolic_operand_1 (rtx, enum tls_model);
837 static void output_pic_addr_const (FILE *, rtx, int);
838 static void put_condition_code (enum rtx_code, enum machine_mode,
839 int, int, FILE *);
840 static const char *get_some_local_dynamic_name (void);
841 static int get_some_local_dynamic_name_1 (rtx *, void *);
842 static rtx maybe_get_pool_constant (rtx);
843 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
844 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
845 rtx *);
846 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
848 enum machine_mode);
849 static rtx get_thread_pointer (int);
850 static rtx legitimize_tls_address (rtx, enum tls_model, int);
851 static void get_pc_thunk_name (char [32], unsigned int);
852 static rtx gen_push (rtx);
853 static int memory_address_length (rtx addr);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx x86_this_parameter (tree);
872 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878 static tree ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
881 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
883 struct ix86_address
885 rtx base, index, disp;
886 HOST_WIDE_INT scale;
887 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
890 static int ix86_decompose_address (rtx, struct ix86_address *);
891 static int ix86_address_cost (rtx);
892 static bool ix86_cannot_force_const_mem (rtx);
893 static rtx ix86_delegitimize_address (rtx);
895 struct builtin_description;
896 static rtx ix86_expand_sse_comi (const struct builtin_description *,
897 tree, rtx);
898 static rtx ix86_expand_sse_compare (const struct builtin_description *,
899 tree, rtx);
900 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
901 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
902 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
903 static rtx ix86_expand_store_builtin (enum insn_code, tree);
904 static rtx safe_vector_operand (rtx, enum machine_mode);
905 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
906 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
907 enum rtx_code *, enum rtx_code *);
908 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
912 static int ix86_fp_comparison_cost (enum rtx_code code);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame *);
916 static int ix86_comp_type_attributes (tree, tree);
917 static int ix86_function_regparm (tree, tree);
918 const struct attribute_spec ix86_attribute_table[];
919 static bool ix86_function_ok_for_sibcall (tree, tree);
920 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
921 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
922 static int ix86_value_regno (enum machine_mode);
923 static bool contains_128bit_aligned_vector_p (tree);
924 static rtx ix86_struct_value_rtx (tree, int);
925 static bool ix86_ms_bitfield_layout_p (tree);
926 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
927 static int extended_reg_mentioned_1 (rtx *, void *);
928 static bool ix86_rtx_costs (rtx, int, int, int *);
929 static int min_insn_size (rtx);
930 static tree ix86_md_asm_clobbers (tree clobbers);
931 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
932 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
933 tree, bool);
935 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
936 static void ix86_svr3_asm_out_constructor (rtx, int);
937 #endif
939 /* Register class used for passing given 64bit part of the argument.
940 These represent classes as documented by the PS ABI, with the exception
941 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
942 use SF or DFmode move instead of DImode to avoid reformatting penalties.
944 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
945 whenever possible (upper half does contain padding).
947 enum x86_64_reg_class
949 X86_64_NO_CLASS,
950 X86_64_INTEGER_CLASS,
951 X86_64_INTEGERSI_CLASS,
952 X86_64_SSE_CLASS,
953 X86_64_SSESF_CLASS,
954 X86_64_SSEDF_CLASS,
955 X86_64_SSEUP_CLASS,
956 X86_64_X87_CLASS,
957 X86_64_X87UP_CLASS,
958 X86_64_MEMORY_CLASS
960 static const char * const x86_64_reg_class_name[] =
961 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
963 #define MAX_CLASSES 4
964 static int classify_argument (enum machine_mode, tree,
965 enum x86_64_reg_class [MAX_CLASSES], int);
966 static int examine_argument (enum machine_mode, tree, int, int *, int *);
967 static rtx construct_container (enum machine_mode, tree, int, int, int,
968 const int *, int);
969 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
970 enum x86_64_reg_class);
972 /* Table of constants used by fldpi, fldln2, etc.... */
973 static REAL_VALUE_TYPE ext_80387_constants_table [5];
974 static bool ext_80387_constants_init = 0;
975 static void init_ext_80387_constants (void);
977 /* Initialize the GCC target structure. */
978 #undef TARGET_ATTRIBUTE_TABLE
979 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
980 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
981 # undef TARGET_MERGE_DECL_ATTRIBUTES
982 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
983 #endif
985 #undef TARGET_COMP_TYPE_ATTRIBUTES
986 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
988 #undef TARGET_INIT_BUILTINS
989 #define TARGET_INIT_BUILTINS ix86_init_builtins
991 #undef TARGET_EXPAND_BUILTIN
992 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
994 #undef TARGET_ASM_FUNCTION_EPILOGUE
995 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
997 #undef TARGET_ASM_OPEN_PAREN
998 #define TARGET_ASM_OPEN_PAREN ""
999 #undef TARGET_ASM_CLOSE_PAREN
1000 #define TARGET_ASM_CLOSE_PAREN ""
1002 #undef TARGET_ASM_ALIGNED_HI_OP
1003 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1004 #undef TARGET_ASM_ALIGNED_SI_OP
1005 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1006 #ifdef ASM_QUAD
1007 #undef TARGET_ASM_ALIGNED_DI_OP
1008 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1009 #endif
1011 #undef TARGET_ASM_UNALIGNED_HI_OP
1012 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1013 #undef TARGET_ASM_UNALIGNED_SI_OP
1014 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1015 #undef TARGET_ASM_UNALIGNED_DI_OP
1016 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1018 #undef TARGET_SCHED_ADJUST_COST
1019 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1020 #undef TARGET_SCHED_ISSUE_RATE
1021 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1022 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1023 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1024 ia32_multipass_dfa_lookahead
1026 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1027 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1029 #ifdef HAVE_AS_TLS
1030 #undef TARGET_HAVE_TLS
1031 #define TARGET_HAVE_TLS true
1032 #endif
1033 #undef TARGET_CANNOT_FORCE_CONST_MEM
1034 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1036 #undef TARGET_DELEGITIMIZE_ADDRESS
1037 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1039 #undef TARGET_MS_BITFIELD_LAYOUT_P
1040 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1042 #undef TARGET_ASM_OUTPUT_MI_THUNK
1043 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1044 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1045 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1047 #undef TARGET_ASM_FILE_START
1048 #define TARGET_ASM_FILE_START x86_file_start
1050 #undef TARGET_RTX_COSTS
1051 #define TARGET_RTX_COSTS ix86_rtx_costs
1052 #undef TARGET_ADDRESS_COST
1053 #define TARGET_ADDRESS_COST ix86_address_cost
1055 #undef TARGET_FIXED_CONDITION_CODE_REGS
1056 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1057 #undef TARGET_CC_MODES_COMPATIBLE
1058 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1060 #undef TARGET_MACHINE_DEPENDENT_REORG
1061 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1063 #undef TARGET_BUILD_BUILTIN_VA_LIST
1064 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1066 #undef TARGET_MD_ASM_CLOBBERS
1067 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1069 #undef TARGET_PROMOTE_PROTOTYPES
1070 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1071 #undef TARGET_STRUCT_VALUE_RTX
1072 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1073 #undef TARGET_SETUP_INCOMING_VARARGS
1074 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1075 #undef TARGET_MUST_PASS_IN_STACK
1076 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1077 #undef TARGET_PASS_BY_REFERENCE
1078 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1080 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1081 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1083 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1084 #undef TARGET_INSERT_ATTRIBUTES
1085 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1086 #endif
1088 struct gcc_target targetm = TARGET_INITIALIZER;
1091 /* The svr4 ABI for the i386 says that records and unions are returned
1092 in memory. */
1093 #ifndef DEFAULT_PCC_STRUCT_RETURN
1094 #define DEFAULT_PCC_STRUCT_RETURN 1
1095 #endif
1097 /* Sometimes certain combinations of command options do not make
1098 sense on a particular target machine. You can define a macro
1099 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1100 defined, is executed once just after all the command options have
1101 been parsed.
1103 Don't use this macro to turn on various extra optimizations for
1104 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1106 void
1107 override_options (void)
1109 int i;
1110 int ix86_tune_defaulted = 0;
1112 /* Comes from final.c -- no real reason to change it. */
1113 #define MAX_CODE_ALIGN 16
1115 static struct ptt
1117 const struct processor_costs *cost; /* Processor costs */
1118 const int target_enable; /* Target flags to enable. */
1119 const int target_disable; /* Target flags to disable. */
1120 const int align_loop; /* Default alignments. */
1121 const int align_loop_max_skip;
1122 const int align_jump;
1123 const int align_jump_max_skip;
1124 const int align_func;
1126 const processor_target_table[PROCESSOR_max] =
1128 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1129 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1130 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1131 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1132 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1133 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1134 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1135 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1136 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1139 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1140 static struct pta
1142 const char *const name; /* processor name or nickname. */
1143 const enum processor_type processor;
1144 const enum pta_flags
1146 PTA_SSE = 1,
1147 PTA_SSE2 = 2,
1148 PTA_SSE3 = 4,
1149 PTA_MMX = 8,
1150 PTA_PREFETCH_SSE = 16,
1151 PTA_3DNOW = 32,
1152 PTA_3DNOW_A = 64,
1153 PTA_64BIT = 128
1154 } flags;
1156 const processor_alias_table[] =
1158 {"i386", PROCESSOR_I386, 0},
1159 {"i486", PROCESSOR_I486, 0},
1160 {"i586", PROCESSOR_PENTIUM, 0},
1161 {"pentium", PROCESSOR_PENTIUM, 0},
1162 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1163 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1164 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1165 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1166 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1167 {"i686", PROCESSOR_PENTIUMPRO, 0},
1168 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1169 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1170 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1171 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1172 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1173 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1174 | PTA_MMX | PTA_PREFETCH_SSE},
1175 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1176 | PTA_MMX | PTA_PREFETCH_SSE},
1177 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1178 | PTA_MMX | PTA_PREFETCH_SSE},
1179 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1180 | PTA_MMX | PTA_PREFETCH_SSE},
1181 {"k6", PROCESSOR_K6, PTA_MMX},
1182 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1183 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1184 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1185 | PTA_3DNOW_A},
1186 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1187 | PTA_3DNOW | PTA_3DNOW_A},
1188 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1189 | PTA_3DNOW_A | PTA_SSE},
1190 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1191 | PTA_3DNOW_A | PTA_SSE},
1192 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1193 | PTA_3DNOW_A | PTA_SSE},
1194 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1195 | PTA_SSE | PTA_SSE2 },
1196 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1197 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1198 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1199 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1200 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1201 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1202 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1203 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1206 int const pta_size = ARRAY_SIZE (processor_alias_table);
1208 /* Set the default values for switches whose default depends on TARGET_64BIT
1209 in case they weren't overwritten by command line options. */
1210 if (TARGET_64BIT)
1212 if (flag_omit_frame_pointer == 2)
1213 flag_omit_frame_pointer = 1;
1214 if (flag_asynchronous_unwind_tables == 2)
1215 flag_asynchronous_unwind_tables = 1;
1216 if (flag_pcc_struct_return == 2)
1217 flag_pcc_struct_return = 0;
1219 else
1221 if (flag_omit_frame_pointer == 2)
1222 flag_omit_frame_pointer = 0;
1223 if (flag_asynchronous_unwind_tables == 2)
1224 flag_asynchronous_unwind_tables = 0;
1225 if (flag_pcc_struct_return == 2)
1226 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1229 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1230 SUBTARGET_OVERRIDE_OPTIONS;
1231 #endif
1233 if (!ix86_tune_string && ix86_arch_string)
1234 ix86_tune_string = ix86_arch_string;
1235 if (!ix86_tune_string)
1237 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1238 ix86_tune_defaulted = 1;
1240 if (!ix86_arch_string)
1241 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1243 if (ix86_cmodel_string != 0)
1245 if (!strcmp (ix86_cmodel_string, "small"))
1246 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1247 else if (flag_pic)
1248 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1249 else if (!strcmp (ix86_cmodel_string, "32"))
1250 ix86_cmodel = CM_32;
1251 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1252 ix86_cmodel = CM_KERNEL;
1253 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1254 ix86_cmodel = CM_MEDIUM;
1255 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1256 ix86_cmodel = CM_LARGE;
1257 else
1258 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1260 else
1262 ix86_cmodel = CM_32;
1263 if (TARGET_64BIT)
1264 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1266 if (ix86_asm_string != 0)
1268 if (!strcmp (ix86_asm_string, "intel"))
1269 ix86_asm_dialect = ASM_INTEL;
1270 else if (!strcmp (ix86_asm_string, "att"))
1271 ix86_asm_dialect = ASM_ATT;
1272 else
1273 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1275 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1276 error ("code model `%s' not supported in the %s bit mode",
1277 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1278 if (ix86_cmodel == CM_LARGE)
1279 sorry ("code model `large' not supported yet");
1280 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1281 sorry ("%i-bit mode not compiled in",
1282 (target_flags & MASK_64BIT) ? 64 : 32);
1284 for (i = 0; i < pta_size; i++)
1285 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1287 ix86_arch = processor_alias_table[i].processor;
1288 /* Default cpu tuning to the architecture. */
1289 ix86_tune = ix86_arch;
1290 if (processor_alias_table[i].flags & PTA_MMX
1291 && !(target_flags_explicit & MASK_MMX))
1292 target_flags |= MASK_MMX;
1293 if (processor_alias_table[i].flags & PTA_3DNOW
1294 && !(target_flags_explicit & MASK_3DNOW))
1295 target_flags |= MASK_3DNOW;
1296 if (processor_alias_table[i].flags & PTA_3DNOW_A
1297 && !(target_flags_explicit & MASK_3DNOW_A))
1298 target_flags |= MASK_3DNOW_A;
1299 if (processor_alias_table[i].flags & PTA_SSE
1300 && !(target_flags_explicit & MASK_SSE))
1301 target_flags |= MASK_SSE;
1302 if (processor_alias_table[i].flags & PTA_SSE2
1303 && !(target_flags_explicit & MASK_SSE2))
1304 target_flags |= MASK_SSE2;
1305 if (processor_alias_table[i].flags & PTA_SSE3
1306 && !(target_flags_explicit & MASK_SSE3))
1307 target_flags |= MASK_SSE3;
1308 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1309 x86_prefetch_sse = true;
1310 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1312 if (ix86_tune_defaulted)
1314 ix86_tune_string = "x86-64";
1315 for (i = 0; i < pta_size; i++)
1316 if (! strcmp (ix86_tune_string,
1317 processor_alias_table[i].name))
1318 break;
1319 ix86_tune = processor_alias_table[i].processor;
1321 else
1322 error ("CPU you selected does not support x86-64 "
1323 "instruction set");
1325 break;
1328 if (i == pta_size)
1329 error ("bad value (%s) for -march= switch", ix86_arch_string);
1331 for (i = 0; i < pta_size; i++)
1332 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1334 ix86_tune = processor_alias_table[i].processor;
1335 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1336 error ("CPU you selected does not support x86-64 instruction set");
1338 /* Intel CPUs have always interpreted SSE prefetch instructions as
1339 NOPs; so, we can enable SSE prefetch instructions even when
1340 -mtune (rather than -march) points us to a processor that has them.
1341 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1342 higher processors. */
1343 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1344 x86_prefetch_sse = true;
1345 break;
1347 if (i == pta_size)
1348 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1350 if (optimize_size)
1351 ix86_cost = &size_cost;
1352 else
1353 ix86_cost = processor_target_table[ix86_tune].cost;
1354 target_flags |= processor_target_table[ix86_tune].target_enable;
1355 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1357 /* Arrange to set up i386_stack_locals for all functions. */
1358 init_machine_status = ix86_init_machine_status;
1360 /* Validate -mregparm= value. */
1361 if (ix86_regparm_string)
1363 i = atoi (ix86_regparm_string);
1364 if (i < 0 || i > REGPARM_MAX)
1365 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1366 else
1367 ix86_regparm = i;
1369 else
1370 if (TARGET_64BIT)
1371 ix86_regparm = REGPARM_MAX;
1373 /* If the user has provided any of the -malign-* options,
1374 warn and use that value only if -falign-* is not set.
1375 Remove this code in GCC 3.2 or later. */
1376 if (ix86_align_loops_string)
1378 warning ("-malign-loops is obsolete, use -falign-loops");
1379 if (align_loops == 0)
1381 i = atoi (ix86_align_loops_string);
1382 if (i < 0 || i > MAX_CODE_ALIGN)
1383 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1384 else
1385 align_loops = 1 << i;
1389 if (ix86_align_jumps_string)
1391 warning ("-malign-jumps is obsolete, use -falign-jumps");
1392 if (align_jumps == 0)
1394 i = atoi (ix86_align_jumps_string);
1395 if (i < 0 || i > MAX_CODE_ALIGN)
1396 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1397 else
1398 align_jumps = 1 << i;
1402 if (ix86_align_funcs_string)
1404 warning ("-malign-functions is obsolete, use -falign-functions");
1405 if (align_functions == 0)
1407 i = atoi (ix86_align_funcs_string);
1408 if (i < 0 || i > MAX_CODE_ALIGN)
1409 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1410 else
1411 align_functions = 1 << i;
1415 /* Default align_* from the processor table. */
1416 if (align_loops == 0)
1418 align_loops = processor_target_table[ix86_tune].align_loop;
1419 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1421 if (align_jumps == 0)
1423 align_jumps = processor_target_table[ix86_tune].align_jump;
1424 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1426 if (align_functions == 0)
1428 align_functions = processor_target_table[ix86_tune].align_func;
1431 /* Validate -mpreferred-stack-boundary= value, or provide default.
1432 The default of 128 bits is for Pentium III's SSE __m128, but we
1433 don't want additional code to keep the stack aligned when
1434 optimizing for code size. */
1435 ix86_preferred_stack_boundary = (optimize_size
1436 ? TARGET_64BIT ? 128 : 32
1437 : 128);
1438 if (ix86_preferred_stack_boundary_string)
1440 i = atoi (ix86_preferred_stack_boundary_string);
1441 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1442 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1443 TARGET_64BIT ? 4 : 2);
1444 else
1445 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1448 /* Validate -mbranch-cost= value, or provide default. */
1449 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1450 if (ix86_branch_cost_string)
1452 i = atoi (ix86_branch_cost_string);
1453 if (i < 0 || i > 5)
1454 error ("-mbranch-cost=%d is not between 0 and 5", i);
1455 else
1456 ix86_branch_cost = i;
1459 if (ix86_tls_dialect_string)
1461 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1462 ix86_tls_dialect = TLS_DIALECT_GNU;
1463 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1464 ix86_tls_dialect = TLS_DIALECT_SUN;
1465 else
1466 error ("bad value (%s) for -mtls-dialect= switch",
1467 ix86_tls_dialect_string);
1470 /* Keep nonleaf frame pointers. */
1471 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1472 flag_omit_frame_pointer = 1;
1474 /* If we're doing fast math, we don't care about comparison order
1475 wrt NaNs. This lets us use a shorter comparison sequence. */
1476 if (flag_unsafe_math_optimizations)
1477 target_flags &= ~MASK_IEEE_FP;
1479 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1480 since the insns won't need emulation. */
1481 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1482 target_flags &= ~MASK_NO_FANCY_MATH_387;
1484 /* Turn on SSE2 builtins for -msse3. */
1485 if (TARGET_SSE3)
1486 target_flags |= MASK_SSE2;
1488 /* Turn on SSE builtins for -msse2. */
1489 if (TARGET_SSE2)
1490 target_flags |= MASK_SSE;
1492 if (TARGET_64BIT)
1494 if (TARGET_ALIGN_DOUBLE)
1495 error ("-malign-double makes no sense in the 64bit mode");
1496 if (TARGET_RTD)
1497 error ("-mrtd calling convention not supported in the 64bit mode");
1498 /* Enable by default the SSE and MMX builtins. */
1499 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1500 ix86_fpmath = FPMATH_SSE;
1502 else
1504 ix86_fpmath = FPMATH_387;
1505 /* i386 ABI does not specify red zone. It still makes sense to use it
1506 when programmer takes care to stack from being destroyed. */
1507 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1508 target_flags |= MASK_NO_RED_ZONE;
1511 if (ix86_fpmath_string != 0)
1513 if (! strcmp (ix86_fpmath_string, "387"))
1514 ix86_fpmath = FPMATH_387;
1515 else if (! strcmp (ix86_fpmath_string, "sse"))
1517 if (!TARGET_SSE)
1519 warning ("SSE instruction set disabled, using 387 arithmetics");
1520 ix86_fpmath = FPMATH_387;
1522 else
1523 ix86_fpmath = FPMATH_SSE;
1525 else if (! strcmp (ix86_fpmath_string, "387,sse")
1526 || ! strcmp (ix86_fpmath_string, "sse,387"))
1528 if (!TARGET_SSE)
1530 warning ("SSE instruction set disabled, using 387 arithmetics");
1531 ix86_fpmath = FPMATH_387;
1533 else if (!TARGET_80387)
1535 warning ("387 instruction set disabled, using SSE arithmetics");
1536 ix86_fpmath = FPMATH_SSE;
1538 else
1539 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1541 else
1542 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1545 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1546 on by -msse. */
1547 if (TARGET_SSE)
1549 target_flags |= MASK_MMX;
1550 x86_prefetch_sse = true;
1553 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1554 if (TARGET_3DNOW)
1556 target_flags |= MASK_MMX;
1557 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1558 extensions it adds. */
1559 if (x86_3dnow_a & (1 << ix86_arch))
1560 target_flags |= MASK_3DNOW_A;
1562 if ((x86_accumulate_outgoing_args & TUNEMASK)
1563 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1564 && !optimize_size)
1565 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1567 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1569 char *p;
1570 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1571 p = strchr (internal_label_prefix, 'X');
1572 internal_label_prefix_len = p - internal_label_prefix;
1573 *p = '\0';
1577 void
1578 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1580 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1581 make the problem with not enough registers even worse. */
1582 #ifdef INSN_SCHEDULING
1583 if (level > 1)
1584 flag_schedule_insns = 0;
1585 #endif
1587 /* The default values of these switches depend on the TARGET_64BIT
1588 that is not known at this moment. Mark these values with 2 and
1589 let user the to override these. In case there is no command line option
1590 specifying them, we will set the defaults in override_options. */
1591 if (optimize >= 1)
1592 flag_omit_frame_pointer = 2;
1593 flag_pcc_struct_return = 2;
1594 flag_asynchronous_unwind_tables = 2;
1597 /* Table of valid machine attributes. */
1598 const struct attribute_spec ix86_attribute_table[] =
1600 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1601 /* Stdcall attribute says callee is responsible for popping arguments
1602 if they are not variable. */
1603 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1604 /* Fastcall attribute says callee is responsible for popping arguments
1605 if they are not variable. */
1606 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1607 /* Cdecl attribute says the callee is a normal C declaration */
1608 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1609 /* Regparm attribute specifies how many integer arguments are to be
1610 passed in registers. */
1611 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1612 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1613 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1614 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1615 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1616 #endif
1617 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1618 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1619 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1620 SUBTARGET_ATTRIBUTE_TABLE,
1621 #endif
1622 { NULL, 0, 0, false, false, false, NULL }
1625 /* Decide whether we can make a sibling call to a function. DECL is the
1626 declaration of the function being targeted by the call and EXP is the
1627 CALL_EXPR representing the call. */
1629 static bool
1630 ix86_function_ok_for_sibcall (tree decl, tree exp)
1632 /* If we are generating position-independent code, we cannot sibcall
1633 optimize any indirect call, or a direct call to a global function,
1634 as the PLT requires %ebx be live. */
1635 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1636 return false;
1638 /* If we are returning floats on the 80387 register stack, we cannot
1639 make a sibcall from a function that doesn't return a float to a
1640 function that does or, conversely, from a function that does return
1641 a float to a function that doesn't; the necessary stack adjustment
1642 would not be executed. */
1643 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1644 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1645 return false;
1647 /* If this call is indirect, we'll need to be able to use a call-clobbered
1648 register for the address of the target function. Make sure that all
1649 such registers are not used for passing parameters. */
1650 if (!decl && !TARGET_64BIT)
1652 tree type;
1654 /* We're looking at the CALL_EXPR, we need the type of the function. */
1655 type = TREE_OPERAND (exp, 0); /* pointer expression */
1656 type = TREE_TYPE (type); /* pointer type */
1657 type = TREE_TYPE (type); /* function type */
1659 if (ix86_function_regparm (type, NULL) >= 3)
1661 /* ??? Need to count the actual number of registers to be used,
1662 not the possible number of registers. Fix later. */
1663 return false;
1667 /* Otherwise okay. That also includes certain types of indirect calls. */
1668 return true;
1671 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1672 arguments as in struct attribute_spec.handler. */
1673 static tree
1674 ix86_handle_cdecl_attribute (tree *node, tree name,
1675 tree args ATTRIBUTE_UNUSED,
1676 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1678 if (TREE_CODE (*node) != FUNCTION_TYPE
1679 && TREE_CODE (*node) != METHOD_TYPE
1680 && TREE_CODE (*node) != FIELD_DECL
1681 && TREE_CODE (*node) != TYPE_DECL)
1683 warning ("`%s' attribute only applies to functions",
1684 IDENTIFIER_POINTER (name));
1685 *no_add_attrs = true;
1687 else
1689 if (is_attribute_p ("fastcall", name))
1691 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1693 error ("fastcall and stdcall attributes are not compatible");
1695 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1697 error ("fastcall and regparm attributes are not compatible");
1700 else if (is_attribute_p ("stdcall", name))
1702 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1704 error ("fastcall and stdcall attributes are not compatible");
1709 if (TARGET_64BIT)
1711 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1712 *no_add_attrs = true;
1715 return NULL_TREE;
1718 /* Handle a "regparm" attribute;
1719 arguments as in struct attribute_spec.handler. */
1720 static tree
1721 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1722 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1724 if (TREE_CODE (*node) != FUNCTION_TYPE
1725 && TREE_CODE (*node) != METHOD_TYPE
1726 && TREE_CODE (*node) != FIELD_DECL
1727 && TREE_CODE (*node) != TYPE_DECL)
1729 warning ("`%s' attribute only applies to functions",
1730 IDENTIFIER_POINTER (name));
1731 *no_add_attrs = true;
1733 else
1735 tree cst;
1737 cst = TREE_VALUE (args);
1738 if (TREE_CODE (cst) != INTEGER_CST)
1740 warning ("`%s' attribute requires an integer constant argument",
1741 IDENTIFIER_POINTER (name));
1742 *no_add_attrs = true;
1744 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1746 warning ("argument to `%s' attribute larger than %d",
1747 IDENTIFIER_POINTER (name), REGPARM_MAX);
1748 *no_add_attrs = true;
1751 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1753 error ("fastcall and regparm attributes are not compatible");
1757 return NULL_TREE;
1760 /* Return 0 if the attributes for two types are incompatible, 1 if they
1761 are compatible, and 2 if they are nearly compatible (which causes a
1762 warning to be generated). */
1764 static int
1765 ix86_comp_type_attributes (tree type1, tree type2)
1767 /* Check for mismatch of non-default calling convention. */
1768 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1770 if (TREE_CODE (type1) != FUNCTION_TYPE)
1771 return 1;
1773 /* Check for mismatched fastcall types */
1774 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1775 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1776 return 0;
1778 /* Check for mismatched return types (cdecl vs stdcall). */
1779 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1780 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1781 return 0;
1782 if (ix86_function_regparm (type1, NULL)
1783 != ix86_function_regparm (type2, NULL))
1784 return 0;
1785 return 1;
1788 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1789 DECL may be NULL when calling function indirectly
1790 or considering a libcall. */
1792 static int
1793 ix86_function_regparm (tree type, tree decl)
1795 tree attr;
1796 int regparm = ix86_regparm;
1797 bool user_convention = false;
1799 if (!TARGET_64BIT)
1801 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1802 if (attr)
1804 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1805 user_convention = true;
1808 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1810 regparm = 2;
1811 user_convention = true;
1814 /* Use register calling convention for local functions when possible. */
1815 if (!TARGET_64BIT && !user_convention && decl
1816 && flag_unit_at_a_time && !profile_flag)
1818 struct cgraph_local_info *i = cgraph_local_info (decl);
1819 if (i && i->local)
1821 /* We can't use regparm(3) for nested functions as these use
1822 static chain pointer in third argument. */
1823 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1824 regparm = 2;
1825 else
1826 regparm = 3;
1830 return regparm;
1833 /* Return true if EAX is live at the start of the function. Used by
1834 ix86_expand_prologue to determine if we need special help before
1835 calling allocate_stack_worker. */
1837 static bool
1838 ix86_eax_live_at_start_p (void)
1840 /* Cheat. Don't bother working forward from ix86_function_regparm
1841 to the function type to whether an actual argument is located in
1842 eax. Instead just look at cfg info, which is still close enough
1843 to correct at this point. This gives false positives for broken
1844 functions that might use uninitialized data that happens to be
1845 allocated in eax, but who cares? */
1846 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1849 /* Value is the number of bytes of arguments automatically
1850 popped when returning from a subroutine call.
1851 FUNDECL is the declaration node of the function (as a tree),
1852 FUNTYPE is the data type of the function (as a tree),
1853 or for a library call it is an identifier node for the subroutine name.
1854 SIZE is the number of bytes of arguments passed on the stack.
1856 On the 80386, the RTD insn may be used to pop them if the number
1857 of args is fixed, but if the number is variable then the caller
1858 must pop them all. RTD can't be used for library calls now
1859 because the library is compiled with the Unix compiler.
1860 Use of RTD is a selectable option, since it is incompatible with
1861 standard Unix calling sequences. If the option is not selected,
1862 the caller must always pop the args.
1864 The attribute stdcall is equivalent to RTD on a per module basis. */
1867 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1869 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1871 /* Cdecl functions override -mrtd, and never pop the stack. */
1872 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1874 /* Stdcall and fastcall functions will pop the stack if not
1875 variable args. */
1876 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1877 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1878 rtd = 1;
1880 if (rtd
1881 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1882 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1883 == void_type_node)))
1884 return size;
1887 /* Lose any fake structure return argument if it is passed on the stack. */
1888 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1889 && !TARGET_64BIT)
1891 int nregs = ix86_function_regparm (funtype, fundecl);
1893 if (!nregs)
1894 return GET_MODE_SIZE (Pmode);
1897 return 0;
1900 /* Argument support functions. */
1902 /* Return true when register may be used to pass function parameters. */
1903 bool
1904 ix86_function_arg_regno_p (int regno)
1906 int i;
1907 if (!TARGET_64BIT)
1908 return (regno < REGPARM_MAX
1909 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1910 if (SSE_REGNO_P (regno) && TARGET_SSE)
1911 return true;
1912 /* RAX is used as hidden argument to va_arg functions. */
1913 if (!regno)
1914 return true;
1915 for (i = 0; i < REGPARM_MAX; i++)
1916 if (regno == x86_64_int_parameter_registers[i])
1917 return true;
1918 return false;
1921 /* Return if we do not know how to pass TYPE solely in registers. */
1923 static bool
1924 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
1926 if (must_pass_in_stack_var_size_or_pad (mode, type))
1927 return true;
1928 return (!TARGET_64BIT && type && mode == TImode);
1931 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1932 for a call to a function whose data type is FNTYPE.
1933 For a library call, FNTYPE is 0. */
1935 void
1936 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1937 tree fntype, /* tree ptr for function decl */
1938 rtx libname, /* SYMBOL_REF of library name or 0 */
1939 tree fndecl)
1941 static CUMULATIVE_ARGS zero_cum;
1942 tree param, next_param;
1944 if (TARGET_DEBUG_ARG)
1946 fprintf (stderr, "\ninit_cumulative_args (");
1947 if (fntype)
1948 fprintf (stderr, "fntype code = %s, ret code = %s",
1949 tree_code_name[(int) TREE_CODE (fntype)],
1950 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1951 else
1952 fprintf (stderr, "no fntype");
1954 if (libname)
1955 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1958 *cum = zero_cum;
1960 /* Set up the number of registers to use for passing arguments. */
1961 if (fntype)
1962 cum->nregs = ix86_function_regparm (fntype, fndecl);
1963 else
1964 cum->nregs = ix86_regparm;
1965 if (TARGET_SSE)
1966 cum->sse_nregs = SSE_REGPARM_MAX;
1967 if (TARGET_MMX)
1968 cum->mmx_nregs = MMX_REGPARM_MAX;
1969 cum->warn_sse = true;
1970 cum->warn_mmx = true;
1971 cum->maybe_vaarg = false;
1973 /* Use ecx and edx registers if function has fastcall attribute */
1974 if (fntype && !TARGET_64BIT)
1976 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1978 cum->nregs = 2;
1979 cum->fastcall = 1;
1983 /* Determine if this function has variable arguments. This is
1984 indicated by the last argument being 'void_type_mode' if there
1985 are no variable arguments. If there are variable arguments, then
1986 we won't pass anything in registers in 32-bit mode. */
1988 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
1990 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1991 param != 0; param = next_param)
1993 next_param = TREE_CHAIN (param);
1994 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1996 if (!TARGET_64BIT)
1998 cum->nregs = 0;
1999 cum->sse_nregs = 0;
2000 cum->mmx_nregs = 0;
2001 cum->warn_sse = 0;
2002 cum->warn_mmx = 0;
2003 cum->fastcall = 0;
2005 cum->maybe_vaarg = true;
2009 if ((!fntype && !libname)
2010 || (fntype && !TYPE_ARG_TYPES (fntype)))
2011 cum->maybe_vaarg = 1;
2013 if (TARGET_DEBUG_ARG)
2014 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2016 return;
2019 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2020 of this code is to classify each 8bytes of incoming argument by the register
2021 class and assign registers accordingly. */
2023 /* Return the union class of CLASS1 and CLASS2.
2024 See the x86-64 PS ABI for details. */
2026 static enum x86_64_reg_class
2027 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2029 /* Rule #1: If both classes are equal, this is the resulting class. */
2030 if (class1 == class2)
2031 return class1;
2033 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2034 the other class. */
2035 if (class1 == X86_64_NO_CLASS)
2036 return class2;
2037 if (class2 == X86_64_NO_CLASS)
2038 return class1;
2040 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2041 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2042 return X86_64_MEMORY_CLASS;
2044 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2045 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2046 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2047 return X86_64_INTEGERSI_CLASS;
2048 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2049 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2050 return X86_64_INTEGER_CLASS;
2052 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2053 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2054 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2055 return X86_64_MEMORY_CLASS;
2057 /* Rule #6: Otherwise class SSE is used. */
2058 return X86_64_SSE_CLASS;
2061 /* Classify the argument of type TYPE and mode MODE.
2062 CLASSES will be filled by the register class used to pass each word
2063 of the operand. The number of words is returned. In case the parameter
2064 should be passed in memory, 0 is returned. As a special case for zero
2065 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2067 BIT_OFFSET is used internally for handling records and specifies offset
2068 of the offset in bits modulo 256 to avoid overflow cases.
2070 See the x86-64 PS ABI for details.
2073 static int
2074 classify_argument (enum machine_mode mode, tree type,
2075 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2077 HOST_WIDE_INT bytes =
2078 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2079 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2081 /* Variable sized entities are always passed/returned in memory. */
2082 if (bytes < 0)
2083 return 0;
2085 if (mode != VOIDmode
2086 && targetm.calls.must_pass_in_stack (mode, type))
2087 return 0;
2089 if (type && AGGREGATE_TYPE_P (type))
2091 int i;
2092 tree field;
2093 enum x86_64_reg_class subclasses[MAX_CLASSES];
2095 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2096 if (bytes > 16)
2097 return 0;
2099 for (i = 0; i < words; i++)
2100 classes[i] = X86_64_NO_CLASS;
2102 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2103 signalize memory class, so handle it as special case. */
2104 if (!words)
2106 classes[0] = X86_64_NO_CLASS;
2107 return 1;
2110 /* Classify each field of record and merge classes. */
2111 if (TREE_CODE (type) == RECORD_TYPE)
2113 /* For classes first merge in the field of the subclasses. */
2114 if (TYPE_BINFO (type))
2116 tree binfo, base_binfo;
2117 int i;
2119 for (binfo = TYPE_BINFO (type), i = 0;
2120 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2122 int num;
2123 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2124 tree type = BINFO_TYPE (base_binfo);
2126 num = classify_argument (TYPE_MODE (type),
2127 type, subclasses,
2128 (offset + bit_offset) % 256);
2129 if (!num)
2130 return 0;
2131 for (i = 0; i < num; i++)
2133 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2134 classes[i + pos] =
2135 merge_classes (subclasses[i], classes[i + pos]);
2139 /* And now merge the fields of structure. */
2140 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2142 if (TREE_CODE (field) == FIELD_DECL)
2144 int num;
2146 /* Bitfields are always classified as integer. Handle them
2147 early, since later code would consider them to be
2148 misaligned integers. */
2149 if (DECL_BIT_FIELD (field))
2151 for (i = int_bit_position (field) / 8 / 8;
2152 i < (int_bit_position (field)
2153 + tree_low_cst (DECL_SIZE (field), 0)
2154 + 63) / 8 / 8; i++)
2155 classes[i] =
2156 merge_classes (X86_64_INTEGER_CLASS,
2157 classes[i]);
2159 else
2161 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2162 TREE_TYPE (field), subclasses,
2163 (int_bit_position (field)
2164 + bit_offset) % 256);
2165 if (!num)
2166 return 0;
2167 for (i = 0; i < num; i++)
2169 int pos =
2170 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2171 classes[i + pos] =
2172 merge_classes (subclasses[i], classes[i + pos]);
2178 /* Arrays are handled as small records. */
2179 else if (TREE_CODE (type) == ARRAY_TYPE)
2181 int num;
2182 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2183 TREE_TYPE (type), subclasses, bit_offset);
2184 if (!num)
2185 return 0;
2187 /* The partial classes are now full classes. */
2188 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2189 subclasses[0] = X86_64_SSE_CLASS;
2190 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2191 subclasses[0] = X86_64_INTEGER_CLASS;
2193 for (i = 0; i < words; i++)
2194 classes[i] = subclasses[i % num];
2196 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2197 else if (TREE_CODE (type) == UNION_TYPE
2198 || TREE_CODE (type) == QUAL_UNION_TYPE)
2200 /* For classes first merge in the field of the subclasses. */
2201 if (TYPE_BINFO (type))
2203 tree binfo, base_binfo;
2204 int i;
2206 for (binfo = TYPE_BINFO (type), i = 0;
2207 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2209 int num;
2210 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2211 tree type = BINFO_TYPE (base_binfo);
2213 num = classify_argument (TYPE_MODE (type),
2214 type, subclasses,
2215 (offset + (bit_offset % 64)) % 256);
2216 if (!num)
2217 return 0;
2218 for (i = 0; i < num; i++)
2220 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2221 classes[i + pos] =
2222 merge_classes (subclasses[i], classes[i + pos]);
2226 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2228 if (TREE_CODE (field) == FIELD_DECL)
2230 int num;
2231 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2232 TREE_TYPE (field), subclasses,
2233 bit_offset);
2234 if (!num)
2235 return 0;
2236 for (i = 0; i < num; i++)
2237 classes[i] = merge_classes (subclasses[i], classes[i]);
2241 else if (TREE_CODE (type) == SET_TYPE)
2243 if (bytes <= 4)
2245 classes[0] = X86_64_INTEGERSI_CLASS;
2246 return 1;
2248 else if (bytes <= 8)
2250 classes[0] = X86_64_INTEGER_CLASS;
2251 return 1;
2253 else if (bytes <= 12)
2255 classes[0] = X86_64_INTEGER_CLASS;
2256 classes[1] = X86_64_INTEGERSI_CLASS;
2257 return 2;
2259 else
2261 classes[0] = X86_64_INTEGER_CLASS;
2262 classes[1] = X86_64_INTEGER_CLASS;
2263 return 2;
2266 else
2267 abort ();
2269 /* Final merger cleanup. */
2270 for (i = 0; i < words; i++)
2272 /* If one class is MEMORY, everything should be passed in
2273 memory. */
2274 if (classes[i] == X86_64_MEMORY_CLASS)
2275 return 0;
2277 /* The X86_64_SSEUP_CLASS should be always preceded by
2278 X86_64_SSE_CLASS. */
2279 if (classes[i] == X86_64_SSEUP_CLASS
2280 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2281 classes[i] = X86_64_SSE_CLASS;
2283 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2284 if (classes[i] == X86_64_X87UP_CLASS
2285 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2286 classes[i] = X86_64_SSE_CLASS;
2288 return words;
2291 /* Compute alignment needed. We align all types to natural boundaries with
2292 exception of XFmode that is aligned to 64bits. */
2293 if (mode != VOIDmode && mode != BLKmode)
2295 int mode_alignment = GET_MODE_BITSIZE (mode);
2297 if (mode == XFmode)
2298 mode_alignment = 128;
2299 else if (mode == XCmode)
2300 mode_alignment = 256;
2301 if (COMPLEX_MODE_P (mode))
2302 mode_alignment /= 2;
2303 /* Misaligned fields are always returned in memory. */
2304 if (bit_offset % mode_alignment)
2305 return 0;
2308 /* for V1xx modes, just use the base mode */
2309 if (VECTOR_MODE_P (mode)
2310 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2311 mode = GET_MODE_INNER (mode);
2313 /* Classification of atomic types. */
2314 switch (mode)
2316 case DImode:
2317 case SImode:
2318 case HImode:
2319 case QImode:
2320 case CSImode:
2321 case CHImode:
2322 case CQImode:
2323 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2324 classes[0] = X86_64_INTEGERSI_CLASS;
2325 else
2326 classes[0] = X86_64_INTEGER_CLASS;
2327 return 1;
2328 case CDImode:
2329 case TImode:
2330 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2331 return 2;
2332 case CTImode:
2333 return 0;
2334 case SFmode:
2335 if (!(bit_offset % 64))
2336 classes[0] = X86_64_SSESF_CLASS;
2337 else
2338 classes[0] = X86_64_SSE_CLASS;
2339 return 1;
2340 case DFmode:
2341 classes[0] = X86_64_SSEDF_CLASS;
2342 return 1;
2343 case XFmode:
2344 classes[0] = X86_64_X87_CLASS;
2345 classes[1] = X86_64_X87UP_CLASS;
2346 return 2;
2347 case TFmode:
2348 classes[0] = X86_64_SSE_CLASS;
2349 classes[1] = X86_64_SSEUP_CLASS;
2350 return 2;
2351 case SCmode:
2352 classes[0] = X86_64_SSE_CLASS;
2353 return 1;
2354 case DCmode:
2355 classes[0] = X86_64_SSEDF_CLASS;
2356 classes[1] = X86_64_SSEDF_CLASS;
2357 return 2;
2358 case XCmode:
2359 case TCmode:
2360 /* These modes are larger than 16 bytes. */
2361 return 0;
2362 case V4SFmode:
2363 case V4SImode:
2364 case V16QImode:
2365 case V8HImode:
2366 case V2DFmode:
2367 case V2DImode:
2368 classes[0] = X86_64_SSE_CLASS;
2369 classes[1] = X86_64_SSEUP_CLASS;
2370 return 2;
2371 case V2SFmode:
2372 case V2SImode:
2373 case V4HImode:
2374 case V8QImode:
2375 classes[0] = X86_64_SSE_CLASS;
2376 return 1;
2377 case BLKmode:
2378 case VOIDmode:
2379 return 0;
2380 default:
2381 if (VECTOR_MODE_P (mode))
2383 if (bytes > 16)
2384 return 0;
2385 if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT)
2387 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2388 classes[0] = X86_64_INTEGERSI_CLASS;
2389 else
2390 classes[0] = X86_64_INTEGER_CLASS;
2391 classes[1] = X86_64_INTEGER_CLASS;
2392 return 1 + (bytes > 8);
2395 abort ();
2399 /* Examine the argument and return set number of register required in each
2400 class. Return 0 iff parameter should be passed in memory. */
2401 static int
2402 examine_argument (enum machine_mode mode, tree type, int in_return,
2403 int *int_nregs, int *sse_nregs)
2405 enum x86_64_reg_class class[MAX_CLASSES];
2406 int n = classify_argument (mode, type, class, 0);
2408 *int_nregs = 0;
2409 *sse_nregs = 0;
2410 if (!n)
2411 return 0;
2412 for (n--; n >= 0; n--)
2413 switch (class[n])
2415 case X86_64_INTEGER_CLASS:
2416 case X86_64_INTEGERSI_CLASS:
2417 (*int_nregs)++;
2418 break;
2419 case X86_64_SSE_CLASS:
2420 case X86_64_SSESF_CLASS:
2421 case X86_64_SSEDF_CLASS:
2422 (*sse_nregs)++;
2423 break;
2424 case X86_64_NO_CLASS:
2425 case X86_64_SSEUP_CLASS:
2426 break;
2427 case X86_64_X87_CLASS:
2428 case X86_64_X87UP_CLASS:
2429 if (!in_return)
2430 return 0;
2431 break;
2432 case X86_64_MEMORY_CLASS:
2433 abort ();
2435 return 1;
2437 /* Construct container for the argument used by GCC interface. See
2438 FUNCTION_ARG for the detailed description. */
2439 static rtx
2440 construct_container (enum machine_mode mode, tree type, int in_return,
2441 int nintregs, int nsseregs, const int * intreg,
2442 int sse_regno)
2444 enum machine_mode tmpmode;
2445 int bytes =
2446 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2447 enum x86_64_reg_class class[MAX_CLASSES];
2448 int n;
2449 int i;
2450 int nexps = 0;
2451 int needed_sseregs, needed_intregs;
2452 rtx exp[MAX_CLASSES];
2453 rtx ret;
2455 n = classify_argument (mode, type, class, 0);
2456 if (TARGET_DEBUG_ARG)
2458 if (!n)
2459 fprintf (stderr, "Memory class\n");
2460 else
2462 fprintf (stderr, "Classes:");
2463 for (i = 0; i < n; i++)
2465 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2467 fprintf (stderr, "\n");
2470 if (!n)
2471 return NULL;
2472 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2473 return NULL;
2474 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2475 return NULL;
2477 /* First construct simple cases. Avoid SCmode, since we want to use
2478 single register to pass this type. */
2479 if (n == 1 && mode != SCmode)
2480 switch (class[0])
2482 case X86_64_INTEGER_CLASS:
2483 case X86_64_INTEGERSI_CLASS:
2484 return gen_rtx_REG (mode, intreg[0]);
2485 case X86_64_SSE_CLASS:
2486 case X86_64_SSESF_CLASS:
2487 case X86_64_SSEDF_CLASS:
2488 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2489 case X86_64_X87_CLASS:
2490 return gen_rtx_REG (mode, FIRST_STACK_REG);
2491 case X86_64_NO_CLASS:
2492 /* Zero sized array, struct or class. */
2493 return NULL;
2494 default:
2495 abort ();
2497 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2498 && mode != BLKmode)
2499 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2500 if (n == 2
2501 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2502 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2503 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2504 && class[1] == X86_64_INTEGER_CLASS
2505 && (mode == CDImode || mode == TImode || mode == TFmode)
2506 && intreg[0] + 1 == intreg[1])
2507 return gen_rtx_REG (mode, intreg[0]);
2508 if (n == 4
2509 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2510 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2511 && mode != BLKmode)
2512 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2514 /* Otherwise figure out the entries of the PARALLEL. */
2515 for (i = 0; i < n; i++)
2517 switch (class[i])
2519 case X86_64_NO_CLASS:
2520 break;
2521 case X86_64_INTEGER_CLASS:
2522 case X86_64_INTEGERSI_CLASS:
2523 /* Merge TImodes on aligned occasions here too. */
2524 if (i * 8 + 8 > bytes)
2525 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2526 else if (class[i] == X86_64_INTEGERSI_CLASS)
2527 tmpmode = SImode;
2528 else
2529 tmpmode = DImode;
2530 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2531 if (tmpmode == BLKmode)
2532 tmpmode = DImode;
2533 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2534 gen_rtx_REG (tmpmode, *intreg),
2535 GEN_INT (i*8));
2536 intreg++;
2537 break;
2538 case X86_64_SSESF_CLASS:
2539 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2540 gen_rtx_REG (SFmode,
2541 SSE_REGNO (sse_regno)),
2542 GEN_INT (i*8));
2543 sse_regno++;
2544 break;
2545 case X86_64_SSEDF_CLASS:
2546 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2547 gen_rtx_REG (DFmode,
2548 SSE_REGNO (sse_regno)),
2549 GEN_INT (i*8));
2550 sse_regno++;
2551 break;
2552 case X86_64_SSE_CLASS:
2553 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2554 tmpmode = TImode;
2555 else
2556 tmpmode = DImode;
2557 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2558 gen_rtx_REG (tmpmode,
2559 SSE_REGNO (sse_regno)),
2560 GEN_INT (i*8));
2561 if (tmpmode == TImode)
2562 i++;
2563 sse_regno++;
2564 break;
2565 default:
2566 abort ();
2569 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2570 for (i = 0; i < nexps; i++)
2571 XVECEXP (ret, 0, i) = exp [i];
2572 return ret;
2575 /* Update the data in CUM to advance over an argument
2576 of mode MODE and data type TYPE.
2577 (TYPE is null for libcalls where that information may not be available.) */
2579 void
2580 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2581 enum machine_mode mode, /* current arg mode */
2582 tree type, /* type of the argument or 0 if lib support */
2583 int named) /* whether or not the argument was named */
2585 int bytes =
2586 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2587 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2589 if (TARGET_DEBUG_ARG)
2590 fprintf (stderr,
2591 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2592 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2593 if (TARGET_64BIT)
2595 int int_nregs, sse_nregs;
2596 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2597 cum->words += words;
2598 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2600 cum->nregs -= int_nregs;
2601 cum->sse_nregs -= sse_nregs;
2602 cum->regno += int_nregs;
2603 cum->sse_regno += sse_nregs;
2605 else
2606 cum->words += words;
2608 else
2610 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2611 && (!type || !AGGREGATE_TYPE_P (type)))
2613 cum->sse_words += words;
2614 cum->sse_nregs -= 1;
2615 cum->sse_regno += 1;
2616 if (cum->sse_nregs <= 0)
2618 cum->sse_nregs = 0;
2619 cum->sse_regno = 0;
2622 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2623 && (!type || !AGGREGATE_TYPE_P (type)))
2625 cum->mmx_words += words;
2626 cum->mmx_nregs -= 1;
2627 cum->mmx_regno += 1;
2628 if (cum->mmx_nregs <= 0)
2630 cum->mmx_nregs = 0;
2631 cum->mmx_regno = 0;
2634 else
2636 cum->words += words;
2637 cum->nregs -= words;
2638 cum->regno += words;
2640 if (cum->nregs <= 0)
2642 cum->nregs = 0;
2643 cum->regno = 0;
2647 return;
2650 /* Define where to put the arguments to a function.
2651 Value is zero to push the argument on the stack,
2652 or a hard register in which to store the argument.
2654 MODE is the argument's machine mode.
2655 TYPE is the data type of the argument (as a tree).
2656 This is null for libcalls where that information may
2657 not be available.
2658 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2659 the preceding args and about the function being called.
2660 NAMED is nonzero if this argument is a named parameter
2661 (otherwise it is an extra parameter matching an ellipsis). */
2664 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2665 enum machine_mode mode, /* current arg mode */
2666 tree type, /* type of the argument or 0 if lib support */
2667 int named) /* != 0 for normal args, == 0 for ... args */
2669 rtx ret = NULL_RTX;
2670 int bytes =
2671 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2672 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2673 static bool warnedsse, warnedmmx;
2675 /* To simplify the code below, represent vector types with a vector mode
2676 even if MMX/SSE are not active. */
2677 if (type
2678 && TREE_CODE (type) == VECTOR_TYPE
2679 && (bytes == 8 || bytes == 16)
2680 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_INT
2681 && GET_MODE_CLASS (TYPE_MODE (type)) != MODE_VECTOR_FLOAT)
2683 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2684 mode = TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
2685 ? MIN_MODE_VECTOR_FLOAT : MIN_MODE_VECTOR_INT;
2687 /* Get the mode which has this inner mode and number of units. */
2688 while (GET_MODE_NUNITS (mode) != TYPE_VECTOR_SUBPARTS (type)
2689 || GET_MODE_INNER (mode) != innermode)
2691 mode = GET_MODE_WIDER_MODE (mode);
2692 if (mode == VOIDmode)
2693 abort ();
2697 /* Handle a hidden AL argument containing number of registers for varargs
2698 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2699 any AL settings. */
2700 if (mode == VOIDmode)
2702 if (TARGET_64BIT)
2703 return GEN_INT (cum->maybe_vaarg
2704 ? (cum->sse_nregs < 0
2705 ? SSE_REGPARM_MAX
2706 : cum->sse_regno)
2707 : -1);
2708 else
2709 return constm1_rtx;
2711 if (TARGET_64BIT)
2712 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2713 &x86_64_int_parameter_registers [cum->regno],
2714 cum->sse_regno);
2715 else
2716 switch (mode)
2718 /* For now, pass fp/complex values on the stack. */
2719 default:
2720 break;
2722 case BLKmode:
2723 if (bytes < 0)
2724 break;
2725 /* FALLTHRU */
2726 case DImode:
2727 case SImode:
2728 case HImode:
2729 case QImode:
2730 if (words <= cum->nregs)
2732 int regno = cum->regno;
2734 /* Fastcall allocates the first two DWORD (SImode) or
2735 smaller arguments to ECX and EDX. */
2736 if (cum->fastcall)
2738 if (mode == BLKmode || mode == DImode)
2739 break;
2741 /* ECX not EAX is the first allocated register. */
2742 if (regno == 0)
2743 regno = 2;
2745 ret = gen_rtx_REG (mode, regno);
2747 break;
2748 case TImode:
2749 case V16QImode:
2750 case V8HImode:
2751 case V4SImode:
2752 case V2DImode:
2753 case V4SFmode:
2754 case V2DFmode:
2755 if (!type || !AGGREGATE_TYPE_P (type))
2757 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
2759 warnedsse = true;
2760 warning ("SSE vector argument without SSE enabled "
2761 "changes the ABI");
2763 if (cum->sse_nregs)
2764 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2766 break;
2767 case V8QImode:
2768 case V4HImode:
2769 case V2SImode:
2770 case V2SFmode:
2771 if (!type || !AGGREGATE_TYPE_P (type))
2773 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2775 warnedmmx = true;
2776 warning ("MMX vector argument without MMX enabled "
2777 "changes the ABI");
2779 if (cum->mmx_nregs)
2780 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2782 break;
2785 if (TARGET_DEBUG_ARG)
2787 fprintf (stderr,
2788 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2789 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2791 if (ret)
2792 print_simple_rtl (stderr, ret);
2793 else
2794 fprintf (stderr, ", stack");
2796 fprintf (stderr, " )\n");
2799 return ret;
2802 /* A C expression that indicates when an argument must be passed by
2803 reference. If nonzero for an argument, a copy of that argument is
2804 made in memory and a pointer to the argument is passed instead of
2805 the argument itself. The pointer is passed in whatever way is
2806 appropriate for passing a pointer to that type. */
2808 static bool
2809 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2810 enum machine_mode mode ATTRIBUTE_UNUSED,
2811 tree type, bool named ATTRIBUTE_UNUSED)
2813 if (!TARGET_64BIT)
2814 return 0;
2816 if (type && int_size_in_bytes (type) == -1)
2818 if (TARGET_DEBUG_ARG)
2819 fprintf (stderr, "function_arg_pass_by_reference\n");
2820 return 1;
2823 return 0;
2826 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2827 ABI. Only called if TARGET_SSE. */
2828 static bool
2829 contains_128bit_aligned_vector_p (tree type)
2831 enum machine_mode mode = TYPE_MODE (type);
2832 if (SSE_REG_MODE_P (mode)
2833 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2834 return true;
2835 if (TYPE_ALIGN (type) < 128)
2836 return false;
2838 if (AGGREGATE_TYPE_P (type))
2840 /* Walk the aggregates recursively. */
2841 if (TREE_CODE (type) == RECORD_TYPE
2842 || TREE_CODE (type) == UNION_TYPE
2843 || TREE_CODE (type) == QUAL_UNION_TYPE)
2845 tree field;
2847 if (TYPE_BINFO (type))
2849 tree binfo, base_binfo;
2850 int i;
2852 for (binfo = TYPE_BINFO (type), i = 0;
2853 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
2854 if (contains_128bit_aligned_vector_p (BINFO_TYPE (base_binfo)))
2855 return true;
2857 /* And now merge the fields of structure. */
2858 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2860 if (TREE_CODE (field) == FIELD_DECL
2861 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2862 return true;
2865 /* Just for use if some languages passes arrays by value. */
2866 else if (TREE_CODE (type) == ARRAY_TYPE)
2868 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2869 return true;
2871 else
2872 abort ();
2874 return false;
2877 /* Gives the alignment boundary, in bits, of an argument with the
2878 specified mode and type. */
2881 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2883 int align;
2884 if (type)
2885 align = TYPE_ALIGN (type);
2886 else
2887 align = GET_MODE_ALIGNMENT (mode);
2888 if (align < PARM_BOUNDARY)
2889 align = PARM_BOUNDARY;
2890 if (!TARGET_64BIT)
2892 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2893 make an exception for SSE modes since these require 128bit
2894 alignment.
2896 The handling here differs from field_alignment. ICC aligns MMX
2897 arguments to 4 byte boundaries, while structure fields are aligned
2898 to 8 byte boundaries. */
2899 if (!TARGET_SSE)
2900 align = PARM_BOUNDARY;
2901 else if (!type)
2903 if (!SSE_REG_MODE_P (mode))
2904 align = PARM_BOUNDARY;
2906 else
2908 if (!contains_128bit_aligned_vector_p (type))
2909 align = PARM_BOUNDARY;
2912 if (align > 128)
2913 align = 128;
2914 return align;
2917 /* Return true if N is a possible register number of function value. */
2918 bool
2919 ix86_function_value_regno_p (int regno)
2921 if (!TARGET_64BIT)
2923 return ((regno) == 0
2924 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2925 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2927 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2928 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2929 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2932 /* Define how to find the value returned by a function.
2933 VALTYPE is the data type of the value (as a tree).
2934 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2935 otherwise, FUNC is 0. */
2937 ix86_function_value (tree valtype)
2939 if (TARGET_64BIT)
2941 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2942 REGPARM_MAX, SSE_REGPARM_MAX,
2943 x86_64_int_return_registers, 0);
2944 /* For zero sized structures, construct_container return NULL, but we need
2945 to keep rest of compiler happy by returning meaningful value. */
2946 if (!ret)
2947 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2948 return ret;
2950 else
2951 return gen_rtx_REG (TYPE_MODE (valtype),
2952 ix86_value_regno (TYPE_MODE (valtype)));
2955 /* Return false iff type is returned in memory. */
2957 ix86_return_in_memory (tree type)
2959 int needed_intregs, needed_sseregs, size;
2960 enum machine_mode mode = TYPE_MODE (type);
2962 if (TARGET_64BIT)
2963 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2965 if (mode == BLKmode)
2966 return 1;
2968 size = int_size_in_bytes (type);
2970 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2971 return 0;
2973 if (VECTOR_MODE_P (mode) || mode == TImode)
2975 /* User-created vectors small enough to fit in EAX. */
2976 if (size < 8)
2977 return 0;
2979 /* MMX/3dNow values are returned on the stack, since we've
2980 got to EMMS/FEMMS before returning. */
2981 if (size == 8)
2982 return 1;
2984 /* SSE values are returned in XMM0, except when it doesn't exist. */
2985 if (size == 16)
2986 return (TARGET_SSE ? 0 : 1);
2989 if (mode == XFmode)
2990 return 0;
2992 if (size > 12)
2993 return 1;
2994 return 0;
2997 /* When returning SSE vector types, we have a choice of either
2998 (1) being abi incompatible with a -march switch, or
2999 (2) generating an error.
3000 Given no good solution, I think the safest thing is one warning.
3001 The user won't be able to use -Werror, but....
3003 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3004 called in response to actually generating a caller or callee that
3005 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3006 via aggregate_value_p for general type probing from tree-ssa. */
3008 static rtx
3009 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3011 static bool warned;
3013 if (!TARGET_SSE && type && !warned)
3015 /* Look at the return type of the function, not the function type. */
3016 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3018 if (mode == TImode
3019 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3021 warned = true;
3022 warning ("SSE vector return without SSE enabled changes the ABI");
3026 return NULL;
3029 /* Define how to find the value returned by a library function
3030 assuming the value has mode MODE. */
3032 ix86_libcall_value (enum machine_mode mode)
3034 if (TARGET_64BIT)
3036 switch (mode)
3038 case SFmode:
3039 case SCmode:
3040 case DFmode:
3041 case DCmode:
3042 case TFmode:
3043 return gen_rtx_REG (mode, FIRST_SSE_REG);
3044 case XFmode:
3045 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3046 case XCmode:
3047 case TCmode:
3048 return NULL;
3049 default:
3050 return gen_rtx_REG (mode, 0);
3053 else
3054 return gen_rtx_REG (mode, ix86_value_regno (mode));
3057 /* Given a mode, return the register to use for a return value. */
3059 static int
3060 ix86_value_regno (enum machine_mode mode)
3062 /* Floating point return values in %st(0). */
3063 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
3064 return FIRST_FLOAT_REG;
3065 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3066 we prevent this case when sse is not available. */
3067 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3068 return FIRST_SSE_REG;
3069 /* Everything else in %eax. */
3070 return 0;
3073 /* Create the va_list data type. */
3075 static tree
3076 ix86_build_builtin_va_list (void)
3078 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3080 /* For i386 we use plain pointer to argument area. */
3081 if (!TARGET_64BIT)
3082 return build_pointer_type (char_type_node);
3084 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3085 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3087 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3088 unsigned_type_node);
3089 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3090 unsigned_type_node);
3091 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3092 ptr_type_node);
3093 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3094 ptr_type_node);
3096 DECL_FIELD_CONTEXT (f_gpr) = record;
3097 DECL_FIELD_CONTEXT (f_fpr) = record;
3098 DECL_FIELD_CONTEXT (f_ovf) = record;
3099 DECL_FIELD_CONTEXT (f_sav) = record;
3101 TREE_CHAIN (record) = type_decl;
3102 TYPE_NAME (record) = type_decl;
3103 TYPE_FIELDS (record) = f_gpr;
3104 TREE_CHAIN (f_gpr) = f_fpr;
3105 TREE_CHAIN (f_fpr) = f_ovf;
3106 TREE_CHAIN (f_ovf) = f_sav;
3108 layout_type (record);
3110 /* The correct type is an array type of one element. */
3111 return build_array_type (record, build_index_type (size_zero_node));
3114 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3116 static void
3117 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3118 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3119 int no_rtl)
3121 CUMULATIVE_ARGS next_cum;
3122 rtx save_area = NULL_RTX, mem;
3123 rtx label;
3124 rtx label_ref;
3125 rtx tmp_reg;
3126 rtx nsse_reg;
3127 int set;
3128 tree fntype;
3129 int stdarg_p;
3130 int i;
3132 if (!TARGET_64BIT)
3133 return;
3135 /* Indicate to allocate space on the stack for varargs save area. */
3136 ix86_save_varrargs_registers = 1;
3138 cfun->stack_alignment_needed = 128;
3140 fntype = TREE_TYPE (current_function_decl);
3141 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3142 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3143 != void_type_node));
3145 /* For varargs, we do not want to skip the dummy va_dcl argument.
3146 For stdargs, we do want to skip the last named argument. */
3147 next_cum = *cum;
3148 if (stdarg_p)
3149 function_arg_advance (&next_cum, mode, type, 1);
3151 if (!no_rtl)
3152 save_area = frame_pointer_rtx;
3154 set = get_varargs_alias_set ();
3156 for (i = next_cum.regno; i < ix86_regparm; i++)
3158 mem = gen_rtx_MEM (Pmode,
3159 plus_constant (save_area, i * UNITS_PER_WORD));
3160 set_mem_alias_set (mem, set);
3161 emit_move_insn (mem, gen_rtx_REG (Pmode,
3162 x86_64_int_parameter_registers[i]));
3165 if (next_cum.sse_nregs)
3167 /* Now emit code to save SSE registers. The AX parameter contains number
3168 of SSE parameter registers used to call this function. We use
3169 sse_prologue_save insn template that produces computed jump across
3170 SSE saves. We need some preparation work to get this working. */
3172 label = gen_label_rtx ();
3173 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3175 /* Compute address to jump to :
3176 label - 5*eax + nnamed_sse_arguments*5 */
3177 tmp_reg = gen_reg_rtx (Pmode);
3178 nsse_reg = gen_reg_rtx (Pmode);
3179 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3180 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3181 gen_rtx_MULT (Pmode, nsse_reg,
3182 GEN_INT (4))));
3183 if (next_cum.sse_regno)
3184 emit_move_insn
3185 (nsse_reg,
3186 gen_rtx_CONST (DImode,
3187 gen_rtx_PLUS (DImode,
3188 label_ref,
3189 GEN_INT (next_cum.sse_regno * 4))));
3190 else
3191 emit_move_insn (nsse_reg, label_ref);
3192 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3194 /* Compute address of memory block we save into. We always use pointer
3195 pointing 127 bytes after first byte to store - this is needed to keep
3196 instruction size limited by 4 bytes. */
3197 tmp_reg = gen_reg_rtx (Pmode);
3198 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3199 plus_constant (save_area,
3200 8 * REGPARM_MAX + 127)));
3201 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3202 set_mem_alias_set (mem, set);
3203 set_mem_align (mem, BITS_PER_WORD);
3205 /* And finally do the dirty job! */
3206 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3207 GEN_INT (next_cum.sse_regno), label));
3212 /* Implement va_start. */
3214 void
3215 ix86_va_start (tree valist, rtx nextarg)
3217 HOST_WIDE_INT words, n_gpr, n_fpr;
3218 tree f_gpr, f_fpr, f_ovf, f_sav;
3219 tree gpr, fpr, ovf, sav, t;
3221 /* Only 64bit target needs something special. */
3222 if (!TARGET_64BIT)
3224 std_expand_builtin_va_start (valist, nextarg);
3225 return;
3228 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3229 f_fpr = TREE_CHAIN (f_gpr);
3230 f_ovf = TREE_CHAIN (f_fpr);
3231 f_sav = TREE_CHAIN (f_ovf);
3233 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3234 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3235 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3236 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3237 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3239 /* Count number of gp and fp argument registers used. */
3240 words = current_function_args_info.words;
3241 n_gpr = current_function_args_info.regno;
3242 n_fpr = current_function_args_info.sse_regno;
3244 if (TARGET_DEBUG_ARG)
3245 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3246 (int) words, (int) n_gpr, (int) n_fpr);
3248 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3249 build_int_2 (n_gpr * 8, 0));
3250 TREE_SIDE_EFFECTS (t) = 1;
3251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3253 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3254 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3255 TREE_SIDE_EFFECTS (t) = 1;
3256 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3258 /* Find the overflow area. */
3259 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3260 if (words != 0)
3261 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3262 build_int_2 (words * UNITS_PER_WORD, 0));
3263 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3264 TREE_SIDE_EFFECTS (t) = 1;
3265 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3267 /* Find the register save area.
3268 Prologue of the function save it right above stack frame. */
3269 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3270 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3271 TREE_SIDE_EFFECTS (t) = 1;
3272 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3275 /* Implement va_arg. */
3277 tree
3278 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3280 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3281 tree f_gpr, f_fpr, f_ovf, f_sav;
3282 tree gpr, fpr, ovf, sav, t;
3283 int size, rsize;
3284 tree lab_false, lab_over = NULL_TREE;
3285 tree addr, t2;
3286 rtx container;
3287 int indirect_p = 0;
3288 tree ptrtype;
3290 /* Only 64bit target needs something special. */
3291 if (!TARGET_64BIT)
3292 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3294 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3295 f_fpr = TREE_CHAIN (f_gpr);
3296 f_ovf = TREE_CHAIN (f_fpr);
3297 f_sav = TREE_CHAIN (f_ovf);
3299 valist = build_fold_indirect_ref (valist);
3300 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3301 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3302 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3303 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3305 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3306 if (indirect_p)
3307 type = build_pointer_type (type);
3308 size = int_size_in_bytes (type);
3309 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3311 container = construct_container (TYPE_MODE (type), type, 0,
3312 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3314 * Pull the value out of the saved registers ...
3317 addr = create_tmp_var (ptr_type_node, "addr");
3318 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3320 if (container)
3322 int needed_intregs, needed_sseregs;
3323 bool need_temp;
3324 tree int_addr, sse_addr;
3326 lab_false = create_artificial_label ();
3327 lab_over = create_artificial_label ();
3329 examine_argument (TYPE_MODE (type), type, 0,
3330 &needed_intregs, &needed_sseregs);
3332 need_temp = (!REG_P (container)
3333 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3334 || TYPE_ALIGN (type) > 128));
3336 /* In case we are passing structure, verify that it is consecutive block
3337 on the register save area. If not we need to do moves. */
3338 if (!need_temp && !REG_P (container))
3340 /* Verify that all registers are strictly consecutive */
3341 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3343 int i;
3345 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3347 rtx slot = XVECEXP (container, 0, i);
3348 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3349 || INTVAL (XEXP (slot, 1)) != i * 16)
3350 need_temp = 1;
3353 else
3355 int i;
3357 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3359 rtx slot = XVECEXP (container, 0, i);
3360 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3361 || INTVAL (XEXP (slot, 1)) != i * 8)
3362 need_temp = 1;
3366 if (!need_temp)
3368 int_addr = addr;
3369 sse_addr = addr;
3371 else
3373 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3374 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3375 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3376 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3378 /* First ensure that we fit completely in registers. */
3379 if (needed_intregs)
3381 t = build_int_2 ((REGPARM_MAX - needed_intregs + 1) * 8, 0);
3382 TREE_TYPE (t) = TREE_TYPE (gpr);
3383 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3384 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3385 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3386 gimplify_and_add (t, pre_p);
3388 if (needed_sseregs)
3390 t = build_int_2 ((SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3391 + REGPARM_MAX * 8, 0);
3392 TREE_TYPE (t) = TREE_TYPE (fpr);
3393 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3394 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3395 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3396 gimplify_and_add (t, pre_p);
3399 /* Compute index to start of area used for integer regs. */
3400 if (needed_intregs)
3402 /* int_addr = gpr + sav; */
3403 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3404 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3405 gimplify_and_add (t, pre_p);
3407 if (needed_sseregs)
3409 /* sse_addr = fpr + sav; */
3410 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3411 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3412 gimplify_and_add (t, pre_p);
3414 if (need_temp)
3416 int i;
3417 tree temp = create_tmp_var (type, "va_arg_tmp");
3419 /* addr = &temp; */
3420 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3421 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3422 gimplify_and_add (t, pre_p);
3424 for (i = 0; i < XVECLEN (container, 0); i++)
3426 rtx slot = XVECEXP (container, 0, i);
3427 rtx reg = XEXP (slot, 0);
3428 enum machine_mode mode = GET_MODE (reg);
3429 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3430 tree addr_type = build_pointer_type (piece_type);
3431 tree src_addr, src;
3432 int src_offset;
3433 tree dest_addr, dest;
3435 if (SSE_REGNO_P (REGNO (reg)))
3437 src_addr = sse_addr;
3438 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3440 else
3442 src_addr = int_addr;
3443 src_offset = REGNO (reg) * 8;
3445 src_addr = fold_convert (addr_type, src_addr);
3446 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3447 size_int (src_offset)));
3448 src = build_fold_indirect_ref (src_addr);
3450 dest_addr = fold_convert (addr_type, addr);
3451 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3452 size_int (INTVAL (XEXP (slot, 1)))));
3453 dest = build_fold_indirect_ref (dest_addr);
3455 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3456 gimplify_and_add (t, pre_p);
3460 if (needed_intregs)
3462 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3463 build_int_2 (needed_intregs * 8, 0));
3464 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3465 gimplify_and_add (t, pre_p);
3467 if (needed_sseregs)
3470 build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3471 build_int_2 (needed_sseregs * 16, 0));
3472 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3473 gimplify_and_add (t, pre_p);
3476 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3477 gimplify_and_add (t, pre_p);
3479 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3480 append_to_statement_list (t, pre_p);
3483 /* ... otherwise out of the overflow area. */
3485 /* Care for on-stack alignment if needed. */
3486 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3487 t = ovf;
3488 else
3490 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3491 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3492 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3494 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3496 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3497 gimplify_and_add (t2, pre_p);
3499 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3500 build_int_2 (rsize * UNITS_PER_WORD, 0));
3501 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3502 gimplify_and_add (t, pre_p);
3504 if (container)
3506 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3507 append_to_statement_list (t, pre_p);
3510 ptrtype = build_pointer_type (type);
3511 addr = fold_convert (ptrtype, addr);
3513 if (indirect_p)
3514 addr = build_fold_indirect_ref (addr);
3515 return build_fold_indirect_ref (addr);
3518 /* Return nonzero if OP is either a i387 or SSE fp register. */
3520 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3522 return ANY_FP_REG_P (op);
3525 /* Return nonzero if OP is an i387 fp register. */
3527 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3529 return FP_REG_P (op);
3532 /* Return nonzero if OP is a non-fp register_operand. */
3534 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3536 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3539 /* Return nonzero if OP is a register operand other than an
3540 i387 fp register. */
3542 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3544 return register_operand (op, mode) && !FP_REG_P (op);
3547 /* Return nonzero if OP is general operand representable on x86_64. */
3550 x86_64_general_operand (rtx op, enum machine_mode mode)
3552 if (!TARGET_64BIT)
3553 return general_operand (op, mode);
3554 if (nonimmediate_operand (op, mode))
3555 return 1;
3556 return x86_64_sign_extended_value (op);
3559 /* Return nonzero if OP is general operand representable on x86_64
3560 as either sign extended or zero extended constant. */
3563 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3565 if (!TARGET_64BIT)
3566 return general_operand (op, mode);
3567 if (nonimmediate_operand (op, mode))
3568 return 1;
3569 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3572 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3575 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3577 if (!TARGET_64BIT)
3578 return nonmemory_operand (op, mode);
3579 if (register_operand (op, mode))
3580 return 1;
3581 return x86_64_sign_extended_value (op);
3584 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3587 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3589 if (!TARGET_64BIT || !flag_pic)
3590 return nonmemory_operand (op, mode);
3591 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3592 return 1;
3593 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3594 return 1;
3595 return 0;
3598 /* Return nonzero if OPNUM's MEM should be matched
3599 in movabs* patterns. */
3602 ix86_check_movabs (rtx insn, int opnum)
3604 rtx set, mem;
3606 set = PATTERN (insn);
3607 if (GET_CODE (set) == PARALLEL)
3608 set = XVECEXP (set, 0, 0);
3609 if (GET_CODE (set) != SET)
3610 abort ();
3611 mem = XEXP (set, opnum);
3612 while (GET_CODE (mem) == SUBREG)
3613 mem = SUBREG_REG (mem);
3614 if (GET_CODE (mem) != MEM)
3615 abort ();
3616 return (volatile_ok || !MEM_VOLATILE_P (mem));
3619 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3622 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3624 if (!TARGET_64BIT)
3625 return nonmemory_operand (op, mode);
3626 if (register_operand (op, mode))
3627 return 1;
3628 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3631 /* Return nonzero if OP is immediate operand representable on x86_64. */
3634 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3636 if (!TARGET_64BIT)
3637 return immediate_operand (op, mode);
3638 return x86_64_sign_extended_value (op);
3641 /* Return nonzero if OP is immediate operand representable on x86_64. */
3644 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3646 return x86_64_zero_extended_value (op);
3649 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3650 for shift & compare patterns, as shifting by 0 does not change flags),
3651 else return zero. */
3654 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3656 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3659 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3660 reference and a constant. */
3663 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3665 switch (GET_CODE (op))
3667 case SYMBOL_REF:
3668 case LABEL_REF:
3669 return 1;
3671 case CONST:
3672 op = XEXP (op, 0);
3673 if (GET_CODE (op) == SYMBOL_REF
3674 || GET_CODE (op) == LABEL_REF
3675 || (GET_CODE (op) == UNSPEC
3676 && (XINT (op, 1) == UNSPEC_GOT
3677 || XINT (op, 1) == UNSPEC_GOTOFF
3678 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3679 return 1;
3680 if (GET_CODE (op) != PLUS
3681 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3682 return 0;
3684 op = XEXP (op, 0);
3685 if (GET_CODE (op) == SYMBOL_REF
3686 || GET_CODE (op) == LABEL_REF)
3687 return 1;
3688 /* Only @GOTOFF gets offsets. */
3689 if (GET_CODE (op) != UNSPEC
3690 || XINT (op, 1) != UNSPEC_GOTOFF)
3691 return 0;
3693 op = XVECEXP (op, 0, 0);
3694 if (GET_CODE (op) == SYMBOL_REF
3695 || GET_CODE (op) == LABEL_REF)
3696 return 1;
3697 return 0;
3699 default:
3700 return 0;
3704 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3707 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3709 if (GET_CODE (op) != CONST)
3710 return 0;
3711 op = XEXP (op, 0);
3712 if (TARGET_64BIT)
3714 if (GET_CODE (op) == UNSPEC
3715 && XINT (op, 1) == UNSPEC_GOTPCREL)
3716 return 1;
3717 if (GET_CODE (op) == PLUS
3718 && GET_CODE (XEXP (op, 0)) == UNSPEC
3719 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3720 return 1;
3722 else
3724 if (GET_CODE (op) == UNSPEC)
3725 return 1;
3726 if (GET_CODE (op) != PLUS
3727 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3728 return 0;
3729 op = XEXP (op, 0);
3730 if (GET_CODE (op) == UNSPEC)
3731 return 1;
3733 return 0;
3736 /* Return true if OP is a symbolic operand that resolves locally. */
3738 static int
3739 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3741 if (GET_CODE (op) == CONST
3742 && GET_CODE (XEXP (op, 0)) == PLUS
3743 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3744 op = XEXP (XEXP (op, 0), 0);
3746 if (GET_CODE (op) == LABEL_REF)
3747 return 1;
3749 if (GET_CODE (op) != SYMBOL_REF)
3750 return 0;
3752 if (SYMBOL_REF_LOCAL_P (op))
3753 return 1;
3755 /* There is, however, a not insubstantial body of code in the rest of
3756 the compiler that assumes it can just stick the results of
3757 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3758 /* ??? This is a hack. Should update the body of the compiler to
3759 always create a DECL an invoke targetm.encode_section_info. */
3760 if (strncmp (XSTR (op, 0), internal_label_prefix,
3761 internal_label_prefix_len) == 0)
3762 return 1;
3764 return 0;
3767 /* Test for various thread-local symbols. */
3770 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3772 if (GET_CODE (op) != SYMBOL_REF)
3773 return 0;
3774 return SYMBOL_REF_TLS_MODEL (op);
3777 static inline int
3778 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3780 if (GET_CODE (op) != SYMBOL_REF)
3781 return 0;
3782 return SYMBOL_REF_TLS_MODEL (op) == kind;
3786 global_dynamic_symbolic_operand (rtx op,
3787 enum machine_mode mode ATTRIBUTE_UNUSED)
3789 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3793 local_dynamic_symbolic_operand (rtx op,
3794 enum machine_mode mode ATTRIBUTE_UNUSED)
3796 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3800 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3802 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3806 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3808 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3811 /* Test for a valid operand for a call instruction. Don't allow the
3812 arg pointer register or virtual regs since they may decay into
3813 reg + const, which the patterns can't handle. */
3816 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3818 /* Disallow indirect through a virtual register. This leads to
3819 compiler aborts when trying to eliminate them. */
3820 if (GET_CODE (op) == REG
3821 && (op == arg_pointer_rtx
3822 || op == frame_pointer_rtx
3823 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3824 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3825 return 0;
3827 /* Disallow `call 1234'. Due to varying assembler lameness this
3828 gets either rejected or translated to `call .+1234'. */
3829 if (GET_CODE (op) == CONST_INT)
3830 return 0;
3832 /* Explicitly allow SYMBOL_REF even if pic. */
3833 if (GET_CODE (op) == SYMBOL_REF)
3834 return 1;
3836 /* Otherwise we can allow any general_operand in the address. */
3837 return general_operand (op, Pmode);
3840 /* Test for a valid operand for a call instruction. Don't allow the
3841 arg pointer register or virtual regs since they may decay into
3842 reg + const, which the patterns can't handle. */
3845 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3847 /* Disallow indirect through a virtual register. This leads to
3848 compiler aborts when trying to eliminate them. */
3849 if (GET_CODE (op) == REG
3850 && (op == arg_pointer_rtx
3851 || op == frame_pointer_rtx
3852 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3853 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3854 return 0;
3856 /* Explicitly allow SYMBOL_REF even if pic. */
3857 if (GET_CODE (op) == SYMBOL_REF)
3858 return 1;
3860 /* Otherwise we can only allow register operands. */
3861 return register_operand (op, Pmode);
3865 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3867 if (GET_CODE (op) == CONST
3868 && GET_CODE (XEXP (op, 0)) == PLUS
3869 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3870 op = XEXP (XEXP (op, 0), 0);
3871 return GET_CODE (op) == SYMBOL_REF;
3874 /* Match exactly zero and one. */
3877 const0_operand (rtx op, enum machine_mode mode)
3879 return op == CONST0_RTX (mode);
3883 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3885 return op == const1_rtx;
3888 /* Match 2, 4, or 8. Used for leal multiplicands. */
3891 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3893 return (GET_CODE (op) == CONST_INT
3894 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3898 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3900 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3904 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3906 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3910 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3912 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3916 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3918 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3922 /* True if this is a constant appropriate for an increment or decrement. */
3925 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3927 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3928 registers, since carry flag is not set. */
3929 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
3930 return 0;
3931 return op == const1_rtx || op == constm1_rtx;
3934 /* Return nonzero if OP is acceptable as operand of DImode shift
3935 expander. */
3938 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3940 if (TARGET_64BIT)
3941 return nonimmediate_operand (op, mode);
3942 else
3943 return register_operand (op, mode);
3946 /* Return false if this is the stack pointer, or any other fake
3947 register eliminable to the stack pointer. Otherwise, this is
3948 a register operand.
3950 This is used to prevent esp from being used as an index reg.
3951 Which would only happen in pathological cases. */
3954 reg_no_sp_operand (rtx op, enum machine_mode mode)
3956 rtx t = op;
3957 if (GET_CODE (t) == SUBREG)
3958 t = SUBREG_REG (t);
3959 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3960 return 0;
3962 return register_operand (op, mode);
3966 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3968 return MMX_REG_P (op);
3971 /* Return false if this is any eliminable register. Otherwise
3972 general_operand. */
3975 general_no_elim_operand (rtx op, enum machine_mode mode)
3977 rtx t = op;
3978 if (GET_CODE (t) == SUBREG)
3979 t = SUBREG_REG (t);
3980 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3981 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3982 || t == virtual_stack_dynamic_rtx)
3983 return 0;
3984 if (REG_P (t)
3985 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3986 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3987 return 0;
3989 return general_operand (op, mode);
3992 /* Return false if this is any eliminable register. Otherwise
3993 register_operand or const_int. */
3996 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3998 rtx t = op;
3999 if (GET_CODE (t) == SUBREG)
4000 t = SUBREG_REG (t);
4001 if (t == arg_pointer_rtx || t == frame_pointer_rtx
4002 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
4003 || t == virtual_stack_dynamic_rtx)
4004 return 0;
4006 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
4009 /* Return false if this is any eliminable register or stack register,
4010 otherwise work like register_operand. */
4013 index_register_operand (rtx op, enum machine_mode mode)
4015 rtx t = op;
4016 if (GET_CODE (t) == SUBREG)
4017 t = SUBREG_REG (t);
4018 if (!REG_P (t))
4019 return 0;
4020 if (t == arg_pointer_rtx
4021 || t == frame_pointer_rtx
4022 || t == virtual_incoming_args_rtx
4023 || t == virtual_stack_vars_rtx
4024 || t == virtual_stack_dynamic_rtx
4025 || REGNO (t) == STACK_POINTER_REGNUM)
4026 return 0;
4028 return general_operand (op, mode);
4031 /* Return true if op is a Q_REGS class register. */
4034 q_regs_operand (rtx op, enum machine_mode mode)
4036 if (mode != VOIDmode && GET_MODE (op) != mode)
4037 return 0;
4038 if (GET_CODE (op) == SUBREG)
4039 op = SUBREG_REG (op);
4040 return ANY_QI_REG_P (op);
4043 /* Return true if op is an flags register. */
4046 flags_reg_operand (rtx op, enum machine_mode mode)
4048 if (mode != VOIDmode && GET_MODE (op) != mode)
4049 return 0;
4050 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
4053 /* Return true if op is a NON_Q_REGS class register. */
4056 non_q_regs_operand (rtx op, enum machine_mode mode)
4058 if (mode != VOIDmode && GET_MODE (op) != mode)
4059 return 0;
4060 if (GET_CODE (op) == SUBREG)
4061 op = SUBREG_REG (op);
4062 return NON_QI_REG_P (op);
4066 zero_extended_scalar_load_operand (rtx op,
4067 enum machine_mode mode ATTRIBUTE_UNUSED)
4069 unsigned n_elts;
4070 if (GET_CODE (op) != MEM)
4071 return 0;
4072 op = maybe_get_pool_constant (op);
4073 if (!op)
4074 return 0;
4075 if (GET_CODE (op) != CONST_VECTOR)
4076 return 0;
4077 n_elts =
4078 (GET_MODE_SIZE (GET_MODE (op)) /
4079 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
4080 for (n_elts--; n_elts > 0; n_elts--)
4082 rtx elt = CONST_VECTOR_ELT (op, n_elts);
4083 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
4084 return 0;
4086 return 1;
4089 /* Return 1 when OP is operand acceptable for standard SSE move. */
4091 vector_move_operand (rtx op, enum machine_mode mode)
4093 if (nonimmediate_operand (op, mode))
4094 return 1;
4095 if (GET_MODE (op) != mode && mode != VOIDmode)
4096 return 0;
4097 return (op == CONST0_RTX (GET_MODE (op)));
4100 /* Return true if op if a valid address, and does not contain
4101 a segment override. */
4104 no_seg_address_operand (rtx op, enum machine_mode mode)
4106 struct ix86_address parts;
4108 if (! address_operand (op, mode))
4109 return 0;
4111 if (! ix86_decompose_address (op, &parts))
4112 abort ();
4114 return parts.seg == SEG_DEFAULT;
4117 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4118 insns. */
4120 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4122 enum rtx_code code = GET_CODE (op);
4123 switch (code)
4125 /* Operations supported directly. */
4126 case EQ:
4127 case LT:
4128 case LE:
4129 case UNORDERED:
4130 case NE:
4131 case UNGE:
4132 case UNGT:
4133 case ORDERED:
4134 return 1;
4135 /* These are equivalent to ones above in non-IEEE comparisons. */
4136 case UNEQ:
4137 case UNLT:
4138 case UNLE:
4139 case LTGT:
4140 case GE:
4141 case GT:
4142 return !TARGET_IEEE_FP;
4143 default:
4144 return 0;
4147 /* Return 1 if OP is a valid comparison operator in valid mode. */
4149 ix86_comparison_operator (rtx op, enum machine_mode mode)
4151 enum machine_mode inmode;
4152 enum rtx_code code = GET_CODE (op);
4153 if (mode != VOIDmode && GET_MODE (op) != mode)
4154 return 0;
4155 if (!COMPARISON_P (op))
4156 return 0;
4157 inmode = GET_MODE (XEXP (op, 0));
4159 if (inmode == CCFPmode || inmode == CCFPUmode)
4161 enum rtx_code second_code, bypass_code;
4162 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4163 return (bypass_code == NIL && second_code == NIL);
4165 switch (code)
4167 case EQ: case NE:
4168 return 1;
4169 case LT: case GE:
4170 if (inmode == CCmode || inmode == CCGCmode
4171 || inmode == CCGOCmode || inmode == CCNOmode)
4172 return 1;
4173 return 0;
4174 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4175 if (inmode == CCmode)
4176 return 1;
4177 return 0;
4178 case GT: case LE:
4179 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4180 return 1;
4181 return 0;
4182 default:
4183 return 0;
4187 /* Return 1 if OP is a valid comparison operator testing carry flag
4188 to be set. */
4190 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4192 enum machine_mode inmode;
4193 enum rtx_code code = GET_CODE (op);
4195 if (mode != VOIDmode && GET_MODE (op) != mode)
4196 return 0;
4197 if (!COMPARISON_P (op))
4198 return 0;
4199 inmode = GET_MODE (XEXP (op, 0));
4200 if (GET_CODE (XEXP (op, 0)) != REG
4201 || REGNO (XEXP (op, 0)) != 17
4202 || XEXP (op, 1) != const0_rtx)
4203 return 0;
4205 if (inmode == CCFPmode || inmode == CCFPUmode)
4207 enum rtx_code second_code, bypass_code;
4209 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4210 if (bypass_code != NIL || second_code != NIL)
4211 return 0;
4212 code = ix86_fp_compare_code_to_integer (code);
4214 else if (inmode != CCmode)
4215 return 0;
4216 return code == LTU;
4219 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4222 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4224 enum machine_mode inmode;
4225 enum rtx_code code = GET_CODE (op);
4227 if (mode != VOIDmode && GET_MODE (op) != mode)
4228 return 0;
4229 if (!COMPARISON_P (op))
4230 return 0;
4231 inmode = GET_MODE (XEXP (op, 0));
4232 if (inmode == CCFPmode || inmode == CCFPUmode)
4234 enum rtx_code second_code, bypass_code;
4236 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4237 if (bypass_code != NIL || second_code != NIL)
4238 return 0;
4239 code = ix86_fp_compare_code_to_integer (code);
4241 /* i387 supports just limited amount of conditional codes. */
4242 switch (code)
4244 case LTU: case GTU: case LEU: case GEU:
4245 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4246 return 1;
4247 return 0;
4248 case ORDERED: case UNORDERED:
4249 case EQ: case NE:
4250 return 1;
4251 default:
4252 return 0;
4256 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4259 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4261 switch (GET_CODE (op))
4263 case MULT:
4264 /* Modern CPUs have same latency for HImode and SImode multiply,
4265 but 386 and 486 do HImode multiply faster. */
4266 return ix86_tune > PROCESSOR_I486;
4267 case PLUS:
4268 case AND:
4269 case IOR:
4270 case XOR:
4271 case ASHIFT:
4272 return 1;
4273 default:
4274 return 0;
4278 /* Nearly general operand, but accept any const_double, since we wish
4279 to be able to drop them into memory rather than have them get pulled
4280 into registers. */
4283 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4285 if (mode != VOIDmode && mode != GET_MODE (op))
4286 return 0;
4287 if (GET_CODE (op) == CONST_DOUBLE)
4288 return 1;
4289 return general_operand (op, mode);
4292 /* Match an SI or HImode register for a zero_extract. */
4295 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4297 int regno;
4298 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4299 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4300 return 0;
4302 if (!register_operand (op, VOIDmode))
4303 return 0;
4305 /* Be careful to accept only registers having upper parts. */
4306 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4307 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4310 /* Return 1 if this is a valid binary floating-point operation.
4311 OP is the expression matched, and MODE is its mode. */
4314 binary_fp_operator (rtx op, enum machine_mode mode)
4316 if (mode != VOIDmode && mode != GET_MODE (op))
4317 return 0;
4319 switch (GET_CODE (op))
4321 case PLUS:
4322 case MINUS:
4323 case MULT:
4324 case DIV:
4325 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4327 default:
4328 return 0;
4333 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4335 return GET_CODE (op) == MULT;
4339 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4341 return GET_CODE (op) == DIV;
4345 arith_or_logical_operator (rtx op, enum machine_mode mode)
4347 return ((mode == VOIDmode || GET_MODE (op) == mode)
4348 && ARITHMETIC_P (op));
4351 /* Returns 1 if OP is memory operand with a displacement. */
4354 memory_displacement_operand (rtx op, enum machine_mode mode)
4356 struct ix86_address parts;
4358 if (! memory_operand (op, mode))
4359 return 0;
4361 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4362 abort ();
4364 return parts.disp != NULL_RTX;
4367 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4368 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4370 ??? It seems likely that this will only work because cmpsi is an
4371 expander, and no actual insns use this. */
4374 cmpsi_operand (rtx op, enum machine_mode mode)
4376 if (nonimmediate_operand (op, mode))
4377 return 1;
4379 if (GET_CODE (op) == AND
4380 && GET_MODE (op) == SImode
4381 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4382 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4383 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4384 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4385 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4386 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4387 return 1;
4389 return 0;
4392 /* Returns 1 if OP is memory operand that can not be represented by the
4393 modRM array. */
4396 long_memory_operand (rtx op, enum machine_mode mode)
4398 if (! memory_operand (op, mode))
4399 return 0;
4401 return memory_address_length (op) != 0;
4404 /* Return nonzero if the rtx is known aligned. */
4407 aligned_operand (rtx op, enum machine_mode mode)
4409 struct ix86_address parts;
4411 if (!general_operand (op, mode))
4412 return 0;
4414 /* Registers and immediate operands are always "aligned". */
4415 if (GET_CODE (op) != MEM)
4416 return 1;
4418 /* Don't even try to do any aligned optimizations with volatiles. */
4419 if (MEM_VOLATILE_P (op))
4420 return 0;
4422 op = XEXP (op, 0);
4424 /* Pushes and pops are only valid on the stack pointer. */
4425 if (GET_CODE (op) == PRE_DEC
4426 || GET_CODE (op) == POST_INC)
4427 return 1;
4429 /* Decode the address. */
4430 if (! ix86_decompose_address (op, &parts))
4431 abort ();
4433 /* Look for some component that isn't known to be aligned. */
4434 if (parts.index)
4436 if (parts.scale < 4
4437 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4438 return 0;
4440 if (parts.base)
4442 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4443 return 0;
4445 if (parts.disp)
4447 if (GET_CODE (parts.disp) != CONST_INT
4448 || (INTVAL (parts.disp) & 3) != 0)
4449 return 0;
4452 /* Didn't find one -- this must be an aligned address. */
4453 return 1;
4456 /* Initialize the table of extra 80387 mathematical constants. */
4458 static void
4459 init_ext_80387_constants (void)
4461 static const char * cst[5] =
4463 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4464 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4465 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4466 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4467 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4469 int i;
4471 for (i = 0; i < 5; i++)
4473 real_from_string (&ext_80387_constants_table[i], cst[i]);
4474 /* Ensure each constant is rounded to XFmode precision. */
4475 real_convert (&ext_80387_constants_table[i],
4476 XFmode, &ext_80387_constants_table[i]);
4479 ext_80387_constants_init = 1;
4482 /* Return true if the constant is something that can be loaded with
4483 a special instruction. */
4486 standard_80387_constant_p (rtx x)
4488 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4489 return -1;
4491 if (x == CONST0_RTX (GET_MODE (x)))
4492 return 1;
4493 if (x == CONST1_RTX (GET_MODE (x)))
4494 return 2;
4496 /* For XFmode constants, try to find a special 80387 instruction when
4497 optimizing for size or on those CPUs that benefit from them. */
4498 if (GET_MODE (x) == XFmode
4499 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4501 REAL_VALUE_TYPE r;
4502 int i;
4504 if (! ext_80387_constants_init)
4505 init_ext_80387_constants ();
4507 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4508 for (i = 0; i < 5; i++)
4509 if (real_identical (&r, &ext_80387_constants_table[i]))
4510 return i + 3;
4513 return 0;
4516 /* Return the opcode of the special instruction to be used to load
4517 the constant X. */
4519 const char *
4520 standard_80387_constant_opcode (rtx x)
4522 switch (standard_80387_constant_p (x))
4524 case 1:
4525 return "fldz";
4526 case 2:
4527 return "fld1";
4528 case 3:
4529 return "fldlg2";
4530 case 4:
4531 return "fldln2";
4532 case 5:
4533 return "fldl2e";
4534 case 6:
4535 return "fldl2t";
4536 case 7:
4537 return "fldpi";
4539 abort ();
4542 /* Return the CONST_DOUBLE representing the 80387 constant that is
4543 loaded by the specified special instruction. The argument IDX
4544 matches the return value from standard_80387_constant_p. */
4547 standard_80387_constant_rtx (int idx)
4549 int i;
4551 if (! ext_80387_constants_init)
4552 init_ext_80387_constants ();
4554 switch (idx)
4556 case 3:
4557 case 4:
4558 case 5:
4559 case 6:
4560 case 7:
4561 i = idx - 3;
4562 break;
4564 default:
4565 abort ();
4568 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4569 XFmode);
4572 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4575 standard_sse_constant_p (rtx x)
4577 if (x == const0_rtx)
4578 return 1;
4579 return (x == CONST0_RTX (GET_MODE (x)));
4582 /* Returns 1 if OP contains a symbol reference */
4585 symbolic_reference_mentioned_p (rtx op)
4587 const char *fmt;
4588 int i;
4590 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4591 return 1;
4593 fmt = GET_RTX_FORMAT (GET_CODE (op));
4594 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4596 if (fmt[i] == 'E')
4598 int j;
4600 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4601 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4602 return 1;
4605 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4606 return 1;
4609 return 0;
4612 /* Return 1 if it is appropriate to emit `ret' instructions in the
4613 body of a function. Do this only if the epilogue is simple, needing a
4614 couple of insns. Prior to reloading, we can't tell how many registers
4615 must be saved, so return 0 then. Return 0 if there is no frame
4616 marker to de-allocate.
4618 If NON_SAVING_SETJMP is defined and true, then it is not possible
4619 for the epilogue to be simple, so return 0. This is a special case
4620 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4621 until final, but jump_optimize may need to know sooner if a
4622 `return' is OK. */
4625 ix86_can_use_return_insn_p (void)
4627 struct ix86_frame frame;
4629 #ifdef NON_SAVING_SETJMP
4630 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4631 return 0;
4632 #endif
4634 if (! reload_completed || frame_pointer_needed)
4635 return 0;
4637 /* Don't allow more than 32 pop, since that's all we can do
4638 with one instruction. */
4639 if (current_function_pops_args
4640 && current_function_args_size >= 32768)
4641 return 0;
4643 ix86_compute_frame_layout (&frame);
4644 return frame.to_allocate == 0 && frame.nregs == 0;
4647 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4649 x86_64_sign_extended_value (rtx value)
4651 switch (GET_CODE (value))
4653 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4654 to be at least 32 and this all acceptable constants are
4655 represented as CONST_INT. */
4656 case CONST_INT:
4657 if (HOST_BITS_PER_WIDE_INT == 32)
4658 return 1;
4659 else
4661 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4662 return trunc_int_for_mode (val, SImode) == val;
4664 break;
4666 /* For certain code models, the symbolic references are known to fit.
4667 in CM_SMALL_PIC model we know it fits if it is local to the shared
4668 library. Don't count TLS SYMBOL_REFs here, since they should fit
4669 only if inside of UNSPEC handled below. */
4670 case SYMBOL_REF:
4671 /* TLS symbols are not constant. */
4672 if (tls_symbolic_operand (value, Pmode))
4673 return false;
4674 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4676 /* For certain code models, the code is near as well. */
4677 case LABEL_REF:
4678 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4679 || ix86_cmodel == CM_KERNEL);
4681 /* We also may accept the offsetted memory references in certain special
4682 cases. */
4683 case CONST:
4684 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4685 switch (XINT (XEXP (value, 0), 1))
4687 case UNSPEC_GOTPCREL:
4688 case UNSPEC_DTPOFF:
4689 case UNSPEC_GOTNTPOFF:
4690 case UNSPEC_NTPOFF:
4691 return 1;
4692 default:
4693 break;
4695 if (GET_CODE (XEXP (value, 0)) == PLUS)
4697 rtx op1 = XEXP (XEXP (value, 0), 0);
4698 rtx op2 = XEXP (XEXP (value, 0), 1);
4699 HOST_WIDE_INT offset;
4701 if (ix86_cmodel == CM_LARGE)
4702 return 0;
4703 if (GET_CODE (op2) != CONST_INT)
4704 return 0;
4705 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4706 switch (GET_CODE (op1))
4708 case SYMBOL_REF:
4709 /* For CM_SMALL assume that latest object is 16MB before
4710 end of 31bits boundary. We may also accept pretty
4711 large negative constants knowing that all objects are
4712 in the positive half of address space. */
4713 if (ix86_cmodel == CM_SMALL
4714 && offset < 16*1024*1024
4715 && trunc_int_for_mode (offset, SImode) == offset)
4716 return 1;
4717 /* For CM_KERNEL we know that all object resist in the
4718 negative half of 32bits address space. We may not
4719 accept negative offsets, since they may be just off
4720 and we may accept pretty large positive ones. */
4721 if (ix86_cmodel == CM_KERNEL
4722 && offset > 0
4723 && trunc_int_for_mode (offset, SImode) == offset)
4724 return 1;
4725 break;
4726 case LABEL_REF:
4727 /* These conditions are similar to SYMBOL_REF ones, just the
4728 constraints for code models differ. */
4729 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4730 && offset < 16*1024*1024
4731 && trunc_int_for_mode (offset, SImode) == offset)
4732 return 1;
4733 if (ix86_cmodel == CM_KERNEL
4734 && offset > 0
4735 && trunc_int_for_mode (offset, SImode) == offset)
4736 return 1;
4737 break;
4738 case UNSPEC:
4739 switch (XINT (op1, 1))
4741 case UNSPEC_DTPOFF:
4742 case UNSPEC_NTPOFF:
4743 if (offset > 0
4744 && trunc_int_for_mode (offset, SImode) == offset)
4745 return 1;
4747 break;
4748 default:
4749 return 0;
4752 return 0;
4753 default:
4754 return 0;
4758 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4760 x86_64_zero_extended_value (rtx value)
4762 switch (GET_CODE (value))
4764 case CONST_DOUBLE:
4765 if (HOST_BITS_PER_WIDE_INT == 32)
4766 return (GET_MODE (value) == VOIDmode
4767 && !CONST_DOUBLE_HIGH (value));
4768 else
4769 return 0;
4770 case CONST_INT:
4771 if (HOST_BITS_PER_WIDE_INT == 32)
4772 return INTVAL (value) >= 0;
4773 else
4774 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4775 break;
4777 /* For certain code models, the symbolic references are known to fit. */
4778 case SYMBOL_REF:
4779 /* TLS symbols are not constant. */
4780 if (tls_symbolic_operand (value, Pmode))
4781 return false;
4782 return ix86_cmodel == CM_SMALL;
4784 /* For certain code models, the code is near as well. */
4785 case LABEL_REF:
4786 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4788 /* We also may accept the offsetted memory references in certain special
4789 cases. */
4790 case CONST:
4791 if (GET_CODE (XEXP (value, 0)) == PLUS)
4793 rtx op1 = XEXP (XEXP (value, 0), 0);
4794 rtx op2 = XEXP (XEXP (value, 0), 1);
4796 if (ix86_cmodel == CM_LARGE)
4797 return 0;
4798 switch (GET_CODE (op1))
4800 case SYMBOL_REF:
4801 return 0;
4802 /* For small code model we may accept pretty large positive
4803 offsets, since one bit is available for free. Negative
4804 offsets are limited by the size of NULL pointer area
4805 specified by the ABI. */
4806 if (ix86_cmodel == CM_SMALL
4807 && GET_CODE (op2) == CONST_INT
4808 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4809 && (trunc_int_for_mode (INTVAL (op2), SImode)
4810 == INTVAL (op2)))
4811 return 1;
4812 /* ??? For the kernel, we may accept adjustment of
4813 -0x10000000, since we know that it will just convert
4814 negative address space to positive, but perhaps this
4815 is not worthwhile. */
4816 break;
4817 case LABEL_REF:
4818 /* These conditions are similar to SYMBOL_REF ones, just the
4819 constraints for code models differ. */
4820 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4821 && GET_CODE (op2) == CONST_INT
4822 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4823 && (trunc_int_for_mode (INTVAL (op2), SImode)
4824 == INTVAL (op2)))
4825 return 1;
4826 break;
4827 default:
4828 return 0;
4831 return 0;
4832 default:
4833 return 0;
4837 /* Value should be nonzero if functions must have frame pointers.
4838 Zero means the frame pointer need not be set up (and parms may
4839 be accessed via the stack pointer) in functions that seem suitable. */
4842 ix86_frame_pointer_required (void)
4844 /* If we accessed previous frames, then the generated code expects
4845 to be able to access the saved ebp value in our frame. */
4846 if (cfun->machine->accesses_prev_frame)
4847 return 1;
4849 /* Several x86 os'es need a frame pointer for other reasons,
4850 usually pertaining to setjmp. */
4851 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4852 return 1;
4854 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4855 the frame pointer by default. Turn it back on now if we've not
4856 got a leaf function. */
4857 if (TARGET_OMIT_LEAF_FRAME_POINTER
4858 && (!current_function_is_leaf))
4859 return 1;
4861 if (current_function_profile)
4862 return 1;
4864 return 0;
4867 /* Record that the current function accesses previous call frames. */
4869 void
4870 ix86_setup_frame_addresses (void)
4872 cfun->machine->accesses_prev_frame = 1;
4875 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4876 # define USE_HIDDEN_LINKONCE 1
4877 #else
4878 # define USE_HIDDEN_LINKONCE 0
4879 #endif
4881 static int pic_labels_used;
4883 /* Fills in the label name that should be used for a pc thunk for
4884 the given register. */
4886 static void
4887 get_pc_thunk_name (char name[32], unsigned int regno)
4889 if (USE_HIDDEN_LINKONCE)
4890 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4891 else
4892 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4896 /* This function generates code for -fpic that loads %ebx with
4897 the return address of the caller and then returns. */
4899 void
4900 ix86_file_end (void)
4902 rtx xops[2];
4903 int regno;
4905 for (regno = 0; regno < 8; ++regno)
4907 char name[32];
4909 if (! ((pic_labels_used >> regno) & 1))
4910 continue;
4912 get_pc_thunk_name (name, regno);
4914 if (USE_HIDDEN_LINKONCE)
4916 tree decl;
4918 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4919 error_mark_node);
4920 TREE_PUBLIC (decl) = 1;
4921 TREE_STATIC (decl) = 1;
4922 DECL_ONE_ONLY (decl) = 1;
4924 (*targetm.asm_out.unique_section) (decl, 0);
4925 named_section (decl, NULL, 0);
4927 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4928 fputs ("\t.hidden\t", asm_out_file);
4929 assemble_name (asm_out_file, name);
4930 fputc ('\n', asm_out_file);
4931 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4933 else
4935 text_section ();
4936 ASM_OUTPUT_LABEL (asm_out_file, name);
4939 xops[0] = gen_rtx_REG (SImode, regno);
4940 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4941 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4942 output_asm_insn ("ret", xops);
4945 if (NEED_INDICATE_EXEC_STACK)
4946 file_end_indicate_exec_stack ();
4949 /* Emit code for the SET_GOT patterns. */
4951 const char *
4952 output_set_got (rtx dest)
4954 rtx xops[3];
4956 xops[0] = dest;
4957 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4959 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4961 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4963 if (!flag_pic)
4964 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4965 else
4966 output_asm_insn ("call\t%a2", xops);
4968 #if TARGET_MACHO
4969 /* Output the "canonical" label name ("Lxx$pb") here too. This
4970 is what will be referred to by the Mach-O PIC subsystem. */
4971 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4972 #endif
4973 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4974 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4976 if (flag_pic)
4977 output_asm_insn ("pop{l}\t%0", xops);
4979 else
4981 char name[32];
4982 get_pc_thunk_name (name, REGNO (dest));
4983 pic_labels_used |= 1 << REGNO (dest);
4985 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4986 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4987 output_asm_insn ("call\t%X2", xops);
4990 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4991 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4992 else if (!TARGET_MACHO)
4993 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4995 return "";
4998 /* Generate an "push" pattern for input ARG. */
5000 static rtx
5001 gen_push (rtx arg)
5003 return gen_rtx_SET (VOIDmode,
5004 gen_rtx_MEM (Pmode,
5005 gen_rtx_PRE_DEC (Pmode,
5006 stack_pointer_rtx)),
5007 arg);
5010 /* Return >= 0 if there is an unused call-clobbered register available
5011 for the entire function. */
5013 static unsigned int
5014 ix86_select_alt_pic_regnum (void)
5016 if (current_function_is_leaf && !current_function_profile)
5018 int i;
5019 for (i = 2; i >= 0; --i)
5020 if (!regs_ever_live[i])
5021 return i;
5024 return INVALID_REGNUM;
5027 /* Return 1 if we need to save REGNO. */
5028 static int
5029 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5031 if (pic_offset_table_rtx
5032 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5033 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5034 || current_function_profile
5035 || current_function_calls_eh_return
5036 || current_function_uses_const_pool))
5038 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5039 return 0;
5040 return 1;
5043 if (current_function_calls_eh_return && maybe_eh_return)
5045 unsigned i;
5046 for (i = 0; ; i++)
5048 unsigned test = EH_RETURN_DATA_REGNO (i);
5049 if (test == INVALID_REGNUM)
5050 break;
5051 if (test == regno)
5052 return 1;
5056 return (regs_ever_live[regno]
5057 && !call_used_regs[regno]
5058 && !fixed_regs[regno]
5059 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5062 /* Return number of registers to be saved on the stack. */
5064 static int
5065 ix86_nsaved_regs (void)
5067 int nregs = 0;
5068 int regno;
5070 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5071 if (ix86_save_reg (regno, true))
5072 nregs++;
5073 return nregs;
5076 /* Return the offset between two registers, one to be eliminated, and the other
5077 its replacement, at the start of a routine. */
5079 HOST_WIDE_INT
5080 ix86_initial_elimination_offset (int from, int to)
5082 struct ix86_frame frame;
5083 ix86_compute_frame_layout (&frame);
5085 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5086 return frame.hard_frame_pointer_offset;
5087 else if (from == FRAME_POINTER_REGNUM
5088 && to == HARD_FRAME_POINTER_REGNUM)
5089 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5090 else
5092 if (to != STACK_POINTER_REGNUM)
5093 abort ();
5094 else if (from == ARG_POINTER_REGNUM)
5095 return frame.stack_pointer_offset;
5096 else if (from != FRAME_POINTER_REGNUM)
5097 abort ();
5098 else
5099 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5103 /* Fill structure ix86_frame about frame of currently computed function. */
5105 static void
5106 ix86_compute_frame_layout (struct ix86_frame *frame)
5108 HOST_WIDE_INT total_size;
5109 unsigned int stack_alignment_needed;
5110 HOST_WIDE_INT offset;
5111 unsigned int preferred_alignment;
5112 HOST_WIDE_INT size = get_frame_size ();
5114 frame->nregs = ix86_nsaved_regs ();
5115 total_size = size;
5117 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5118 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5120 /* During reload iteration the amount of registers saved can change.
5121 Recompute the value as needed. Do not recompute when amount of registers
5122 didn't change as reload does mutiple calls to the function and does not
5123 expect the decision to change within single iteration. */
5124 if (!optimize_size
5125 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5127 int count = frame->nregs;
5129 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5130 /* The fast prologue uses move instead of push to save registers. This
5131 is significantly longer, but also executes faster as modern hardware
5132 can execute the moves in parallel, but can't do that for push/pop.
5134 Be careful about choosing what prologue to emit: When function takes
5135 many instructions to execute we may use slow version as well as in
5136 case function is known to be outside hot spot (this is known with
5137 feedback only). Weight the size of function by number of registers
5138 to save as it is cheap to use one or two push instructions but very
5139 slow to use many of them. */
5140 if (count)
5141 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5142 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5143 || (flag_branch_probabilities
5144 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5145 cfun->machine->use_fast_prologue_epilogue = false;
5146 else
5147 cfun->machine->use_fast_prologue_epilogue
5148 = !expensive_function_p (count);
5150 if (TARGET_PROLOGUE_USING_MOVE
5151 && cfun->machine->use_fast_prologue_epilogue)
5152 frame->save_regs_using_mov = true;
5153 else
5154 frame->save_regs_using_mov = false;
5157 /* Skip return address and saved base pointer. */
5158 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5160 frame->hard_frame_pointer_offset = offset;
5162 /* Do some sanity checking of stack_alignment_needed and
5163 preferred_alignment, since i386 port is the only using those features
5164 that may break easily. */
5166 if (size && !stack_alignment_needed)
5167 abort ();
5168 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5169 abort ();
5170 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5171 abort ();
5172 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5173 abort ();
5175 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5176 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5178 /* Register save area */
5179 offset += frame->nregs * UNITS_PER_WORD;
5181 /* Va-arg area */
5182 if (ix86_save_varrargs_registers)
5184 offset += X86_64_VARARGS_SIZE;
5185 frame->va_arg_size = X86_64_VARARGS_SIZE;
5187 else
5188 frame->va_arg_size = 0;
5190 /* Align start of frame for local function. */
5191 frame->padding1 = ((offset + stack_alignment_needed - 1)
5192 & -stack_alignment_needed) - offset;
5194 offset += frame->padding1;
5196 /* Frame pointer points here. */
5197 frame->frame_pointer_offset = offset;
5199 offset += size;
5201 /* Add outgoing arguments area. Can be skipped if we eliminated
5202 all the function calls as dead code.
5203 Skipping is however impossible when function calls alloca. Alloca
5204 expander assumes that last current_function_outgoing_args_size
5205 of stack frame are unused. */
5206 if (ACCUMULATE_OUTGOING_ARGS
5207 && (!current_function_is_leaf || current_function_calls_alloca))
5209 offset += current_function_outgoing_args_size;
5210 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5212 else
5213 frame->outgoing_arguments_size = 0;
5215 /* Align stack boundary. Only needed if we're calling another function
5216 or using alloca. */
5217 if (!current_function_is_leaf || current_function_calls_alloca)
5218 frame->padding2 = ((offset + preferred_alignment - 1)
5219 & -preferred_alignment) - offset;
5220 else
5221 frame->padding2 = 0;
5223 offset += frame->padding2;
5225 /* We've reached end of stack frame. */
5226 frame->stack_pointer_offset = offset;
5228 /* Size prologue needs to allocate. */
5229 frame->to_allocate =
5230 (size + frame->padding1 + frame->padding2
5231 + frame->outgoing_arguments_size + frame->va_arg_size);
5233 if ((!frame->to_allocate && frame->nregs <= 1)
5234 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5235 frame->save_regs_using_mov = false;
5237 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5238 && current_function_is_leaf)
5240 frame->red_zone_size = frame->to_allocate;
5241 if (frame->save_regs_using_mov)
5242 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5243 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5244 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5246 else
5247 frame->red_zone_size = 0;
5248 frame->to_allocate -= frame->red_zone_size;
5249 frame->stack_pointer_offset -= frame->red_zone_size;
5250 #if 0
5251 fprintf (stderr, "nregs: %i\n", frame->nregs);
5252 fprintf (stderr, "size: %i\n", size);
5253 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5254 fprintf (stderr, "padding1: %i\n", frame->padding1);
5255 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5256 fprintf (stderr, "padding2: %i\n", frame->padding2);
5257 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5258 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5259 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5260 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5261 frame->hard_frame_pointer_offset);
5262 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5263 #endif
5266 /* Emit code to save registers in the prologue. */
5268 static void
5269 ix86_emit_save_regs (void)
5271 int regno;
5272 rtx insn;
5274 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5275 if (ix86_save_reg (regno, true))
5277 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5278 RTX_FRAME_RELATED_P (insn) = 1;
5282 /* Emit code to save registers using MOV insns. First register
5283 is restored from POINTER + OFFSET. */
5284 static void
5285 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5287 int regno;
5288 rtx insn;
5290 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5291 if (ix86_save_reg (regno, true))
5293 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5294 Pmode, offset),
5295 gen_rtx_REG (Pmode, regno));
5296 RTX_FRAME_RELATED_P (insn) = 1;
5297 offset += UNITS_PER_WORD;
5301 /* Expand prologue or epilogue stack adjustment.
5302 The pattern exist to put a dependency on all ebp-based memory accesses.
5303 STYLE should be negative if instructions should be marked as frame related,
5304 zero if %r11 register is live and cannot be freely used and positive
5305 otherwise. */
5307 static void
5308 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5310 rtx insn;
5312 if (! TARGET_64BIT)
5313 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5314 else if (x86_64_immediate_operand (offset, DImode))
5315 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5316 else
5318 rtx r11;
5319 /* r11 is used by indirect sibcall return as well, set before the
5320 epilogue and used after the epilogue. ATM indirect sibcall
5321 shouldn't be used together with huge frame sizes in one
5322 function because of the frame_size check in sibcall.c. */
5323 if (style == 0)
5324 abort ();
5325 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5326 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5327 if (style < 0)
5328 RTX_FRAME_RELATED_P (insn) = 1;
5329 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5330 offset));
5332 if (style < 0)
5333 RTX_FRAME_RELATED_P (insn) = 1;
5336 /* Expand the prologue into a bunch of separate insns. */
5338 void
5339 ix86_expand_prologue (void)
5341 rtx insn;
5342 bool pic_reg_used;
5343 struct ix86_frame frame;
5344 HOST_WIDE_INT allocate;
5346 ix86_compute_frame_layout (&frame);
5348 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5349 slower on all targets. Also sdb doesn't like it. */
5351 if (frame_pointer_needed)
5353 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5354 RTX_FRAME_RELATED_P (insn) = 1;
5356 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5357 RTX_FRAME_RELATED_P (insn) = 1;
5360 allocate = frame.to_allocate;
5362 if (!frame.save_regs_using_mov)
5363 ix86_emit_save_regs ();
5364 else
5365 allocate += frame.nregs * UNITS_PER_WORD;
5367 /* When using red zone we may start register saving before allocating
5368 the stack frame saving one cycle of the prologue. */
5369 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5370 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5371 : stack_pointer_rtx,
5372 -frame.nregs * UNITS_PER_WORD);
5374 if (allocate == 0)
5376 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5377 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5378 GEN_INT (-allocate), -1);
5379 else
5381 /* Only valid for Win32. */
5382 rtx eax = gen_rtx_REG (SImode, 0);
5383 bool eax_live = ix86_eax_live_at_start_p ();
5385 if (TARGET_64BIT)
5386 abort ();
5388 if (eax_live)
5390 emit_insn (gen_push (eax));
5391 allocate -= 4;
5394 insn = emit_move_insn (eax, GEN_INT (allocate));
5395 RTX_FRAME_RELATED_P (insn) = 1;
5397 insn = emit_insn (gen_allocate_stack_worker (eax));
5398 RTX_FRAME_RELATED_P (insn) = 1;
5400 if (eax_live)
5402 rtx t;
5403 if (frame_pointer_needed)
5404 t = plus_constant (hard_frame_pointer_rtx,
5405 allocate
5406 - frame.to_allocate
5407 - frame.nregs * UNITS_PER_WORD);
5408 else
5409 t = plus_constant (stack_pointer_rtx, allocate);
5410 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5414 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5416 if (!frame_pointer_needed || !frame.to_allocate)
5417 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5418 else
5419 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5420 -frame.nregs * UNITS_PER_WORD);
5423 pic_reg_used = false;
5424 if (pic_offset_table_rtx
5425 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5426 || current_function_profile))
5428 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5430 if (alt_pic_reg_used != INVALID_REGNUM)
5431 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5433 pic_reg_used = true;
5436 if (pic_reg_used)
5438 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5440 /* Even with accurate pre-reload life analysis, we can wind up
5441 deleting all references to the pic register after reload.
5442 Consider if cross-jumping unifies two sides of a branch
5443 controlled by a comparison vs the only read from a global.
5444 In which case, allow the set_got to be deleted, though we're
5445 too late to do anything about the ebx save in the prologue. */
5446 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5449 /* Prevent function calls from be scheduled before the call to mcount.
5450 In the pic_reg_used case, make sure that the got load isn't deleted. */
5451 if (current_function_profile)
5452 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5455 /* Emit code to restore saved registers using MOV insns. First register
5456 is restored from POINTER + OFFSET. */
5457 static void
5458 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5459 int maybe_eh_return)
5461 int regno;
5462 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5464 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5465 if (ix86_save_reg (regno, maybe_eh_return))
5467 /* Ensure that adjust_address won't be forced to produce pointer
5468 out of range allowed by x86-64 instruction set. */
5469 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5471 rtx r11;
5473 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5474 emit_move_insn (r11, GEN_INT (offset));
5475 emit_insn (gen_adddi3 (r11, r11, pointer));
5476 base_address = gen_rtx_MEM (Pmode, r11);
5477 offset = 0;
5479 emit_move_insn (gen_rtx_REG (Pmode, regno),
5480 adjust_address (base_address, Pmode, offset));
5481 offset += UNITS_PER_WORD;
5485 /* Restore function stack, frame, and registers. */
5487 void
5488 ix86_expand_epilogue (int style)
5490 int regno;
5491 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5492 struct ix86_frame frame;
5493 HOST_WIDE_INT offset;
5495 ix86_compute_frame_layout (&frame);
5497 /* Calculate start of saved registers relative to ebp. Special care
5498 must be taken for the normal return case of a function using
5499 eh_return: the eax and edx registers are marked as saved, but not
5500 restored along this path. */
5501 offset = frame.nregs;
5502 if (current_function_calls_eh_return && style != 2)
5503 offset -= 2;
5504 offset *= -UNITS_PER_WORD;
5506 /* If we're only restoring one register and sp is not valid then
5507 using a move instruction to restore the register since it's
5508 less work than reloading sp and popping the register.
5510 The default code result in stack adjustment using add/lea instruction,
5511 while this code results in LEAVE instruction (or discrete equivalent),
5512 so it is profitable in some other cases as well. Especially when there
5513 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5514 and there is exactly one register to pop. This heuristic may need some
5515 tuning in future. */
5516 if ((!sp_valid && frame.nregs <= 1)
5517 || (TARGET_EPILOGUE_USING_MOVE
5518 && cfun->machine->use_fast_prologue_epilogue
5519 && (frame.nregs > 1 || frame.to_allocate))
5520 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5521 || (frame_pointer_needed && TARGET_USE_LEAVE
5522 && cfun->machine->use_fast_prologue_epilogue
5523 && frame.nregs == 1)
5524 || current_function_calls_eh_return)
5526 /* Restore registers. We can use ebp or esp to address the memory
5527 locations. If both are available, default to ebp, since offsets
5528 are known to be small. Only exception is esp pointing directly to the
5529 end of block of saved registers, where we may simplify addressing
5530 mode. */
5532 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5533 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5534 frame.to_allocate, style == 2);
5535 else
5536 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5537 offset, style == 2);
5539 /* eh_return epilogues need %ecx added to the stack pointer. */
5540 if (style == 2)
5542 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5544 if (frame_pointer_needed)
5546 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5547 tmp = plus_constant (tmp, UNITS_PER_WORD);
5548 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5550 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5551 emit_move_insn (hard_frame_pointer_rtx, tmp);
5553 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5554 const0_rtx, style);
5556 else
5558 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5559 tmp = plus_constant (tmp, (frame.to_allocate
5560 + frame.nregs * UNITS_PER_WORD));
5561 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5564 else if (!frame_pointer_needed)
5565 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5566 GEN_INT (frame.to_allocate
5567 + frame.nregs * UNITS_PER_WORD),
5568 style);
5569 /* If not an i386, mov & pop is faster than "leave". */
5570 else if (TARGET_USE_LEAVE || optimize_size
5571 || !cfun->machine->use_fast_prologue_epilogue)
5572 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5573 else
5575 pro_epilogue_adjust_stack (stack_pointer_rtx,
5576 hard_frame_pointer_rtx,
5577 const0_rtx, style);
5578 if (TARGET_64BIT)
5579 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5580 else
5581 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5584 else
5586 /* First step is to deallocate the stack frame so that we can
5587 pop the registers. */
5588 if (!sp_valid)
5590 if (!frame_pointer_needed)
5591 abort ();
5592 pro_epilogue_adjust_stack (stack_pointer_rtx,
5593 hard_frame_pointer_rtx,
5594 GEN_INT (offset), style);
5596 else if (frame.to_allocate)
5597 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5598 GEN_INT (frame.to_allocate), style);
5600 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5601 if (ix86_save_reg (regno, false))
5603 if (TARGET_64BIT)
5604 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5605 else
5606 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5608 if (frame_pointer_needed)
5610 /* Leave results in shorter dependency chains on CPUs that are
5611 able to grok it fast. */
5612 if (TARGET_USE_LEAVE)
5613 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5614 else if (TARGET_64BIT)
5615 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5616 else
5617 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5621 /* Sibcall epilogues don't want a return instruction. */
5622 if (style == 0)
5623 return;
5625 if (current_function_pops_args && current_function_args_size)
5627 rtx popc = GEN_INT (current_function_pops_args);
5629 /* i386 can only pop 64K bytes. If asked to pop more, pop
5630 return address, do explicit add, and jump indirectly to the
5631 caller. */
5633 if (current_function_pops_args >= 65536)
5635 rtx ecx = gen_rtx_REG (SImode, 2);
5637 /* There is no "pascal" calling convention in 64bit ABI. */
5638 if (TARGET_64BIT)
5639 abort ();
5641 emit_insn (gen_popsi1 (ecx));
5642 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5643 emit_jump_insn (gen_return_indirect_internal (ecx));
5645 else
5646 emit_jump_insn (gen_return_pop_internal (popc));
5648 else
5649 emit_jump_insn (gen_return_internal ());
5652 /* Reset from the function's potential modifications. */
5654 static void
5655 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5656 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5658 if (pic_offset_table_rtx)
5659 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5662 /* Extract the parts of an RTL expression that is a valid memory address
5663 for an instruction. Return 0 if the structure of the address is
5664 grossly off. Return -1 if the address contains ASHIFT, so it is not
5665 strictly valid, but still used for computing length of lea instruction. */
5667 static int
5668 ix86_decompose_address (rtx addr, struct ix86_address *out)
5670 rtx base = NULL_RTX;
5671 rtx index = NULL_RTX;
5672 rtx disp = NULL_RTX;
5673 HOST_WIDE_INT scale = 1;
5674 rtx scale_rtx = NULL_RTX;
5675 int retval = 1;
5676 enum ix86_address_seg seg = SEG_DEFAULT;
5678 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5679 base = addr;
5680 else if (GET_CODE (addr) == PLUS)
5682 rtx addends[4], op;
5683 int n = 0, i;
5685 op = addr;
5688 if (n >= 4)
5689 return 0;
5690 addends[n++] = XEXP (op, 1);
5691 op = XEXP (op, 0);
5693 while (GET_CODE (op) == PLUS);
5694 if (n >= 4)
5695 return 0;
5696 addends[n] = op;
5698 for (i = n; i >= 0; --i)
5700 op = addends[i];
5701 switch (GET_CODE (op))
5703 case MULT:
5704 if (index)
5705 return 0;
5706 index = XEXP (op, 0);
5707 scale_rtx = XEXP (op, 1);
5708 break;
5710 case UNSPEC:
5711 if (XINT (op, 1) == UNSPEC_TP
5712 && TARGET_TLS_DIRECT_SEG_REFS
5713 && seg == SEG_DEFAULT)
5714 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5715 else
5716 return 0;
5717 break;
5719 case REG:
5720 case SUBREG:
5721 if (!base)
5722 base = op;
5723 else if (!index)
5724 index = op;
5725 else
5726 return 0;
5727 break;
5729 case CONST:
5730 case CONST_INT:
5731 case SYMBOL_REF:
5732 case LABEL_REF:
5733 if (disp)
5734 return 0;
5735 disp = op;
5736 break;
5738 default:
5739 return 0;
5743 else if (GET_CODE (addr) == MULT)
5745 index = XEXP (addr, 0); /* index*scale */
5746 scale_rtx = XEXP (addr, 1);
5748 else if (GET_CODE (addr) == ASHIFT)
5750 rtx tmp;
5752 /* We're called for lea too, which implements ashift on occasion. */
5753 index = XEXP (addr, 0);
5754 tmp = XEXP (addr, 1);
5755 if (GET_CODE (tmp) != CONST_INT)
5756 return 0;
5757 scale = INTVAL (tmp);
5758 if ((unsigned HOST_WIDE_INT) scale > 3)
5759 return 0;
5760 scale = 1 << scale;
5761 retval = -1;
5763 else
5764 disp = addr; /* displacement */
5766 /* Extract the integral value of scale. */
5767 if (scale_rtx)
5769 if (GET_CODE (scale_rtx) != CONST_INT)
5770 return 0;
5771 scale = INTVAL (scale_rtx);
5774 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5775 if (base && index && scale == 1
5776 && (index == arg_pointer_rtx
5777 || index == frame_pointer_rtx
5778 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5780 rtx tmp = base;
5781 base = index;
5782 index = tmp;
5785 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5786 if ((base == hard_frame_pointer_rtx
5787 || base == frame_pointer_rtx
5788 || base == arg_pointer_rtx) && !disp)
5789 disp = const0_rtx;
5791 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5792 Avoid this by transforming to [%esi+0]. */
5793 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5794 && base && !index && !disp
5795 && REG_P (base)
5796 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5797 disp = const0_rtx;
5799 /* Special case: encode reg+reg instead of reg*2. */
5800 if (!base && index && scale && scale == 2)
5801 base = index, scale = 1;
5803 /* Special case: scaling cannot be encoded without base or displacement. */
5804 if (!base && !disp && index && scale != 1)
5805 disp = const0_rtx;
5807 out->base = base;
5808 out->index = index;
5809 out->disp = disp;
5810 out->scale = scale;
5811 out->seg = seg;
5813 return retval;
5816 /* Return cost of the memory address x.
5817 For i386, it is better to use a complex address than let gcc copy
5818 the address into a reg and make a new pseudo. But not if the address
5819 requires to two regs - that would mean more pseudos with longer
5820 lifetimes. */
5821 static int
5822 ix86_address_cost (rtx x)
5824 struct ix86_address parts;
5825 int cost = 1;
5827 if (!ix86_decompose_address (x, &parts))
5828 abort ();
5830 /* More complex memory references are better. */
5831 if (parts.disp && parts.disp != const0_rtx)
5832 cost--;
5833 if (parts.seg != SEG_DEFAULT)
5834 cost--;
5836 /* Attempt to minimize number of registers in the address. */
5837 if ((parts.base
5838 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5839 || (parts.index
5840 && (!REG_P (parts.index)
5841 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5842 cost++;
5844 if (parts.base
5845 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5846 && parts.index
5847 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5848 && parts.base != parts.index)
5849 cost++;
5851 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5852 since it's predecode logic can't detect the length of instructions
5853 and it degenerates to vector decoded. Increase cost of such
5854 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5855 to split such addresses or even refuse such addresses at all.
5857 Following addressing modes are affected:
5858 [base+scale*index]
5859 [scale*index+disp]
5860 [base+index]
5862 The first and last case may be avoidable by explicitly coding the zero in
5863 memory address, but I don't have AMD-K6 machine handy to check this
5864 theory. */
5866 if (TARGET_K6
5867 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5868 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5869 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5870 cost += 10;
5872 return cost;
5875 /* If X is a machine specific address (i.e. a symbol or label being
5876 referenced as a displacement from the GOT implemented using an
5877 UNSPEC), then return the base term. Otherwise return X. */
5880 ix86_find_base_term (rtx x)
5882 rtx term;
5884 if (TARGET_64BIT)
5886 if (GET_CODE (x) != CONST)
5887 return x;
5888 term = XEXP (x, 0);
5889 if (GET_CODE (term) == PLUS
5890 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5891 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5892 term = XEXP (term, 0);
5893 if (GET_CODE (term) != UNSPEC
5894 || XINT (term, 1) != UNSPEC_GOTPCREL)
5895 return x;
5897 term = XVECEXP (term, 0, 0);
5899 if (GET_CODE (term) != SYMBOL_REF
5900 && GET_CODE (term) != LABEL_REF)
5901 return x;
5903 return term;
5906 term = ix86_delegitimize_address (x);
5908 if (GET_CODE (term) != SYMBOL_REF
5909 && GET_CODE (term) != LABEL_REF)
5910 return x;
5912 return term;
5915 /* Determine if a given RTX is a valid constant. We already know this
5916 satisfies CONSTANT_P. */
5918 bool
5919 legitimate_constant_p (rtx x)
5921 rtx inner;
5923 switch (GET_CODE (x))
5925 case SYMBOL_REF:
5926 /* TLS symbols are not constant. */
5927 if (tls_symbolic_operand (x, Pmode))
5928 return false;
5929 break;
5931 case CONST:
5932 inner = XEXP (x, 0);
5934 /* Offsets of TLS symbols are never valid.
5935 Discourage CSE from creating them. */
5936 if (GET_CODE (inner) == PLUS
5937 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5938 return false;
5940 if (GET_CODE (inner) == PLUS
5941 || GET_CODE (inner) == MINUS)
5943 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5944 return false;
5945 inner = XEXP (inner, 0);
5948 /* Only some unspecs are valid as "constants". */
5949 if (GET_CODE (inner) == UNSPEC)
5950 switch (XINT (inner, 1))
5952 case UNSPEC_TPOFF:
5953 case UNSPEC_NTPOFF:
5954 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5955 case UNSPEC_DTPOFF:
5956 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5957 default:
5958 return false;
5960 break;
5962 default:
5963 break;
5966 /* Otherwise we handle everything else in the move patterns. */
5967 return true;
5970 /* Determine if it's legal to put X into the constant pool. This
5971 is not possible for the address of thread-local symbols, which
5972 is checked above. */
5974 static bool
5975 ix86_cannot_force_const_mem (rtx x)
5977 return !legitimate_constant_p (x);
5980 /* Determine if a given RTX is a valid constant address. */
5982 bool
5983 constant_address_p (rtx x)
5985 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5988 /* Nonzero if the constant value X is a legitimate general operand
5989 when generating PIC code. It is given that flag_pic is on and
5990 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5992 bool
5993 legitimate_pic_operand_p (rtx x)
5995 rtx inner;
5997 switch (GET_CODE (x))
5999 case CONST:
6000 inner = XEXP (x, 0);
6002 /* Only some unspecs are valid as "constants". */
6003 if (GET_CODE (inner) == UNSPEC)
6004 switch (XINT (inner, 1))
6006 case UNSPEC_TPOFF:
6007 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
6008 default:
6009 return false;
6011 /* FALLTHRU */
6013 case SYMBOL_REF:
6014 case LABEL_REF:
6015 return legitimate_pic_address_disp_p (x);
6017 default:
6018 return true;
6022 /* Determine if a given CONST RTX is a valid memory displacement
6023 in PIC mode. */
6026 legitimate_pic_address_disp_p (rtx disp)
6028 bool saw_plus;
6030 /* In 64bit mode we can allow direct addresses of symbols and labels
6031 when they are not dynamic symbols. */
6032 if (TARGET_64BIT)
6034 /* TLS references should always be enclosed in UNSPEC. */
6035 if (tls_symbolic_operand (disp, GET_MODE (disp)))
6036 return 0;
6037 if (GET_CODE (disp) == SYMBOL_REF
6038 && ix86_cmodel == CM_SMALL_PIC
6039 && SYMBOL_REF_LOCAL_P (disp))
6040 return 1;
6041 if (GET_CODE (disp) == LABEL_REF)
6042 return 1;
6043 if (GET_CODE (disp) == CONST
6044 && GET_CODE (XEXP (disp, 0)) == PLUS)
6046 rtx op0 = XEXP (XEXP (disp, 0), 0);
6047 rtx op1 = XEXP (XEXP (disp, 0), 1);
6049 /* TLS references should always be enclosed in UNSPEC. */
6050 if (tls_symbolic_operand (op0, GET_MODE (op0)))
6051 return 0;
6052 if (((GET_CODE (op0) == SYMBOL_REF
6053 && ix86_cmodel == CM_SMALL_PIC
6054 && SYMBOL_REF_LOCAL_P (op0))
6055 || GET_CODE (op0) == LABEL_REF)
6056 && GET_CODE (op1) == CONST_INT
6057 && INTVAL (op1) < 16*1024*1024
6058 && INTVAL (op1) >= -16*1024*1024)
6059 return 1;
6062 if (GET_CODE (disp) != CONST)
6063 return 0;
6064 disp = XEXP (disp, 0);
6066 if (TARGET_64BIT)
6068 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6069 of GOT tables. We should not need these anyway. */
6070 if (GET_CODE (disp) != UNSPEC
6071 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6072 return 0;
6074 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6075 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6076 return 0;
6077 return 1;
6080 saw_plus = false;
6081 if (GET_CODE (disp) == PLUS)
6083 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6084 return 0;
6085 disp = XEXP (disp, 0);
6086 saw_plus = true;
6089 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6090 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
6092 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6093 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6094 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6096 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6097 if (! strcmp (sym_name, "<pic base>"))
6098 return 1;
6102 if (GET_CODE (disp) != UNSPEC)
6103 return 0;
6105 switch (XINT (disp, 1))
6107 case UNSPEC_GOT:
6108 if (saw_plus)
6109 return false;
6110 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6111 case UNSPEC_GOTOFF:
6112 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6113 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6114 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6115 return false;
6116 case UNSPEC_GOTTPOFF:
6117 case UNSPEC_GOTNTPOFF:
6118 case UNSPEC_INDNTPOFF:
6119 if (saw_plus)
6120 return false;
6121 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6122 case UNSPEC_NTPOFF:
6123 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6124 case UNSPEC_DTPOFF:
6125 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6128 return 0;
6131 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6132 memory address for an instruction. The MODE argument is the machine mode
6133 for the MEM expression that wants to use this address.
6135 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6136 convert common non-canonical forms to canonical form so that they will
6137 be recognized. */
6140 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6142 struct ix86_address parts;
6143 rtx base, index, disp;
6144 HOST_WIDE_INT scale;
6145 const char *reason = NULL;
6146 rtx reason_rtx = NULL_RTX;
6148 if (TARGET_DEBUG_ADDR)
6150 fprintf (stderr,
6151 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6152 GET_MODE_NAME (mode), strict);
6153 debug_rtx (addr);
6156 if (ix86_decompose_address (addr, &parts) <= 0)
6158 reason = "decomposition failed";
6159 goto report_error;
6162 base = parts.base;
6163 index = parts.index;
6164 disp = parts.disp;
6165 scale = parts.scale;
6167 /* Validate base register.
6169 Don't allow SUBREG's here, it can lead to spill failures when the base
6170 is one word out of a two word structure, which is represented internally
6171 as a DImode int. */
6173 if (base)
6175 reason_rtx = base;
6177 if (GET_CODE (base) != REG)
6179 reason = "base is not a register";
6180 goto report_error;
6183 if (GET_MODE (base) != Pmode)
6185 reason = "base is not in Pmode";
6186 goto report_error;
6189 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6190 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6192 reason = "base is not valid";
6193 goto report_error;
6197 /* Validate index register.
6199 Don't allow SUBREG's here, it can lead to spill failures when the index
6200 is one word out of a two word structure, which is represented internally
6201 as a DImode int. */
6203 if (index)
6205 reason_rtx = index;
6207 if (GET_CODE (index) != REG)
6209 reason = "index is not a register";
6210 goto report_error;
6213 if (GET_MODE (index) != Pmode)
6215 reason = "index is not in Pmode";
6216 goto report_error;
6219 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6220 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6222 reason = "index is not valid";
6223 goto report_error;
6227 /* Validate scale factor. */
6228 if (scale != 1)
6230 reason_rtx = GEN_INT (scale);
6231 if (!index)
6233 reason = "scale without index";
6234 goto report_error;
6237 if (scale != 2 && scale != 4 && scale != 8)
6239 reason = "scale is not a valid multiplier";
6240 goto report_error;
6244 /* Validate displacement. */
6245 if (disp)
6247 reason_rtx = disp;
6249 if (GET_CODE (disp) == CONST
6250 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6251 switch (XINT (XEXP (disp, 0), 1))
6253 case UNSPEC_GOT:
6254 case UNSPEC_GOTOFF:
6255 case UNSPEC_GOTPCREL:
6256 if (!flag_pic)
6257 abort ();
6258 goto is_legitimate_pic;
6260 case UNSPEC_GOTTPOFF:
6261 case UNSPEC_GOTNTPOFF:
6262 case UNSPEC_INDNTPOFF:
6263 case UNSPEC_NTPOFF:
6264 case UNSPEC_DTPOFF:
6265 break;
6267 default:
6268 reason = "invalid address unspec";
6269 goto report_error;
6272 else if (flag_pic && (SYMBOLIC_CONST (disp)
6273 #if TARGET_MACHO
6274 && !machopic_operand_p (disp)
6275 #endif
6278 is_legitimate_pic:
6279 if (TARGET_64BIT && (index || base))
6281 /* foo@dtpoff(%rX) is ok. */
6282 if (GET_CODE (disp) != CONST
6283 || GET_CODE (XEXP (disp, 0)) != PLUS
6284 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6285 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6286 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6287 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6289 reason = "non-constant pic memory reference";
6290 goto report_error;
6293 else if (! legitimate_pic_address_disp_p (disp))
6295 reason = "displacement is an invalid pic construct";
6296 goto report_error;
6299 /* This code used to verify that a symbolic pic displacement
6300 includes the pic_offset_table_rtx register.
6302 While this is good idea, unfortunately these constructs may
6303 be created by "adds using lea" optimization for incorrect
6304 code like:
6306 int a;
6307 int foo(int i)
6309 return *(&a+i);
6312 This code is nonsensical, but results in addressing
6313 GOT table with pic_offset_table_rtx base. We can't
6314 just refuse it easily, since it gets matched by
6315 "addsi3" pattern, that later gets split to lea in the
6316 case output register differs from input. While this
6317 can be handled by separate addsi pattern for this case
6318 that never results in lea, this seems to be easier and
6319 correct fix for crash to disable this test. */
6321 else if (GET_CODE (disp) != LABEL_REF
6322 && GET_CODE (disp) != CONST_INT
6323 && (GET_CODE (disp) != CONST
6324 || !legitimate_constant_p (disp))
6325 && (GET_CODE (disp) != SYMBOL_REF
6326 || !legitimate_constant_p (disp)))
6328 reason = "displacement is not constant";
6329 goto report_error;
6331 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6333 reason = "displacement is out of range";
6334 goto report_error;
6338 /* Everything looks valid. */
6339 if (TARGET_DEBUG_ADDR)
6340 fprintf (stderr, "Success.\n");
6341 return TRUE;
6343 report_error:
6344 if (TARGET_DEBUG_ADDR)
6346 fprintf (stderr, "Error: %s\n", reason);
6347 debug_rtx (reason_rtx);
6349 return FALSE;
6352 /* Return an unique alias set for the GOT. */
6354 static HOST_WIDE_INT
6355 ix86_GOT_alias_set (void)
6357 static HOST_WIDE_INT set = -1;
6358 if (set == -1)
6359 set = new_alias_set ();
6360 return set;
6363 /* Return a legitimate reference for ORIG (an address) using the
6364 register REG. If REG is 0, a new pseudo is generated.
6366 There are two types of references that must be handled:
6368 1. Global data references must load the address from the GOT, via
6369 the PIC reg. An insn is emitted to do this load, and the reg is
6370 returned.
6372 2. Static data references, constant pool addresses, and code labels
6373 compute the address as an offset from the GOT, whose base is in
6374 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6375 differentiate them from global data objects. The returned
6376 address is the PIC reg + an unspec constant.
6378 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6379 reg also appears in the address. */
6381 static rtx
6382 legitimize_pic_address (rtx orig, rtx reg)
6384 rtx addr = orig;
6385 rtx new = orig;
6386 rtx base;
6388 #if TARGET_MACHO
6389 if (reg == 0)
6390 reg = gen_reg_rtx (Pmode);
6391 /* Use the generic Mach-O PIC machinery. */
6392 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6393 #endif
6395 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6396 new = addr;
6397 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6399 /* This symbol may be referenced via a displacement from the PIC
6400 base address (@GOTOFF). */
6402 if (reload_in_progress)
6403 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6404 if (GET_CODE (addr) == CONST)
6405 addr = XEXP (addr, 0);
6406 if (GET_CODE (addr) == PLUS)
6408 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6409 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6411 else
6412 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6413 new = gen_rtx_CONST (Pmode, new);
6414 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6416 if (reg != 0)
6418 emit_move_insn (reg, new);
6419 new = reg;
6422 else if (GET_CODE (addr) == SYMBOL_REF)
6424 if (TARGET_64BIT)
6426 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6427 new = gen_rtx_CONST (Pmode, new);
6428 new = gen_rtx_MEM (Pmode, new);
6429 RTX_UNCHANGING_P (new) = 1;
6430 set_mem_alias_set (new, ix86_GOT_alias_set ());
6432 if (reg == 0)
6433 reg = gen_reg_rtx (Pmode);
6434 /* Use directly gen_movsi, otherwise the address is loaded
6435 into register for CSE. We don't want to CSE this addresses,
6436 instead we CSE addresses from the GOT table, so skip this. */
6437 emit_insn (gen_movsi (reg, new));
6438 new = reg;
6440 else
6442 /* This symbol must be referenced via a load from the
6443 Global Offset Table (@GOT). */
6445 if (reload_in_progress)
6446 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6447 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6448 new = gen_rtx_CONST (Pmode, new);
6449 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6450 new = gen_rtx_MEM (Pmode, new);
6451 RTX_UNCHANGING_P (new) = 1;
6452 set_mem_alias_set (new, ix86_GOT_alias_set ());
6454 if (reg == 0)
6455 reg = gen_reg_rtx (Pmode);
6456 emit_move_insn (reg, new);
6457 new = reg;
6460 else
6462 if (GET_CODE (addr) == CONST)
6464 addr = XEXP (addr, 0);
6466 /* We must match stuff we generate before. Assume the only
6467 unspecs that can get here are ours. Not that we could do
6468 anything with them anyway.... */
6469 if (GET_CODE (addr) == UNSPEC
6470 || (GET_CODE (addr) == PLUS
6471 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6472 return orig;
6473 if (GET_CODE (addr) != PLUS)
6474 abort ();
6476 if (GET_CODE (addr) == PLUS)
6478 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6480 /* Check first to see if this is a constant offset from a @GOTOFF
6481 symbol reference. */
6482 if (local_symbolic_operand (op0, Pmode)
6483 && GET_CODE (op1) == CONST_INT)
6485 if (!TARGET_64BIT)
6487 if (reload_in_progress)
6488 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6489 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6490 UNSPEC_GOTOFF);
6491 new = gen_rtx_PLUS (Pmode, new, op1);
6492 new = gen_rtx_CONST (Pmode, new);
6493 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6495 if (reg != 0)
6497 emit_move_insn (reg, new);
6498 new = reg;
6501 else
6503 if (INTVAL (op1) < -16*1024*1024
6504 || INTVAL (op1) >= 16*1024*1024)
6505 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6508 else
6510 base = legitimize_pic_address (XEXP (addr, 0), reg);
6511 new = legitimize_pic_address (XEXP (addr, 1),
6512 base == reg ? NULL_RTX : reg);
6514 if (GET_CODE (new) == CONST_INT)
6515 new = plus_constant (base, INTVAL (new));
6516 else
6518 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6520 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6521 new = XEXP (new, 1);
6523 new = gen_rtx_PLUS (Pmode, base, new);
6528 return new;
6531 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6533 static rtx
6534 get_thread_pointer (int to_reg)
6536 rtx tp, reg, insn;
6538 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6539 if (!to_reg)
6540 return tp;
6542 reg = gen_reg_rtx (Pmode);
6543 insn = gen_rtx_SET (VOIDmode, reg, tp);
6544 insn = emit_insn (insn);
6546 return reg;
6549 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6550 false if we expect this to be used for a memory address and true if
6551 we expect to load the address into a register. */
6553 static rtx
6554 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6556 rtx dest, base, off, pic;
6557 int type;
6559 switch (model)
6561 case TLS_MODEL_GLOBAL_DYNAMIC:
6562 dest = gen_reg_rtx (Pmode);
6563 if (TARGET_64BIT)
6565 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6567 start_sequence ();
6568 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6569 insns = get_insns ();
6570 end_sequence ();
6572 emit_libcall_block (insns, dest, rax, x);
6574 else
6575 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6576 break;
6578 case TLS_MODEL_LOCAL_DYNAMIC:
6579 base = gen_reg_rtx (Pmode);
6580 if (TARGET_64BIT)
6582 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6584 start_sequence ();
6585 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6586 insns = get_insns ();
6587 end_sequence ();
6589 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6590 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6591 emit_libcall_block (insns, base, rax, note);
6593 else
6594 emit_insn (gen_tls_local_dynamic_base_32 (base));
6596 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6597 off = gen_rtx_CONST (Pmode, off);
6599 return gen_rtx_PLUS (Pmode, base, off);
6601 case TLS_MODEL_INITIAL_EXEC:
6602 if (TARGET_64BIT)
6604 pic = NULL;
6605 type = UNSPEC_GOTNTPOFF;
6607 else if (flag_pic)
6609 if (reload_in_progress)
6610 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6611 pic = pic_offset_table_rtx;
6612 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6614 else if (!TARGET_GNU_TLS)
6616 pic = gen_reg_rtx (Pmode);
6617 emit_insn (gen_set_got (pic));
6618 type = UNSPEC_GOTTPOFF;
6620 else
6622 pic = NULL;
6623 type = UNSPEC_INDNTPOFF;
6626 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6627 off = gen_rtx_CONST (Pmode, off);
6628 if (pic)
6629 off = gen_rtx_PLUS (Pmode, pic, off);
6630 off = gen_rtx_MEM (Pmode, off);
6631 RTX_UNCHANGING_P (off) = 1;
6632 set_mem_alias_set (off, ix86_GOT_alias_set ());
6634 if (TARGET_64BIT || TARGET_GNU_TLS)
6636 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6637 off = force_reg (Pmode, off);
6638 return gen_rtx_PLUS (Pmode, base, off);
6640 else
6642 base = get_thread_pointer (true);
6643 dest = gen_reg_rtx (Pmode);
6644 emit_insn (gen_subsi3 (dest, base, off));
6646 break;
6648 case TLS_MODEL_LOCAL_EXEC:
6649 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6650 (TARGET_64BIT || TARGET_GNU_TLS)
6651 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6652 off = gen_rtx_CONST (Pmode, off);
6654 if (TARGET_64BIT || TARGET_GNU_TLS)
6656 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6657 return gen_rtx_PLUS (Pmode, base, off);
6659 else
6661 base = get_thread_pointer (true);
6662 dest = gen_reg_rtx (Pmode);
6663 emit_insn (gen_subsi3 (dest, base, off));
6665 break;
6667 default:
6668 abort ();
6671 return dest;
6674 /* Try machine-dependent ways of modifying an illegitimate address
6675 to be legitimate. If we find one, return the new, valid address.
6676 This macro is used in only one place: `memory_address' in explow.c.
6678 OLDX is the address as it was before break_out_memory_refs was called.
6679 In some cases it is useful to look at this to decide what needs to be done.
6681 MODE and WIN are passed so that this macro can use
6682 GO_IF_LEGITIMATE_ADDRESS.
6684 It is always safe for this macro to do nothing. It exists to recognize
6685 opportunities to optimize the output.
6687 For the 80386, we handle X+REG by loading X into a register R and
6688 using R+REG. R will go in a general reg and indexing will be used.
6689 However, if REG is a broken-out memory address or multiplication,
6690 nothing needs to be done because REG can certainly go in a general reg.
6692 When -fpic is used, special handling is needed for symbolic references.
6693 See comments by legitimize_pic_address in i386.c for details. */
6696 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6698 int changed = 0;
6699 unsigned log;
6701 if (TARGET_DEBUG_ADDR)
6703 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6704 GET_MODE_NAME (mode));
6705 debug_rtx (x);
6708 log = tls_symbolic_operand (x, mode);
6709 if (log)
6710 return legitimize_tls_address (x, log, false);
6711 if (GET_CODE (x) == CONST
6712 && GET_CODE (XEXP (x, 0)) == PLUS
6713 && (log = tls_symbolic_operand (XEXP (XEXP (x, 0), 0), Pmode)))
6715 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6716 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6719 if (flag_pic && SYMBOLIC_CONST (x))
6720 return legitimize_pic_address (x, 0);
6722 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6723 if (GET_CODE (x) == ASHIFT
6724 && GET_CODE (XEXP (x, 1)) == CONST_INT
6725 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6727 changed = 1;
6728 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6729 GEN_INT (1 << log));
6732 if (GET_CODE (x) == PLUS)
6734 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6736 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6737 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6738 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6740 changed = 1;
6741 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6742 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6743 GEN_INT (1 << log));
6746 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6747 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6748 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6750 changed = 1;
6751 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6752 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6753 GEN_INT (1 << log));
6756 /* Put multiply first if it isn't already. */
6757 if (GET_CODE (XEXP (x, 1)) == MULT)
6759 rtx tmp = XEXP (x, 0);
6760 XEXP (x, 0) = XEXP (x, 1);
6761 XEXP (x, 1) = tmp;
6762 changed = 1;
6765 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6766 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6767 created by virtual register instantiation, register elimination, and
6768 similar optimizations. */
6769 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6771 changed = 1;
6772 x = gen_rtx_PLUS (Pmode,
6773 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6774 XEXP (XEXP (x, 1), 0)),
6775 XEXP (XEXP (x, 1), 1));
6778 /* Canonicalize
6779 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6780 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6781 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6782 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6783 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6784 && CONSTANT_P (XEXP (x, 1)))
6786 rtx constant;
6787 rtx other = NULL_RTX;
6789 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6791 constant = XEXP (x, 1);
6792 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6794 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6796 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6797 other = XEXP (x, 1);
6799 else
6800 constant = 0;
6802 if (constant)
6804 changed = 1;
6805 x = gen_rtx_PLUS (Pmode,
6806 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6807 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6808 plus_constant (other, INTVAL (constant)));
6812 if (changed && legitimate_address_p (mode, x, FALSE))
6813 return x;
6815 if (GET_CODE (XEXP (x, 0)) == MULT)
6817 changed = 1;
6818 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6821 if (GET_CODE (XEXP (x, 1)) == MULT)
6823 changed = 1;
6824 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6827 if (changed
6828 && GET_CODE (XEXP (x, 1)) == REG
6829 && GET_CODE (XEXP (x, 0)) == REG)
6830 return x;
6832 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6834 changed = 1;
6835 x = legitimize_pic_address (x, 0);
6838 if (changed && legitimate_address_p (mode, x, FALSE))
6839 return x;
6841 if (GET_CODE (XEXP (x, 0)) == REG)
6843 rtx temp = gen_reg_rtx (Pmode);
6844 rtx val = force_operand (XEXP (x, 1), temp);
6845 if (val != temp)
6846 emit_move_insn (temp, val);
6848 XEXP (x, 1) = temp;
6849 return x;
6852 else if (GET_CODE (XEXP (x, 1)) == REG)
6854 rtx temp = gen_reg_rtx (Pmode);
6855 rtx val = force_operand (XEXP (x, 0), temp);
6856 if (val != temp)
6857 emit_move_insn (temp, val);
6859 XEXP (x, 0) = temp;
6860 return x;
6864 return x;
6867 /* Print an integer constant expression in assembler syntax. Addition
6868 and subtraction are the only arithmetic that may appear in these
6869 expressions. FILE is the stdio stream to write to, X is the rtx, and
6870 CODE is the operand print code from the output string. */
6872 static void
6873 output_pic_addr_const (FILE *file, rtx x, int code)
6875 char buf[256];
6877 switch (GET_CODE (x))
6879 case PC:
6880 if (flag_pic)
6881 putc ('.', file);
6882 else
6883 abort ();
6884 break;
6886 case SYMBOL_REF:
6887 /* Mark the decl as referenced so that cgraph will output the function. */
6888 if (SYMBOL_REF_DECL (x))
6889 mark_decl_referenced (SYMBOL_REF_DECL (x));
6891 assemble_name (file, XSTR (x, 0));
6892 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6893 fputs ("@PLT", file);
6894 break;
6896 case LABEL_REF:
6897 x = XEXP (x, 0);
6898 /* FALLTHRU */
6899 case CODE_LABEL:
6900 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6901 assemble_name (asm_out_file, buf);
6902 break;
6904 case CONST_INT:
6905 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6906 break;
6908 case CONST:
6909 /* This used to output parentheses around the expression,
6910 but that does not work on the 386 (either ATT or BSD assembler). */
6911 output_pic_addr_const (file, XEXP (x, 0), code);
6912 break;
6914 case CONST_DOUBLE:
6915 if (GET_MODE (x) == VOIDmode)
6917 /* We can use %d if the number is <32 bits and positive. */
6918 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6919 fprintf (file, "0x%lx%08lx",
6920 (unsigned long) CONST_DOUBLE_HIGH (x),
6921 (unsigned long) CONST_DOUBLE_LOW (x));
6922 else
6923 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6925 else
6926 /* We can't handle floating point constants;
6927 PRINT_OPERAND must handle them. */
6928 output_operand_lossage ("floating constant misused");
6929 break;
6931 case PLUS:
6932 /* Some assemblers need integer constants to appear first. */
6933 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6935 output_pic_addr_const (file, XEXP (x, 0), code);
6936 putc ('+', file);
6937 output_pic_addr_const (file, XEXP (x, 1), code);
6939 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6941 output_pic_addr_const (file, XEXP (x, 1), code);
6942 putc ('+', file);
6943 output_pic_addr_const (file, XEXP (x, 0), code);
6945 else
6946 abort ();
6947 break;
6949 case MINUS:
6950 if (!TARGET_MACHO)
6951 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6952 output_pic_addr_const (file, XEXP (x, 0), code);
6953 putc ('-', file);
6954 output_pic_addr_const (file, XEXP (x, 1), code);
6955 if (!TARGET_MACHO)
6956 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6957 break;
6959 case UNSPEC:
6960 if (XVECLEN (x, 0) != 1)
6961 abort ();
6962 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6963 switch (XINT (x, 1))
6965 case UNSPEC_GOT:
6966 fputs ("@GOT", file);
6967 break;
6968 case UNSPEC_GOTOFF:
6969 fputs ("@GOTOFF", file);
6970 break;
6971 case UNSPEC_GOTPCREL:
6972 fputs ("@GOTPCREL(%rip)", file);
6973 break;
6974 case UNSPEC_GOTTPOFF:
6975 /* FIXME: This might be @TPOFF in Sun ld too. */
6976 fputs ("@GOTTPOFF", file);
6977 break;
6978 case UNSPEC_TPOFF:
6979 fputs ("@TPOFF", file);
6980 break;
6981 case UNSPEC_NTPOFF:
6982 if (TARGET_64BIT)
6983 fputs ("@TPOFF", file);
6984 else
6985 fputs ("@NTPOFF", file);
6986 break;
6987 case UNSPEC_DTPOFF:
6988 fputs ("@DTPOFF", file);
6989 break;
6990 case UNSPEC_GOTNTPOFF:
6991 if (TARGET_64BIT)
6992 fputs ("@GOTTPOFF(%rip)", file);
6993 else
6994 fputs ("@GOTNTPOFF", file);
6995 break;
6996 case UNSPEC_INDNTPOFF:
6997 fputs ("@INDNTPOFF", file);
6998 break;
6999 default:
7000 output_operand_lossage ("invalid UNSPEC as operand");
7001 break;
7003 break;
7005 default:
7006 output_operand_lossage ("invalid expression as operand");
7010 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
7011 We need to handle our special PIC relocations. */
7013 void
7014 i386_dwarf_output_addr_const (FILE *file, rtx x)
7016 #ifdef ASM_QUAD
7017 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
7018 #else
7019 if (TARGET_64BIT)
7020 abort ();
7021 fprintf (file, "%s", ASM_LONG);
7022 #endif
7023 if (flag_pic)
7024 output_pic_addr_const (file, x, '\0');
7025 else
7026 output_addr_const (file, x);
7027 fputc ('\n', file);
7030 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
7031 We need to emit DTP-relative relocations. */
7033 void
7034 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7036 fputs (ASM_LONG, file);
7037 output_addr_const (file, x);
7038 fputs ("@DTPOFF", file);
7039 switch (size)
7041 case 4:
7042 break;
7043 case 8:
7044 fputs (", 0", file);
7045 break;
7046 default:
7047 abort ();
7051 /* In the name of slightly smaller debug output, and to cater to
7052 general assembler losage, recognize PIC+GOTOFF and turn it back
7053 into a direct symbol reference. */
7055 static rtx
7056 ix86_delegitimize_address (rtx orig_x)
7058 rtx x = orig_x, y;
7060 if (GET_CODE (x) == MEM)
7061 x = XEXP (x, 0);
7063 if (TARGET_64BIT)
7065 if (GET_CODE (x) != CONST
7066 || GET_CODE (XEXP (x, 0)) != UNSPEC
7067 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7068 || GET_CODE (orig_x) != MEM)
7069 return orig_x;
7070 return XVECEXP (XEXP (x, 0), 0, 0);
7073 if (GET_CODE (x) != PLUS
7074 || GET_CODE (XEXP (x, 1)) != CONST)
7075 return orig_x;
7077 if (GET_CODE (XEXP (x, 0)) == REG
7078 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7079 /* %ebx + GOT/GOTOFF */
7080 y = NULL;
7081 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7083 /* %ebx + %reg * scale + GOT/GOTOFF */
7084 y = XEXP (x, 0);
7085 if (GET_CODE (XEXP (y, 0)) == REG
7086 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
7087 y = XEXP (y, 1);
7088 else if (GET_CODE (XEXP (y, 1)) == REG
7089 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7090 y = XEXP (y, 0);
7091 else
7092 return orig_x;
7093 if (GET_CODE (y) != REG
7094 && GET_CODE (y) != MULT
7095 && GET_CODE (y) != ASHIFT)
7096 return orig_x;
7098 else
7099 return orig_x;
7101 x = XEXP (XEXP (x, 1), 0);
7102 if (GET_CODE (x) == UNSPEC
7103 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7104 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7106 if (y)
7107 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7108 return XVECEXP (x, 0, 0);
7111 if (GET_CODE (x) == PLUS
7112 && GET_CODE (XEXP (x, 0)) == UNSPEC
7113 && GET_CODE (XEXP (x, 1)) == CONST_INT
7114 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7115 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7116 && GET_CODE (orig_x) != MEM)))
7118 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7119 if (y)
7120 return gen_rtx_PLUS (Pmode, y, x);
7121 return x;
7124 return orig_x;
7127 static void
7128 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7129 int fp, FILE *file)
7131 const char *suffix;
7133 if (mode == CCFPmode || mode == CCFPUmode)
7135 enum rtx_code second_code, bypass_code;
7136 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7137 if (bypass_code != NIL || second_code != NIL)
7138 abort ();
7139 code = ix86_fp_compare_code_to_integer (code);
7140 mode = CCmode;
7142 if (reverse)
7143 code = reverse_condition (code);
7145 switch (code)
7147 case EQ:
7148 suffix = "e";
7149 break;
7150 case NE:
7151 suffix = "ne";
7152 break;
7153 case GT:
7154 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7155 abort ();
7156 suffix = "g";
7157 break;
7158 case GTU:
7159 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7160 Those same assemblers have the same but opposite losage on cmov. */
7161 if (mode != CCmode)
7162 abort ();
7163 suffix = fp ? "nbe" : "a";
7164 break;
7165 case LT:
7166 if (mode == CCNOmode || mode == CCGOCmode)
7167 suffix = "s";
7168 else if (mode == CCmode || mode == CCGCmode)
7169 suffix = "l";
7170 else
7171 abort ();
7172 break;
7173 case LTU:
7174 if (mode != CCmode)
7175 abort ();
7176 suffix = "b";
7177 break;
7178 case GE:
7179 if (mode == CCNOmode || mode == CCGOCmode)
7180 suffix = "ns";
7181 else if (mode == CCmode || mode == CCGCmode)
7182 suffix = "ge";
7183 else
7184 abort ();
7185 break;
7186 case GEU:
7187 /* ??? As above. */
7188 if (mode != CCmode)
7189 abort ();
7190 suffix = fp ? "nb" : "ae";
7191 break;
7192 case LE:
7193 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7194 abort ();
7195 suffix = "le";
7196 break;
7197 case LEU:
7198 if (mode != CCmode)
7199 abort ();
7200 suffix = "be";
7201 break;
7202 case UNORDERED:
7203 suffix = fp ? "u" : "p";
7204 break;
7205 case ORDERED:
7206 suffix = fp ? "nu" : "np";
7207 break;
7208 default:
7209 abort ();
7211 fputs (suffix, file);
7214 /* Print the name of register X to FILE based on its machine mode and number.
7215 If CODE is 'w', pretend the mode is HImode.
7216 If CODE is 'b', pretend the mode is QImode.
7217 If CODE is 'k', pretend the mode is SImode.
7218 If CODE is 'q', pretend the mode is DImode.
7219 If CODE is 'h', pretend the reg is the `high' byte register.
7220 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7222 void
7223 print_reg (rtx x, int code, FILE *file)
7225 if (REGNO (x) == ARG_POINTER_REGNUM
7226 || REGNO (x) == FRAME_POINTER_REGNUM
7227 || REGNO (x) == FLAGS_REG
7228 || REGNO (x) == FPSR_REG)
7229 abort ();
7231 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7232 putc ('%', file);
7234 if (code == 'w' || MMX_REG_P (x))
7235 code = 2;
7236 else if (code == 'b')
7237 code = 1;
7238 else if (code == 'k')
7239 code = 4;
7240 else if (code == 'q')
7241 code = 8;
7242 else if (code == 'y')
7243 code = 3;
7244 else if (code == 'h')
7245 code = 0;
7246 else
7247 code = GET_MODE_SIZE (GET_MODE (x));
7249 /* Irritatingly, AMD extended registers use different naming convention
7250 from the normal registers. */
7251 if (REX_INT_REG_P (x))
7253 if (!TARGET_64BIT)
7254 abort ();
7255 switch (code)
7257 case 0:
7258 error ("extended registers have no high halves");
7259 break;
7260 case 1:
7261 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7262 break;
7263 case 2:
7264 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7265 break;
7266 case 4:
7267 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7268 break;
7269 case 8:
7270 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7271 break;
7272 default:
7273 error ("unsupported operand size for extended register");
7274 break;
7276 return;
7278 switch (code)
7280 case 3:
7281 if (STACK_TOP_P (x))
7283 fputs ("st(0)", file);
7284 break;
7286 /* FALLTHRU */
7287 case 8:
7288 case 4:
7289 case 12:
7290 if (! ANY_FP_REG_P (x))
7291 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7292 /* FALLTHRU */
7293 case 16:
7294 case 2:
7295 normal:
7296 fputs (hi_reg_name[REGNO (x)], file);
7297 break;
7298 case 1:
7299 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7300 goto normal;
7301 fputs (qi_reg_name[REGNO (x)], file);
7302 break;
7303 case 0:
7304 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7305 goto normal;
7306 fputs (qi_high_reg_name[REGNO (x)], file);
7307 break;
7308 default:
7309 abort ();
7313 /* Locate some local-dynamic symbol still in use by this function
7314 so that we can print its name in some tls_local_dynamic_base
7315 pattern. */
7317 static const char *
7318 get_some_local_dynamic_name (void)
7320 rtx insn;
7322 if (cfun->machine->some_ld_name)
7323 return cfun->machine->some_ld_name;
7325 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7326 if (INSN_P (insn)
7327 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7328 return cfun->machine->some_ld_name;
7330 abort ();
7333 static int
7334 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7336 rtx x = *px;
7338 if (GET_CODE (x) == SYMBOL_REF
7339 && local_dynamic_symbolic_operand (x, Pmode))
7341 cfun->machine->some_ld_name = XSTR (x, 0);
7342 return 1;
7345 return 0;
7348 /* Meaning of CODE:
7349 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7350 C -- print opcode suffix for set/cmov insn.
7351 c -- like C, but print reversed condition
7352 F,f -- likewise, but for floating-point.
7353 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7354 otherwise nothing
7355 R -- print the prefix for register names.
7356 z -- print the opcode suffix for the size of the current operand.
7357 * -- print a star (in certain assembler syntax)
7358 A -- print an absolute memory reference.
7359 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7360 s -- print a shift double count, followed by the assemblers argument
7361 delimiter.
7362 b -- print the QImode name of the register for the indicated operand.
7363 %b0 would print %al if operands[0] is reg 0.
7364 w -- likewise, print the HImode name of the register.
7365 k -- likewise, print the SImode name of the register.
7366 q -- likewise, print the DImode name of the register.
7367 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7368 y -- print "st(0)" instead of "st" as a register.
7369 D -- print condition for SSE cmp instruction.
7370 P -- if PIC, print an @PLT suffix.
7371 X -- don't print any sort of PIC '@' suffix for a symbol.
7372 & -- print some in-use local-dynamic symbol name.
7375 void
7376 print_operand (FILE *file, rtx x, int code)
7378 if (code)
7380 switch (code)
7382 case '*':
7383 if (ASSEMBLER_DIALECT == ASM_ATT)
7384 putc ('*', file);
7385 return;
7387 case '&':
7388 assemble_name (file, get_some_local_dynamic_name ());
7389 return;
7391 case 'A':
7392 if (ASSEMBLER_DIALECT == ASM_ATT)
7393 putc ('*', file);
7394 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7396 /* Intel syntax. For absolute addresses, registers should not
7397 be surrounded by braces. */
7398 if (GET_CODE (x) != REG)
7400 putc ('[', file);
7401 PRINT_OPERAND (file, x, 0);
7402 putc (']', file);
7403 return;
7406 else
7407 abort ();
7409 PRINT_OPERAND (file, x, 0);
7410 return;
7413 case 'L':
7414 if (ASSEMBLER_DIALECT == ASM_ATT)
7415 putc ('l', file);
7416 return;
7418 case 'W':
7419 if (ASSEMBLER_DIALECT == ASM_ATT)
7420 putc ('w', file);
7421 return;
7423 case 'B':
7424 if (ASSEMBLER_DIALECT == ASM_ATT)
7425 putc ('b', file);
7426 return;
7428 case 'Q':
7429 if (ASSEMBLER_DIALECT == ASM_ATT)
7430 putc ('l', file);
7431 return;
7433 case 'S':
7434 if (ASSEMBLER_DIALECT == ASM_ATT)
7435 putc ('s', file);
7436 return;
7438 case 'T':
7439 if (ASSEMBLER_DIALECT == ASM_ATT)
7440 putc ('t', file);
7441 return;
7443 case 'z':
7444 /* 387 opcodes don't get size suffixes if the operands are
7445 registers. */
7446 if (STACK_REG_P (x))
7447 return;
7449 /* Likewise if using Intel opcodes. */
7450 if (ASSEMBLER_DIALECT == ASM_INTEL)
7451 return;
7453 /* This is the size of op from size of operand. */
7454 switch (GET_MODE_SIZE (GET_MODE (x)))
7456 case 2:
7457 #ifdef HAVE_GAS_FILDS_FISTS
7458 putc ('s', file);
7459 #endif
7460 return;
7462 case 4:
7463 if (GET_MODE (x) == SFmode)
7465 putc ('s', file);
7466 return;
7468 else
7469 putc ('l', file);
7470 return;
7472 case 12:
7473 case 16:
7474 putc ('t', file);
7475 return;
7477 case 8:
7478 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7480 #ifdef GAS_MNEMONICS
7481 putc ('q', file);
7482 #else
7483 putc ('l', file);
7484 putc ('l', file);
7485 #endif
7487 else
7488 putc ('l', file);
7489 return;
7491 default:
7492 abort ();
7495 case 'b':
7496 case 'w':
7497 case 'k':
7498 case 'q':
7499 case 'h':
7500 case 'y':
7501 case 'X':
7502 case 'P':
7503 break;
7505 case 's':
7506 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7508 PRINT_OPERAND (file, x, 0);
7509 putc (',', file);
7511 return;
7513 case 'D':
7514 /* Little bit of braindamage here. The SSE compare instructions
7515 does use completely different names for the comparisons that the
7516 fp conditional moves. */
7517 switch (GET_CODE (x))
7519 case EQ:
7520 case UNEQ:
7521 fputs ("eq", file);
7522 break;
7523 case LT:
7524 case UNLT:
7525 fputs ("lt", file);
7526 break;
7527 case LE:
7528 case UNLE:
7529 fputs ("le", file);
7530 break;
7531 case UNORDERED:
7532 fputs ("unord", file);
7533 break;
7534 case NE:
7535 case LTGT:
7536 fputs ("neq", file);
7537 break;
7538 case UNGE:
7539 case GE:
7540 fputs ("nlt", file);
7541 break;
7542 case UNGT:
7543 case GT:
7544 fputs ("nle", file);
7545 break;
7546 case ORDERED:
7547 fputs ("ord", file);
7548 break;
7549 default:
7550 abort ();
7551 break;
7553 return;
7554 case 'O':
7555 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7556 if (ASSEMBLER_DIALECT == ASM_ATT)
7558 switch (GET_MODE (x))
7560 case HImode: putc ('w', file); break;
7561 case SImode:
7562 case SFmode: putc ('l', file); break;
7563 case DImode:
7564 case DFmode: putc ('q', file); break;
7565 default: abort ();
7567 putc ('.', file);
7569 #endif
7570 return;
7571 case 'C':
7572 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7573 return;
7574 case 'F':
7575 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7576 if (ASSEMBLER_DIALECT == ASM_ATT)
7577 putc ('.', file);
7578 #endif
7579 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7580 return;
7582 /* Like above, but reverse condition */
7583 case 'c':
7584 /* Check to see if argument to %c is really a constant
7585 and not a condition code which needs to be reversed. */
7586 if (!COMPARISON_P (x))
7588 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7589 return;
7591 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7592 return;
7593 case 'f':
7594 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7595 if (ASSEMBLER_DIALECT == ASM_ATT)
7596 putc ('.', file);
7597 #endif
7598 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7599 return;
7600 case '+':
7602 rtx x;
7604 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7605 return;
7607 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7608 if (x)
7610 int pred_val = INTVAL (XEXP (x, 0));
7612 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7613 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7615 int taken = pred_val > REG_BR_PROB_BASE / 2;
7616 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7618 /* Emit hints only in the case default branch prediction
7619 heuristics would fail. */
7620 if (taken != cputaken)
7622 /* We use 3e (DS) prefix for taken branches and
7623 2e (CS) prefix for not taken branches. */
7624 if (taken)
7625 fputs ("ds ; ", file);
7626 else
7627 fputs ("cs ; ", file);
7631 return;
7633 default:
7634 output_operand_lossage ("invalid operand code `%c'", code);
7638 if (GET_CODE (x) == REG)
7639 print_reg (x, code, file);
7641 else if (GET_CODE (x) == MEM)
7643 /* No `byte ptr' prefix for call instructions. */
7644 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7646 const char * size;
7647 switch (GET_MODE_SIZE (GET_MODE (x)))
7649 case 1: size = "BYTE"; break;
7650 case 2: size = "WORD"; break;
7651 case 4: size = "DWORD"; break;
7652 case 8: size = "QWORD"; break;
7653 case 12: size = "XWORD"; break;
7654 case 16: size = "XMMWORD"; break;
7655 default:
7656 abort ();
7659 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7660 if (code == 'b')
7661 size = "BYTE";
7662 else if (code == 'w')
7663 size = "WORD";
7664 else if (code == 'k')
7665 size = "DWORD";
7667 fputs (size, file);
7668 fputs (" PTR ", file);
7671 x = XEXP (x, 0);
7672 /* Avoid (%rip) for call operands. */
7673 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7674 && GET_CODE (x) != CONST_INT)
7675 output_addr_const (file, x);
7676 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7677 output_operand_lossage ("invalid constraints for operand");
7678 else
7679 output_address (x);
7682 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7684 REAL_VALUE_TYPE r;
7685 long l;
7687 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7688 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7690 if (ASSEMBLER_DIALECT == ASM_ATT)
7691 putc ('$', file);
7692 fprintf (file, "0x%08lx", l);
7695 /* These float cases don't actually occur as immediate operands. */
7696 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7698 char dstr[30];
7700 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7701 fprintf (file, "%s", dstr);
7704 else if (GET_CODE (x) == CONST_DOUBLE
7705 && GET_MODE (x) == XFmode)
7707 char dstr[30];
7709 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7710 fprintf (file, "%s", dstr);
7713 else
7715 if (code != 'P')
7717 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7719 if (ASSEMBLER_DIALECT == ASM_ATT)
7720 putc ('$', file);
7722 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7723 || GET_CODE (x) == LABEL_REF)
7725 if (ASSEMBLER_DIALECT == ASM_ATT)
7726 putc ('$', file);
7727 else
7728 fputs ("OFFSET FLAT:", file);
7731 if (GET_CODE (x) == CONST_INT)
7732 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7733 else if (flag_pic)
7734 output_pic_addr_const (file, x, code);
7735 else
7736 output_addr_const (file, x);
7740 /* Print a memory operand whose address is ADDR. */
7742 void
7743 print_operand_address (FILE *file, rtx addr)
7745 struct ix86_address parts;
7746 rtx base, index, disp;
7747 int scale;
7749 if (! ix86_decompose_address (addr, &parts))
7750 abort ();
7752 base = parts.base;
7753 index = parts.index;
7754 disp = parts.disp;
7755 scale = parts.scale;
7757 switch (parts.seg)
7759 case SEG_DEFAULT:
7760 break;
7761 case SEG_FS:
7762 case SEG_GS:
7763 if (USER_LABEL_PREFIX[0] == 0)
7764 putc ('%', file);
7765 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7766 break;
7767 default:
7768 abort ();
7771 if (!base && !index)
7773 /* Displacement only requires special attention. */
7775 if (GET_CODE (disp) == CONST_INT)
7777 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7779 if (USER_LABEL_PREFIX[0] == 0)
7780 putc ('%', file);
7781 fputs ("ds:", file);
7783 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7785 else if (flag_pic)
7786 output_pic_addr_const (file, disp, 0);
7787 else
7788 output_addr_const (file, disp);
7790 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7791 if (TARGET_64BIT
7792 && ((GET_CODE (disp) == SYMBOL_REF
7793 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7794 || GET_CODE (disp) == LABEL_REF
7795 || (GET_CODE (disp) == CONST
7796 && GET_CODE (XEXP (disp, 0)) == PLUS
7797 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7798 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7799 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7800 fputs ("(%rip)", file);
7802 else
7804 if (ASSEMBLER_DIALECT == ASM_ATT)
7806 if (disp)
7808 if (flag_pic)
7809 output_pic_addr_const (file, disp, 0);
7810 else if (GET_CODE (disp) == LABEL_REF)
7811 output_asm_label (disp);
7812 else
7813 output_addr_const (file, disp);
7816 putc ('(', file);
7817 if (base)
7818 print_reg (base, 0, file);
7819 if (index)
7821 putc (',', file);
7822 print_reg (index, 0, file);
7823 if (scale != 1)
7824 fprintf (file, ",%d", scale);
7826 putc (')', file);
7828 else
7830 rtx offset = NULL_RTX;
7832 if (disp)
7834 /* Pull out the offset of a symbol; print any symbol itself. */
7835 if (GET_CODE (disp) == CONST
7836 && GET_CODE (XEXP (disp, 0)) == PLUS
7837 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7839 offset = XEXP (XEXP (disp, 0), 1);
7840 disp = gen_rtx_CONST (VOIDmode,
7841 XEXP (XEXP (disp, 0), 0));
7844 if (flag_pic)
7845 output_pic_addr_const (file, disp, 0);
7846 else if (GET_CODE (disp) == LABEL_REF)
7847 output_asm_label (disp);
7848 else if (GET_CODE (disp) == CONST_INT)
7849 offset = disp;
7850 else
7851 output_addr_const (file, disp);
7854 putc ('[', file);
7855 if (base)
7857 print_reg (base, 0, file);
7858 if (offset)
7860 if (INTVAL (offset) >= 0)
7861 putc ('+', file);
7862 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7865 else if (offset)
7866 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7867 else
7868 putc ('0', file);
7870 if (index)
7872 putc ('+', file);
7873 print_reg (index, 0, file);
7874 if (scale != 1)
7875 fprintf (file, "*%d", scale);
7877 putc (']', file);
7882 bool
7883 output_addr_const_extra (FILE *file, rtx x)
7885 rtx op;
7887 if (GET_CODE (x) != UNSPEC)
7888 return false;
7890 op = XVECEXP (x, 0, 0);
7891 switch (XINT (x, 1))
7893 case UNSPEC_GOTTPOFF:
7894 output_addr_const (file, op);
7895 /* FIXME: This might be @TPOFF in Sun ld. */
7896 fputs ("@GOTTPOFF", file);
7897 break;
7898 case UNSPEC_TPOFF:
7899 output_addr_const (file, op);
7900 fputs ("@TPOFF", file);
7901 break;
7902 case UNSPEC_NTPOFF:
7903 output_addr_const (file, op);
7904 if (TARGET_64BIT)
7905 fputs ("@TPOFF", file);
7906 else
7907 fputs ("@NTPOFF", file);
7908 break;
7909 case UNSPEC_DTPOFF:
7910 output_addr_const (file, op);
7911 fputs ("@DTPOFF", file);
7912 break;
7913 case UNSPEC_GOTNTPOFF:
7914 output_addr_const (file, op);
7915 if (TARGET_64BIT)
7916 fputs ("@GOTTPOFF(%rip)", file);
7917 else
7918 fputs ("@GOTNTPOFF", file);
7919 break;
7920 case UNSPEC_INDNTPOFF:
7921 output_addr_const (file, op);
7922 fputs ("@INDNTPOFF", file);
7923 break;
7925 default:
7926 return false;
7929 return true;
7932 /* Split one or more DImode RTL references into pairs of SImode
7933 references. The RTL can be REG, offsettable MEM, integer constant, or
7934 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7935 split and "num" is its length. lo_half and hi_half are output arrays
7936 that parallel "operands". */
7938 void
7939 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7941 while (num--)
7943 rtx op = operands[num];
7945 /* simplify_subreg refuse to split volatile memory addresses,
7946 but we still have to handle it. */
7947 if (GET_CODE (op) == MEM)
7949 lo_half[num] = adjust_address (op, SImode, 0);
7950 hi_half[num] = adjust_address (op, SImode, 4);
7952 else
7954 lo_half[num] = simplify_gen_subreg (SImode, op,
7955 GET_MODE (op) == VOIDmode
7956 ? DImode : GET_MODE (op), 0);
7957 hi_half[num] = simplify_gen_subreg (SImode, op,
7958 GET_MODE (op) == VOIDmode
7959 ? DImode : GET_MODE (op), 4);
7963 /* Split one or more TImode RTL references into pairs of SImode
7964 references. The RTL can be REG, offsettable MEM, integer constant, or
7965 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7966 split and "num" is its length. lo_half and hi_half are output arrays
7967 that parallel "operands". */
7969 void
7970 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7972 while (num--)
7974 rtx op = operands[num];
7976 /* simplify_subreg refuse to split volatile memory addresses, but we
7977 still have to handle it. */
7978 if (GET_CODE (op) == MEM)
7980 lo_half[num] = adjust_address (op, DImode, 0);
7981 hi_half[num] = adjust_address (op, DImode, 8);
7983 else
7985 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7986 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7991 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7992 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7993 is the expression of the binary operation. The output may either be
7994 emitted here, or returned to the caller, like all output_* functions.
7996 There is no guarantee that the operands are the same mode, as they
7997 might be within FLOAT or FLOAT_EXTEND expressions. */
7999 #ifndef SYSV386_COMPAT
8000 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8001 wants to fix the assemblers because that causes incompatibility
8002 with gcc. No-one wants to fix gcc because that causes
8003 incompatibility with assemblers... You can use the option of
8004 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8005 #define SYSV386_COMPAT 1
8006 #endif
8008 const char *
8009 output_387_binary_op (rtx insn, rtx *operands)
8011 static char buf[30];
8012 const char *p;
8013 const char *ssep;
8014 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
8016 #ifdef ENABLE_CHECKING
8017 /* Even if we do not want to check the inputs, this documents input
8018 constraints. Which helps in understanding the following code. */
8019 if (STACK_REG_P (operands[0])
8020 && ((REG_P (operands[1])
8021 && REGNO (operands[0]) == REGNO (operands[1])
8022 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8023 || (REG_P (operands[2])
8024 && REGNO (operands[0]) == REGNO (operands[2])
8025 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8026 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8027 ; /* ok */
8028 else if (!is_sse)
8029 abort ();
8030 #endif
8032 switch (GET_CODE (operands[3]))
8034 case PLUS:
8035 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8036 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8037 p = "fiadd";
8038 else
8039 p = "fadd";
8040 ssep = "add";
8041 break;
8043 case MINUS:
8044 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8045 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8046 p = "fisub";
8047 else
8048 p = "fsub";
8049 ssep = "sub";
8050 break;
8052 case MULT:
8053 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8054 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8055 p = "fimul";
8056 else
8057 p = "fmul";
8058 ssep = "mul";
8059 break;
8061 case DIV:
8062 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8063 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8064 p = "fidiv";
8065 else
8066 p = "fdiv";
8067 ssep = "div";
8068 break;
8070 default:
8071 abort ();
8074 if (is_sse)
8076 strcpy (buf, ssep);
8077 if (GET_MODE (operands[0]) == SFmode)
8078 strcat (buf, "ss\t{%2, %0|%0, %2}");
8079 else
8080 strcat (buf, "sd\t{%2, %0|%0, %2}");
8081 return buf;
8083 strcpy (buf, p);
8085 switch (GET_CODE (operands[3]))
8087 case MULT:
8088 case PLUS:
8089 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8091 rtx temp = operands[2];
8092 operands[2] = operands[1];
8093 operands[1] = temp;
8096 /* know operands[0] == operands[1]. */
8098 if (GET_CODE (operands[2]) == MEM)
8100 p = "%z2\t%2";
8101 break;
8104 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8106 if (STACK_TOP_P (operands[0]))
8107 /* How is it that we are storing to a dead operand[2]?
8108 Well, presumably operands[1] is dead too. We can't
8109 store the result to st(0) as st(0) gets popped on this
8110 instruction. Instead store to operands[2] (which I
8111 think has to be st(1)). st(1) will be popped later.
8112 gcc <= 2.8.1 didn't have this check and generated
8113 assembly code that the Unixware assembler rejected. */
8114 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8115 else
8116 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8117 break;
8120 if (STACK_TOP_P (operands[0]))
8121 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8122 else
8123 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8124 break;
8126 case MINUS:
8127 case DIV:
8128 if (GET_CODE (operands[1]) == MEM)
8130 p = "r%z1\t%1";
8131 break;
8134 if (GET_CODE (operands[2]) == MEM)
8136 p = "%z2\t%2";
8137 break;
8140 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8142 #if SYSV386_COMPAT
8143 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8144 derived assemblers, confusingly reverse the direction of
8145 the operation for fsub{r} and fdiv{r} when the
8146 destination register is not st(0). The Intel assembler
8147 doesn't have this brain damage. Read !SYSV386_COMPAT to
8148 figure out what the hardware really does. */
8149 if (STACK_TOP_P (operands[0]))
8150 p = "{p\t%0, %2|rp\t%2, %0}";
8151 else
8152 p = "{rp\t%2, %0|p\t%0, %2}";
8153 #else
8154 if (STACK_TOP_P (operands[0]))
8155 /* As above for fmul/fadd, we can't store to st(0). */
8156 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8157 else
8158 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8159 #endif
8160 break;
8163 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8165 #if SYSV386_COMPAT
8166 if (STACK_TOP_P (operands[0]))
8167 p = "{rp\t%0, %1|p\t%1, %0}";
8168 else
8169 p = "{p\t%1, %0|rp\t%0, %1}";
8170 #else
8171 if (STACK_TOP_P (operands[0]))
8172 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8173 else
8174 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8175 #endif
8176 break;
8179 if (STACK_TOP_P (operands[0]))
8181 if (STACK_TOP_P (operands[1]))
8182 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8183 else
8184 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8185 break;
8187 else if (STACK_TOP_P (operands[1]))
8189 #if SYSV386_COMPAT
8190 p = "{\t%1, %0|r\t%0, %1}";
8191 #else
8192 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8193 #endif
8195 else
8197 #if SYSV386_COMPAT
8198 p = "{r\t%2, %0|\t%0, %2}";
8199 #else
8200 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8201 #endif
8203 break;
8205 default:
8206 abort ();
8209 strcat (buf, p);
8210 return buf;
8213 /* Output code to initialize control word copies used by
8214 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8215 is set to control word rounding downwards. */
8216 void
8217 emit_i387_cw_initialization (rtx normal, rtx round_down)
8219 rtx reg = gen_reg_rtx (HImode);
8221 emit_insn (gen_x86_fnstcw_1 (normal));
8222 emit_move_insn (reg, normal);
8223 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8224 && !TARGET_64BIT)
8225 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8226 else
8227 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8228 emit_move_insn (round_down, reg);
8231 /* Output code for INSN to convert a float to a signed int. OPERANDS
8232 are the insn operands. The output may be [HSD]Imode and the input
8233 operand may be [SDX]Fmode. */
8235 const char *
8236 output_fix_trunc (rtx insn, rtx *operands)
8238 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8239 int dimode_p = GET_MODE (operands[0]) == DImode;
8241 /* Jump through a hoop or two for DImode, since the hardware has no
8242 non-popping instruction. We used to do this a different way, but
8243 that was somewhat fragile and broke with post-reload splitters. */
8244 if (dimode_p && !stack_top_dies)
8245 output_asm_insn ("fld\t%y1", operands);
8247 if (!STACK_TOP_P (operands[1]))
8248 abort ();
8250 if (GET_CODE (operands[0]) != MEM)
8251 abort ();
8253 output_asm_insn ("fldcw\t%3", operands);
8254 if (stack_top_dies || dimode_p)
8255 output_asm_insn ("fistp%z0\t%0", operands);
8256 else
8257 output_asm_insn ("fist%z0\t%0", operands);
8258 output_asm_insn ("fldcw\t%2", operands);
8260 return "";
8263 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8264 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8265 when fucom should be used. */
8267 const char *
8268 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8270 int stack_top_dies;
8271 rtx cmp_op0 = operands[0];
8272 rtx cmp_op1 = operands[1];
8273 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8275 if (eflags_p == 2)
8277 cmp_op0 = cmp_op1;
8278 cmp_op1 = operands[2];
8280 if (is_sse)
8282 if (GET_MODE (operands[0]) == SFmode)
8283 if (unordered_p)
8284 return "ucomiss\t{%1, %0|%0, %1}";
8285 else
8286 return "comiss\t{%1, %0|%0, %1}";
8287 else
8288 if (unordered_p)
8289 return "ucomisd\t{%1, %0|%0, %1}";
8290 else
8291 return "comisd\t{%1, %0|%0, %1}";
8294 if (! STACK_TOP_P (cmp_op0))
8295 abort ();
8297 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8299 if (STACK_REG_P (cmp_op1)
8300 && stack_top_dies
8301 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8302 && REGNO (cmp_op1) != FIRST_STACK_REG)
8304 /* If both the top of the 387 stack dies, and the other operand
8305 is also a stack register that dies, then this must be a
8306 `fcompp' float compare */
8308 if (eflags_p == 1)
8310 /* There is no double popping fcomi variant. Fortunately,
8311 eflags is immune from the fstp's cc clobbering. */
8312 if (unordered_p)
8313 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8314 else
8315 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8316 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8318 else
8320 if (eflags_p == 2)
8322 if (unordered_p)
8323 return "fucompp\n\tfnstsw\t%0";
8324 else
8325 return "fcompp\n\tfnstsw\t%0";
8327 else
8329 if (unordered_p)
8330 return "fucompp";
8331 else
8332 return "fcompp";
8336 else
8338 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8340 static const char * const alt[24] =
8342 "fcom%z1\t%y1",
8343 "fcomp%z1\t%y1",
8344 "fucom%z1\t%y1",
8345 "fucomp%z1\t%y1",
8347 "ficom%z1\t%y1",
8348 "ficomp%z1\t%y1",
8349 NULL,
8350 NULL,
8352 "fcomi\t{%y1, %0|%0, %y1}",
8353 "fcomip\t{%y1, %0|%0, %y1}",
8354 "fucomi\t{%y1, %0|%0, %y1}",
8355 "fucomip\t{%y1, %0|%0, %y1}",
8357 NULL,
8358 NULL,
8359 NULL,
8360 NULL,
8362 "fcom%z2\t%y2\n\tfnstsw\t%0",
8363 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8364 "fucom%z2\t%y2\n\tfnstsw\t%0",
8365 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8367 "ficom%z2\t%y2\n\tfnstsw\t%0",
8368 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8369 NULL,
8370 NULL
8373 int mask;
8374 const char *ret;
8376 mask = eflags_p << 3;
8377 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8378 mask |= unordered_p << 1;
8379 mask |= stack_top_dies;
8381 if (mask >= 24)
8382 abort ();
8383 ret = alt[mask];
8384 if (ret == NULL)
8385 abort ();
8387 return ret;
8391 void
8392 ix86_output_addr_vec_elt (FILE *file, int value)
8394 const char *directive = ASM_LONG;
8396 if (TARGET_64BIT)
8398 #ifdef ASM_QUAD
8399 directive = ASM_QUAD;
8400 #else
8401 abort ();
8402 #endif
8405 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8408 void
8409 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8411 if (TARGET_64BIT)
8412 fprintf (file, "%s%s%d-%s%d\n",
8413 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8414 else if (HAVE_AS_GOTOFF_IN_DATA)
8415 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8416 #if TARGET_MACHO
8417 else if (TARGET_MACHO)
8419 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8420 machopic_output_function_base_name (file);
8421 fprintf(file, "\n");
8423 #endif
8424 else
8425 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8426 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8429 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8430 for the target. */
8432 void
8433 ix86_expand_clear (rtx dest)
8435 rtx tmp;
8437 /* We play register width games, which are only valid after reload. */
8438 if (!reload_completed)
8439 abort ();
8441 /* Avoid HImode and its attendant prefix byte. */
8442 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8443 dest = gen_rtx_REG (SImode, REGNO (dest));
8445 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8447 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8448 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8450 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8451 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8454 emit_insn (tmp);
8457 /* X is an unchanging MEM. If it is a constant pool reference, return
8458 the constant pool rtx, else NULL. */
8460 static rtx
8461 maybe_get_pool_constant (rtx x)
8463 x = ix86_delegitimize_address (XEXP (x, 0));
8465 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8466 return get_pool_constant (x);
8468 return NULL_RTX;
8471 void
8472 ix86_expand_move (enum machine_mode mode, rtx operands[])
8474 int strict = (reload_in_progress || reload_completed);
8475 rtx op0, op1;
8476 enum tls_model model;
8478 op0 = operands[0];
8479 op1 = operands[1];
8481 model = tls_symbolic_operand (op1, Pmode);
8482 if (model)
8484 op1 = legitimize_tls_address (op1, model, true);
8485 op1 = force_operand (op1, op0);
8486 if (op1 == op0)
8487 return;
8490 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8492 #if TARGET_MACHO
8493 if (MACHOPIC_PURE)
8495 rtx temp = ((reload_in_progress
8496 || ((op0 && GET_CODE (op0) == REG)
8497 && mode == Pmode))
8498 ? op0 : gen_reg_rtx (Pmode));
8499 op1 = machopic_indirect_data_reference (op1, temp);
8500 op1 = machopic_legitimize_pic_address (op1, mode,
8501 temp == op1 ? 0 : temp);
8503 else if (MACHOPIC_INDIRECT)
8504 op1 = machopic_indirect_data_reference (op1, 0);
8505 if (op0 == op1)
8506 return;
8507 #else
8508 if (GET_CODE (op0) == MEM)
8509 op1 = force_reg (Pmode, op1);
8510 else
8511 op1 = legitimize_address (op1, op1, Pmode);
8512 #endif /* TARGET_MACHO */
8514 else
8516 if (GET_CODE (op0) == MEM
8517 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8518 || !push_operand (op0, mode))
8519 && GET_CODE (op1) == MEM)
8520 op1 = force_reg (mode, op1);
8522 if (push_operand (op0, mode)
8523 && ! general_no_elim_operand (op1, mode))
8524 op1 = copy_to_mode_reg (mode, op1);
8526 /* Force large constants in 64bit compilation into register
8527 to get them CSEed. */
8528 if (TARGET_64BIT && mode == DImode
8529 && immediate_operand (op1, mode)
8530 && !x86_64_zero_extended_value (op1)
8531 && !register_operand (op0, mode)
8532 && optimize && !reload_completed && !reload_in_progress)
8533 op1 = copy_to_mode_reg (mode, op1);
8535 if (FLOAT_MODE_P (mode))
8537 /* If we are loading a floating point constant to a register,
8538 force the value to memory now, since we'll get better code
8539 out the back end. */
8541 if (strict)
8543 else if (GET_CODE (op1) == CONST_DOUBLE)
8545 op1 = validize_mem (force_const_mem (mode, op1));
8546 if (!register_operand (op0, mode))
8548 rtx temp = gen_reg_rtx (mode);
8549 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8550 emit_move_insn (op0, temp);
8551 return;
8557 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8560 void
8561 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8563 /* Force constants other than zero into memory. We do not know how
8564 the instructions used to build constants modify the upper 64 bits
8565 of the register, once we have that information we may be able
8566 to handle some of them more efficiently. */
8567 if ((reload_in_progress | reload_completed) == 0
8568 && register_operand (operands[0], mode)
8569 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8570 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8572 /* Make operand1 a register if it isn't already. */
8573 if (!no_new_pseudos
8574 && !register_operand (operands[0], mode)
8575 && !register_operand (operands[1], mode))
8577 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8578 emit_move_insn (operands[0], temp);
8579 return;
8582 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8585 /* Attempt to expand a binary operator. Make the expansion closer to the
8586 actual machine, then just general_operand, which will allow 3 separate
8587 memory references (one output, two input) in a single insn. */
8589 void
8590 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8591 rtx operands[])
8593 int matching_memory;
8594 rtx src1, src2, dst, op, clob;
8596 dst = operands[0];
8597 src1 = operands[1];
8598 src2 = operands[2];
8600 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8601 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8602 && (rtx_equal_p (dst, src2)
8603 || immediate_operand (src1, mode)))
8605 rtx temp = src1;
8606 src1 = src2;
8607 src2 = temp;
8610 /* If the destination is memory, and we do not have matching source
8611 operands, do things in registers. */
8612 matching_memory = 0;
8613 if (GET_CODE (dst) == MEM)
8615 if (rtx_equal_p (dst, src1))
8616 matching_memory = 1;
8617 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8618 && rtx_equal_p (dst, src2))
8619 matching_memory = 2;
8620 else
8621 dst = gen_reg_rtx (mode);
8624 /* Both source operands cannot be in memory. */
8625 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8627 if (matching_memory != 2)
8628 src2 = force_reg (mode, src2);
8629 else
8630 src1 = force_reg (mode, src1);
8633 /* If the operation is not commutable, source 1 cannot be a constant
8634 or non-matching memory. */
8635 if ((CONSTANT_P (src1)
8636 || (!matching_memory && GET_CODE (src1) == MEM))
8637 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8638 src1 = force_reg (mode, src1);
8640 /* If optimizing, copy to regs to improve CSE */
8641 if (optimize && ! no_new_pseudos)
8643 if (GET_CODE (dst) == MEM)
8644 dst = gen_reg_rtx (mode);
8645 if (GET_CODE (src1) == MEM)
8646 src1 = force_reg (mode, src1);
8647 if (GET_CODE (src2) == MEM)
8648 src2 = force_reg (mode, src2);
8651 /* Emit the instruction. */
8653 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8654 if (reload_in_progress)
8656 /* Reload doesn't know about the flags register, and doesn't know that
8657 it doesn't want to clobber it. We can only do this with PLUS. */
8658 if (code != PLUS)
8659 abort ();
8660 emit_insn (op);
8662 else
8664 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8665 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8668 /* Fix up the destination if needed. */
8669 if (dst != operands[0])
8670 emit_move_insn (operands[0], dst);
8673 /* Return TRUE or FALSE depending on whether the binary operator meets the
8674 appropriate constraints. */
8677 ix86_binary_operator_ok (enum rtx_code code,
8678 enum machine_mode mode ATTRIBUTE_UNUSED,
8679 rtx operands[3])
8681 /* Both source operands cannot be in memory. */
8682 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8683 return 0;
8684 /* If the operation is not commutable, source 1 cannot be a constant. */
8685 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8686 return 0;
8687 /* If the destination is memory, we must have a matching source operand. */
8688 if (GET_CODE (operands[0]) == MEM
8689 && ! (rtx_equal_p (operands[0], operands[1])
8690 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8691 && rtx_equal_p (operands[0], operands[2]))))
8692 return 0;
8693 /* If the operation is not commutable and the source 1 is memory, we must
8694 have a matching destination. */
8695 if (GET_CODE (operands[1]) == MEM
8696 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8697 && ! rtx_equal_p (operands[0], operands[1]))
8698 return 0;
8699 return 1;
8702 /* Attempt to expand a unary operator. Make the expansion closer to the
8703 actual machine, then just general_operand, which will allow 2 separate
8704 memory references (one output, one input) in a single insn. */
8706 void
8707 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8708 rtx operands[])
8710 int matching_memory;
8711 rtx src, dst, op, clob;
8713 dst = operands[0];
8714 src = operands[1];
8716 /* If the destination is memory, and we do not have matching source
8717 operands, do things in registers. */
8718 matching_memory = 0;
8719 if (GET_CODE (dst) == MEM)
8721 if (rtx_equal_p (dst, src))
8722 matching_memory = 1;
8723 else
8724 dst = gen_reg_rtx (mode);
8727 /* When source operand is memory, destination must match. */
8728 if (!matching_memory && GET_CODE (src) == MEM)
8729 src = force_reg (mode, src);
8731 /* If optimizing, copy to regs to improve CSE */
8732 if (optimize && ! no_new_pseudos)
8734 if (GET_CODE (dst) == MEM)
8735 dst = gen_reg_rtx (mode);
8736 if (GET_CODE (src) == MEM)
8737 src = force_reg (mode, src);
8740 /* Emit the instruction. */
8742 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8743 if (reload_in_progress || code == NOT)
8745 /* Reload doesn't know about the flags register, and doesn't know that
8746 it doesn't want to clobber it. */
8747 if (code != NOT)
8748 abort ();
8749 emit_insn (op);
8751 else
8753 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8754 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8757 /* Fix up the destination if needed. */
8758 if (dst != operands[0])
8759 emit_move_insn (operands[0], dst);
8762 /* Return TRUE or FALSE depending on whether the unary operator meets the
8763 appropriate constraints. */
8766 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8767 enum machine_mode mode ATTRIBUTE_UNUSED,
8768 rtx operands[2] ATTRIBUTE_UNUSED)
8770 /* If one of operands is memory, source and destination must match. */
8771 if ((GET_CODE (operands[0]) == MEM
8772 || GET_CODE (operands[1]) == MEM)
8773 && ! rtx_equal_p (operands[0], operands[1]))
8774 return FALSE;
8775 return TRUE;
8778 /* Return TRUE or FALSE depending on whether the first SET in INSN
8779 has source and destination with matching CC modes, and that the
8780 CC mode is at least as constrained as REQ_MODE. */
8783 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8785 rtx set;
8786 enum machine_mode set_mode;
8788 set = PATTERN (insn);
8789 if (GET_CODE (set) == PARALLEL)
8790 set = XVECEXP (set, 0, 0);
8791 if (GET_CODE (set) != SET)
8792 abort ();
8793 if (GET_CODE (SET_SRC (set)) != COMPARE)
8794 abort ();
8796 set_mode = GET_MODE (SET_DEST (set));
8797 switch (set_mode)
8799 case CCNOmode:
8800 if (req_mode != CCNOmode
8801 && (req_mode != CCmode
8802 || XEXP (SET_SRC (set), 1) != const0_rtx))
8803 return 0;
8804 break;
8805 case CCmode:
8806 if (req_mode == CCGCmode)
8807 return 0;
8808 /* FALLTHRU */
8809 case CCGCmode:
8810 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8811 return 0;
8812 /* FALLTHRU */
8813 case CCGOCmode:
8814 if (req_mode == CCZmode)
8815 return 0;
8816 /* FALLTHRU */
8817 case CCZmode:
8818 break;
8820 default:
8821 abort ();
8824 return (GET_MODE (SET_SRC (set)) == set_mode);
8827 /* Generate insn patterns to do an integer compare of OPERANDS. */
8829 static rtx
8830 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8832 enum machine_mode cmpmode;
8833 rtx tmp, flags;
8835 cmpmode = SELECT_CC_MODE (code, op0, op1);
8836 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8838 /* This is very simple, but making the interface the same as in the
8839 FP case makes the rest of the code easier. */
8840 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8841 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8843 /* Return the test that should be put into the flags user, i.e.
8844 the bcc, scc, or cmov instruction. */
8845 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8848 /* Figure out whether to use ordered or unordered fp comparisons.
8849 Return the appropriate mode to use. */
8851 enum machine_mode
8852 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8854 /* ??? In order to make all comparisons reversible, we do all comparisons
8855 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8856 all forms trapping and nontrapping comparisons, we can make inequality
8857 comparisons trapping again, since it results in better code when using
8858 FCOM based compares. */
8859 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8862 enum machine_mode
8863 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8865 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8866 return ix86_fp_compare_mode (code);
8867 switch (code)
8869 /* Only zero flag is needed. */
8870 case EQ: /* ZF=0 */
8871 case NE: /* ZF!=0 */
8872 return CCZmode;
8873 /* Codes needing carry flag. */
8874 case GEU: /* CF=0 */
8875 case GTU: /* CF=0 & ZF=0 */
8876 case LTU: /* CF=1 */
8877 case LEU: /* CF=1 | ZF=1 */
8878 return CCmode;
8879 /* Codes possibly doable only with sign flag when
8880 comparing against zero. */
8881 case GE: /* SF=OF or SF=0 */
8882 case LT: /* SF<>OF or SF=1 */
8883 if (op1 == const0_rtx)
8884 return CCGOCmode;
8885 else
8886 /* For other cases Carry flag is not required. */
8887 return CCGCmode;
8888 /* Codes doable only with sign flag when comparing
8889 against zero, but we miss jump instruction for it
8890 so we need to use relational tests against overflow
8891 that thus needs to be zero. */
8892 case GT: /* ZF=0 & SF=OF */
8893 case LE: /* ZF=1 | SF<>OF */
8894 if (op1 == const0_rtx)
8895 return CCNOmode;
8896 else
8897 return CCGCmode;
8898 /* strcmp pattern do (use flags) and combine may ask us for proper
8899 mode. */
8900 case USE:
8901 return CCmode;
8902 default:
8903 abort ();
8907 /* Return the fixed registers used for condition codes. */
8909 static bool
8910 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8912 *p1 = FLAGS_REG;
8913 *p2 = FPSR_REG;
8914 return true;
8917 /* If two condition code modes are compatible, return a condition code
8918 mode which is compatible with both. Otherwise, return
8919 VOIDmode. */
8921 static enum machine_mode
8922 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8924 if (m1 == m2)
8925 return m1;
8927 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8928 return VOIDmode;
8930 if ((m1 == CCGCmode && m2 == CCGOCmode)
8931 || (m1 == CCGOCmode && m2 == CCGCmode))
8932 return CCGCmode;
8934 switch (m1)
8936 default:
8937 abort ();
8939 case CCmode:
8940 case CCGCmode:
8941 case CCGOCmode:
8942 case CCNOmode:
8943 case CCZmode:
8944 switch (m2)
8946 default:
8947 return VOIDmode;
8949 case CCmode:
8950 case CCGCmode:
8951 case CCGOCmode:
8952 case CCNOmode:
8953 case CCZmode:
8954 return CCmode;
8957 case CCFPmode:
8958 case CCFPUmode:
8959 /* These are only compatible with themselves, which we already
8960 checked above. */
8961 return VOIDmode;
8965 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8968 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8970 enum rtx_code swapped_code = swap_condition (code);
8971 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8972 || (ix86_fp_comparison_cost (swapped_code)
8973 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8976 /* Swap, force into registers, or otherwise massage the two operands
8977 to a fp comparison. The operands are updated in place; the new
8978 comparison code is returned. */
8980 static enum rtx_code
8981 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8983 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8984 rtx op0 = *pop0, op1 = *pop1;
8985 enum machine_mode op_mode = GET_MODE (op0);
8986 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8988 /* All of the unordered compare instructions only work on registers.
8989 The same is true of the XFmode compare instructions. The same is
8990 true of the fcomi compare instructions. */
8992 if (!is_sse
8993 && (fpcmp_mode == CCFPUmode
8994 || op_mode == XFmode
8995 || ix86_use_fcomi_compare (code)))
8997 op0 = force_reg (op_mode, op0);
8998 op1 = force_reg (op_mode, op1);
9000 else
9002 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9003 things around if they appear profitable, otherwise force op0
9004 into a register. */
9006 if (standard_80387_constant_p (op0) == 0
9007 || (GET_CODE (op0) == MEM
9008 && ! (standard_80387_constant_p (op1) == 0
9009 || GET_CODE (op1) == MEM)))
9011 rtx tmp;
9012 tmp = op0, op0 = op1, op1 = tmp;
9013 code = swap_condition (code);
9016 if (GET_CODE (op0) != REG)
9017 op0 = force_reg (op_mode, op0);
9019 if (CONSTANT_P (op1))
9021 if (standard_80387_constant_p (op1))
9022 op1 = force_reg (op_mode, op1);
9023 else
9024 op1 = validize_mem (force_const_mem (op_mode, op1));
9028 /* Try to rearrange the comparison to make it cheaper. */
9029 if (ix86_fp_comparison_cost (code)
9030 > ix86_fp_comparison_cost (swap_condition (code))
9031 && (GET_CODE (op1) == REG || !no_new_pseudos))
9033 rtx tmp;
9034 tmp = op0, op0 = op1, op1 = tmp;
9035 code = swap_condition (code);
9036 if (GET_CODE (op0) != REG)
9037 op0 = force_reg (op_mode, op0);
9040 *pop0 = op0;
9041 *pop1 = op1;
9042 return code;
9045 /* Convert comparison codes we use to represent FP comparison to integer
9046 code that will result in proper branch. Return UNKNOWN if no such code
9047 is available. */
9048 static enum rtx_code
9049 ix86_fp_compare_code_to_integer (enum rtx_code code)
9051 switch (code)
9053 case GT:
9054 return GTU;
9055 case GE:
9056 return GEU;
9057 case ORDERED:
9058 case UNORDERED:
9059 return code;
9060 break;
9061 case UNEQ:
9062 return EQ;
9063 break;
9064 case UNLT:
9065 return LTU;
9066 break;
9067 case UNLE:
9068 return LEU;
9069 break;
9070 case LTGT:
9071 return NE;
9072 break;
9073 default:
9074 return UNKNOWN;
9078 /* Split comparison code CODE into comparisons we can do using branch
9079 instructions. BYPASS_CODE is comparison code for branch that will
9080 branch around FIRST_CODE and SECOND_CODE. If some of branches
9081 is not required, set value to NIL.
9082 We never require more than two branches. */
9083 static void
9084 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9085 enum rtx_code *first_code,
9086 enum rtx_code *second_code)
9088 *first_code = code;
9089 *bypass_code = NIL;
9090 *second_code = NIL;
9092 /* The fcomi comparison sets flags as follows:
9094 cmp ZF PF CF
9095 > 0 0 0
9096 < 0 0 1
9097 = 1 0 0
9098 un 1 1 1 */
9100 switch (code)
9102 case GT: /* GTU - CF=0 & ZF=0 */
9103 case GE: /* GEU - CF=0 */
9104 case ORDERED: /* PF=0 */
9105 case UNORDERED: /* PF=1 */
9106 case UNEQ: /* EQ - ZF=1 */
9107 case UNLT: /* LTU - CF=1 */
9108 case UNLE: /* LEU - CF=1 | ZF=1 */
9109 case LTGT: /* EQ - ZF=0 */
9110 break;
9111 case LT: /* LTU - CF=1 - fails on unordered */
9112 *first_code = UNLT;
9113 *bypass_code = UNORDERED;
9114 break;
9115 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9116 *first_code = UNLE;
9117 *bypass_code = UNORDERED;
9118 break;
9119 case EQ: /* EQ - ZF=1 - fails on unordered */
9120 *first_code = UNEQ;
9121 *bypass_code = UNORDERED;
9122 break;
9123 case NE: /* NE - ZF=0 - fails on unordered */
9124 *first_code = LTGT;
9125 *second_code = UNORDERED;
9126 break;
9127 case UNGE: /* GEU - CF=0 - fails on unordered */
9128 *first_code = GE;
9129 *second_code = UNORDERED;
9130 break;
9131 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9132 *first_code = GT;
9133 *second_code = UNORDERED;
9134 break;
9135 default:
9136 abort ();
9138 if (!TARGET_IEEE_FP)
9140 *second_code = NIL;
9141 *bypass_code = NIL;
9145 /* Return cost of comparison done fcom + arithmetics operations on AX.
9146 All following functions do use number of instructions as a cost metrics.
9147 In future this should be tweaked to compute bytes for optimize_size and
9148 take into account performance of various instructions on various CPUs. */
9149 static int
9150 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9152 if (!TARGET_IEEE_FP)
9153 return 4;
9154 /* The cost of code output by ix86_expand_fp_compare. */
9155 switch (code)
9157 case UNLE:
9158 case UNLT:
9159 case LTGT:
9160 case GT:
9161 case GE:
9162 case UNORDERED:
9163 case ORDERED:
9164 case UNEQ:
9165 return 4;
9166 break;
9167 case LT:
9168 case NE:
9169 case EQ:
9170 case UNGE:
9171 return 5;
9172 break;
9173 case LE:
9174 case UNGT:
9175 return 6;
9176 break;
9177 default:
9178 abort ();
9182 /* Return cost of comparison done using fcomi operation.
9183 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9184 static int
9185 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9187 enum rtx_code bypass_code, first_code, second_code;
9188 /* Return arbitrarily high cost when instruction is not supported - this
9189 prevents gcc from using it. */
9190 if (!TARGET_CMOVE)
9191 return 1024;
9192 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9193 return (bypass_code != NIL || second_code != NIL) + 2;
9196 /* Return cost of comparison done using sahf operation.
9197 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9198 static int
9199 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9201 enum rtx_code bypass_code, first_code, second_code;
9202 /* Return arbitrarily high cost when instruction is not preferred - this
9203 avoids gcc from using it. */
9204 if (!TARGET_USE_SAHF && !optimize_size)
9205 return 1024;
9206 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9207 return (bypass_code != NIL || second_code != NIL) + 3;
9210 /* Compute cost of the comparison done using any method.
9211 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9212 static int
9213 ix86_fp_comparison_cost (enum rtx_code code)
9215 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9216 int min;
9218 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9219 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9221 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9222 if (min > sahf_cost)
9223 min = sahf_cost;
9224 if (min > fcomi_cost)
9225 min = fcomi_cost;
9226 return min;
9229 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9231 static rtx
9232 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9233 rtx *second_test, rtx *bypass_test)
9235 enum machine_mode fpcmp_mode, intcmp_mode;
9236 rtx tmp, tmp2;
9237 int cost = ix86_fp_comparison_cost (code);
9238 enum rtx_code bypass_code, first_code, second_code;
9240 fpcmp_mode = ix86_fp_compare_mode (code);
9241 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9243 if (second_test)
9244 *second_test = NULL_RTX;
9245 if (bypass_test)
9246 *bypass_test = NULL_RTX;
9248 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9250 /* Do fcomi/sahf based test when profitable. */
9251 if ((bypass_code == NIL || bypass_test)
9252 && (second_code == NIL || second_test)
9253 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9255 if (TARGET_CMOVE)
9257 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9258 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9259 tmp);
9260 emit_insn (tmp);
9262 else
9264 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9265 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9266 if (!scratch)
9267 scratch = gen_reg_rtx (HImode);
9268 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9269 emit_insn (gen_x86_sahf_1 (scratch));
9272 /* The FP codes work out to act like unsigned. */
9273 intcmp_mode = fpcmp_mode;
9274 code = first_code;
9275 if (bypass_code != NIL)
9276 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9277 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9278 const0_rtx);
9279 if (second_code != NIL)
9280 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9281 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9282 const0_rtx);
9284 else
9286 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9287 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9288 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9289 if (!scratch)
9290 scratch = gen_reg_rtx (HImode);
9291 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9293 /* In the unordered case, we have to check C2 for NaN's, which
9294 doesn't happen to work out to anything nice combination-wise.
9295 So do some bit twiddling on the value we've got in AH to come
9296 up with an appropriate set of condition codes. */
9298 intcmp_mode = CCNOmode;
9299 switch (code)
9301 case GT:
9302 case UNGT:
9303 if (code == GT || !TARGET_IEEE_FP)
9305 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9306 code = EQ;
9308 else
9310 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9311 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9312 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9313 intcmp_mode = CCmode;
9314 code = GEU;
9316 break;
9317 case LT:
9318 case UNLT:
9319 if (code == LT && TARGET_IEEE_FP)
9321 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9322 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9323 intcmp_mode = CCmode;
9324 code = EQ;
9326 else
9328 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9329 code = NE;
9331 break;
9332 case GE:
9333 case UNGE:
9334 if (code == GE || !TARGET_IEEE_FP)
9336 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9337 code = EQ;
9339 else
9341 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9342 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9343 GEN_INT (0x01)));
9344 code = NE;
9346 break;
9347 case LE:
9348 case UNLE:
9349 if (code == LE && TARGET_IEEE_FP)
9351 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9352 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9353 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9354 intcmp_mode = CCmode;
9355 code = LTU;
9357 else
9359 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9360 code = NE;
9362 break;
9363 case EQ:
9364 case UNEQ:
9365 if (code == EQ && TARGET_IEEE_FP)
9367 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9368 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9369 intcmp_mode = CCmode;
9370 code = EQ;
9372 else
9374 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9375 code = NE;
9376 break;
9378 break;
9379 case NE:
9380 case LTGT:
9381 if (code == NE && TARGET_IEEE_FP)
9383 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9384 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9385 GEN_INT (0x40)));
9386 code = NE;
9388 else
9390 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9391 code = EQ;
9393 break;
9395 case UNORDERED:
9396 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9397 code = NE;
9398 break;
9399 case ORDERED:
9400 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9401 code = EQ;
9402 break;
9404 default:
9405 abort ();
9409 /* Return the test that should be put into the flags user, i.e.
9410 the bcc, scc, or cmov instruction. */
9411 return gen_rtx_fmt_ee (code, VOIDmode,
9412 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9413 const0_rtx);
9417 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9419 rtx op0, op1, ret;
9420 op0 = ix86_compare_op0;
9421 op1 = ix86_compare_op1;
9423 if (second_test)
9424 *second_test = NULL_RTX;
9425 if (bypass_test)
9426 *bypass_test = NULL_RTX;
9428 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9429 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9430 second_test, bypass_test);
9431 else
9432 ret = ix86_expand_int_compare (code, op0, op1);
9434 return ret;
9437 /* Return true if the CODE will result in nontrivial jump sequence. */
9438 bool
9439 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9441 enum rtx_code bypass_code, first_code, second_code;
9442 if (!TARGET_CMOVE)
9443 return true;
9444 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9445 return bypass_code != NIL || second_code != NIL;
9448 void
9449 ix86_expand_branch (enum rtx_code code, rtx label)
9451 rtx tmp;
9453 switch (GET_MODE (ix86_compare_op0))
9455 case QImode:
9456 case HImode:
9457 case SImode:
9458 simple:
9459 tmp = ix86_expand_compare (code, NULL, NULL);
9460 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9461 gen_rtx_LABEL_REF (VOIDmode, label),
9462 pc_rtx);
9463 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9464 return;
9466 case SFmode:
9467 case DFmode:
9468 case XFmode:
9470 rtvec vec;
9471 int use_fcomi;
9472 enum rtx_code bypass_code, first_code, second_code;
9474 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9475 &ix86_compare_op1);
9477 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9479 /* Check whether we will use the natural sequence with one jump. If
9480 so, we can expand jump early. Otherwise delay expansion by
9481 creating compound insn to not confuse optimizers. */
9482 if (bypass_code == NIL && second_code == NIL
9483 && TARGET_CMOVE)
9485 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9486 gen_rtx_LABEL_REF (VOIDmode, label),
9487 pc_rtx, NULL_RTX);
9489 else
9491 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9492 ix86_compare_op0, ix86_compare_op1);
9493 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9494 gen_rtx_LABEL_REF (VOIDmode, label),
9495 pc_rtx);
9496 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9498 use_fcomi = ix86_use_fcomi_compare (code);
9499 vec = rtvec_alloc (3 + !use_fcomi);
9500 RTVEC_ELT (vec, 0) = tmp;
9501 RTVEC_ELT (vec, 1)
9502 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9503 RTVEC_ELT (vec, 2)
9504 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9505 if (! use_fcomi)
9506 RTVEC_ELT (vec, 3)
9507 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9509 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9511 return;
9514 case DImode:
9515 if (TARGET_64BIT)
9516 goto simple;
9517 /* Expand DImode branch into multiple compare+branch. */
9519 rtx lo[2], hi[2], label2;
9520 enum rtx_code code1, code2, code3;
9522 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9524 tmp = ix86_compare_op0;
9525 ix86_compare_op0 = ix86_compare_op1;
9526 ix86_compare_op1 = tmp;
9527 code = swap_condition (code);
9529 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9530 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9532 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9533 avoid two branches. This costs one extra insn, so disable when
9534 optimizing for size. */
9536 if ((code == EQ || code == NE)
9537 && (!optimize_size
9538 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9540 rtx xor0, xor1;
9542 xor1 = hi[0];
9543 if (hi[1] != const0_rtx)
9544 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9545 NULL_RTX, 0, OPTAB_WIDEN);
9547 xor0 = lo[0];
9548 if (lo[1] != const0_rtx)
9549 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9550 NULL_RTX, 0, OPTAB_WIDEN);
9552 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9553 NULL_RTX, 0, OPTAB_WIDEN);
9555 ix86_compare_op0 = tmp;
9556 ix86_compare_op1 = const0_rtx;
9557 ix86_expand_branch (code, label);
9558 return;
9561 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9562 op1 is a constant and the low word is zero, then we can just
9563 examine the high word. */
9565 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9566 switch (code)
9568 case LT: case LTU: case GE: case GEU:
9569 ix86_compare_op0 = hi[0];
9570 ix86_compare_op1 = hi[1];
9571 ix86_expand_branch (code, label);
9572 return;
9573 default:
9574 break;
9577 /* Otherwise, we need two or three jumps. */
9579 label2 = gen_label_rtx ();
9581 code1 = code;
9582 code2 = swap_condition (code);
9583 code3 = unsigned_condition (code);
9585 switch (code)
9587 case LT: case GT: case LTU: case GTU:
9588 break;
9590 case LE: code1 = LT; code2 = GT; break;
9591 case GE: code1 = GT; code2 = LT; break;
9592 case LEU: code1 = LTU; code2 = GTU; break;
9593 case GEU: code1 = GTU; code2 = LTU; break;
9595 case EQ: code1 = NIL; code2 = NE; break;
9596 case NE: code2 = NIL; break;
9598 default:
9599 abort ();
9603 * a < b =>
9604 * if (hi(a) < hi(b)) goto true;
9605 * if (hi(a) > hi(b)) goto false;
9606 * if (lo(a) < lo(b)) goto true;
9607 * false:
9610 ix86_compare_op0 = hi[0];
9611 ix86_compare_op1 = hi[1];
9613 if (code1 != NIL)
9614 ix86_expand_branch (code1, label);
9615 if (code2 != NIL)
9616 ix86_expand_branch (code2, label2);
9618 ix86_compare_op0 = lo[0];
9619 ix86_compare_op1 = lo[1];
9620 ix86_expand_branch (code3, label);
9622 if (code2 != NIL)
9623 emit_label (label2);
9624 return;
9627 default:
9628 abort ();
9632 /* Split branch based on floating point condition. */
9633 void
9634 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9635 rtx target1, rtx target2, rtx tmp)
9637 rtx second, bypass;
9638 rtx label = NULL_RTX;
9639 rtx condition;
9640 int bypass_probability = -1, second_probability = -1, probability = -1;
9641 rtx i;
9643 if (target2 != pc_rtx)
9645 rtx tmp = target2;
9646 code = reverse_condition_maybe_unordered (code);
9647 target2 = target1;
9648 target1 = tmp;
9651 condition = ix86_expand_fp_compare (code, op1, op2,
9652 tmp, &second, &bypass);
9654 if (split_branch_probability >= 0)
9656 /* Distribute the probabilities across the jumps.
9657 Assume the BYPASS and SECOND to be always test
9658 for UNORDERED. */
9659 probability = split_branch_probability;
9661 /* Value of 1 is low enough to make no need for probability
9662 to be updated. Later we may run some experiments and see
9663 if unordered values are more frequent in practice. */
9664 if (bypass)
9665 bypass_probability = 1;
9666 if (second)
9667 second_probability = 1;
9669 if (bypass != NULL_RTX)
9671 label = gen_label_rtx ();
9672 i = emit_jump_insn (gen_rtx_SET
9673 (VOIDmode, pc_rtx,
9674 gen_rtx_IF_THEN_ELSE (VOIDmode,
9675 bypass,
9676 gen_rtx_LABEL_REF (VOIDmode,
9677 label),
9678 pc_rtx)));
9679 if (bypass_probability >= 0)
9680 REG_NOTES (i)
9681 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9682 GEN_INT (bypass_probability),
9683 REG_NOTES (i));
9685 i = emit_jump_insn (gen_rtx_SET
9686 (VOIDmode, pc_rtx,
9687 gen_rtx_IF_THEN_ELSE (VOIDmode,
9688 condition, target1, target2)));
9689 if (probability >= 0)
9690 REG_NOTES (i)
9691 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9692 GEN_INT (probability),
9693 REG_NOTES (i));
9694 if (second != NULL_RTX)
9696 i = emit_jump_insn (gen_rtx_SET
9697 (VOIDmode, pc_rtx,
9698 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9699 target2)));
9700 if (second_probability >= 0)
9701 REG_NOTES (i)
9702 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9703 GEN_INT (second_probability),
9704 REG_NOTES (i));
9706 if (label != NULL_RTX)
9707 emit_label (label);
9711 ix86_expand_setcc (enum rtx_code code, rtx dest)
9713 rtx ret, tmp, tmpreg, equiv;
9714 rtx second_test, bypass_test;
9716 if (GET_MODE (ix86_compare_op0) == DImode
9717 && !TARGET_64BIT)
9718 return 0; /* FAIL */
9720 if (GET_MODE (dest) != QImode)
9721 abort ();
9723 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9724 PUT_MODE (ret, QImode);
9726 tmp = dest;
9727 tmpreg = dest;
9729 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9730 if (bypass_test || second_test)
9732 rtx test = second_test;
9733 int bypass = 0;
9734 rtx tmp2 = gen_reg_rtx (QImode);
9735 if (bypass_test)
9737 if (second_test)
9738 abort ();
9739 test = bypass_test;
9740 bypass = 1;
9741 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9743 PUT_MODE (test, QImode);
9744 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9746 if (bypass)
9747 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9748 else
9749 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9752 /* Attach a REG_EQUAL note describing the comparison result. */
9753 equiv = simplify_gen_relational (code, QImode,
9754 GET_MODE (ix86_compare_op0),
9755 ix86_compare_op0, ix86_compare_op1);
9756 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9758 return 1; /* DONE */
9761 /* Expand comparison setting or clearing carry flag. Return true when
9762 successful and set pop for the operation. */
9763 static bool
9764 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9766 enum machine_mode mode =
9767 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9769 /* Do not handle DImode compares that go trought special path. Also we can't
9770 deal with FP compares yet. This is possible to add. */
9771 if ((mode == DImode && !TARGET_64BIT))
9772 return false;
9773 if (FLOAT_MODE_P (mode))
9775 rtx second_test = NULL, bypass_test = NULL;
9776 rtx compare_op, compare_seq;
9778 /* Shortcut: following common codes never translate into carry flag compares. */
9779 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9780 || code == ORDERED || code == UNORDERED)
9781 return false;
9783 /* These comparisons require zero flag; swap operands so they won't. */
9784 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9785 && !TARGET_IEEE_FP)
9787 rtx tmp = op0;
9788 op0 = op1;
9789 op1 = tmp;
9790 code = swap_condition (code);
9793 /* Try to expand the comparison and verify that we end up with carry flag
9794 based comparison. This is fails to be true only when we decide to expand
9795 comparison using arithmetic that is not too common scenario. */
9796 start_sequence ();
9797 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9798 &second_test, &bypass_test);
9799 compare_seq = get_insns ();
9800 end_sequence ();
9802 if (second_test || bypass_test)
9803 return false;
9804 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9805 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9806 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9807 else
9808 code = GET_CODE (compare_op);
9809 if (code != LTU && code != GEU)
9810 return false;
9811 emit_insn (compare_seq);
9812 *pop = compare_op;
9813 return true;
9815 if (!INTEGRAL_MODE_P (mode))
9816 return false;
9817 switch (code)
9819 case LTU:
9820 case GEU:
9821 break;
9823 /* Convert a==0 into (unsigned)a<1. */
9824 case EQ:
9825 case NE:
9826 if (op1 != const0_rtx)
9827 return false;
9828 op1 = const1_rtx;
9829 code = (code == EQ ? LTU : GEU);
9830 break;
9832 /* Convert a>b into b<a or a>=b-1. */
9833 case GTU:
9834 case LEU:
9835 if (GET_CODE (op1) == CONST_INT)
9837 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9838 /* Bail out on overflow. We still can swap operands but that
9839 would force loading of the constant into register. */
9840 if (op1 == const0_rtx
9841 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9842 return false;
9843 code = (code == GTU ? GEU : LTU);
9845 else
9847 rtx tmp = op1;
9848 op1 = op0;
9849 op0 = tmp;
9850 code = (code == GTU ? LTU : GEU);
9852 break;
9854 /* Convert a>=0 into (unsigned)a<0x80000000. */
9855 case LT:
9856 case GE:
9857 if (mode == DImode || op1 != const0_rtx)
9858 return false;
9859 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9860 code = (code == LT ? GEU : LTU);
9861 break;
9862 case LE:
9863 case GT:
9864 if (mode == DImode || op1 != constm1_rtx)
9865 return false;
9866 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9867 code = (code == LE ? GEU : LTU);
9868 break;
9870 default:
9871 return false;
9873 /* Swapping operands may cause constant to appear as first operand. */
9874 if (!nonimmediate_operand (op0, VOIDmode))
9876 if (no_new_pseudos)
9877 return false;
9878 op0 = force_reg (mode, op0);
9880 ix86_compare_op0 = op0;
9881 ix86_compare_op1 = op1;
9882 *pop = ix86_expand_compare (code, NULL, NULL);
9883 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9884 abort ();
9885 return true;
9889 ix86_expand_int_movcc (rtx operands[])
9891 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9892 rtx compare_seq, compare_op;
9893 rtx second_test, bypass_test;
9894 enum machine_mode mode = GET_MODE (operands[0]);
9895 bool sign_bit_compare_p = false;;
9897 start_sequence ();
9898 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9899 compare_seq = get_insns ();
9900 end_sequence ();
9902 compare_code = GET_CODE (compare_op);
9904 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9905 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9906 sign_bit_compare_p = true;
9908 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9909 HImode insns, we'd be swallowed in word prefix ops. */
9911 if ((mode != HImode || TARGET_FAST_PREFIX)
9912 && (mode != DImode || TARGET_64BIT)
9913 && GET_CODE (operands[2]) == CONST_INT
9914 && GET_CODE (operands[3]) == CONST_INT)
9916 rtx out = operands[0];
9917 HOST_WIDE_INT ct = INTVAL (operands[2]);
9918 HOST_WIDE_INT cf = INTVAL (operands[3]);
9919 HOST_WIDE_INT diff;
9921 diff = ct - cf;
9922 /* Sign bit compares are better done using shifts than we do by using
9923 sbb. */
9924 if (sign_bit_compare_p
9925 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9926 ix86_compare_op1, &compare_op))
9928 /* Detect overlap between destination and compare sources. */
9929 rtx tmp = out;
9931 if (!sign_bit_compare_p)
9933 bool fpcmp = false;
9935 compare_code = GET_CODE (compare_op);
9937 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9938 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9940 fpcmp = true;
9941 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9944 /* To simplify rest of code, restrict to the GEU case. */
9945 if (compare_code == LTU)
9947 HOST_WIDE_INT tmp = ct;
9948 ct = cf;
9949 cf = tmp;
9950 compare_code = reverse_condition (compare_code);
9951 code = reverse_condition (code);
9953 else
9955 if (fpcmp)
9956 PUT_CODE (compare_op,
9957 reverse_condition_maybe_unordered
9958 (GET_CODE (compare_op)));
9959 else
9960 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9962 diff = ct - cf;
9964 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9965 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9966 tmp = gen_reg_rtx (mode);
9968 if (mode == DImode)
9969 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9970 else
9971 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9973 else
9975 if (code == GT || code == GE)
9976 code = reverse_condition (code);
9977 else
9979 HOST_WIDE_INT tmp = ct;
9980 ct = cf;
9981 cf = tmp;
9982 diff = ct - cf;
9984 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9985 ix86_compare_op1, VOIDmode, 0, -1);
9988 if (diff == 1)
9991 * cmpl op0,op1
9992 * sbbl dest,dest
9993 * [addl dest, ct]
9995 * Size 5 - 8.
9997 if (ct)
9998 tmp = expand_simple_binop (mode, PLUS,
9999 tmp, GEN_INT (ct),
10000 copy_rtx (tmp), 1, OPTAB_DIRECT);
10002 else if (cf == -1)
10005 * cmpl op0,op1
10006 * sbbl dest,dest
10007 * orl $ct, dest
10009 * Size 8.
10011 tmp = expand_simple_binop (mode, IOR,
10012 tmp, GEN_INT (ct),
10013 copy_rtx (tmp), 1, OPTAB_DIRECT);
10015 else if (diff == -1 && ct)
10018 * cmpl op0,op1
10019 * sbbl dest,dest
10020 * notl dest
10021 * [addl dest, cf]
10023 * Size 8 - 11.
10025 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10026 if (cf)
10027 tmp = expand_simple_binop (mode, PLUS,
10028 copy_rtx (tmp), GEN_INT (cf),
10029 copy_rtx (tmp), 1, OPTAB_DIRECT);
10031 else
10034 * cmpl op0,op1
10035 * sbbl dest,dest
10036 * [notl dest]
10037 * andl cf - ct, dest
10038 * [addl dest, ct]
10040 * Size 8 - 11.
10043 if (cf == 0)
10045 cf = ct;
10046 ct = 0;
10047 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10050 tmp = expand_simple_binop (mode, AND,
10051 copy_rtx (tmp),
10052 gen_int_mode (cf - ct, mode),
10053 copy_rtx (tmp), 1, OPTAB_DIRECT);
10054 if (ct)
10055 tmp = expand_simple_binop (mode, PLUS,
10056 copy_rtx (tmp), GEN_INT (ct),
10057 copy_rtx (tmp), 1, OPTAB_DIRECT);
10060 if (!rtx_equal_p (tmp, out))
10061 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10063 return 1; /* DONE */
10066 if (diff < 0)
10068 HOST_WIDE_INT tmp;
10069 tmp = ct, ct = cf, cf = tmp;
10070 diff = -diff;
10071 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10073 /* We may be reversing unordered compare to normal compare, that
10074 is not valid in general (we may convert non-trapping condition
10075 to trapping one), however on i386 we currently emit all
10076 comparisons unordered. */
10077 compare_code = reverse_condition_maybe_unordered (compare_code);
10078 code = reverse_condition_maybe_unordered (code);
10080 else
10082 compare_code = reverse_condition (compare_code);
10083 code = reverse_condition (code);
10087 compare_code = NIL;
10088 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10089 && GET_CODE (ix86_compare_op1) == CONST_INT)
10091 if (ix86_compare_op1 == const0_rtx
10092 && (code == LT || code == GE))
10093 compare_code = code;
10094 else if (ix86_compare_op1 == constm1_rtx)
10096 if (code == LE)
10097 compare_code = LT;
10098 else if (code == GT)
10099 compare_code = GE;
10103 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10104 if (compare_code != NIL
10105 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10106 && (cf == -1 || ct == -1))
10108 /* If lea code below could be used, only optimize
10109 if it results in a 2 insn sequence. */
10111 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10112 || diff == 3 || diff == 5 || diff == 9)
10113 || (compare_code == LT && ct == -1)
10114 || (compare_code == GE && cf == -1))
10117 * notl op1 (if necessary)
10118 * sarl $31, op1
10119 * orl cf, op1
10121 if (ct != -1)
10123 cf = ct;
10124 ct = -1;
10125 code = reverse_condition (code);
10128 out = emit_store_flag (out, code, ix86_compare_op0,
10129 ix86_compare_op1, VOIDmode, 0, -1);
10131 out = expand_simple_binop (mode, IOR,
10132 out, GEN_INT (cf),
10133 out, 1, OPTAB_DIRECT);
10134 if (out != operands[0])
10135 emit_move_insn (operands[0], out);
10137 return 1; /* DONE */
10142 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10143 || diff == 3 || diff == 5 || diff == 9)
10144 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10145 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10148 * xorl dest,dest
10149 * cmpl op1,op2
10150 * setcc dest
10151 * lea cf(dest*(ct-cf)),dest
10153 * Size 14.
10155 * This also catches the degenerate setcc-only case.
10158 rtx tmp;
10159 int nops;
10161 out = emit_store_flag (out, code, ix86_compare_op0,
10162 ix86_compare_op1, VOIDmode, 0, 1);
10164 nops = 0;
10165 /* On x86_64 the lea instruction operates on Pmode, so we need
10166 to get arithmetics done in proper mode to match. */
10167 if (diff == 1)
10168 tmp = copy_rtx (out);
10169 else
10171 rtx out1;
10172 out1 = copy_rtx (out);
10173 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10174 nops++;
10175 if (diff & 1)
10177 tmp = gen_rtx_PLUS (mode, tmp, out1);
10178 nops++;
10181 if (cf != 0)
10183 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10184 nops++;
10186 if (!rtx_equal_p (tmp, out))
10188 if (nops == 1)
10189 out = force_operand (tmp, copy_rtx (out));
10190 else
10191 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10193 if (!rtx_equal_p (out, operands[0]))
10194 emit_move_insn (operands[0], copy_rtx (out));
10196 return 1; /* DONE */
10200 * General case: Jumpful:
10201 * xorl dest,dest cmpl op1, op2
10202 * cmpl op1, op2 movl ct, dest
10203 * setcc dest jcc 1f
10204 * decl dest movl cf, dest
10205 * andl (cf-ct),dest 1:
10206 * addl ct,dest
10208 * Size 20. Size 14.
10210 * This is reasonably steep, but branch mispredict costs are
10211 * high on modern cpus, so consider failing only if optimizing
10212 * for space.
10215 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10216 && BRANCH_COST >= 2)
10218 if (cf == 0)
10220 cf = ct;
10221 ct = 0;
10222 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10223 /* We may be reversing unordered compare to normal compare,
10224 that is not valid in general (we may convert non-trapping
10225 condition to trapping one), however on i386 we currently
10226 emit all comparisons unordered. */
10227 code = reverse_condition_maybe_unordered (code);
10228 else
10230 code = reverse_condition (code);
10231 if (compare_code != NIL)
10232 compare_code = reverse_condition (compare_code);
10236 if (compare_code != NIL)
10238 /* notl op1 (if needed)
10239 sarl $31, op1
10240 andl (cf-ct), op1
10241 addl ct, op1
10243 For x < 0 (resp. x <= -1) there will be no notl,
10244 so if possible swap the constants to get rid of the
10245 complement.
10246 True/false will be -1/0 while code below (store flag
10247 followed by decrement) is 0/-1, so the constants need
10248 to be exchanged once more. */
10250 if (compare_code == GE || !cf)
10252 code = reverse_condition (code);
10253 compare_code = LT;
10255 else
10257 HOST_WIDE_INT tmp = cf;
10258 cf = ct;
10259 ct = tmp;
10262 out = emit_store_flag (out, code, ix86_compare_op0,
10263 ix86_compare_op1, VOIDmode, 0, -1);
10265 else
10267 out = emit_store_flag (out, code, ix86_compare_op0,
10268 ix86_compare_op1, VOIDmode, 0, 1);
10270 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10271 copy_rtx (out), 1, OPTAB_DIRECT);
10274 out = expand_simple_binop (mode, AND, copy_rtx (out),
10275 gen_int_mode (cf - ct, mode),
10276 copy_rtx (out), 1, OPTAB_DIRECT);
10277 if (ct)
10278 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10279 copy_rtx (out), 1, OPTAB_DIRECT);
10280 if (!rtx_equal_p (out, operands[0]))
10281 emit_move_insn (operands[0], copy_rtx (out));
10283 return 1; /* DONE */
10287 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10289 /* Try a few things more with specific constants and a variable. */
10291 optab op;
10292 rtx var, orig_out, out, tmp;
10294 if (BRANCH_COST <= 2)
10295 return 0; /* FAIL */
10297 /* If one of the two operands is an interesting constant, load a
10298 constant with the above and mask it in with a logical operation. */
10300 if (GET_CODE (operands[2]) == CONST_INT)
10302 var = operands[3];
10303 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10304 operands[3] = constm1_rtx, op = and_optab;
10305 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10306 operands[3] = const0_rtx, op = ior_optab;
10307 else
10308 return 0; /* FAIL */
10310 else if (GET_CODE (operands[3]) == CONST_INT)
10312 var = operands[2];
10313 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10314 operands[2] = constm1_rtx, op = and_optab;
10315 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10316 operands[2] = const0_rtx, op = ior_optab;
10317 else
10318 return 0; /* FAIL */
10320 else
10321 return 0; /* FAIL */
10323 orig_out = operands[0];
10324 tmp = gen_reg_rtx (mode);
10325 operands[0] = tmp;
10327 /* Recurse to get the constant loaded. */
10328 if (ix86_expand_int_movcc (operands) == 0)
10329 return 0; /* FAIL */
10331 /* Mask in the interesting variable. */
10332 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10333 OPTAB_WIDEN);
10334 if (!rtx_equal_p (out, orig_out))
10335 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10337 return 1; /* DONE */
10341 * For comparison with above,
10343 * movl cf,dest
10344 * movl ct,tmp
10345 * cmpl op1,op2
10346 * cmovcc tmp,dest
10348 * Size 15.
10351 if (! nonimmediate_operand (operands[2], mode))
10352 operands[2] = force_reg (mode, operands[2]);
10353 if (! nonimmediate_operand (operands[3], mode))
10354 operands[3] = force_reg (mode, operands[3]);
10356 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10358 rtx tmp = gen_reg_rtx (mode);
10359 emit_move_insn (tmp, operands[3]);
10360 operands[3] = tmp;
10362 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10364 rtx tmp = gen_reg_rtx (mode);
10365 emit_move_insn (tmp, operands[2]);
10366 operands[2] = tmp;
10369 if (! register_operand (operands[2], VOIDmode)
10370 && (mode == QImode
10371 || ! register_operand (operands[3], VOIDmode)))
10372 operands[2] = force_reg (mode, operands[2]);
10374 if (mode == QImode
10375 && ! register_operand (operands[3], VOIDmode))
10376 operands[3] = force_reg (mode, operands[3]);
10378 emit_insn (compare_seq);
10379 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10380 gen_rtx_IF_THEN_ELSE (mode,
10381 compare_op, operands[2],
10382 operands[3])));
10383 if (bypass_test)
10384 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10385 gen_rtx_IF_THEN_ELSE (mode,
10386 bypass_test,
10387 copy_rtx (operands[3]),
10388 copy_rtx (operands[0]))));
10389 if (second_test)
10390 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10391 gen_rtx_IF_THEN_ELSE (mode,
10392 second_test,
10393 copy_rtx (operands[2]),
10394 copy_rtx (operands[0]))));
10396 return 1; /* DONE */
10400 ix86_expand_fp_movcc (rtx operands[])
10402 enum rtx_code code;
10403 rtx tmp;
10404 rtx compare_op, second_test, bypass_test;
10406 /* For SF/DFmode conditional moves based on comparisons
10407 in same mode, we may want to use SSE min/max instructions. */
10408 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10409 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10410 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10411 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10412 && (!TARGET_IEEE_FP
10413 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10414 /* We may be called from the post-reload splitter. */
10415 && (!REG_P (operands[0])
10416 || SSE_REG_P (operands[0])
10417 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10419 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10420 code = GET_CODE (operands[1]);
10422 /* See if we have (cross) match between comparison operands and
10423 conditional move operands. */
10424 if (rtx_equal_p (operands[2], op1))
10426 rtx tmp = op0;
10427 op0 = op1;
10428 op1 = tmp;
10429 code = reverse_condition_maybe_unordered (code);
10431 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10433 /* Check for min operation. */
10434 if (code == LT || code == UNLE)
10436 if (code == UNLE)
10438 rtx tmp = op0;
10439 op0 = op1;
10440 op1 = tmp;
10442 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10443 if (memory_operand (op0, VOIDmode))
10444 op0 = force_reg (GET_MODE (operands[0]), op0);
10445 if (GET_MODE (operands[0]) == SFmode)
10446 emit_insn (gen_minsf3 (operands[0], op0, op1));
10447 else
10448 emit_insn (gen_mindf3 (operands[0], op0, op1));
10449 return 1;
10451 /* Check for max operation. */
10452 if (code == GT || code == UNGE)
10454 if (code == UNGE)
10456 rtx tmp = op0;
10457 op0 = op1;
10458 op1 = tmp;
10460 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10461 if (memory_operand (op0, VOIDmode))
10462 op0 = force_reg (GET_MODE (operands[0]), op0);
10463 if (GET_MODE (operands[0]) == SFmode)
10464 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10465 else
10466 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10467 return 1;
10470 /* Manage condition to be sse_comparison_operator. In case we are
10471 in non-ieee mode, try to canonicalize the destination operand
10472 to be first in the comparison - this helps reload to avoid extra
10473 moves. */
10474 if (!sse_comparison_operator (operands[1], VOIDmode)
10475 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10477 rtx tmp = ix86_compare_op0;
10478 ix86_compare_op0 = ix86_compare_op1;
10479 ix86_compare_op1 = tmp;
10480 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10481 VOIDmode, ix86_compare_op0,
10482 ix86_compare_op1);
10484 /* Similarly try to manage result to be first operand of conditional
10485 move. We also don't support the NE comparison on SSE, so try to
10486 avoid it. */
10487 if ((rtx_equal_p (operands[0], operands[3])
10488 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10489 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10491 rtx tmp = operands[2];
10492 operands[2] = operands[3];
10493 operands[3] = tmp;
10494 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10495 (GET_CODE (operands[1])),
10496 VOIDmode, ix86_compare_op0,
10497 ix86_compare_op1);
10499 if (GET_MODE (operands[0]) == SFmode)
10500 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10501 operands[2], operands[3],
10502 ix86_compare_op0, ix86_compare_op1));
10503 else
10504 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10505 operands[2], operands[3],
10506 ix86_compare_op0, ix86_compare_op1));
10507 return 1;
10510 /* The floating point conditional move instructions don't directly
10511 support conditions resulting from a signed integer comparison. */
10513 code = GET_CODE (operands[1]);
10514 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10516 /* The floating point conditional move instructions don't directly
10517 support signed integer comparisons. */
10519 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10521 if (second_test != NULL || bypass_test != NULL)
10522 abort ();
10523 tmp = gen_reg_rtx (QImode);
10524 ix86_expand_setcc (code, tmp);
10525 code = NE;
10526 ix86_compare_op0 = tmp;
10527 ix86_compare_op1 = const0_rtx;
10528 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10530 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10532 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10533 emit_move_insn (tmp, operands[3]);
10534 operands[3] = tmp;
10536 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10538 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10539 emit_move_insn (tmp, operands[2]);
10540 operands[2] = tmp;
10543 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10544 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10545 compare_op,
10546 operands[2],
10547 operands[3])));
10548 if (bypass_test)
10549 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10550 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10551 bypass_test,
10552 operands[3],
10553 operands[0])));
10554 if (second_test)
10555 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10556 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10557 second_test,
10558 operands[2],
10559 operands[0])));
10561 return 1;
10564 /* Expand conditional increment or decrement using adb/sbb instructions.
10565 The default case using setcc followed by the conditional move can be
10566 done by generic code. */
10568 ix86_expand_int_addcc (rtx operands[])
10570 enum rtx_code code = GET_CODE (operands[1]);
10571 rtx compare_op;
10572 rtx val = const0_rtx;
10573 bool fpcmp = false;
10574 enum machine_mode mode = GET_MODE (operands[0]);
10576 if (operands[3] != const1_rtx
10577 && operands[3] != constm1_rtx)
10578 return 0;
10579 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10580 ix86_compare_op1, &compare_op))
10581 return 0;
10582 code = GET_CODE (compare_op);
10584 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10585 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10587 fpcmp = true;
10588 code = ix86_fp_compare_code_to_integer (code);
10591 if (code != LTU)
10593 val = constm1_rtx;
10594 if (fpcmp)
10595 PUT_CODE (compare_op,
10596 reverse_condition_maybe_unordered
10597 (GET_CODE (compare_op)));
10598 else
10599 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10601 PUT_MODE (compare_op, mode);
10603 /* Construct either adc or sbb insn. */
10604 if ((code == LTU) == (operands[3] == constm1_rtx))
10606 switch (GET_MODE (operands[0]))
10608 case QImode:
10609 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10610 break;
10611 case HImode:
10612 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10613 break;
10614 case SImode:
10615 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10616 break;
10617 case DImode:
10618 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10619 break;
10620 default:
10621 abort ();
10624 else
10626 switch (GET_MODE (operands[0]))
10628 case QImode:
10629 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10630 break;
10631 case HImode:
10632 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10633 break;
10634 case SImode:
10635 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10636 break;
10637 case DImode:
10638 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10639 break;
10640 default:
10641 abort ();
10644 return 1; /* DONE */
10648 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10649 works for floating pointer parameters and nonoffsetable memories.
10650 For pushes, it returns just stack offsets; the values will be saved
10651 in the right order. Maximally three parts are generated. */
10653 static int
10654 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10656 int size;
10658 if (!TARGET_64BIT)
10659 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10660 else
10661 size = (GET_MODE_SIZE (mode) + 4) / 8;
10663 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10664 abort ();
10665 if (size < 2 || size > 3)
10666 abort ();
10668 /* Optimize constant pool reference to immediates. This is used by fp
10669 moves, that force all constants to memory to allow combining. */
10670 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10672 rtx tmp = maybe_get_pool_constant (operand);
10673 if (tmp)
10674 operand = tmp;
10677 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10679 /* The only non-offsetable memories we handle are pushes. */
10680 if (! push_operand (operand, VOIDmode))
10681 abort ();
10683 operand = copy_rtx (operand);
10684 PUT_MODE (operand, Pmode);
10685 parts[0] = parts[1] = parts[2] = operand;
10687 else if (!TARGET_64BIT)
10689 if (mode == DImode)
10690 split_di (&operand, 1, &parts[0], &parts[1]);
10691 else
10693 if (REG_P (operand))
10695 if (!reload_completed)
10696 abort ();
10697 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10698 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10699 if (size == 3)
10700 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10702 else if (offsettable_memref_p (operand))
10704 operand = adjust_address (operand, SImode, 0);
10705 parts[0] = operand;
10706 parts[1] = adjust_address (operand, SImode, 4);
10707 if (size == 3)
10708 parts[2] = adjust_address (operand, SImode, 8);
10710 else if (GET_CODE (operand) == CONST_DOUBLE)
10712 REAL_VALUE_TYPE r;
10713 long l[4];
10715 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10716 switch (mode)
10718 case XFmode:
10719 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10720 parts[2] = gen_int_mode (l[2], SImode);
10721 break;
10722 case DFmode:
10723 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10724 break;
10725 default:
10726 abort ();
10728 parts[1] = gen_int_mode (l[1], SImode);
10729 parts[0] = gen_int_mode (l[0], SImode);
10731 else
10732 abort ();
10735 else
10737 if (mode == TImode)
10738 split_ti (&operand, 1, &parts[0], &parts[1]);
10739 if (mode == XFmode || mode == TFmode)
10741 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10742 if (REG_P (operand))
10744 if (!reload_completed)
10745 abort ();
10746 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10747 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10749 else if (offsettable_memref_p (operand))
10751 operand = adjust_address (operand, DImode, 0);
10752 parts[0] = operand;
10753 parts[1] = adjust_address (operand, upper_mode, 8);
10755 else if (GET_CODE (operand) == CONST_DOUBLE)
10757 REAL_VALUE_TYPE r;
10758 long l[3];
10760 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10761 real_to_target (l, &r, mode);
10762 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10763 if (HOST_BITS_PER_WIDE_INT >= 64)
10764 parts[0]
10765 = gen_int_mode
10766 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10767 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10768 DImode);
10769 else
10770 parts[0] = immed_double_const (l[0], l[1], DImode);
10771 if (upper_mode == SImode)
10772 parts[1] = gen_int_mode (l[2], SImode);
10773 else if (HOST_BITS_PER_WIDE_INT >= 64)
10774 parts[1]
10775 = gen_int_mode
10776 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10777 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10778 DImode);
10779 else
10780 parts[1] = immed_double_const (l[2], l[3], DImode);
10782 else
10783 abort ();
10787 return size;
10790 /* Emit insns to perform a move or push of DI, DF, and XF values.
10791 Return false when normal moves are needed; true when all required
10792 insns have been emitted. Operands 2-4 contain the input values
10793 int the correct order; operands 5-7 contain the output values. */
10795 void
10796 ix86_split_long_move (rtx operands[])
10798 rtx part[2][3];
10799 int nparts;
10800 int push = 0;
10801 int collisions = 0;
10802 enum machine_mode mode = GET_MODE (operands[0]);
10804 /* The DFmode expanders may ask us to move double.
10805 For 64bit target this is single move. By hiding the fact
10806 here we simplify i386.md splitters. */
10807 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10809 /* Optimize constant pool reference to immediates. This is used by
10810 fp moves, that force all constants to memory to allow combining. */
10812 if (GET_CODE (operands[1]) == MEM
10813 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10814 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10815 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10816 if (push_operand (operands[0], VOIDmode))
10818 operands[0] = copy_rtx (operands[0]);
10819 PUT_MODE (operands[0], Pmode);
10821 else
10822 operands[0] = gen_lowpart (DImode, operands[0]);
10823 operands[1] = gen_lowpart (DImode, operands[1]);
10824 emit_move_insn (operands[0], operands[1]);
10825 return;
10828 /* The only non-offsettable memory we handle is push. */
10829 if (push_operand (operands[0], VOIDmode))
10830 push = 1;
10831 else if (GET_CODE (operands[0]) == MEM
10832 && ! offsettable_memref_p (operands[0]))
10833 abort ();
10835 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10836 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10838 /* When emitting push, take care for source operands on the stack. */
10839 if (push && GET_CODE (operands[1]) == MEM
10840 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10842 if (nparts == 3)
10843 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10844 XEXP (part[1][2], 0));
10845 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10846 XEXP (part[1][1], 0));
10849 /* We need to do copy in the right order in case an address register
10850 of the source overlaps the destination. */
10851 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10853 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10854 collisions++;
10855 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10856 collisions++;
10857 if (nparts == 3
10858 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10859 collisions++;
10861 /* Collision in the middle part can be handled by reordering. */
10862 if (collisions == 1 && nparts == 3
10863 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10865 rtx tmp;
10866 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10867 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10870 /* If there are more collisions, we can't handle it by reordering.
10871 Do an lea to the last part and use only one colliding move. */
10872 else if (collisions > 1)
10874 rtx base;
10876 collisions = 1;
10878 base = part[0][nparts - 1];
10880 /* Handle the case when the last part isn't valid for lea.
10881 Happens in 64-bit mode storing the 12-byte XFmode. */
10882 if (GET_MODE (base) != Pmode)
10883 base = gen_rtx_REG (Pmode, REGNO (base));
10885 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10886 part[1][0] = replace_equiv_address (part[1][0], base);
10887 part[1][1] = replace_equiv_address (part[1][1],
10888 plus_constant (base, UNITS_PER_WORD));
10889 if (nparts == 3)
10890 part[1][2] = replace_equiv_address (part[1][2],
10891 plus_constant (base, 8));
10895 if (push)
10897 if (!TARGET_64BIT)
10899 if (nparts == 3)
10901 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10902 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10903 emit_move_insn (part[0][2], part[1][2]);
10906 else
10908 /* In 64bit mode we don't have 32bit push available. In case this is
10909 register, it is OK - we will just use larger counterpart. We also
10910 retype memory - these comes from attempt to avoid REX prefix on
10911 moving of second half of TFmode value. */
10912 if (GET_MODE (part[1][1]) == SImode)
10914 if (GET_CODE (part[1][1]) == MEM)
10915 part[1][1] = adjust_address (part[1][1], DImode, 0);
10916 else if (REG_P (part[1][1]))
10917 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10918 else
10919 abort ();
10920 if (GET_MODE (part[1][0]) == SImode)
10921 part[1][0] = part[1][1];
10924 emit_move_insn (part[0][1], part[1][1]);
10925 emit_move_insn (part[0][0], part[1][0]);
10926 return;
10929 /* Choose correct order to not overwrite the source before it is copied. */
10930 if ((REG_P (part[0][0])
10931 && REG_P (part[1][1])
10932 && (REGNO (part[0][0]) == REGNO (part[1][1])
10933 || (nparts == 3
10934 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10935 || (collisions > 0
10936 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10938 if (nparts == 3)
10940 operands[2] = part[0][2];
10941 operands[3] = part[0][1];
10942 operands[4] = part[0][0];
10943 operands[5] = part[1][2];
10944 operands[6] = part[1][1];
10945 operands[7] = part[1][0];
10947 else
10949 operands[2] = part[0][1];
10950 operands[3] = part[0][0];
10951 operands[5] = part[1][1];
10952 operands[6] = part[1][0];
10955 else
10957 if (nparts == 3)
10959 operands[2] = part[0][0];
10960 operands[3] = part[0][1];
10961 operands[4] = part[0][2];
10962 operands[5] = part[1][0];
10963 operands[6] = part[1][1];
10964 operands[7] = part[1][2];
10966 else
10968 operands[2] = part[0][0];
10969 operands[3] = part[0][1];
10970 operands[5] = part[1][0];
10971 operands[6] = part[1][1];
10974 emit_move_insn (operands[2], operands[5]);
10975 emit_move_insn (operands[3], operands[6]);
10976 if (nparts == 3)
10977 emit_move_insn (operands[4], operands[7]);
10979 return;
10982 void
10983 ix86_split_ashldi (rtx *operands, rtx scratch)
10985 rtx low[2], high[2];
10986 int count;
10988 if (GET_CODE (operands[2]) == CONST_INT)
10990 split_di (operands, 2, low, high);
10991 count = INTVAL (operands[2]) & 63;
10993 if (count >= 32)
10995 emit_move_insn (high[0], low[1]);
10996 emit_move_insn (low[0], const0_rtx);
10998 if (count > 32)
10999 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
11001 else
11003 if (!rtx_equal_p (operands[0], operands[1]))
11004 emit_move_insn (operands[0], operands[1]);
11005 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
11006 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
11009 else
11011 if (!rtx_equal_p (operands[0], operands[1]))
11012 emit_move_insn (operands[0], operands[1]);
11014 split_di (operands, 1, low, high);
11016 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
11017 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
11019 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11021 if (! no_new_pseudos)
11022 scratch = force_reg (SImode, const0_rtx);
11023 else
11024 emit_move_insn (scratch, const0_rtx);
11026 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
11027 scratch));
11029 else
11030 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11034 void
11035 ix86_split_ashrdi (rtx *operands, rtx scratch)
11037 rtx low[2], high[2];
11038 int count;
11040 if (GET_CODE (operands[2]) == CONST_INT)
11042 split_di (operands, 2, low, high);
11043 count = INTVAL (operands[2]) & 63;
11045 if (count == 63)
11047 emit_move_insn (high[0], high[1]);
11048 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11049 emit_move_insn (low[0], high[0]);
11052 else if (count >= 32)
11054 emit_move_insn (low[0], high[1]);
11056 if (! reload_completed)
11057 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
11058 else
11060 emit_move_insn (high[0], low[0]);
11061 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
11064 if (count > 32)
11065 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
11067 else
11069 if (!rtx_equal_p (operands[0], operands[1]))
11070 emit_move_insn (operands[0], operands[1]);
11071 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11072 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
11075 else
11077 if (!rtx_equal_p (operands[0], operands[1]))
11078 emit_move_insn (operands[0], operands[1]);
11080 split_di (operands, 1, low, high);
11082 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11083 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
11085 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11087 if (! no_new_pseudos)
11088 scratch = gen_reg_rtx (SImode);
11089 emit_move_insn (scratch, high[0]);
11090 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11091 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11092 scratch));
11094 else
11095 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11099 void
11100 ix86_split_lshrdi (rtx *operands, rtx scratch)
11102 rtx low[2], high[2];
11103 int count;
11105 if (GET_CODE (operands[2]) == CONST_INT)
11107 split_di (operands, 2, low, high);
11108 count = INTVAL (operands[2]) & 63;
11110 if (count >= 32)
11112 emit_move_insn (low[0], high[1]);
11113 emit_move_insn (high[0], const0_rtx);
11115 if (count > 32)
11116 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11118 else
11120 if (!rtx_equal_p (operands[0], operands[1]))
11121 emit_move_insn (operands[0], operands[1]);
11122 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11123 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11126 else
11128 if (!rtx_equal_p (operands[0], operands[1]))
11129 emit_move_insn (operands[0], operands[1]);
11131 split_di (operands, 1, low, high);
11133 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11134 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11136 /* Heh. By reversing the arguments, we can reuse this pattern. */
11137 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11139 if (! no_new_pseudos)
11140 scratch = force_reg (SImode, const0_rtx);
11141 else
11142 emit_move_insn (scratch, const0_rtx);
11144 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11145 scratch));
11147 else
11148 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11152 /* Helper function for the string operations below. Dest VARIABLE whether
11153 it is aligned to VALUE bytes. If true, jump to the label. */
11154 static rtx
11155 ix86_expand_aligntest (rtx variable, int value)
11157 rtx label = gen_label_rtx ();
11158 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11159 if (GET_MODE (variable) == DImode)
11160 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11161 else
11162 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11163 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11164 1, label);
11165 return label;
11168 /* Adjust COUNTER by the VALUE. */
11169 static void
11170 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11172 if (GET_MODE (countreg) == DImode)
11173 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11174 else
11175 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11178 /* Zero extend possibly SImode EXP to Pmode register. */
11180 ix86_zero_extend_to_Pmode (rtx exp)
11182 rtx r;
11183 if (GET_MODE (exp) == VOIDmode)
11184 return force_reg (Pmode, exp);
11185 if (GET_MODE (exp) == Pmode)
11186 return copy_to_mode_reg (Pmode, exp);
11187 r = gen_reg_rtx (Pmode);
11188 emit_insn (gen_zero_extendsidi2 (r, exp));
11189 return r;
11192 /* Expand string move (memcpy) operation. Use i386 string operations when
11193 profitable. expand_clrmem contains similar code. */
11195 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11197 rtx srcreg, destreg, countreg, srcexp, destexp;
11198 enum machine_mode counter_mode;
11199 HOST_WIDE_INT align = 0;
11200 unsigned HOST_WIDE_INT count = 0;
11202 if (GET_CODE (align_exp) == CONST_INT)
11203 align = INTVAL (align_exp);
11205 /* Can't use any of this if the user has appropriated esi or edi. */
11206 if (global_regs[4] || global_regs[5])
11207 return 0;
11209 /* This simple hack avoids all inlining code and simplifies code below. */
11210 if (!TARGET_ALIGN_STRINGOPS)
11211 align = 64;
11213 if (GET_CODE (count_exp) == CONST_INT)
11215 count = INTVAL (count_exp);
11216 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11217 return 0;
11220 /* Figure out proper mode for counter. For 32bits it is always SImode,
11221 for 64bits use SImode when possible, otherwise DImode.
11222 Set count to number of bytes copied when known at compile time. */
11223 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11224 || x86_64_zero_extended_value (count_exp))
11225 counter_mode = SImode;
11226 else
11227 counter_mode = DImode;
11229 if (counter_mode != SImode && counter_mode != DImode)
11230 abort ();
11232 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11233 if (destreg != XEXP (dst, 0))
11234 dst = replace_equiv_address_nv (dst, destreg);
11235 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11236 if (srcreg != XEXP (src, 0))
11237 src = replace_equiv_address_nv (src, srcreg);
11239 /* When optimizing for size emit simple rep ; movsb instruction for
11240 counts not divisible by 4. */
11242 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11244 emit_insn (gen_cld ());
11245 countreg = ix86_zero_extend_to_Pmode (count_exp);
11246 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11247 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11248 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11249 destexp, srcexp));
11252 /* For constant aligned (or small unaligned) copies use rep movsl
11253 followed by code copying the rest. For PentiumPro ensure 8 byte
11254 alignment to allow rep movsl acceleration. */
11256 else if (count != 0
11257 && (align >= 8
11258 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11259 || optimize_size || count < (unsigned int) 64))
11261 unsigned HOST_WIDE_INT offset = 0;
11262 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11263 rtx srcmem, dstmem;
11265 emit_insn (gen_cld ());
11266 if (count & ~(size - 1))
11268 countreg = copy_to_mode_reg (counter_mode,
11269 GEN_INT ((count >> (size == 4 ? 2 : 3))
11270 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11271 countreg = ix86_zero_extend_to_Pmode (countreg);
11273 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11274 GEN_INT (size == 4 ? 2 : 3));
11275 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11276 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11278 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11279 countreg, destexp, srcexp));
11280 offset = count & ~(size - 1);
11282 if (size == 8 && (count & 0x04))
11284 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11285 offset);
11286 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11287 offset);
11288 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11289 offset += 4;
11291 if (count & 0x02)
11293 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11294 offset);
11295 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11296 offset);
11297 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11298 offset += 2;
11300 if (count & 0x01)
11302 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11303 offset);
11304 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11305 offset);
11306 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11309 /* The generic code based on the glibc implementation:
11310 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11311 allowing accelerated copying there)
11312 - copy the data using rep movsl
11313 - copy the rest. */
11314 else
11316 rtx countreg2;
11317 rtx label = NULL;
11318 rtx srcmem, dstmem;
11319 int desired_alignment = (TARGET_PENTIUMPRO
11320 && (count == 0 || count >= (unsigned int) 260)
11321 ? 8 : UNITS_PER_WORD);
11322 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11323 dst = change_address (dst, BLKmode, destreg);
11324 src = change_address (src, BLKmode, srcreg);
11326 /* In case we don't know anything about the alignment, default to
11327 library version, since it is usually equally fast and result in
11328 shorter code.
11330 Also emit call when we know that the count is large and call overhead
11331 will not be important. */
11332 if (!TARGET_INLINE_ALL_STRINGOPS
11333 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11334 return 0;
11336 if (TARGET_SINGLE_STRINGOP)
11337 emit_insn (gen_cld ());
11339 countreg2 = gen_reg_rtx (Pmode);
11340 countreg = copy_to_mode_reg (counter_mode, count_exp);
11342 /* We don't use loops to align destination and to copy parts smaller
11343 than 4 bytes, because gcc is able to optimize such code better (in
11344 the case the destination or the count really is aligned, gcc is often
11345 able to predict the branches) and also it is friendlier to the
11346 hardware branch prediction.
11348 Using loops is beneficial for generic case, because we can
11349 handle small counts using the loops. Many CPUs (such as Athlon)
11350 have large REP prefix setup costs.
11352 This is quite costly. Maybe we can revisit this decision later or
11353 add some customizability to this code. */
11355 if (count == 0 && align < desired_alignment)
11357 label = gen_label_rtx ();
11358 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11359 LEU, 0, counter_mode, 1, label);
11361 if (align <= 1)
11363 rtx label = ix86_expand_aligntest (destreg, 1);
11364 srcmem = change_address (src, QImode, srcreg);
11365 dstmem = change_address (dst, QImode, destreg);
11366 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11367 ix86_adjust_counter (countreg, 1);
11368 emit_label (label);
11369 LABEL_NUSES (label) = 1;
11371 if (align <= 2)
11373 rtx label = ix86_expand_aligntest (destreg, 2);
11374 srcmem = change_address (src, HImode, srcreg);
11375 dstmem = change_address (dst, HImode, destreg);
11376 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11377 ix86_adjust_counter (countreg, 2);
11378 emit_label (label);
11379 LABEL_NUSES (label) = 1;
11381 if (align <= 4 && desired_alignment > 4)
11383 rtx label = ix86_expand_aligntest (destreg, 4);
11384 srcmem = change_address (src, SImode, srcreg);
11385 dstmem = change_address (dst, SImode, destreg);
11386 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11387 ix86_adjust_counter (countreg, 4);
11388 emit_label (label);
11389 LABEL_NUSES (label) = 1;
11392 if (label && desired_alignment > 4 && !TARGET_64BIT)
11394 emit_label (label);
11395 LABEL_NUSES (label) = 1;
11396 label = NULL_RTX;
11398 if (!TARGET_SINGLE_STRINGOP)
11399 emit_insn (gen_cld ());
11400 if (TARGET_64BIT)
11402 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11403 GEN_INT (3)));
11404 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11406 else
11408 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11409 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11411 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11412 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11413 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11414 countreg2, destexp, srcexp));
11416 if (label)
11418 emit_label (label);
11419 LABEL_NUSES (label) = 1;
11421 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11423 srcmem = change_address (src, SImode, srcreg);
11424 dstmem = change_address (dst, SImode, destreg);
11425 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11427 if ((align <= 4 || count == 0) && TARGET_64BIT)
11429 rtx label = ix86_expand_aligntest (countreg, 4);
11430 srcmem = change_address (src, SImode, srcreg);
11431 dstmem = change_address (dst, SImode, destreg);
11432 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11433 emit_label (label);
11434 LABEL_NUSES (label) = 1;
11436 if (align > 2 && count != 0 && (count & 2))
11438 srcmem = change_address (src, HImode, srcreg);
11439 dstmem = change_address (dst, HImode, destreg);
11440 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11442 if (align <= 2 || count == 0)
11444 rtx label = ix86_expand_aligntest (countreg, 2);
11445 srcmem = change_address (src, HImode, srcreg);
11446 dstmem = change_address (dst, HImode, destreg);
11447 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11448 emit_label (label);
11449 LABEL_NUSES (label) = 1;
11451 if (align > 1 && count != 0 && (count & 1))
11453 srcmem = change_address (src, QImode, srcreg);
11454 dstmem = change_address (dst, QImode, destreg);
11455 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11457 if (align <= 1 || count == 0)
11459 rtx label = ix86_expand_aligntest (countreg, 1);
11460 srcmem = change_address (src, QImode, srcreg);
11461 dstmem = change_address (dst, QImode, destreg);
11462 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11463 emit_label (label);
11464 LABEL_NUSES (label) = 1;
11468 return 1;
11471 /* Expand string clear operation (bzero). Use i386 string operations when
11472 profitable. expand_movmem contains similar code. */
11474 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11476 rtx destreg, zeroreg, countreg, destexp;
11477 enum machine_mode counter_mode;
11478 HOST_WIDE_INT align = 0;
11479 unsigned HOST_WIDE_INT count = 0;
11481 if (GET_CODE (align_exp) == CONST_INT)
11482 align = INTVAL (align_exp);
11484 /* Can't use any of this if the user has appropriated esi. */
11485 if (global_regs[4])
11486 return 0;
11488 /* This simple hack avoids all inlining code and simplifies code below. */
11489 if (!TARGET_ALIGN_STRINGOPS)
11490 align = 32;
11492 if (GET_CODE (count_exp) == CONST_INT)
11494 count = INTVAL (count_exp);
11495 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11496 return 0;
11498 /* Figure out proper mode for counter. For 32bits it is always SImode,
11499 for 64bits use SImode when possible, otherwise DImode.
11500 Set count to number of bytes copied when known at compile time. */
11501 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11502 || x86_64_zero_extended_value (count_exp))
11503 counter_mode = SImode;
11504 else
11505 counter_mode = DImode;
11507 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11508 if (destreg != XEXP (dst, 0))
11509 dst = replace_equiv_address_nv (dst, destreg);
11511 emit_insn (gen_cld ());
11513 /* When optimizing for size emit simple rep ; movsb instruction for
11514 counts not divisible by 4. */
11516 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11518 countreg = ix86_zero_extend_to_Pmode (count_exp);
11519 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11520 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11521 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11523 else if (count != 0
11524 && (align >= 8
11525 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11526 || optimize_size || count < (unsigned int) 64))
11528 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11529 unsigned HOST_WIDE_INT offset = 0;
11531 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11532 if (count & ~(size - 1))
11534 countreg = copy_to_mode_reg (counter_mode,
11535 GEN_INT ((count >> (size == 4 ? 2 : 3))
11536 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11537 countreg = ix86_zero_extend_to_Pmode (countreg);
11538 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11539 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11540 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11541 offset = count & ~(size - 1);
11543 if (size == 8 && (count & 0x04))
11545 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11546 offset);
11547 emit_insn (gen_strset (destreg, mem,
11548 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11549 offset += 4;
11551 if (count & 0x02)
11553 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11554 offset);
11555 emit_insn (gen_strset (destreg, mem,
11556 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11557 offset += 2;
11559 if (count & 0x01)
11561 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11562 offset);
11563 emit_insn (gen_strset (destreg, mem,
11564 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11567 else
11569 rtx countreg2;
11570 rtx label = NULL;
11571 /* Compute desired alignment of the string operation. */
11572 int desired_alignment = (TARGET_PENTIUMPRO
11573 && (count == 0 || count >= (unsigned int) 260)
11574 ? 8 : UNITS_PER_WORD);
11576 /* In case we don't know anything about the alignment, default to
11577 library version, since it is usually equally fast and result in
11578 shorter code.
11580 Also emit call when we know that the count is large and call overhead
11581 will not be important. */
11582 if (!TARGET_INLINE_ALL_STRINGOPS
11583 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11584 return 0;
11586 if (TARGET_SINGLE_STRINGOP)
11587 emit_insn (gen_cld ());
11589 countreg2 = gen_reg_rtx (Pmode);
11590 countreg = copy_to_mode_reg (counter_mode, count_exp);
11591 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11592 /* Get rid of MEM_OFFSET, it won't be accurate. */
11593 dst = change_address (dst, BLKmode, destreg);
11595 if (count == 0 && align < desired_alignment)
11597 label = gen_label_rtx ();
11598 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11599 LEU, 0, counter_mode, 1, label);
11601 if (align <= 1)
11603 rtx label = ix86_expand_aligntest (destreg, 1);
11604 emit_insn (gen_strset (destreg, dst,
11605 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11606 ix86_adjust_counter (countreg, 1);
11607 emit_label (label);
11608 LABEL_NUSES (label) = 1;
11610 if (align <= 2)
11612 rtx label = ix86_expand_aligntest (destreg, 2);
11613 emit_insn (gen_strset (destreg, dst,
11614 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11615 ix86_adjust_counter (countreg, 2);
11616 emit_label (label);
11617 LABEL_NUSES (label) = 1;
11619 if (align <= 4 && desired_alignment > 4)
11621 rtx label = ix86_expand_aligntest (destreg, 4);
11622 emit_insn (gen_strset (destreg, dst,
11623 (TARGET_64BIT
11624 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11625 : zeroreg)));
11626 ix86_adjust_counter (countreg, 4);
11627 emit_label (label);
11628 LABEL_NUSES (label) = 1;
11631 if (label && desired_alignment > 4 && !TARGET_64BIT)
11633 emit_label (label);
11634 LABEL_NUSES (label) = 1;
11635 label = NULL_RTX;
11638 if (!TARGET_SINGLE_STRINGOP)
11639 emit_insn (gen_cld ());
11640 if (TARGET_64BIT)
11642 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11643 GEN_INT (3)));
11644 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11646 else
11648 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11649 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11651 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11652 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11654 if (label)
11656 emit_label (label);
11657 LABEL_NUSES (label) = 1;
11660 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11661 emit_insn (gen_strset (destreg, dst,
11662 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11663 if (TARGET_64BIT && (align <= 4 || count == 0))
11665 rtx label = ix86_expand_aligntest (countreg, 4);
11666 emit_insn (gen_strset (destreg, dst,
11667 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11668 emit_label (label);
11669 LABEL_NUSES (label) = 1;
11671 if (align > 2 && count != 0 && (count & 2))
11672 emit_insn (gen_strset (destreg, dst,
11673 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11674 if (align <= 2 || count == 0)
11676 rtx label = ix86_expand_aligntest (countreg, 2);
11677 emit_insn (gen_strset (destreg, dst,
11678 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11679 emit_label (label);
11680 LABEL_NUSES (label) = 1;
11682 if (align > 1 && count != 0 && (count & 1))
11683 emit_insn (gen_strset (destreg, dst,
11684 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11685 if (align <= 1 || count == 0)
11687 rtx label = ix86_expand_aligntest (countreg, 1);
11688 emit_insn (gen_strset (destreg, dst,
11689 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11690 emit_label (label);
11691 LABEL_NUSES (label) = 1;
11694 return 1;
11697 /* Expand strlen. */
11699 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11701 rtx addr, scratch1, scratch2, scratch3, scratch4;
11703 /* The generic case of strlen expander is long. Avoid it's
11704 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11706 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11707 && !TARGET_INLINE_ALL_STRINGOPS
11708 && !optimize_size
11709 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11710 return 0;
11712 addr = force_reg (Pmode, XEXP (src, 0));
11713 scratch1 = gen_reg_rtx (Pmode);
11715 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11716 && !optimize_size)
11718 /* Well it seems that some optimizer does not combine a call like
11719 foo(strlen(bar), strlen(bar));
11720 when the move and the subtraction is done here. It does calculate
11721 the length just once when these instructions are done inside of
11722 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11723 often used and I use one fewer register for the lifetime of
11724 output_strlen_unroll() this is better. */
11726 emit_move_insn (out, addr);
11728 ix86_expand_strlensi_unroll_1 (out, src, align);
11730 /* strlensi_unroll_1 returns the address of the zero at the end of
11731 the string, like memchr(), so compute the length by subtracting
11732 the start address. */
11733 if (TARGET_64BIT)
11734 emit_insn (gen_subdi3 (out, out, addr));
11735 else
11736 emit_insn (gen_subsi3 (out, out, addr));
11738 else
11740 rtx unspec;
11741 scratch2 = gen_reg_rtx (Pmode);
11742 scratch3 = gen_reg_rtx (Pmode);
11743 scratch4 = force_reg (Pmode, constm1_rtx);
11745 emit_move_insn (scratch3, addr);
11746 eoschar = force_reg (QImode, eoschar);
11748 emit_insn (gen_cld ());
11749 src = replace_equiv_address_nv (src, scratch3);
11751 /* If .md starts supporting :P, this can be done in .md. */
11752 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11753 scratch4), UNSPEC_SCAS);
11754 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11755 if (TARGET_64BIT)
11757 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11758 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11760 else
11762 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11763 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11766 return 1;
11769 /* Expand the appropriate insns for doing strlen if not just doing
11770 repnz; scasb
11772 out = result, initialized with the start address
11773 align_rtx = alignment of the address.
11774 scratch = scratch register, initialized with the startaddress when
11775 not aligned, otherwise undefined
11777 This is just the body. It needs the initializations mentioned above and
11778 some address computing at the end. These things are done in i386.md. */
11780 static void
11781 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11783 int align;
11784 rtx tmp;
11785 rtx align_2_label = NULL_RTX;
11786 rtx align_3_label = NULL_RTX;
11787 rtx align_4_label = gen_label_rtx ();
11788 rtx end_0_label = gen_label_rtx ();
11789 rtx mem;
11790 rtx tmpreg = gen_reg_rtx (SImode);
11791 rtx scratch = gen_reg_rtx (SImode);
11792 rtx cmp;
11794 align = 0;
11795 if (GET_CODE (align_rtx) == CONST_INT)
11796 align = INTVAL (align_rtx);
11798 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11800 /* Is there a known alignment and is it less than 4? */
11801 if (align < 4)
11803 rtx scratch1 = gen_reg_rtx (Pmode);
11804 emit_move_insn (scratch1, out);
11805 /* Is there a known alignment and is it not 2? */
11806 if (align != 2)
11808 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11809 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11811 /* Leave just the 3 lower bits. */
11812 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11813 NULL_RTX, 0, OPTAB_WIDEN);
11815 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11816 Pmode, 1, align_4_label);
11817 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11818 Pmode, 1, align_2_label);
11819 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11820 Pmode, 1, align_3_label);
11822 else
11824 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11825 check if is aligned to 4 - byte. */
11827 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11828 NULL_RTX, 0, OPTAB_WIDEN);
11830 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11831 Pmode, 1, align_4_label);
11834 mem = change_address (src, QImode, out);
11836 /* Now compare the bytes. */
11838 /* Compare the first n unaligned byte on a byte per byte basis. */
11839 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11840 QImode, 1, end_0_label);
11842 /* Increment the address. */
11843 if (TARGET_64BIT)
11844 emit_insn (gen_adddi3 (out, out, const1_rtx));
11845 else
11846 emit_insn (gen_addsi3 (out, out, const1_rtx));
11848 /* Not needed with an alignment of 2 */
11849 if (align != 2)
11851 emit_label (align_2_label);
11853 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11854 end_0_label);
11856 if (TARGET_64BIT)
11857 emit_insn (gen_adddi3 (out, out, const1_rtx));
11858 else
11859 emit_insn (gen_addsi3 (out, out, const1_rtx));
11861 emit_label (align_3_label);
11864 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11865 end_0_label);
11867 if (TARGET_64BIT)
11868 emit_insn (gen_adddi3 (out, out, const1_rtx));
11869 else
11870 emit_insn (gen_addsi3 (out, out, const1_rtx));
11873 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11874 align this loop. It gives only huge programs, but does not help to
11875 speed up. */
11876 emit_label (align_4_label);
11878 mem = change_address (src, SImode, out);
11879 emit_move_insn (scratch, mem);
11880 if (TARGET_64BIT)
11881 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11882 else
11883 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11885 /* This formula yields a nonzero result iff one of the bytes is zero.
11886 This saves three branches inside loop and many cycles. */
11888 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11889 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11890 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11891 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11892 gen_int_mode (0x80808080, SImode)));
11893 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11894 align_4_label);
11896 if (TARGET_CMOVE)
11898 rtx reg = gen_reg_rtx (SImode);
11899 rtx reg2 = gen_reg_rtx (Pmode);
11900 emit_move_insn (reg, tmpreg);
11901 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11903 /* If zero is not in the first two bytes, move two bytes forward. */
11904 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11905 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11906 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11907 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11908 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11909 reg,
11910 tmpreg)));
11911 /* Emit lea manually to avoid clobbering of flags. */
11912 emit_insn (gen_rtx_SET (SImode, reg2,
11913 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11915 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11916 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11917 emit_insn (gen_rtx_SET (VOIDmode, out,
11918 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11919 reg2,
11920 out)));
11923 else
11925 rtx end_2_label = gen_label_rtx ();
11926 /* Is zero in the first two bytes? */
11928 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11929 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11930 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11931 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11932 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11933 pc_rtx);
11934 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11935 JUMP_LABEL (tmp) = end_2_label;
11937 /* Not in the first two. Move two bytes forward. */
11938 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11939 if (TARGET_64BIT)
11940 emit_insn (gen_adddi3 (out, out, const2_rtx));
11941 else
11942 emit_insn (gen_addsi3 (out, out, const2_rtx));
11944 emit_label (end_2_label);
11948 /* Avoid branch in fixing the byte. */
11949 tmpreg = gen_lowpart (QImode, tmpreg);
11950 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11951 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11952 if (TARGET_64BIT)
11953 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11954 else
11955 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11957 emit_label (end_0_label);
11960 void
11961 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11962 rtx callarg2 ATTRIBUTE_UNUSED,
11963 rtx pop, int sibcall)
11965 rtx use = NULL, call;
11967 if (pop == const0_rtx)
11968 pop = NULL;
11969 if (TARGET_64BIT && pop)
11970 abort ();
11972 #if TARGET_MACHO
11973 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11974 fnaddr = machopic_indirect_call_target (fnaddr);
11975 #else
11976 /* Static functions and indirect calls don't need the pic register. */
11977 if (! TARGET_64BIT && flag_pic
11978 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11979 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11980 use_reg (&use, pic_offset_table_rtx);
11982 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11984 rtx al = gen_rtx_REG (QImode, 0);
11985 emit_move_insn (al, callarg2);
11986 use_reg (&use, al);
11988 #endif /* TARGET_MACHO */
11990 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11992 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11993 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11995 if (sibcall && TARGET_64BIT
11996 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11998 rtx addr;
11999 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12000 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
12001 emit_move_insn (fnaddr, addr);
12002 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12005 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
12006 if (retval)
12007 call = gen_rtx_SET (VOIDmode, retval, call);
12008 if (pop)
12010 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
12011 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
12012 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
12015 call = emit_call_insn (call);
12016 if (use)
12017 CALL_INSN_FUNCTION_USAGE (call) = use;
12021 /* Clear stack slot assignments remembered from previous functions.
12022 This is called from INIT_EXPANDERS once before RTL is emitted for each
12023 function. */
12025 static struct machine_function *
12026 ix86_init_machine_status (void)
12028 struct machine_function *f;
12030 f = ggc_alloc_cleared (sizeof (struct machine_function));
12031 f->use_fast_prologue_epilogue_nregs = -1;
12033 return f;
12036 /* Return a MEM corresponding to a stack slot with mode MODE.
12037 Allocate a new slot if necessary.
12039 The RTL for a function can have several slots available: N is
12040 which slot to use. */
12043 assign_386_stack_local (enum machine_mode mode, int n)
12045 struct stack_local_entry *s;
12047 if (n < 0 || n >= MAX_386_STACK_LOCALS)
12048 abort ();
12050 for (s = ix86_stack_locals; s; s = s->next)
12051 if (s->mode == mode && s->n == n)
12052 return s->rtl;
12054 s = (struct stack_local_entry *)
12055 ggc_alloc (sizeof (struct stack_local_entry));
12056 s->n = n;
12057 s->mode = mode;
12058 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12060 s->next = ix86_stack_locals;
12061 ix86_stack_locals = s;
12062 return s->rtl;
12065 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12067 static GTY(()) rtx ix86_tls_symbol;
12069 ix86_tls_get_addr (void)
12072 if (!ix86_tls_symbol)
12074 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12075 (TARGET_GNU_TLS && !TARGET_64BIT)
12076 ? "___tls_get_addr"
12077 : "__tls_get_addr");
12080 return ix86_tls_symbol;
12083 /* Calculate the length of the memory address in the instruction
12084 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12086 static int
12087 memory_address_length (rtx addr)
12089 struct ix86_address parts;
12090 rtx base, index, disp;
12091 int len;
12093 if (GET_CODE (addr) == PRE_DEC
12094 || GET_CODE (addr) == POST_INC
12095 || GET_CODE (addr) == PRE_MODIFY
12096 || GET_CODE (addr) == POST_MODIFY)
12097 return 0;
12099 if (! ix86_decompose_address (addr, &parts))
12100 abort ();
12102 base = parts.base;
12103 index = parts.index;
12104 disp = parts.disp;
12105 len = 0;
12107 /* Rule of thumb:
12108 - esp as the base always wants an index,
12109 - ebp as the base always wants a displacement. */
12111 /* Register Indirect. */
12112 if (base && !index && !disp)
12114 /* esp (for its index) and ebp (for its displacement) need
12115 the two-byte modrm form. */
12116 if (addr == stack_pointer_rtx
12117 || addr == arg_pointer_rtx
12118 || addr == frame_pointer_rtx
12119 || addr == hard_frame_pointer_rtx)
12120 len = 1;
12123 /* Direct Addressing. */
12124 else if (disp && !base && !index)
12125 len = 4;
12127 else
12129 /* Find the length of the displacement constant. */
12130 if (disp)
12132 if (GET_CODE (disp) == CONST_INT
12133 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12134 && base)
12135 len = 1;
12136 else
12137 len = 4;
12139 /* ebp always wants a displacement. */
12140 else if (base == hard_frame_pointer_rtx)
12141 len = 1;
12143 /* An index requires the two-byte modrm form.... */
12144 if (index
12145 /* ...like esp, which always wants an index. */
12146 || base == stack_pointer_rtx
12147 || base == arg_pointer_rtx
12148 || base == frame_pointer_rtx)
12149 len += 1;
12152 return len;
12155 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12156 is set, expect that insn have 8bit immediate alternative. */
12158 ix86_attr_length_immediate_default (rtx insn, int shortform)
12160 int len = 0;
12161 int i;
12162 extract_insn_cached (insn);
12163 for (i = recog_data.n_operands - 1; i >= 0; --i)
12164 if (CONSTANT_P (recog_data.operand[i]))
12166 if (len)
12167 abort ();
12168 if (shortform
12169 && GET_CODE (recog_data.operand[i]) == CONST_INT
12170 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12171 len = 1;
12172 else
12174 switch (get_attr_mode (insn))
12176 case MODE_QI:
12177 len+=1;
12178 break;
12179 case MODE_HI:
12180 len+=2;
12181 break;
12182 case MODE_SI:
12183 len+=4;
12184 break;
12185 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12186 case MODE_DI:
12187 len+=4;
12188 break;
12189 default:
12190 fatal_insn ("unknown insn mode", insn);
12194 return len;
12196 /* Compute default value for "length_address" attribute. */
12198 ix86_attr_length_address_default (rtx insn)
12200 int i;
12202 if (get_attr_type (insn) == TYPE_LEA)
12204 rtx set = PATTERN (insn);
12205 if (GET_CODE (set) == SET)
12207 else if (GET_CODE (set) == PARALLEL
12208 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12209 set = XVECEXP (set, 0, 0);
12210 else
12212 #ifdef ENABLE_CHECKING
12213 abort ();
12214 #endif
12215 return 0;
12218 return memory_address_length (SET_SRC (set));
12221 extract_insn_cached (insn);
12222 for (i = recog_data.n_operands - 1; i >= 0; --i)
12223 if (GET_CODE (recog_data.operand[i]) == MEM)
12225 return memory_address_length (XEXP (recog_data.operand[i], 0));
12226 break;
12228 return 0;
12231 /* Return the maximum number of instructions a cpu can issue. */
12233 static int
12234 ix86_issue_rate (void)
12236 switch (ix86_tune)
12238 case PROCESSOR_PENTIUM:
12239 case PROCESSOR_K6:
12240 return 2;
12242 case PROCESSOR_PENTIUMPRO:
12243 case PROCESSOR_PENTIUM4:
12244 case PROCESSOR_ATHLON:
12245 case PROCESSOR_K8:
12246 case PROCESSOR_NOCONA:
12247 return 3;
12249 default:
12250 return 1;
12254 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12255 by DEP_INSN and nothing set by DEP_INSN. */
12257 static int
12258 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12260 rtx set, set2;
12262 /* Simplify the test for uninteresting insns. */
12263 if (insn_type != TYPE_SETCC
12264 && insn_type != TYPE_ICMOV
12265 && insn_type != TYPE_FCMOV
12266 && insn_type != TYPE_IBR)
12267 return 0;
12269 if ((set = single_set (dep_insn)) != 0)
12271 set = SET_DEST (set);
12272 set2 = NULL_RTX;
12274 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12275 && XVECLEN (PATTERN (dep_insn), 0) == 2
12276 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12277 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12279 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12280 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12282 else
12283 return 0;
12285 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12286 return 0;
12288 /* This test is true if the dependent insn reads the flags but
12289 not any other potentially set register. */
12290 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12291 return 0;
12293 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12294 return 0;
12296 return 1;
12299 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12300 address with operands set by DEP_INSN. */
12302 static int
12303 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12305 rtx addr;
12307 if (insn_type == TYPE_LEA
12308 && TARGET_PENTIUM)
12310 addr = PATTERN (insn);
12311 if (GET_CODE (addr) == SET)
12313 else if (GET_CODE (addr) == PARALLEL
12314 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12315 addr = XVECEXP (addr, 0, 0);
12316 else
12317 abort ();
12318 addr = SET_SRC (addr);
12320 else
12322 int i;
12323 extract_insn_cached (insn);
12324 for (i = recog_data.n_operands - 1; i >= 0; --i)
12325 if (GET_CODE (recog_data.operand[i]) == MEM)
12327 addr = XEXP (recog_data.operand[i], 0);
12328 goto found;
12330 return 0;
12331 found:;
12334 return modified_in_p (addr, dep_insn);
12337 static int
12338 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12340 enum attr_type insn_type, dep_insn_type;
12341 enum attr_memory memory;
12342 rtx set, set2;
12343 int dep_insn_code_number;
12345 /* Anti and output dependencies have zero cost on all CPUs. */
12346 if (REG_NOTE_KIND (link) != 0)
12347 return 0;
12349 dep_insn_code_number = recog_memoized (dep_insn);
12351 /* If we can't recognize the insns, we can't really do anything. */
12352 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12353 return cost;
12355 insn_type = get_attr_type (insn);
12356 dep_insn_type = get_attr_type (dep_insn);
12358 switch (ix86_tune)
12360 case PROCESSOR_PENTIUM:
12361 /* Address Generation Interlock adds a cycle of latency. */
12362 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12363 cost += 1;
12365 /* ??? Compares pair with jump/setcc. */
12366 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12367 cost = 0;
12369 /* Floating point stores require value to be ready one cycle earlier. */
12370 if (insn_type == TYPE_FMOV
12371 && get_attr_memory (insn) == MEMORY_STORE
12372 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12373 cost += 1;
12374 break;
12376 case PROCESSOR_PENTIUMPRO:
12377 memory = get_attr_memory (insn);
12379 /* INT->FP conversion is expensive. */
12380 if (get_attr_fp_int_src (dep_insn))
12381 cost += 5;
12383 /* There is one cycle extra latency between an FP op and a store. */
12384 if (insn_type == TYPE_FMOV
12385 && (set = single_set (dep_insn)) != NULL_RTX
12386 && (set2 = single_set (insn)) != NULL_RTX
12387 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12388 && GET_CODE (SET_DEST (set2)) == MEM)
12389 cost += 1;
12391 /* Show ability of reorder buffer to hide latency of load by executing
12392 in parallel with previous instruction in case
12393 previous instruction is not needed to compute the address. */
12394 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12395 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12397 /* Claim moves to take one cycle, as core can issue one load
12398 at time and the next load can start cycle later. */
12399 if (dep_insn_type == TYPE_IMOV
12400 || dep_insn_type == TYPE_FMOV)
12401 cost = 1;
12402 else if (cost > 1)
12403 cost--;
12405 break;
12407 case PROCESSOR_K6:
12408 memory = get_attr_memory (insn);
12410 /* The esp dependency is resolved before the instruction is really
12411 finished. */
12412 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12413 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12414 return 1;
12416 /* INT->FP conversion is expensive. */
12417 if (get_attr_fp_int_src (dep_insn))
12418 cost += 5;
12420 /* Show ability of reorder buffer to hide latency of load by executing
12421 in parallel with previous instruction in case
12422 previous instruction is not needed to compute the address. */
12423 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12424 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12426 /* Claim moves to take one cycle, as core can issue one load
12427 at time and the next load can start cycle later. */
12428 if (dep_insn_type == TYPE_IMOV
12429 || dep_insn_type == TYPE_FMOV)
12430 cost = 1;
12431 else if (cost > 2)
12432 cost -= 2;
12433 else
12434 cost = 1;
12436 break;
12438 case PROCESSOR_ATHLON:
12439 case PROCESSOR_K8:
12440 memory = get_attr_memory (insn);
12442 /* Show ability of reorder buffer to hide latency of load by executing
12443 in parallel with previous instruction in case
12444 previous instruction is not needed to compute the address. */
12445 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12446 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12448 enum attr_unit unit = get_attr_unit (insn);
12449 int loadcost = 3;
12451 /* Because of the difference between the length of integer and
12452 floating unit pipeline preparation stages, the memory operands
12453 for floating point are cheaper.
12455 ??? For Athlon it the difference is most probably 2. */
12456 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12457 loadcost = 3;
12458 else
12459 loadcost = TARGET_ATHLON ? 2 : 0;
12461 if (cost >= loadcost)
12462 cost -= loadcost;
12463 else
12464 cost = 0;
12467 default:
12468 break;
12471 return cost;
12474 /* How many alternative schedules to try. This should be as wide as the
12475 scheduling freedom in the DFA, but no wider. Making this value too
12476 large results extra work for the scheduler. */
12478 static int
12479 ia32_multipass_dfa_lookahead (void)
12481 if (ix86_tune == PROCESSOR_PENTIUM)
12482 return 2;
12484 if (ix86_tune == PROCESSOR_PENTIUMPRO
12485 || ix86_tune == PROCESSOR_K6)
12486 return 1;
12488 else
12489 return 0;
12493 /* Compute the alignment given to a constant that is being placed in memory.
12494 EXP is the constant and ALIGN is the alignment that the object would
12495 ordinarily have.
12496 The value of this function is used instead of that alignment to align
12497 the object. */
12500 ix86_constant_alignment (tree exp, int align)
12502 if (TREE_CODE (exp) == REAL_CST)
12504 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12505 return 64;
12506 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12507 return 128;
12509 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12510 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12511 return BITS_PER_WORD;
12513 return align;
12516 /* Compute the alignment for a static variable.
12517 TYPE is the data type, and ALIGN is the alignment that
12518 the object would ordinarily have. The value of this function is used
12519 instead of that alignment to align the object. */
12522 ix86_data_alignment (tree type, int align)
12524 if (AGGREGATE_TYPE_P (type)
12525 && TYPE_SIZE (type)
12526 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12527 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12528 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12529 return 256;
12531 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12532 to 16byte boundary. */
12533 if (TARGET_64BIT)
12535 if (AGGREGATE_TYPE_P (type)
12536 && TYPE_SIZE (type)
12537 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12538 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12539 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12540 return 128;
12543 if (TREE_CODE (type) == ARRAY_TYPE)
12545 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12546 return 64;
12547 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12548 return 128;
12550 else if (TREE_CODE (type) == COMPLEX_TYPE)
12553 if (TYPE_MODE (type) == DCmode && align < 64)
12554 return 64;
12555 if (TYPE_MODE (type) == XCmode && align < 128)
12556 return 128;
12558 else if ((TREE_CODE (type) == RECORD_TYPE
12559 || TREE_CODE (type) == UNION_TYPE
12560 || TREE_CODE (type) == QUAL_UNION_TYPE)
12561 && TYPE_FIELDS (type))
12563 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12564 return 64;
12565 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12566 return 128;
12568 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12569 || TREE_CODE (type) == INTEGER_TYPE)
12571 if (TYPE_MODE (type) == DFmode && align < 64)
12572 return 64;
12573 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12574 return 128;
12577 return align;
12580 /* Compute the alignment for a local variable.
12581 TYPE is the data type, and ALIGN is the alignment that
12582 the object would ordinarily have. The value of this macro is used
12583 instead of that alignment to align the object. */
12586 ix86_local_alignment (tree type, int align)
12588 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12589 to 16byte boundary. */
12590 if (TARGET_64BIT)
12592 if (AGGREGATE_TYPE_P (type)
12593 && TYPE_SIZE (type)
12594 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12595 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12596 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12597 return 128;
12599 if (TREE_CODE (type) == ARRAY_TYPE)
12601 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12602 return 64;
12603 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12604 return 128;
12606 else if (TREE_CODE (type) == COMPLEX_TYPE)
12608 if (TYPE_MODE (type) == DCmode && align < 64)
12609 return 64;
12610 if (TYPE_MODE (type) == XCmode && align < 128)
12611 return 128;
12613 else if ((TREE_CODE (type) == RECORD_TYPE
12614 || TREE_CODE (type) == UNION_TYPE
12615 || TREE_CODE (type) == QUAL_UNION_TYPE)
12616 && TYPE_FIELDS (type))
12618 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12619 return 64;
12620 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12621 return 128;
12623 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12624 || TREE_CODE (type) == INTEGER_TYPE)
12627 if (TYPE_MODE (type) == DFmode && align < 64)
12628 return 64;
12629 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12630 return 128;
12632 return align;
12635 /* Emit RTL insns to initialize the variable parts of a trampoline.
12636 FNADDR is an RTX for the address of the function's pure code.
12637 CXT is an RTX for the static chain value for the function. */
12638 void
12639 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12641 if (!TARGET_64BIT)
12643 /* Compute offset from the end of the jmp to the target function. */
12644 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12645 plus_constant (tramp, 10),
12646 NULL_RTX, 1, OPTAB_DIRECT);
12647 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12648 gen_int_mode (0xb9, QImode));
12649 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12650 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12651 gen_int_mode (0xe9, QImode));
12652 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12654 else
12656 int offset = 0;
12657 /* Try to load address using shorter movl instead of movabs.
12658 We may want to support movq for kernel mode, but kernel does not use
12659 trampolines at the moment. */
12660 if (x86_64_zero_extended_value (fnaddr))
12662 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12663 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12664 gen_int_mode (0xbb41, HImode));
12665 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12666 gen_lowpart (SImode, fnaddr));
12667 offset += 6;
12669 else
12671 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12672 gen_int_mode (0xbb49, HImode));
12673 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12674 fnaddr);
12675 offset += 10;
12677 /* Load static chain using movabs to r10. */
12678 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12679 gen_int_mode (0xba49, HImode));
12680 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12681 cxt);
12682 offset += 10;
12683 /* Jump to the r11 */
12684 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12685 gen_int_mode (0xff49, HImode));
12686 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12687 gen_int_mode (0xe3, QImode));
12688 offset += 3;
12689 if (offset > TRAMPOLINE_SIZE)
12690 abort ();
12693 #ifdef ENABLE_EXECUTE_STACK
12694 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12695 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12696 #endif
12699 #define def_builtin(MASK, NAME, TYPE, CODE) \
12700 do { \
12701 if ((MASK) & target_flags \
12702 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12703 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12704 NULL, NULL_TREE); \
12705 } while (0)
12707 struct builtin_description
12709 const unsigned int mask;
12710 const enum insn_code icode;
12711 const char *const name;
12712 const enum ix86_builtins code;
12713 const enum rtx_code comparison;
12714 const unsigned int flag;
12717 static const struct builtin_description bdesc_comi[] =
12719 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12720 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12721 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12722 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12723 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12724 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12725 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12726 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12727 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12728 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12729 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12730 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12731 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12732 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12733 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12734 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12735 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12736 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12737 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12738 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12739 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12740 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12741 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12742 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12745 static const struct builtin_description bdesc_2arg[] =
12747 /* SSE */
12748 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12749 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12750 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12751 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12752 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12753 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12754 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12755 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12757 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12758 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12759 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12760 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12761 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12762 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12763 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12764 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12765 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12766 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12767 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12768 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12769 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12770 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12771 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12772 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12773 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12774 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12775 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12776 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12778 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12779 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12780 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12781 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12783 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12784 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12785 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12786 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12788 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12789 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12790 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12791 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12792 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12794 /* MMX */
12795 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12796 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12797 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12798 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12799 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12800 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12801 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12802 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12804 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12805 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12806 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12807 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12808 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12809 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12810 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12811 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12813 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12814 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12815 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12817 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12818 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12819 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12820 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12822 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12823 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12825 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12826 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12827 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12828 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12829 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12830 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12832 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12833 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12834 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12835 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12837 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12838 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12839 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12840 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12841 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12842 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12844 /* Special. */
12845 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12846 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12847 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12849 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12850 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12851 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12853 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12854 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12855 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12856 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12857 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12858 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12860 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12861 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12862 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12863 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12864 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12865 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12867 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12868 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12869 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12870 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12872 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12873 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12875 /* SSE2 */
12876 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12886 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12887 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12888 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12889 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12890 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12891 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12892 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12893 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12894 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12895 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12896 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12897 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12898 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12899 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12900 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12901 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12902 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12903 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12904 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12906 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12908 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12920 /* SSE2 MMX */
12921 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12924 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12925 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12927 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12928 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12930 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12931 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12932 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12933 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12934 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12935 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12936 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12937 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12958 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12962 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12963 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12964 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12965 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12996 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12997 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12998 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13003 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13007 /* SSE3 MMX */
13008 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13009 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13010 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13011 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13012 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13013 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13016 static const struct builtin_description bdesc_1arg[] =
13018 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13019 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13021 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13022 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13023 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13025 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13026 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13027 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13028 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13029 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13030 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13052 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13053 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13061 /* SSE3 */
13062 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13063 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13064 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13067 void
13068 ix86_init_builtins (void)
13070 if (TARGET_MMX)
13071 ix86_init_mmx_sse_builtins ();
13074 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13075 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13076 builtins. */
13077 static void
13078 ix86_init_mmx_sse_builtins (void)
13080 const struct builtin_description * d;
13081 size_t i;
13083 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
13084 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
13085 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
13086 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
13087 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
13088 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
13089 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
13090 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13091 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13092 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13094 tree pchar_type_node = build_pointer_type (char_type_node);
13095 tree pcchar_type_node = build_pointer_type (
13096 build_type_variant (char_type_node, 1, 0));
13097 tree pfloat_type_node = build_pointer_type (float_type_node);
13098 tree pcfloat_type_node = build_pointer_type (
13099 build_type_variant (float_type_node, 1, 0));
13100 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13101 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13102 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13104 /* Comparisons. */
13105 tree int_ftype_v4sf_v4sf
13106 = build_function_type_list (integer_type_node,
13107 V4SF_type_node, V4SF_type_node, NULL_TREE);
13108 tree v4si_ftype_v4sf_v4sf
13109 = build_function_type_list (V4SI_type_node,
13110 V4SF_type_node, V4SF_type_node, NULL_TREE);
13111 /* MMX/SSE/integer conversions. */
13112 tree int_ftype_v4sf
13113 = build_function_type_list (integer_type_node,
13114 V4SF_type_node, NULL_TREE);
13115 tree int64_ftype_v4sf
13116 = build_function_type_list (long_long_integer_type_node,
13117 V4SF_type_node, NULL_TREE);
13118 tree int_ftype_v8qi
13119 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13120 tree v4sf_ftype_v4sf_int
13121 = build_function_type_list (V4SF_type_node,
13122 V4SF_type_node, integer_type_node, NULL_TREE);
13123 tree v4sf_ftype_v4sf_int64
13124 = build_function_type_list (V4SF_type_node,
13125 V4SF_type_node, long_long_integer_type_node,
13126 NULL_TREE);
13127 tree v4sf_ftype_v4sf_v2si
13128 = build_function_type_list (V4SF_type_node,
13129 V4SF_type_node, V2SI_type_node, NULL_TREE);
13130 tree int_ftype_v4hi_int
13131 = build_function_type_list (integer_type_node,
13132 V4HI_type_node, integer_type_node, NULL_TREE);
13133 tree v4hi_ftype_v4hi_int_int
13134 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13135 integer_type_node, integer_type_node,
13136 NULL_TREE);
13137 /* Miscellaneous. */
13138 tree v8qi_ftype_v4hi_v4hi
13139 = build_function_type_list (V8QI_type_node,
13140 V4HI_type_node, V4HI_type_node, NULL_TREE);
13141 tree v4hi_ftype_v2si_v2si
13142 = build_function_type_list (V4HI_type_node,
13143 V2SI_type_node, V2SI_type_node, NULL_TREE);
13144 tree v4sf_ftype_v4sf_v4sf_int
13145 = build_function_type_list (V4SF_type_node,
13146 V4SF_type_node, V4SF_type_node,
13147 integer_type_node, NULL_TREE);
13148 tree v2si_ftype_v4hi_v4hi
13149 = build_function_type_list (V2SI_type_node,
13150 V4HI_type_node, V4HI_type_node, NULL_TREE);
13151 tree v4hi_ftype_v4hi_int
13152 = build_function_type_list (V4HI_type_node,
13153 V4HI_type_node, integer_type_node, NULL_TREE);
13154 tree v4hi_ftype_v4hi_di
13155 = build_function_type_list (V4HI_type_node,
13156 V4HI_type_node, long_long_unsigned_type_node,
13157 NULL_TREE);
13158 tree v2si_ftype_v2si_di
13159 = build_function_type_list (V2SI_type_node,
13160 V2SI_type_node, long_long_unsigned_type_node,
13161 NULL_TREE);
13162 tree void_ftype_void
13163 = build_function_type (void_type_node, void_list_node);
13164 tree void_ftype_unsigned
13165 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13166 tree void_ftype_unsigned_unsigned
13167 = build_function_type_list (void_type_node, unsigned_type_node,
13168 unsigned_type_node, NULL_TREE);
13169 tree void_ftype_pcvoid_unsigned_unsigned
13170 = build_function_type_list (void_type_node, const_ptr_type_node,
13171 unsigned_type_node, unsigned_type_node,
13172 NULL_TREE);
13173 tree unsigned_ftype_void
13174 = build_function_type (unsigned_type_node, void_list_node);
13175 tree di_ftype_void
13176 = build_function_type (long_long_unsigned_type_node, void_list_node);
13177 tree v4sf_ftype_void
13178 = build_function_type (V4SF_type_node, void_list_node);
13179 tree v2si_ftype_v4sf
13180 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13181 /* Loads/stores. */
13182 tree void_ftype_v8qi_v8qi_pchar
13183 = build_function_type_list (void_type_node,
13184 V8QI_type_node, V8QI_type_node,
13185 pchar_type_node, NULL_TREE);
13186 tree v4sf_ftype_pcfloat
13187 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13188 /* @@@ the type is bogus */
13189 tree v4sf_ftype_v4sf_pv2si
13190 = build_function_type_list (V4SF_type_node,
13191 V4SF_type_node, pv2si_type_node, NULL_TREE);
13192 tree void_ftype_pv2si_v4sf
13193 = build_function_type_list (void_type_node,
13194 pv2si_type_node, V4SF_type_node, NULL_TREE);
13195 tree void_ftype_pfloat_v4sf
13196 = build_function_type_list (void_type_node,
13197 pfloat_type_node, V4SF_type_node, NULL_TREE);
13198 tree void_ftype_pdi_di
13199 = build_function_type_list (void_type_node,
13200 pdi_type_node, long_long_unsigned_type_node,
13201 NULL_TREE);
13202 tree void_ftype_pv2di_v2di
13203 = build_function_type_list (void_type_node,
13204 pv2di_type_node, V2DI_type_node, NULL_TREE);
13205 /* Normal vector unops. */
13206 tree v4sf_ftype_v4sf
13207 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13209 /* Normal vector binops. */
13210 tree v4sf_ftype_v4sf_v4sf
13211 = build_function_type_list (V4SF_type_node,
13212 V4SF_type_node, V4SF_type_node, NULL_TREE);
13213 tree v8qi_ftype_v8qi_v8qi
13214 = build_function_type_list (V8QI_type_node,
13215 V8QI_type_node, V8QI_type_node, NULL_TREE);
13216 tree v4hi_ftype_v4hi_v4hi
13217 = build_function_type_list (V4HI_type_node,
13218 V4HI_type_node, V4HI_type_node, NULL_TREE);
13219 tree v2si_ftype_v2si_v2si
13220 = build_function_type_list (V2SI_type_node,
13221 V2SI_type_node, V2SI_type_node, NULL_TREE);
13222 tree di_ftype_di_di
13223 = build_function_type_list (long_long_unsigned_type_node,
13224 long_long_unsigned_type_node,
13225 long_long_unsigned_type_node, NULL_TREE);
13227 tree v2si_ftype_v2sf
13228 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13229 tree v2sf_ftype_v2si
13230 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13231 tree v2si_ftype_v2si
13232 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13233 tree v2sf_ftype_v2sf
13234 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13235 tree v2sf_ftype_v2sf_v2sf
13236 = build_function_type_list (V2SF_type_node,
13237 V2SF_type_node, V2SF_type_node, NULL_TREE);
13238 tree v2si_ftype_v2sf_v2sf
13239 = build_function_type_list (V2SI_type_node,
13240 V2SF_type_node, V2SF_type_node, NULL_TREE);
13241 tree pint_type_node = build_pointer_type (integer_type_node);
13242 tree pcint_type_node = build_pointer_type (
13243 build_type_variant (integer_type_node, 1, 0));
13244 tree pdouble_type_node = build_pointer_type (double_type_node);
13245 tree pcdouble_type_node = build_pointer_type (
13246 build_type_variant (double_type_node, 1, 0));
13247 tree int_ftype_v2df_v2df
13248 = build_function_type_list (integer_type_node,
13249 V2DF_type_node, V2DF_type_node, NULL_TREE);
13251 tree ti_ftype_void
13252 = build_function_type (intTI_type_node, void_list_node);
13253 tree v2di_ftype_void
13254 = build_function_type (V2DI_type_node, void_list_node);
13255 tree ti_ftype_ti_ti
13256 = build_function_type_list (intTI_type_node,
13257 intTI_type_node, intTI_type_node, NULL_TREE);
13258 tree void_ftype_pcvoid
13259 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13260 tree v2di_ftype_di
13261 = build_function_type_list (V2DI_type_node,
13262 long_long_unsigned_type_node, NULL_TREE);
13263 tree di_ftype_v2di
13264 = build_function_type_list (long_long_unsigned_type_node,
13265 V2DI_type_node, NULL_TREE);
13266 tree v4sf_ftype_v4si
13267 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13268 tree v4si_ftype_v4sf
13269 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13270 tree v2df_ftype_v4si
13271 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13272 tree v4si_ftype_v2df
13273 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13274 tree v2si_ftype_v2df
13275 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13276 tree v4sf_ftype_v2df
13277 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13278 tree v2df_ftype_v2si
13279 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13280 tree v2df_ftype_v4sf
13281 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13282 tree int_ftype_v2df
13283 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13284 tree int64_ftype_v2df
13285 = build_function_type_list (long_long_integer_type_node,
13286 V2DF_type_node, NULL_TREE);
13287 tree v2df_ftype_v2df_int
13288 = build_function_type_list (V2DF_type_node,
13289 V2DF_type_node, integer_type_node, NULL_TREE);
13290 tree v2df_ftype_v2df_int64
13291 = build_function_type_list (V2DF_type_node,
13292 V2DF_type_node, long_long_integer_type_node,
13293 NULL_TREE);
13294 tree v4sf_ftype_v4sf_v2df
13295 = build_function_type_list (V4SF_type_node,
13296 V4SF_type_node, V2DF_type_node, NULL_TREE);
13297 tree v2df_ftype_v2df_v4sf
13298 = build_function_type_list (V2DF_type_node,
13299 V2DF_type_node, V4SF_type_node, NULL_TREE);
13300 tree v2df_ftype_v2df_v2df_int
13301 = build_function_type_list (V2DF_type_node,
13302 V2DF_type_node, V2DF_type_node,
13303 integer_type_node,
13304 NULL_TREE);
13305 tree v2df_ftype_v2df_pv2si
13306 = build_function_type_list (V2DF_type_node,
13307 V2DF_type_node, pv2si_type_node, NULL_TREE);
13308 tree void_ftype_pv2si_v2df
13309 = build_function_type_list (void_type_node,
13310 pv2si_type_node, V2DF_type_node, NULL_TREE);
13311 tree void_ftype_pdouble_v2df
13312 = build_function_type_list (void_type_node,
13313 pdouble_type_node, V2DF_type_node, NULL_TREE);
13314 tree void_ftype_pint_int
13315 = build_function_type_list (void_type_node,
13316 pint_type_node, integer_type_node, NULL_TREE);
13317 tree void_ftype_v16qi_v16qi_pchar
13318 = build_function_type_list (void_type_node,
13319 V16QI_type_node, V16QI_type_node,
13320 pchar_type_node, NULL_TREE);
13321 tree v2df_ftype_pcdouble
13322 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13323 tree v2df_ftype_v2df_v2df
13324 = build_function_type_list (V2DF_type_node,
13325 V2DF_type_node, V2DF_type_node, NULL_TREE);
13326 tree v16qi_ftype_v16qi_v16qi
13327 = build_function_type_list (V16QI_type_node,
13328 V16QI_type_node, V16QI_type_node, NULL_TREE);
13329 tree v8hi_ftype_v8hi_v8hi
13330 = build_function_type_list (V8HI_type_node,
13331 V8HI_type_node, V8HI_type_node, NULL_TREE);
13332 tree v4si_ftype_v4si_v4si
13333 = build_function_type_list (V4SI_type_node,
13334 V4SI_type_node, V4SI_type_node, NULL_TREE);
13335 tree v2di_ftype_v2di_v2di
13336 = build_function_type_list (V2DI_type_node,
13337 V2DI_type_node, V2DI_type_node, NULL_TREE);
13338 tree v2di_ftype_v2df_v2df
13339 = build_function_type_list (V2DI_type_node,
13340 V2DF_type_node, V2DF_type_node, NULL_TREE);
13341 tree v2df_ftype_v2df
13342 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13343 tree v2df_ftype_double
13344 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13345 tree v2df_ftype_double_double
13346 = build_function_type_list (V2DF_type_node,
13347 double_type_node, double_type_node, NULL_TREE);
13348 tree int_ftype_v8hi_int
13349 = build_function_type_list (integer_type_node,
13350 V8HI_type_node, integer_type_node, NULL_TREE);
13351 tree v8hi_ftype_v8hi_int_int
13352 = build_function_type_list (V8HI_type_node,
13353 V8HI_type_node, integer_type_node,
13354 integer_type_node, NULL_TREE);
13355 tree v2di_ftype_v2di_int
13356 = build_function_type_list (V2DI_type_node,
13357 V2DI_type_node, integer_type_node, NULL_TREE);
13358 tree v4si_ftype_v4si_int
13359 = build_function_type_list (V4SI_type_node,
13360 V4SI_type_node, integer_type_node, NULL_TREE);
13361 tree v8hi_ftype_v8hi_int
13362 = build_function_type_list (V8HI_type_node,
13363 V8HI_type_node, integer_type_node, NULL_TREE);
13364 tree v8hi_ftype_v8hi_v2di
13365 = build_function_type_list (V8HI_type_node,
13366 V8HI_type_node, V2DI_type_node, NULL_TREE);
13367 tree v4si_ftype_v4si_v2di
13368 = build_function_type_list (V4SI_type_node,
13369 V4SI_type_node, V2DI_type_node, NULL_TREE);
13370 tree v4si_ftype_v8hi_v8hi
13371 = build_function_type_list (V4SI_type_node,
13372 V8HI_type_node, V8HI_type_node, NULL_TREE);
13373 tree di_ftype_v8qi_v8qi
13374 = build_function_type_list (long_long_unsigned_type_node,
13375 V8QI_type_node, V8QI_type_node, NULL_TREE);
13376 tree di_ftype_v2si_v2si
13377 = build_function_type_list (long_long_unsigned_type_node,
13378 V2SI_type_node, V2SI_type_node, NULL_TREE);
13379 tree v2di_ftype_v16qi_v16qi
13380 = build_function_type_list (V2DI_type_node,
13381 V16QI_type_node, V16QI_type_node, NULL_TREE);
13382 tree v2di_ftype_v4si_v4si
13383 = build_function_type_list (V2DI_type_node,
13384 V4SI_type_node, V4SI_type_node, NULL_TREE);
13385 tree int_ftype_v16qi
13386 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13387 tree v16qi_ftype_pcchar
13388 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13389 tree void_ftype_pchar_v16qi
13390 = build_function_type_list (void_type_node,
13391 pchar_type_node, V16QI_type_node, NULL_TREE);
13392 tree v4si_ftype_pcint
13393 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13394 tree void_ftype_pcint_v4si
13395 = build_function_type_list (void_type_node,
13396 pcint_type_node, V4SI_type_node, NULL_TREE);
13397 tree v2di_ftype_v2di
13398 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13400 tree float80_type;
13401 tree float128_type;
13403 /* The __float80 type. */
13404 if (TYPE_MODE (long_double_type_node) == XFmode)
13405 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13406 "__float80");
13407 else
13409 /* The __float80 type. */
13410 float80_type = make_node (REAL_TYPE);
13411 TYPE_PRECISION (float80_type) = 80;
13412 layout_type (float80_type);
13413 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13416 float128_type = make_node (REAL_TYPE);
13417 TYPE_PRECISION (float128_type) = 128;
13418 layout_type (float128_type);
13419 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13421 /* Add all builtins that are more or less simple operations on two
13422 operands. */
13423 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13425 /* Use one of the operands; the target can have a different mode for
13426 mask-generating compares. */
13427 enum machine_mode mode;
13428 tree type;
13430 if (d->name == 0)
13431 continue;
13432 mode = insn_data[d->icode].operand[1].mode;
13434 switch (mode)
13436 case V16QImode:
13437 type = v16qi_ftype_v16qi_v16qi;
13438 break;
13439 case V8HImode:
13440 type = v8hi_ftype_v8hi_v8hi;
13441 break;
13442 case V4SImode:
13443 type = v4si_ftype_v4si_v4si;
13444 break;
13445 case V2DImode:
13446 type = v2di_ftype_v2di_v2di;
13447 break;
13448 case V2DFmode:
13449 type = v2df_ftype_v2df_v2df;
13450 break;
13451 case TImode:
13452 type = ti_ftype_ti_ti;
13453 break;
13454 case V4SFmode:
13455 type = v4sf_ftype_v4sf_v4sf;
13456 break;
13457 case V8QImode:
13458 type = v8qi_ftype_v8qi_v8qi;
13459 break;
13460 case V4HImode:
13461 type = v4hi_ftype_v4hi_v4hi;
13462 break;
13463 case V2SImode:
13464 type = v2si_ftype_v2si_v2si;
13465 break;
13466 case DImode:
13467 type = di_ftype_di_di;
13468 break;
13470 default:
13471 abort ();
13474 /* Override for comparisons. */
13475 if (d->icode == CODE_FOR_maskcmpv4sf3
13476 || d->icode == CODE_FOR_maskncmpv4sf3
13477 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13478 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13479 type = v4si_ftype_v4sf_v4sf;
13481 if (d->icode == CODE_FOR_maskcmpv2df3
13482 || d->icode == CODE_FOR_maskncmpv2df3
13483 || d->icode == CODE_FOR_vmmaskcmpv2df3
13484 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13485 type = v2di_ftype_v2df_v2df;
13487 def_builtin (d->mask, d->name, type, d->code);
13490 /* Add the remaining MMX insns with somewhat more complicated types. */
13491 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13492 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13493 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13494 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13495 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13497 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13498 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13499 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13501 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13502 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13504 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13505 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13507 /* comi/ucomi insns. */
13508 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13509 if (d->mask == MASK_SSE2)
13510 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13511 else
13512 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13514 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13515 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13516 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13518 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13519 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13520 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13521 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13522 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13523 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13524 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13525 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13526 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13527 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13528 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13530 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13531 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13533 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13535 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13536 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13537 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13538 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13539 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13540 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13542 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13543 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13544 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13545 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13547 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13548 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13549 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13550 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13552 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13554 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13556 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13557 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13558 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13559 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13560 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13561 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13563 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13565 /* Original 3DNow! */
13566 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13567 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13568 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13569 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13570 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13571 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13572 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13573 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13574 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13575 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13576 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13577 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13578 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13579 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13580 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13581 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13582 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13583 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13584 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13585 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13587 /* 3DNow! extension as used in the Athlon CPU. */
13588 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13589 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13590 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13591 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13592 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13593 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13595 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13597 /* SSE2 */
13598 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13605 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13610 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13612 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13614 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13615 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13618 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13623 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13638 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13639 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13640 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13642 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13644 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13645 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13646 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13647 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13650 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13651 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13653 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13654 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13678 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13680 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13696 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13697 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13701 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13704 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13705 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13709 /* Prescott New Instructions. */
13710 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13711 void_ftype_pcvoid_unsigned_unsigned,
13712 IX86_BUILTIN_MONITOR);
13713 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13714 void_ftype_unsigned_unsigned,
13715 IX86_BUILTIN_MWAIT);
13716 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13717 v4sf_ftype_v4sf,
13718 IX86_BUILTIN_MOVSHDUP);
13719 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13720 v4sf_ftype_v4sf,
13721 IX86_BUILTIN_MOVSLDUP);
13722 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13723 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13724 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13725 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13726 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13727 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13730 /* Errors in the source file can cause expand_expr to return const0_rtx
13731 where we expect a vector. To avoid crashing, use one of the vector
13732 clear instructions. */
13733 static rtx
13734 safe_vector_operand (rtx x, enum machine_mode mode)
13736 if (x != const0_rtx)
13737 return x;
13738 x = gen_reg_rtx (mode);
13740 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13741 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13742 : gen_rtx_SUBREG (DImode, x, 0)));
13743 else
13744 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13745 : gen_rtx_SUBREG (V4SFmode, x, 0),
13746 CONST0_RTX (V4SFmode)));
13747 return x;
13750 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13752 static rtx
13753 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13755 rtx pat;
13756 tree arg0 = TREE_VALUE (arglist);
13757 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13758 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13759 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13760 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13761 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13762 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13764 if (VECTOR_MODE_P (mode0))
13765 op0 = safe_vector_operand (op0, mode0);
13766 if (VECTOR_MODE_P (mode1))
13767 op1 = safe_vector_operand (op1, mode1);
13769 if (! target
13770 || GET_MODE (target) != tmode
13771 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13772 target = gen_reg_rtx (tmode);
13774 if (GET_MODE (op1) == SImode && mode1 == TImode)
13776 rtx x = gen_reg_rtx (V4SImode);
13777 emit_insn (gen_sse2_loadd (x, op1));
13778 op1 = gen_lowpart (TImode, x);
13781 /* In case the insn wants input operands in modes different from
13782 the result, abort. */
13783 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13784 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13785 abort ();
13787 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13788 op0 = copy_to_mode_reg (mode0, op0);
13789 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13790 op1 = copy_to_mode_reg (mode1, op1);
13792 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13793 yet one of the two must not be a memory. This is normally enforced
13794 by expanders, but we didn't bother to create one here. */
13795 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13796 op0 = copy_to_mode_reg (mode0, op0);
13798 pat = GEN_FCN (icode) (target, op0, op1);
13799 if (! pat)
13800 return 0;
13801 emit_insn (pat);
13802 return target;
13805 /* Subroutine of ix86_expand_builtin to take care of stores. */
13807 static rtx
13808 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13810 rtx pat;
13811 tree arg0 = TREE_VALUE (arglist);
13812 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13813 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13814 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13815 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13816 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13818 if (VECTOR_MODE_P (mode1))
13819 op1 = safe_vector_operand (op1, mode1);
13821 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13822 op1 = copy_to_mode_reg (mode1, op1);
13824 pat = GEN_FCN (icode) (op0, op1);
13825 if (pat)
13826 emit_insn (pat);
13827 return 0;
13830 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13832 static rtx
13833 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13834 rtx target, int do_load)
13836 rtx pat;
13837 tree arg0 = TREE_VALUE (arglist);
13838 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13839 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13840 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13842 if (! target
13843 || GET_MODE (target) != tmode
13844 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13845 target = gen_reg_rtx (tmode);
13846 if (do_load)
13847 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13848 else
13850 if (VECTOR_MODE_P (mode0))
13851 op0 = safe_vector_operand (op0, mode0);
13853 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13854 op0 = copy_to_mode_reg (mode0, op0);
13857 pat = GEN_FCN (icode) (target, op0);
13858 if (! pat)
13859 return 0;
13860 emit_insn (pat);
13861 return target;
13864 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13865 sqrtss, rsqrtss, rcpss. */
13867 static rtx
13868 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13870 rtx pat;
13871 tree arg0 = TREE_VALUE (arglist);
13872 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13873 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13874 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13876 if (! target
13877 || GET_MODE (target) != tmode
13878 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13879 target = gen_reg_rtx (tmode);
13881 if (VECTOR_MODE_P (mode0))
13882 op0 = safe_vector_operand (op0, mode0);
13884 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13885 op0 = copy_to_mode_reg (mode0, op0);
13887 op1 = op0;
13888 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13889 op1 = copy_to_mode_reg (mode0, op1);
13891 pat = GEN_FCN (icode) (target, op0, op1);
13892 if (! pat)
13893 return 0;
13894 emit_insn (pat);
13895 return target;
13898 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13900 static rtx
13901 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13902 rtx target)
13904 rtx pat;
13905 tree arg0 = TREE_VALUE (arglist);
13906 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13907 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13908 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13909 rtx op2;
13910 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13911 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13912 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13913 enum rtx_code comparison = d->comparison;
13915 if (VECTOR_MODE_P (mode0))
13916 op0 = safe_vector_operand (op0, mode0);
13917 if (VECTOR_MODE_P (mode1))
13918 op1 = safe_vector_operand (op1, mode1);
13920 /* Swap operands if we have a comparison that isn't available in
13921 hardware. */
13922 if (d->flag)
13924 rtx tmp = gen_reg_rtx (mode1);
13925 emit_move_insn (tmp, op1);
13926 op1 = op0;
13927 op0 = tmp;
13930 if (! target
13931 || GET_MODE (target) != tmode
13932 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13933 target = gen_reg_rtx (tmode);
13935 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13936 op0 = copy_to_mode_reg (mode0, op0);
13937 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13938 op1 = copy_to_mode_reg (mode1, op1);
13940 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13941 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13942 if (! pat)
13943 return 0;
13944 emit_insn (pat);
13945 return target;
13948 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13950 static rtx
13951 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13952 rtx target)
13954 rtx pat;
13955 tree arg0 = TREE_VALUE (arglist);
13956 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13957 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13958 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13959 rtx op2;
13960 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13961 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13962 enum rtx_code comparison = d->comparison;
13964 if (VECTOR_MODE_P (mode0))
13965 op0 = safe_vector_operand (op0, mode0);
13966 if (VECTOR_MODE_P (mode1))
13967 op1 = safe_vector_operand (op1, mode1);
13969 /* Swap operands if we have a comparison that isn't available in
13970 hardware. */
13971 if (d->flag)
13973 rtx tmp = op1;
13974 op1 = op0;
13975 op0 = tmp;
13978 target = gen_reg_rtx (SImode);
13979 emit_move_insn (target, const0_rtx);
13980 target = gen_rtx_SUBREG (QImode, target, 0);
13982 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13983 op0 = copy_to_mode_reg (mode0, op0);
13984 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13985 op1 = copy_to_mode_reg (mode1, op1);
13987 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13988 pat = GEN_FCN (d->icode) (op0, op1);
13989 if (! pat)
13990 return 0;
13991 emit_insn (pat);
13992 emit_insn (gen_rtx_SET (VOIDmode,
13993 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13994 gen_rtx_fmt_ee (comparison, QImode,
13995 SET_DEST (pat),
13996 const0_rtx)));
13998 return SUBREG_REG (target);
14001 /* Expand an expression EXP that calls a built-in function,
14002 with result going to TARGET if that's convenient
14003 (and in mode MODE if that's convenient).
14004 SUBTARGET may be used as the target for computing one of EXP's operands.
14005 IGNORE is nonzero if the value is to be ignored. */
14008 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14009 enum machine_mode mode ATTRIBUTE_UNUSED,
14010 int ignore ATTRIBUTE_UNUSED)
14012 const struct builtin_description *d;
14013 size_t i;
14014 enum insn_code icode;
14015 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14016 tree arglist = TREE_OPERAND (exp, 1);
14017 tree arg0, arg1, arg2;
14018 rtx op0, op1, op2, pat;
14019 enum machine_mode tmode, mode0, mode1, mode2;
14020 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14022 switch (fcode)
14024 case IX86_BUILTIN_EMMS:
14025 emit_insn (gen_emms ());
14026 return 0;
14028 case IX86_BUILTIN_SFENCE:
14029 emit_insn (gen_sfence ());
14030 return 0;
14032 case IX86_BUILTIN_PEXTRW:
14033 case IX86_BUILTIN_PEXTRW128:
14034 icode = (fcode == IX86_BUILTIN_PEXTRW
14035 ? CODE_FOR_mmx_pextrw
14036 : CODE_FOR_sse2_pextrw);
14037 arg0 = TREE_VALUE (arglist);
14038 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14039 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14040 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14041 tmode = insn_data[icode].operand[0].mode;
14042 mode0 = insn_data[icode].operand[1].mode;
14043 mode1 = insn_data[icode].operand[2].mode;
14045 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14046 op0 = copy_to_mode_reg (mode0, op0);
14047 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14049 error ("selector must be an integer constant in the range 0..%i",
14050 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14051 return gen_reg_rtx (tmode);
14053 if (target == 0
14054 || GET_MODE (target) != tmode
14055 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14056 target = gen_reg_rtx (tmode);
14057 pat = GEN_FCN (icode) (target, op0, op1);
14058 if (! pat)
14059 return 0;
14060 emit_insn (pat);
14061 return target;
14063 case IX86_BUILTIN_PINSRW:
14064 case IX86_BUILTIN_PINSRW128:
14065 icode = (fcode == IX86_BUILTIN_PINSRW
14066 ? CODE_FOR_mmx_pinsrw
14067 : CODE_FOR_sse2_pinsrw);
14068 arg0 = TREE_VALUE (arglist);
14069 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14070 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14071 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14072 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14073 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14074 tmode = insn_data[icode].operand[0].mode;
14075 mode0 = insn_data[icode].operand[1].mode;
14076 mode1 = insn_data[icode].operand[2].mode;
14077 mode2 = insn_data[icode].operand[3].mode;
14079 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14080 op0 = copy_to_mode_reg (mode0, op0);
14081 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14082 op1 = copy_to_mode_reg (mode1, op1);
14083 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14085 error ("selector must be an integer constant in the range 0..%i",
14086 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14087 return const0_rtx;
14089 if (target == 0
14090 || GET_MODE (target) != tmode
14091 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14092 target = gen_reg_rtx (tmode);
14093 pat = GEN_FCN (icode) (target, op0, op1, op2);
14094 if (! pat)
14095 return 0;
14096 emit_insn (pat);
14097 return target;
14099 case IX86_BUILTIN_MASKMOVQ:
14100 case IX86_BUILTIN_MASKMOVDQU:
14101 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14102 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14103 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14104 : CODE_FOR_sse2_maskmovdqu));
14105 /* Note the arg order is different from the operand order. */
14106 arg1 = TREE_VALUE (arglist);
14107 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14108 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14109 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14110 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14111 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14112 mode0 = insn_data[icode].operand[0].mode;
14113 mode1 = insn_data[icode].operand[1].mode;
14114 mode2 = insn_data[icode].operand[2].mode;
14116 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14117 op0 = copy_to_mode_reg (mode0, op0);
14118 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14119 op1 = copy_to_mode_reg (mode1, op1);
14120 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14121 op2 = copy_to_mode_reg (mode2, op2);
14122 pat = GEN_FCN (icode) (op0, op1, op2);
14123 if (! pat)
14124 return 0;
14125 emit_insn (pat);
14126 return 0;
14128 case IX86_BUILTIN_SQRTSS:
14129 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14130 case IX86_BUILTIN_RSQRTSS:
14131 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14132 case IX86_BUILTIN_RCPSS:
14133 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14135 case IX86_BUILTIN_LOADAPS:
14136 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14138 case IX86_BUILTIN_LOADUPS:
14139 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14141 case IX86_BUILTIN_STOREAPS:
14142 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14144 case IX86_BUILTIN_STOREUPS:
14145 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14147 case IX86_BUILTIN_LOADSS:
14148 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14150 case IX86_BUILTIN_STORESS:
14151 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14153 case IX86_BUILTIN_LOADHPS:
14154 case IX86_BUILTIN_LOADLPS:
14155 case IX86_BUILTIN_LOADHPD:
14156 case IX86_BUILTIN_LOADLPD:
14157 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14158 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14159 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14160 : CODE_FOR_sse2_movsd);
14161 arg0 = TREE_VALUE (arglist);
14162 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14163 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14164 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14165 tmode = insn_data[icode].operand[0].mode;
14166 mode0 = insn_data[icode].operand[1].mode;
14167 mode1 = insn_data[icode].operand[2].mode;
14169 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14170 op0 = copy_to_mode_reg (mode0, op0);
14171 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14172 if (target == 0
14173 || GET_MODE (target) != tmode
14174 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14175 target = gen_reg_rtx (tmode);
14176 pat = GEN_FCN (icode) (target, op0, op1);
14177 if (! pat)
14178 return 0;
14179 emit_insn (pat);
14180 return target;
14182 case IX86_BUILTIN_STOREHPS:
14183 case IX86_BUILTIN_STORELPS:
14184 case IX86_BUILTIN_STOREHPD:
14185 case IX86_BUILTIN_STORELPD:
14186 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14187 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14188 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14189 : CODE_FOR_sse2_movsd);
14190 arg0 = TREE_VALUE (arglist);
14191 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14192 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14193 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14194 mode0 = insn_data[icode].operand[1].mode;
14195 mode1 = insn_data[icode].operand[2].mode;
14197 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14198 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14199 op1 = copy_to_mode_reg (mode1, op1);
14201 pat = GEN_FCN (icode) (op0, op0, op1);
14202 if (! pat)
14203 return 0;
14204 emit_insn (pat);
14205 return 0;
14207 case IX86_BUILTIN_MOVNTPS:
14208 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14209 case IX86_BUILTIN_MOVNTQ:
14210 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14212 case IX86_BUILTIN_LDMXCSR:
14213 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14214 target = assign_386_stack_local (SImode, 0);
14215 emit_move_insn (target, op0);
14216 emit_insn (gen_ldmxcsr (target));
14217 return 0;
14219 case IX86_BUILTIN_STMXCSR:
14220 target = assign_386_stack_local (SImode, 0);
14221 emit_insn (gen_stmxcsr (target));
14222 return copy_to_mode_reg (SImode, target);
14224 case IX86_BUILTIN_SHUFPS:
14225 case IX86_BUILTIN_SHUFPD:
14226 icode = (fcode == IX86_BUILTIN_SHUFPS
14227 ? CODE_FOR_sse_shufps
14228 : CODE_FOR_sse2_shufpd);
14229 arg0 = TREE_VALUE (arglist);
14230 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14231 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14232 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14233 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14234 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14235 tmode = insn_data[icode].operand[0].mode;
14236 mode0 = insn_data[icode].operand[1].mode;
14237 mode1 = insn_data[icode].operand[2].mode;
14238 mode2 = insn_data[icode].operand[3].mode;
14240 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14241 op0 = copy_to_mode_reg (mode0, op0);
14242 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14243 op1 = copy_to_mode_reg (mode1, op1);
14244 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14246 /* @@@ better error message */
14247 error ("mask must be an immediate");
14248 return gen_reg_rtx (tmode);
14250 if (target == 0
14251 || GET_MODE (target) != tmode
14252 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14253 target = gen_reg_rtx (tmode);
14254 pat = GEN_FCN (icode) (target, op0, op1, op2);
14255 if (! pat)
14256 return 0;
14257 emit_insn (pat);
14258 return target;
14260 case IX86_BUILTIN_PSHUFW:
14261 case IX86_BUILTIN_PSHUFD:
14262 case IX86_BUILTIN_PSHUFHW:
14263 case IX86_BUILTIN_PSHUFLW:
14264 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14265 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14266 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14267 : CODE_FOR_mmx_pshufw);
14268 arg0 = TREE_VALUE (arglist);
14269 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14270 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14271 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14272 tmode = insn_data[icode].operand[0].mode;
14273 mode1 = insn_data[icode].operand[1].mode;
14274 mode2 = insn_data[icode].operand[2].mode;
14276 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14277 op0 = copy_to_mode_reg (mode1, op0);
14278 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14280 /* @@@ better error message */
14281 error ("mask must be an immediate");
14282 return const0_rtx;
14284 if (target == 0
14285 || GET_MODE (target) != tmode
14286 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14287 target = gen_reg_rtx (tmode);
14288 pat = GEN_FCN (icode) (target, op0, op1);
14289 if (! pat)
14290 return 0;
14291 emit_insn (pat);
14292 return target;
14294 case IX86_BUILTIN_PSLLDQI128:
14295 case IX86_BUILTIN_PSRLDQI128:
14296 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14297 : CODE_FOR_sse2_lshrti3);
14298 arg0 = TREE_VALUE (arglist);
14299 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14300 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14301 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14302 tmode = insn_data[icode].operand[0].mode;
14303 mode1 = insn_data[icode].operand[1].mode;
14304 mode2 = insn_data[icode].operand[2].mode;
14306 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14308 op0 = copy_to_reg (op0);
14309 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14311 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14313 error ("shift must be an immediate");
14314 return const0_rtx;
14316 target = gen_reg_rtx (V2DImode);
14317 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14318 if (! pat)
14319 return 0;
14320 emit_insn (pat);
14321 return target;
14323 case IX86_BUILTIN_FEMMS:
14324 emit_insn (gen_femms ());
14325 return NULL_RTX;
14327 case IX86_BUILTIN_PAVGUSB:
14328 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14330 case IX86_BUILTIN_PF2ID:
14331 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14333 case IX86_BUILTIN_PFACC:
14334 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14336 case IX86_BUILTIN_PFADD:
14337 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14339 case IX86_BUILTIN_PFCMPEQ:
14340 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14342 case IX86_BUILTIN_PFCMPGE:
14343 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14345 case IX86_BUILTIN_PFCMPGT:
14346 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14348 case IX86_BUILTIN_PFMAX:
14349 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14351 case IX86_BUILTIN_PFMIN:
14352 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14354 case IX86_BUILTIN_PFMUL:
14355 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14357 case IX86_BUILTIN_PFRCP:
14358 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14360 case IX86_BUILTIN_PFRCPIT1:
14361 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14363 case IX86_BUILTIN_PFRCPIT2:
14364 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14366 case IX86_BUILTIN_PFRSQIT1:
14367 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14369 case IX86_BUILTIN_PFRSQRT:
14370 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14372 case IX86_BUILTIN_PFSUB:
14373 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14375 case IX86_BUILTIN_PFSUBR:
14376 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14378 case IX86_BUILTIN_PI2FD:
14379 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14381 case IX86_BUILTIN_PMULHRW:
14382 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14384 case IX86_BUILTIN_PF2IW:
14385 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14387 case IX86_BUILTIN_PFNACC:
14388 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14390 case IX86_BUILTIN_PFPNACC:
14391 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14393 case IX86_BUILTIN_PI2FW:
14394 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14396 case IX86_BUILTIN_PSWAPDSI:
14397 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14399 case IX86_BUILTIN_PSWAPDSF:
14400 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14402 case IX86_BUILTIN_SSE_ZERO:
14403 target = gen_reg_rtx (V4SFmode);
14404 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14405 return target;
14407 case IX86_BUILTIN_MMX_ZERO:
14408 target = gen_reg_rtx (DImode);
14409 emit_insn (gen_mmx_clrdi (target));
14410 return target;
14412 case IX86_BUILTIN_CLRTI:
14413 target = gen_reg_rtx (V2DImode);
14414 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14415 return target;
14418 case IX86_BUILTIN_SQRTSD:
14419 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14420 case IX86_BUILTIN_LOADAPD:
14421 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14422 case IX86_BUILTIN_LOADUPD:
14423 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14425 case IX86_BUILTIN_STOREAPD:
14426 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14427 case IX86_BUILTIN_STOREUPD:
14428 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14430 case IX86_BUILTIN_LOADSD:
14431 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14433 case IX86_BUILTIN_STORESD:
14434 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14436 case IX86_BUILTIN_SETPD1:
14437 target = assign_386_stack_local (DFmode, 0);
14438 arg0 = TREE_VALUE (arglist);
14439 emit_move_insn (adjust_address (target, DFmode, 0),
14440 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14441 op0 = gen_reg_rtx (V2DFmode);
14442 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14443 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14444 return op0;
14446 case IX86_BUILTIN_SETPD:
14447 target = assign_386_stack_local (V2DFmode, 0);
14448 arg0 = TREE_VALUE (arglist);
14449 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14450 emit_move_insn (adjust_address (target, DFmode, 0),
14451 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14452 emit_move_insn (adjust_address (target, DFmode, 8),
14453 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14454 op0 = gen_reg_rtx (V2DFmode);
14455 emit_insn (gen_sse2_movapd (op0, target));
14456 return op0;
14458 case IX86_BUILTIN_LOADRPD:
14459 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14460 gen_reg_rtx (V2DFmode), 1);
14461 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14462 return target;
14464 case IX86_BUILTIN_LOADPD1:
14465 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14466 gen_reg_rtx (V2DFmode), 1);
14467 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14468 return target;
14470 case IX86_BUILTIN_STOREPD1:
14471 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14472 case IX86_BUILTIN_STORERPD:
14473 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14475 case IX86_BUILTIN_CLRPD:
14476 target = gen_reg_rtx (V2DFmode);
14477 emit_insn (gen_sse_clrv2df (target));
14478 return target;
14480 case IX86_BUILTIN_MFENCE:
14481 emit_insn (gen_sse2_mfence ());
14482 return 0;
14483 case IX86_BUILTIN_LFENCE:
14484 emit_insn (gen_sse2_lfence ());
14485 return 0;
14487 case IX86_BUILTIN_CLFLUSH:
14488 arg0 = TREE_VALUE (arglist);
14489 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14490 icode = CODE_FOR_sse2_clflush;
14491 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14492 op0 = copy_to_mode_reg (Pmode, op0);
14494 emit_insn (gen_sse2_clflush (op0));
14495 return 0;
14497 case IX86_BUILTIN_MOVNTPD:
14498 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14499 case IX86_BUILTIN_MOVNTDQ:
14500 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14501 case IX86_BUILTIN_MOVNTI:
14502 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14504 case IX86_BUILTIN_LOADDQA:
14505 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14506 case IX86_BUILTIN_LOADDQU:
14507 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14508 case IX86_BUILTIN_LOADD:
14509 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14511 case IX86_BUILTIN_STOREDQA:
14512 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14513 case IX86_BUILTIN_STOREDQU:
14514 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14515 case IX86_BUILTIN_STORED:
14516 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14518 case IX86_BUILTIN_MONITOR:
14519 arg0 = TREE_VALUE (arglist);
14520 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14521 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14522 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14523 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14524 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14525 if (!REG_P (op0))
14526 op0 = copy_to_mode_reg (SImode, op0);
14527 if (!REG_P (op1))
14528 op1 = copy_to_mode_reg (SImode, op1);
14529 if (!REG_P (op2))
14530 op2 = copy_to_mode_reg (SImode, op2);
14531 emit_insn (gen_monitor (op0, op1, op2));
14532 return 0;
14534 case IX86_BUILTIN_MWAIT:
14535 arg0 = TREE_VALUE (arglist);
14536 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14537 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14538 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14539 if (!REG_P (op0))
14540 op0 = copy_to_mode_reg (SImode, op0);
14541 if (!REG_P (op1))
14542 op1 = copy_to_mode_reg (SImode, op1);
14543 emit_insn (gen_mwait (op0, op1));
14544 return 0;
14546 case IX86_BUILTIN_LOADDDUP:
14547 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14549 case IX86_BUILTIN_LDDQU:
14550 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14553 default:
14554 break;
14557 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14558 if (d->code == fcode)
14560 /* Compares are treated specially. */
14561 if (d->icode == CODE_FOR_maskcmpv4sf3
14562 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14563 || d->icode == CODE_FOR_maskncmpv4sf3
14564 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14565 || d->icode == CODE_FOR_maskcmpv2df3
14566 || d->icode == CODE_FOR_vmmaskcmpv2df3
14567 || d->icode == CODE_FOR_maskncmpv2df3
14568 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14569 return ix86_expand_sse_compare (d, arglist, target);
14571 return ix86_expand_binop_builtin (d->icode, arglist, target);
14574 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14575 if (d->code == fcode)
14576 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14578 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14579 if (d->code == fcode)
14580 return ix86_expand_sse_comi (d, arglist, target);
14582 /* @@@ Should really do something sensible here. */
14583 return 0;
14586 /* Store OPERAND to the memory after reload is completed. This means
14587 that we can't easily use assign_stack_local. */
14589 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14591 rtx result;
14592 if (!reload_completed)
14593 abort ();
14594 if (TARGET_RED_ZONE)
14596 result = gen_rtx_MEM (mode,
14597 gen_rtx_PLUS (Pmode,
14598 stack_pointer_rtx,
14599 GEN_INT (-RED_ZONE_SIZE)));
14600 emit_move_insn (result, operand);
14602 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14604 switch (mode)
14606 case HImode:
14607 case SImode:
14608 operand = gen_lowpart (DImode, operand);
14609 /* FALLTHRU */
14610 case DImode:
14611 emit_insn (
14612 gen_rtx_SET (VOIDmode,
14613 gen_rtx_MEM (DImode,
14614 gen_rtx_PRE_DEC (DImode,
14615 stack_pointer_rtx)),
14616 operand));
14617 break;
14618 default:
14619 abort ();
14621 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14623 else
14625 switch (mode)
14627 case DImode:
14629 rtx operands[2];
14630 split_di (&operand, 1, operands, operands + 1);
14631 emit_insn (
14632 gen_rtx_SET (VOIDmode,
14633 gen_rtx_MEM (SImode,
14634 gen_rtx_PRE_DEC (Pmode,
14635 stack_pointer_rtx)),
14636 operands[1]));
14637 emit_insn (
14638 gen_rtx_SET (VOIDmode,
14639 gen_rtx_MEM (SImode,
14640 gen_rtx_PRE_DEC (Pmode,
14641 stack_pointer_rtx)),
14642 operands[0]));
14644 break;
14645 case HImode:
14646 /* It is better to store HImodes as SImodes. */
14647 if (!TARGET_PARTIAL_REG_STALL)
14648 operand = gen_lowpart (SImode, operand);
14649 /* FALLTHRU */
14650 case SImode:
14651 emit_insn (
14652 gen_rtx_SET (VOIDmode,
14653 gen_rtx_MEM (GET_MODE (operand),
14654 gen_rtx_PRE_DEC (SImode,
14655 stack_pointer_rtx)),
14656 operand));
14657 break;
14658 default:
14659 abort ();
14661 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14663 return result;
14666 /* Free operand from the memory. */
14667 void
14668 ix86_free_from_memory (enum machine_mode mode)
14670 if (!TARGET_RED_ZONE)
14672 int size;
14674 if (mode == DImode || TARGET_64BIT)
14675 size = 8;
14676 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14677 size = 2;
14678 else
14679 size = 4;
14680 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14681 to pop or add instruction if registers are available. */
14682 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14683 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14684 GEN_INT (size))));
14688 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14689 QImode must go into class Q_REGS.
14690 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14691 movdf to do mem-to-mem moves through integer regs. */
14692 enum reg_class
14693 ix86_preferred_reload_class (rtx x, enum reg_class class)
14695 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14696 return NO_REGS;
14697 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14699 /* SSE can't load any constant directly yet. */
14700 if (SSE_CLASS_P (class))
14701 return NO_REGS;
14702 /* Floats can load 0 and 1. */
14703 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14705 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14706 if (MAYBE_SSE_CLASS_P (class))
14707 return (reg_class_subset_p (class, GENERAL_REGS)
14708 ? GENERAL_REGS : FLOAT_REGS);
14709 else
14710 return class;
14712 /* General regs can load everything. */
14713 if (reg_class_subset_p (class, GENERAL_REGS))
14714 return GENERAL_REGS;
14715 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14716 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14717 return NO_REGS;
14719 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14720 return NO_REGS;
14721 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14722 return Q_REGS;
14723 return class;
14726 /* If we are copying between general and FP registers, we need a memory
14727 location. The same is true for SSE and MMX registers.
14729 The macro can't work reliably when one of the CLASSES is class containing
14730 registers from multiple units (SSE, MMX, integer). We avoid this by never
14731 combining those units in single alternative in the machine description.
14732 Ensure that this constraint holds to avoid unexpected surprises.
14734 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14735 enforce these sanity checks. */
14737 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14738 enum machine_mode mode, int strict)
14740 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14741 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14742 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14743 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14744 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14745 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14747 if (strict)
14748 abort ();
14749 else
14750 return 1;
14752 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14753 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14754 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14755 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14756 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14758 /* Return the cost of moving data from a register in class CLASS1 to
14759 one in class CLASS2.
14761 It is not required that the cost always equal 2 when FROM is the same as TO;
14762 on some machines it is expensive to move between registers if they are not
14763 general registers. */
14765 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14766 enum reg_class class2)
14768 /* In case we require secondary memory, compute cost of the store followed
14769 by load. In order to avoid bad register allocation choices, we need
14770 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14772 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14774 int cost = 1;
14776 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14777 MEMORY_MOVE_COST (mode, class1, 1));
14778 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14779 MEMORY_MOVE_COST (mode, class2, 1));
14781 /* In case of copying from general_purpose_register we may emit multiple
14782 stores followed by single load causing memory size mismatch stall.
14783 Count this as arbitrarily high cost of 20. */
14784 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14785 cost += 20;
14787 /* In the case of FP/MMX moves, the registers actually overlap, and we
14788 have to switch modes in order to treat them differently. */
14789 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14790 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14791 cost += 20;
14793 return cost;
14796 /* Moves between SSE/MMX and integer unit are expensive. */
14797 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14798 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14799 return ix86_cost->mmxsse_to_integer;
14800 if (MAYBE_FLOAT_CLASS_P (class1))
14801 return ix86_cost->fp_move;
14802 if (MAYBE_SSE_CLASS_P (class1))
14803 return ix86_cost->sse_move;
14804 if (MAYBE_MMX_CLASS_P (class1))
14805 return ix86_cost->mmx_move;
14806 return 2;
14809 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14811 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14813 /* Flags and only flags can only hold CCmode values. */
14814 if (CC_REGNO_P (regno))
14815 return GET_MODE_CLASS (mode) == MODE_CC;
14816 if (GET_MODE_CLASS (mode) == MODE_CC
14817 || GET_MODE_CLASS (mode) == MODE_RANDOM
14818 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14819 return 0;
14820 if (FP_REGNO_P (regno))
14821 return VALID_FP_MODE_P (mode);
14822 if (SSE_REGNO_P (regno))
14823 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14824 if (MMX_REGNO_P (regno))
14825 return (TARGET_MMX
14826 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14827 /* We handle both integer and floats in the general purpose registers.
14828 In future we should be able to handle vector modes as well. */
14829 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14830 return 0;
14831 /* Take care for QImode values - they can be in non-QI regs, but then
14832 they do cause partial register stalls. */
14833 if (regno < 4 || mode != QImode || TARGET_64BIT)
14834 return 1;
14835 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14838 /* Return the cost of moving data of mode M between a
14839 register and memory. A value of 2 is the default; this cost is
14840 relative to those in `REGISTER_MOVE_COST'.
14842 If moving between registers and memory is more expensive than
14843 between two registers, you should define this macro to express the
14844 relative cost.
14846 Model also increased moving costs of QImode registers in non
14847 Q_REGS classes.
14850 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14852 if (FLOAT_CLASS_P (class))
14854 int index;
14855 switch (mode)
14857 case SFmode:
14858 index = 0;
14859 break;
14860 case DFmode:
14861 index = 1;
14862 break;
14863 case XFmode:
14864 index = 2;
14865 break;
14866 default:
14867 return 100;
14869 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14871 if (SSE_CLASS_P (class))
14873 int index;
14874 switch (GET_MODE_SIZE (mode))
14876 case 4:
14877 index = 0;
14878 break;
14879 case 8:
14880 index = 1;
14881 break;
14882 case 16:
14883 index = 2;
14884 break;
14885 default:
14886 return 100;
14888 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14890 if (MMX_CLASS_P (class))
14892 int index;
14893 switch (GET_MODE_SIZE (mode))
14895 case 4:
14896 index = 0;
14897 break;
14898 case 8:
14899 index = 1;
14900 break;
14901 default:
14902 return 100;
14904 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14906 switch (GET_MODE_SIZE (mode))
14908 case 1:
14909 if (in)
14910 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14911 : ix86_cost->movzbl_load);
14912 else
14913 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14914 : ix86_cost->int_store[0] + 4);
14915 break;
14916 case 2:
14917 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14918 default:
14919 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14920 if (mode == TFmode)
14921 mode = XFmode;
14922 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14923 * (((int) GET_MODE_SIZE (mode)
14924 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14928 /* Compute a (partial) cost for rtx X. Return true if the complete
14929 cost has been computed, and false if subexpressions should be
14930 scanned. In either case, *TOTAL contains the cost result. */
14932 static bool
14933 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14935 enum machine_mode mode = GET_MODE (x);
14937 switch (code)
14939 case CONST_INT:
14940 case CONST:
14941 case LABEL_REF:
14942 case SYMBOL_REF:
14943 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14944 *total = 3;
14945 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14946 *total = 2;
14947 else if (flag_pic && SYMBOLIC_CONST (x)
14948 && (!TARGET_64BIT
14949 || (!GET_CODE (x) != LABEL_REF
14950 && (GET_CODE (x) != SYMBOL_REF
14951 || !SYMBOL_REF_LOCAL_P (x)))))
14952 *total = 1;
14953 else
14954 *total = 0;
14955 return true;
14957 case CONST_DOUBLE:
14958 if (mode == VOIDmode)
14959 *total = 0;
14960 else
14961 switch (standard_80387_constant_p (x))
14963 case 1: /* 0.0 */
14964 *total = 1;
14965 break;
14966 default: /* Other constants */
14967 *total = 2;
14968 break;
14969 case 0:
14970 case -1:
14971 /* Start with (MEM (SYMBOL_REF)), since that's where
14972 it'll probably end up. Add a penalty for size. */
14973 *total = (COSTS_N_INSNS (1)
14974 + (flag_pic != 0 && !TARGET_64BIT)
14975 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14976 break;
14978 return true;
14980 case ZERO_EXTEND:
14981 /* The zero extensions is often completely free on x86_64, so make
14982 it as cheap as possible. */
14983 if (TARGET_64BIT && mode == DImode
14984 && GET_MODE (XEXP (x, 0)) == SImode)
14985 *total = 1;
14986 else if (TARGET_ZERO_EXTEND_WITH_AND)
14987 *total = COSTS_N_INSNS (ix86_cost->add);
14988 else
14989 *total = COSTS_N_INSNS (ix86_cost->movzx);
14990 return false;
14992 case SIGN_EXTEND:
14993 *total = COSTS_N_INSNS (ix86_cost->movsx);
14994 return false;
14996 case ASHIFT:
14997 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14998 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15000 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15001 if (value == 1)
15003 *total = COSTS_N_INSNS (ix86_cost->add);
15004 return false;
15006 if ((value == 2 || value == 3)
15007 && ix86_cost->lea <= ix86_cost->shift_const)
15009 *total = COSTS_N_INSNS (ix86_cost->lea);
15010 return false;
15013 /* FALLTHRU */
15015 case ROTATE:
15016 case ASHIFTRT:
15017 case LSHIFTRT:
15018 case ROTATERT:
15019 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15021 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15023 if (INTVAL (XEXP (x, 1)) > 32)
15024 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15025 else
15026 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15028 else
15030 if (GET_CODE (XEXP (x, 1)) == AND)
15031 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15032 else
15033 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15036 else
15038 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15039 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15040 else
15041 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15043 return false;
15045 case MULT:
15046 if (FLOAT_MODE_P (mode))
15048 *total = COSTS_N_INSNS (ix86_cost->fmul);
15049 return false;
15051 else
15053 rtx op0 = XEXP (x, 0);
15054 rtx op1 = XEXP (x, 1);
15055 int nbits;
15056 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15058 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15059 for (nbits = 0; value != 0; value &= value - 1)
15060 nbits++;
15062 else
15063 /* This is arbitrary. */
15064 nbits = 7;
15066 /* Compute costs correctly for widening multiplication. */
15067 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
15068 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
15069 == GET_MODE_SIZE (mode))
15071 int is_mulwiden = 0;
15072 enum machine_mode inner_mode = GET_MODE (op0);
15074 if (GET_CODE (op0) == GET_CODE (op1))
15075 is_mulwiden = 1, op1 = XEXP (op1, 0);
15076 else if (GET_CODE (op1) == CONST_INT)
15078 if (GET_CODE (op0) == SIGN_EXTEND)
15079 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
15080 == INTVAL (op1);
15081 else
15082 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
15085 if (is_mulwiden)
15086 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
15089 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15090 + nbits * ix86_cost->mult_bit)
15091 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
15093 return true;
15096 case DIV:
15097 case UDIV:
15098 case MOD:
15099 case UMOD:
15100 if (FLOAT_MODE_P (mode))
15101 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15102 else
15103 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15104 return false;
15106 case PLUS:
15107 if (FLOAT_MODE_P (mode))
15108 *total = COSTS_N_INSNS (ix86_cost->fadd);
15109 else if (GET_MODE_CLASS (mode) == MODE_INT
15110 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15112 if (GET_CODE (XEXP (x, 0)) == PLUS
15113 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15114 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15115 && CONSTANT_P (XEXP (x, 1)))
15117 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15118 if (val == 2 || val == 4 || val == 8)
15120 *total = COSTS_N_INSNS (ix86_cost->lea);
15121 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15122 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15123 outer_code);
15124 *total += rtx_cost (XEXP (x, 1), outer_code);
15125 return true;
15128 else if (GET_CODE (XEXP (x, 0)) == MULT
15129 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15131 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15132 if (val == 2 || val == 4 || val == 8)
15134 *total = COSTS_N_INSNS (ix86_cost->lea);
15135 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15136 *total += rtx_cost (XEXP (x, 1), outer_code);
15137 return true;
15140 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15142 *total = COSTS_N_INSNS (ix86_cost->lea);
15143 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15144 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15145 *total += rtx_cost (XEXP (x, 1), outer_code);
15146 return true;
15149 /* FALLTHRU */
15151 case MINUS:
15152 if (FLOAT_MODE_P (mode))
15154 *total = COSTS_N_INSNS (ix86_cost->fadd);
15155 return false;
15157 /* FALLTHRU */
15159 case AND:
15160 case IOR:
15161 case XOR:
15162 if (!TARGET_64BIT && mode == DImode)
15164 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15165 + (rtx_cost (XEXP (x, 0), outer_code)
15166 << (GET_MODE (XEXP (x, 0)) != DImode))
15167 + (rtx_cost (XEXP (x, 1), outer_code)
15168 << (GET_MODE (XEXP (x, 1)) != DImode)));
15169 return true;
15171 /* FALLTHRU */
15173 case NEG:
15174 if (FLOAT_MODE_P (mode))
15176 *total = COSTS_N_INSNS (ix86_cost->fchs);
15177 return false;
15179 /* FALLTHRU */
15181 case NOT:
15182 if (!TARGET_64BIT && mode == DImode)
15183 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15184 else
15185 *total = COSTS_N_INSNS (ix86_cost->add);
15186 return false;
15188 case FLOAT_EXTEND:
15189 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15190 *total = 0;
15191 return false;
15193 case ABS:
15194 if (FLOAT_MODE_P (mode))
15195 *total = COSTS_N_INSNS (ix86_cost->fabs);
15196 return false;
15198 case SQRT:
15199 if (FLOAT_MODE_P (mode))
15200 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15201 return false;
15203 case UNSPEC:
15204 if (XINT (x, 1) == UNSPEC_TP)
15205 *total = 0;
15206 return false;
15208 default:
15209 return false;
15213 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15214 static void
15215 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15217 init_section ();
15218 fputs ("\tpushl $", asm_out_file);
15219 assemble_name (asm_out_file, XSTR (symbol, 0));
15220 fputc ('\n', asm_out_file);
15222 #endif
15224 #if TARGET_MACHO
15226 static int current_machopic_label_num;
15228 /* Given a symbol name and its associated stub, write out the
15229 definition of the stub. */
15231 void
15232 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15234 unsigned int length;
15235 char *binder_name, *symbol_name, lazy_ptr_name[32];
15236 int label = ++current_machopic_label_num;
15238 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15239 symb = (*targetm.strip_name_encoding) (symb);
15241 length = strlen (stub);
15242 binder_name = alloca (length + 32);
15243 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15245 length = strlen (symb);
15246 symbol_name = alloca (length + 32);
15247 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15249 sprintf (lazy_ptr_name, "L%d$lz", label);
15251 if (MACHOPIC_PURE)
15252 machopic_picsymbol_stub_section ();
15253 else
15254 machopic_symbol_stub_section ();
15256 fprintf (file, "%s:\n", stub);
15257 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15259 if (MACHOPIC_PURE)
15261 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15262 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15263 fprintf (file, "\tjmp %%edx\n");
15265 else
15266 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15268 fprintf (file, "%s:\n", binder_name);
15270 if (MACHOPIC_PURE)
15272 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15273 fprintf (file, "\tpushl %%eax\n");
15275 else
15276 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15278 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15280 machopic_lazy_symbol_ptr_section ();
15281 fprintf (file, "%s:\n", lazy_ptr_name);
15282 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15283 fprintf (file, "\t.long %s\n", binder_name);
15285 #endif /* TARGET_MACHO */
15287 /* Order the registers for register allocator. */
15289 void
15290 x86_order_regs_for_local_alloc (void)
15292 int pos = 0;
15293 int i;
15295 /* First allocate the local general purpose registers. */
15296 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15297 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15298 reg_alloc_order [pos++] = i;
15300 /* Global general purpose registers. */
15301 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15302 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15303 reg_alloc_order [pos++] = i;
15305 /* x87 registers come first in case we are doing FP math
15306 using them. */
15307 if (!TARGET_SSE_MATH)
15308 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15309 reg_alloc_order [pos++] = i;
15311 /* SSE registers. */
15312 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15313 reg_alloc_order [pos++] = i;
15314 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15315 reg_alloc_order [pos++] = i;
15317 /* x87 registers. */
15318 if (TARGET_SSE_MATH)
15319 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15320 reg_alloc_order [pos++] = i;
15322 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15323 reg_alloc_order [pos++] = i;
15325 /* Initialize the rest of array as we do not allocate some registers
15326 at all. */
15327 while (pos < FIRST_PSEUDO_REGISTER)
15328 reg_alloc_order [pos++] = 0;
15331 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15332 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15333 #endif
15335 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15336 struct attribute_spec.handler. */
15337 static tree
15338 ix86_handle_struct_attribute (tree *node, tree name,
15339 tree args ATTRIBUTE_UNUSED,
15340 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15342 tree *type = NULL;
15343 if (DECL_P (*node))
15345 if (TREE_CODE (*node) == TYPE_DECL)
15346 type = &TREE_TYPE (*node);
15348 else
15349 type = node;
15351 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15352 || TREE_CODE (*type) == UNION_TYPE)))
15354 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15355 *no_add_attrs = true;
15358 else if ((is_attribute_p ("ms_struct", name)
15359 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15360 || ((is_attribute_p ("gcc_struct", name)
15361 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15363 warning ("`%s' incompatible attribute ignored",
15364 IDENTIFIER_POINTER (name));
15365 *no_add_attrs = true;
15368 return NULL_TREE;
15371 static bool
15372 ix86_ms_bitfield_layout_p (tree record_type)
15374 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15375 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15376 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15379 /* Returns an expression indicating where the this parameter is
15380 located on entry to the FUNCTION. */
15382 static rtx
15383 x86_this_parameter (tree function)
15385 tree type = TREE_TYPE (function);
15387 if (TARGET_64BIT)
15389 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15390 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15393 if (ix86_function_regparm (type, function) > 0)
15395 tree parm;
15397 parm = TYPE_ARG_TYPES (type);
15398 /* Figure out whether or not the function has a variable number of
15399 arguments. */
15400 for (; parm; parm = TREE_CHAIN (parm))
15401 if (TREE_VALUE (parm) == void_type_node)
15402 break;
15403 /* If not, the this parameter is in the first argument. */
15404 if (parm)
15406 int regno = 0;
15407 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15408 regno = 2;
15409 return gen_rtx_REG (SImode, regno);
15413 if (aggregate_value_p (TREE_TYPE (type), type))
15414 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15415 else
15416 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15419 /* Determine whether x86_output_mi_thunk can succeed. */
15421 static bool
15422 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15423 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15424 HOST_WIDE_INT vcall_offset, tree function)
15426 /* 64-bit can handle anything. */
15427 if (TARGET_64BIT)
15428 return true;
15430 /* For 32-bit, everything's fine if we have one free register. */
15431 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15432 return true;
15434 /* Need a free register for vcall_offset. */
15435 if (vcall_offset)
15436 return false;
15438 /* Need a free register for GOT references. */
15439 if (flag_pic && !(*targetm.binds_local_p) (function))
15440 return false;
15442 /* Otherwise ok. */
15443 return true;
15446 /* Output the assembler code for a thunk function. THUNK_DECL is the
15447 declaration for the thunk function itself, FUNCTION is the decl for
15448 the target function. DELTA is an immediate constant offset to be
15449 added to THIS. If VCALL_OFFSET is nonzero, the word at
15450 *(*this + vcall_offset) should be added to THIS. */
15452 static void
15453 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15454 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15455 HOST_WIDE_INT vcall_offset, tree function)
15457 rtx xops[3];
15458 rtx this = x86_this_parameter (function);
15459 rtx this_reg, tmp;
15461 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15462 pull it in now and let DELTA benefit. */
15463 if (REG_P (this))
15464 this_reg = this;
15465 else if (vcall_offset)
15467 /* Put the this parameter into %eax. */
15468 xops[0] = this;
15469 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15470 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15472 else
15473 this_reg = NULL_RTX;
15475 /* Adjust the this parameter by a fixed constant. */
15476 if (delta)
15478 xops[0] = GEN_INT (delta);
15479 xops[1] = this_reg ? this_reg : this;
15480 if (TARGET_64BIT)
15482 if (!x86_64_general_operand (xops[0], DImode))
15484 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15485 xops[1] = tmp;
15486 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15487 xops[0] = tmp;
15488 xops[1] = this;
15490 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15492 else
15493 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15496 /* Adjust the this parameter by a value stored in the vtable. */
15497 if (vcall_offset)
15499 if (TARGET_64BIT)
15500 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15501 else
15503 int tmp_regno = 2 /* ECX */;
15504 if (lookup_attribute ("fastcall",
15505 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15506 tmp_regno = 0 /* EAX */;
15507 tmp = gen_rtx_REG (SImode, tmp_regno);
15510 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15511 xops[1] = tmp;
15512 if (TARGET_64BIT)
15513 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15514 else
15515 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15517 /* Adjust the this parameter. */
15518 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15519 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15521 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15522 xops[0] = GEN_INT (vcall_offset);
15523 xops[1] = tmp2;
15524 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15525 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15527 xops[1] = this_reg;
15528 if (TARGET_64BIT)
15529 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15530 else
15531 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15534 /* If necessary, drop THIS back to its stack slot. */
15535 if (this_reg && this_reg != this)
15537 xops[0] = this_reg;
15538 xops[1] = this;
15539 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15542 xops[0] = XEXP (DECL_RTL (function), 0);
15543 if (TARGET_64BIT)
15545 if (!flag_pic || (*targetm.binds_local_p) (function))
15546 output_asm_insn ("jmp\t%P0", xops);
15547 else
15549 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15550 tmp = gen_rtx_CONST (Pmode, tmp);
15551 tmp = gen_rtx_MEM (QImode, tmp);
15552 xops[0] = tmp;
15553 output_asm_insn ("jmp\t%A0", xops);
15556 else
15558 if (!flag_pic || (*targetm.binds_local_p) (function))
15559 output_asm_insn ("jmp\t%P0", xops);
15560 else
15561 #if TARGET_MACHO
15562 if (TARGET_MACHO)
15564 rtx sym_ref = XEXP (DECL_RTL (function), 0);
15565 tmp = (gen_rtx_SYMBOL_REF
15566 (Pmode,
15567 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
15568 tmp = gen_rtx_MEM (QImode, tmp);
15569 xops[0] = tmp;
15570 output_asm_insn ("jmp\t%0", xops);
15572 else
15573 #endif /* TARGET_MACHO */
15575 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15576 output_set_got (tmp);
15578 xops[1] = tmp;
15579 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15580 output_asm_insn ("jmp\t{*}%1", xops);
15585 static void
15586 x86_file_start (void)
15588 default_file_start ();
15589 if (X86_FILE_START_VERSION_DIRECTIVE)
15590 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15591 if (X86_FILE_START_FLTUSED)
15592 fputs ("\t.global\t__fltused\n", asm_out_file);
15593 if (ix86_asm_dialect == ASM_INTEL)
15594 fputs ("\t.intel_syntax\n", asm_out_file);
15598 x86_field_alignment (tree field, int computed)
15600 enum machine_mode mode;
15601 tree type = TREE_TYPE (field);
15603 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15604 return computed;
15605 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15606 ? get_inner_array_type (type) : type);
15607 if (mode == DFmode || mode == DCmode
15608 || GET_MODE_CLASS (mode) == MODE_INT
15609 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15610 return MIN (32, computed);
15611 return computed;
15614 /* Output assembler code to FILE to increment profiler label # LABELNO
15615 for profiling a function entry. */
15616 void
15617 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15619 if (TARGET_64BIT)
15620 if (flag_pic)
15622 #ifndef NO_PROFILE_COUNTERS
15623 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15624 #endif
15625 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15627 else
15629 #ifndef NO_PROFILE_COUNTERS
15630 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15631 #endif
15632 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15634 else if (flag_pic)
15636 #ifndef NO_PROFILE_COUNTERS
15637 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15638 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15639 #endif
15640 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15642 else
15644 #ifndef NO_PROFILE_COUNTERS
15645 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15646 PROFILE_COUNT_REGISTER);
15647 #endif
15648 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15652 /* We don't have exact information about the insn sizes, but we may assume
15653 quite safely that we are informed about all 1 byte insns and memory
15654 address sizes. This is enough to eliminate unnecessary padding in
15655 99% of cases. */
15657 static int
15658 min_insn_size (rtx insn)
15660 int l = 0;
15662 if (!INSN_P (insn) || !active_insn_p (insn))
15663 return 0;
15665 /* Discard alignments we've emit and jump instructions. */
15666 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15667 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15668 return 0;
15669 if (GET_CODE (insn) == JUMP_INSN
15670 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15671 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15672 return 0;
15674 /* Important case - calls are always 5 bytes.
15675 It is common to have many calls in the row. */
15676 if (GET_CODE (insn) == CALL_INSN
15677 && symbolic_reference_mentioned_p (PATTERN (insn))
15678 && !SIBLING_CALL_P (insn))
15679 return 5;
15680 if (get_attr_length (insn) <= 1)
15681 return 1;
15683 /* For normal instructions we may rely on the sizes of addresses
15684 and the presence of symbol to require 4 bytes of encoding.
15685 This is not the case for jumps where references are PC relative. */
15686 if (GET_CODE (insn) != JUMP_INSN)
15688 l = get_attr_length_address (insn);
15689 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15690 l = 4;
15692 if (l)
15693 return 1+l;
15694 else
15695 return 2;
15698 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15699 window. */
15701 static void
15702 ix86_avoid_jump_misspredicts (void)
15704 rtx insn, start = get_insns ();
15705 int nbytes = 0, njumps = 0;
15706 int isjump = 0;
15708 /* Look for all minimal intervals of instructions containing 4 jumps.
15709 The intervals are bounded by START and INSN. NBYTES is the total
15710 size of instructions in the interval including INSN and not including
15711 START. When the NBYTES is smaller than 16 bytes, it is possible
15712 that the end of START and INSN ends up in the same 16byte page.
15714 The smallest offset in the page INSN can start is the case where START
15715 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15716 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15718 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15721 nbytes += min_insn_size (insn);
15722 if (dump_file)
15723 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15724 INSN_UID (insn), min_insn_size (insn));
15725 if ((GET_CODE (insn) == JUMP_INSN
15726 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15727 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15728 || GET_CODE (insn) == CALL_INSN)
15729 njumps++;
15730 else
15731 continue;
15733 while (njumps > 3)
15735 start = NEXT_INSN (start);
15736 if ((GET_CODE (start) == JUMP_INSN
15737 && GET_CODE (PATTERN (start)) != ADDR_VEC
15738 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15739 || GET_CODE (start) == CALL_INSN)
15740 njumps--, isjump = 1;
15741 else
15742 isjump = 0;
15743 nbytes -= min_insn_size (start);
15745 if (njumps < 0)
15746 abort ();
15747 if (dump_file)
15748 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15749 INSN_UID (start), INSN_UID (insn), nbytes);
15751 if (njumps == 3 && isjump && nbytes < 16)
15753 int padsize = 15 - nbytes + min_insn_size (insn);
15755 if (dump_file)
15756 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15757 INSN_UID (insn), padsize);
15758 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15763 /* AMD Athlon works faster
15764 when RET is not destination of conditional jump or directly preceded
15765 by other jump instruction. We avoid the penalty by inserting NOP just
15766 before the RET instructions in such cases. */
15767 static void
15768 ix86_pad_returns (void)
15770 edge e;
15772 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15774 basic_block bb = e->src;
15775 rtx ret = BB_END (bb);
15776 rtx prev;
15777 bool replace = false;
15779 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15780 || !maybe_hot_bb_p (bb))
15781 continue;
15782 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15783 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15784 break;
15785 if (prev && GET_CODE (prev) == CODE_LABEL)
15787 edge e;
15788 for (e = bb->pred; e; e = e->pred_next)
15789 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15790 && !(e->flags & EDGE_FALLTHRU))
15791 replace = true;
15793 if (!replace)
15795 prev = prev_active_insn (ret);
15796 if (prev
15797 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15798 || GET_CODE (prev) == CALL_INSN))
15799 replace = true;
15800 /* Empty functions get branch mispredict even when the jump destination
15801 is not visible to us. */
15802 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15803 replace = true;
15805 if (replace)
15807 emit_insn_before (gen_return_internal_long (), ret);
15808 delete_insn (ret);
15813 /* Implement machine specific optimizations. We implement padding of returns
15814 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15815 static void
15816 ix86_reorg (void)
15818 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15819 ix86_pad_returns ();
15820 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15821 ix86_avoid_jump_misspredicts ();
15824 /* Return nonzero when QImode register that must be represented via REX prefix
15825 is used. */
15826 bool
15827 x86_extended_QIreg_mentioned_p (rtx insn)
15829 int i;
15830 extract_insn_cached (insn);
15831 for (i = 0; i < recog_data.n_operands; i++)
15832 if (REG_P (recog_data.operand[i])
15833 && REGNO (recog_data.operand[i]) >= 4)
15834 return true;
15835 return false;
15838 /* Return nonzero when P points to register encoded via REX prefix.
15839 Called via for_each_rtx. */
15840 static int
15841 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15843 unsigned int regno;
15844 if (!REG_P (*p))
15845 return 0;
15846 regno = REGNO (*p);
15847 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15850 /* Return true when INSN mentions register that must be encoded using REX
15851 prefix. */
15852 bool
15853 x86_extended_reg_mentioned_p (rtx insn)
15855 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15858 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15859 optabs would emit if we didn't have TFmode patterns. */
15861 void
15862 x86_emit_floatuns (rtx operands[2])
15864 rtx neglab, donelab, i0, i1, f0, in, out;
15865 enum machine_mode mode, inmode;
15867 inmode = GET_MODE (operands[1]);
15868 if (inmode != SImode
15869 && inmode != DImode)
15870 abort ();
15872 out = operands[0];
15873 in = force_reg (inmode, operands[1]);
15874 mode = GET_MODE (out);
15875 neglab = gen_label_rtx ();
15876 donelab = gen_label_rtx ();
15877 i1 = gen_reg_rtx (Pmode);
15878 f0 = gen_reg_rtx (mode);
15880 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15882 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15883 emit_jump_insn (gen_jump (donelab));
15884 emit_barrier ();
15886 emit_label (neglab);
15888 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15889 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15890 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15891 expand_float (f0, i0, 0);
15892 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15894 emit_label (donelab);
15897 /* Initialize vector TARGET via VALS. */
15898 void
15899 ix86_expand_vector_init (rtx target, rtx vals)
15901 enum machine_mode mode = GET_MODE (target);
15902 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15903 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15904 int i;
15906 for (i = n_elts - 1; i >= 0; i--)
15907 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15908 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15909 break;
15911 /* Few special cases first...
15912 ... constants are best loaded from constant pool. */
15913 if (i < 0)
15915 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15916 return;
15919 /* ... values where only first field is non-constant are best loaded
15920 from the pool and overwritten via move later. */
15921 if (!i)
15923 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15924 GET_MODE_INNER (mode), 0);
15926 op = force_reg (mode, op);
15927 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15928 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15929 switch (GET_MODE (target))
15931 case V2DFmode:
15932 emit_insn (gen_sse2_movsd (target, target, op));
15933 break;
15934 case V4SFmode:
15935 emit_insn (gen_sse_movss (target, target, op));
15936 break;
15937 default:
15938 break;
15940 return;
15943 /* And the busy sequence doing rotations. */
15944 switch (GET_MODE (target))
15946 case V2DFmode:
15948 rtx vecop0 =
15949 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15950 rtx vecop1 =
15951 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15953 vecop0 = force_reg (V2DFmode, vecop0);
15954 vecop1 = force_reg (V2DFmode, vecop1);
15955 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15957 break;
15958 case V4SFmode:
15960 rtx vecop0 =
15961 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15962 rtx vecop1 =
15963 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15964 rtx vecop2 =
15965 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15966 rtx vecop3 =
15967 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15968 rtx tmp1 = gen_reg_rtx (V4SFmode);
15969 rtx tmp2 = gen_reg_rtx (V4SFmode);
15971 vecop0 = force_reg (V4SFmode, vecop0);
15972 vecop1 = force_reg (V4SFmode, vecop1);
15973 vecop2 = force_reg (V4SFmode, vecop2);
15974 vecop3 = force_reg (V4SFmode, vecop3);
15975 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15976 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15977 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15979 break;
15980 default:
15981 abort ();
15985 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15987 We do this in the new i386 backend to maintain source compatibility
15988 with the old cc0-based compiler. */
15990 static tree
15991 ix86_md_asm_clobbers (tree clobbers)
15993 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15994 clobbers);
15995 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15996 clobbers);
15997 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15998 clobbers);
15999 return clobbers;
16002 /* Worker function for REVERSE_CONDITION. */
16004 enum rtx_code
16005 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
16007 return (mode != CCFPmode && mode != CCFPUmode
16008 ? reverse_condition (code)
16009 : reverse_condition_maybe_unordered (code));
16012 /* Output code to perform an x87 FP register move, from OPERANDS[1]
16013 to OPERANDS[0]. */
16015 const char *
16016 output_387_reg_move (rtx insn, rtx *operands)
16018 if (REG_P (operands[1])
16019 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16021 if (REGNO (operands[0]) == FIRST_STACK_REG
16022 && TARGET_USE_FFREEP)
16023 return "ffreep\t%y0";
16024 return "fstp\t%y0";
16026 if (STACK_TOP_P (operands[0]))
16027 return "fld%z1\t%y1";
16028 return "fst\t%y0";
16031 /* Output code to perform a conditional jump to LABEL, if C2 flag in
16032 FP status register is set. */
16034 void
16035 ix86_emit_fp_unordered_jump (rtx label)
16037 rtx reg = gen_reg_rtx (HImode);
16038 rtx temp;
16040 emit_insn (gen_x86_fnstsw_1 (reg));
16042 if (TARGET_USE_SAHF)
16044 emit_insn (gen_x86_sahf_1 (reg));
16046 temp = gen_rtx_REG (CCmode, FLAGS_REG);
16047 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
16049 else
16051 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
16053 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16054 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
16057 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
16058 gen_rtx_LABEL_REF (VOIDmode, label),
16059 pc_rtx);
16060 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
16061 emit_jump_insn (temp);
16064 /* Output code to perform a log1p XFmode calculation. */
16066 void ix86_emit_i387_log1p (rtx op0, rtx op1)
16068 rtx label1 = gen_label_rtx ();
16069 rtx label2 = gen_label_rtx ();
16071 rtx tmp = gen_reg_rtx (XFmode);
16072 rtx tmp2 = gen_reg_rtx (XFmode);
16074 emit_insn (gen_absxf2 (tmp, op1));
16075 emit_insn (gen_cmpxf (tmp,
16076 CONST_DOUBLE_FROM_REAL_VALUE (
16077 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
16078 XFmode)));
16079 emit_jump_insn (gen_bge (label1));
16081 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16082 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
16083 emit_jump (label2);
16085 emit_label (label1);
16086 emit_move_insn (tmp, CONST1_RTX (XFmode));
16087 emit_insn (gen_addxf3 (tmp, op1, tmp));
16088 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
16089 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
16091 emit_label (label2);
16094 #include "gt-i386.h"