* config/i386/i386.c (x86_output_mi_thunk): Correct test for
[official-gcc.git] / gcc / config / i386 / i386.c
blobc26cc503718bc0796413d66929e2e43e94e00bdb
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
51 #endif
53 /* Processor costs (relative to an add) */
54 static const
55 struct processor_costs size_cost = { /* costs for tunning for size */
56 2, /* cost of an add instruction */
57 3, /* cost of a lea instruction */
58 2, /* variable shift costs */
59 3, /* constant shift costs */
60 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
61 0, /* cost of multiply per each bit set */
62 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
63 3, /* cost of movsx */
64 3, /* cost of movzx */
65 0, /* "large" insn */
66 2, /* MOVE_RATIO */
67 2, /* cost for loading QImode using movzbl */
68 {2, 2, 2}, /* cost of loading integer registers
69 in QImode, HImode and SImode.
70 Relative to reg-reg move (2). */
71 {2, 2, 2}, /* cost of storing integer registers */
72 2, /* cost of reg,reg fld/fst */
73 {2, 2, 2}, /* cost of loading fp registers
74 in SFmode, DFmode and XFmode */
75 {2, 2, 2}, /* cost of loading integer registers */
76 3, /* cost of moving MMX register */
77 {3, 3}, /* cost of loading MMX registers
78 in SImode and DImode */
79 {3, 3}, /* cost of storing MMX registers
80 in SImode and DImode */
81 3, /* cost of moving SSE register */
82 {3, 3, 3}, /* cost of loading SSE registers
83 in SImode, DImode and TImode */
84 {3, 3, 3}, /* cost of storing SSE registers
85 in SImode, DImode and TImode */
86 3, /* MMX or SSE register to integer */
87 0, /* size of prefetch block */
88 0, /* number of parallel prefetches */
89 1, /* Branch cost */
90 2, /* cost of FADD and FSUB insns. */
91 2, /* cost of FMUL instruction. */
92 2, /* cost of FDIV instruction. */
93 2, /* cost of FABS instruction. */
94 2, /* cost of FCHS instruction. */
95 2, /* cost of FSQRT instruction. */
98 /* Processor costs (relative to an add) */
99 static const
100 struct processor_costs i386_cost = { /* 386 specific costs */
101 1, /* cost of an add instruction */
102 1, /* cost of a lea instruction */
103 3, /* variable shift costs */
104 2, /* constant shift costs */
105 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
106 1, /* cost of multiply per each bit set */
107 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
108 3, /* cost of movsx */
109 2, /* cost of movzx */
110 15, /* "large" insn */
111 3, /* MOVE_RATIO */
112 4, /* cost for loading QImode using movzbl */
113 {2, 4, 2}, /* cost of loading integer registers
114 in QImode, HImode and SImode.
115 Relative to reg-reg move (2). */
116 {2, 4, 2}, /* cost of storing integer registers */
117 2, /* cost of reg,reg fld/fst */
118 {8, 8, 8}, /* cost of loading fp registers
119 in SFmode, DFmode and XFmode */
120 {8, 8, 8}, /* cost of loading integer registers */
121 2, /* cost of moving MMX register */
122 {4, 8}, /* cost of loading MMX registers
123 in SImode and DImode */
124 {4, 8}, /* cost of storing MMX registers
125 in SImode and DImode */
126 2, /* cost of moving SSE register */
127 {4, 8, 16}, /* cost of loading SSE registers
128 in SImode, DImode and TImode */
129 {4, 8, 16}, /* cost of storing SSE registers
130 in SImode, DImode and TImode */
131 3, /* MMX or SSE register to integer */
132 0, /* size of prefetch block */
133 0, /* number of parallel prefetches */
134 1, /* Branch cost */
135 23, /* cost of FADD and FSUB insns. */
136 27, /* cost of FMUL instruction. */
137 88, /* cost of FDIV instruction. */
138 22, /* cost of FABS instruction. */
139 24, /* cost of FCHS instruction. */
140 122, /* cost of FSQRT instruction. */
143 static const
144 struct processor_costs i486_cost = { /* 486 specific costs */
145 1, /* cost of an add instruction */
146 1, /* cost of a lea instruction */
147 3, /* variable shift costs */
148 2, /* constant shift costs */
149 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
150 1, /* cost of multiply per each bit set */
151 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
152 3, /* cost of movsx */
153 2, /* cost of movzx */
154 15, /* "large" insn */
155 3, /* MOVE_RATIO */
156 4, /* cost for loading QImode using movzbl */
157 {2, 4, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 4, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {8, 8, 8}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {8, 8, 8}, /* cost of loading integer registers */
165 2, /* cost of moving MMX register */
166 {4, 8}, /* cost of loading MMX registers
167 in SImode and DImode */
168 {4, 8}, /* cost of storing MMX registers
169 in SImode and DImode */
170 2, /* cost of moving SSE register */
171 {4, 8, 16}, /* cost of loading SSE registers
172 in SImode, DImode and TImode */
173 {4, 8, 16}, /* cost of storing SSE registers
174 in SImode, DImode and TImode */
175 3, /* MMX or SSE register to integer */
176 0, /* size of prefetch block */
177 0, /* number of parallel prefetches */
178 1, /* Branch cost */
179 8, /* cost of FADD and FSUB insns. */
180 16, /* cost of FMUL instruction. */
181 73, /* cost of FDIV instruction. */
182 3, /* cost of FABS instruction. */
183 3, /* cost of FCHS instruction. */
184 83, /* cost of FSQRT instruction. */
187 static const
188 struct processor_costs pentium_cost = {
189 1, /* cost of an add instruction */
190 1, /* cost of a lea instruction */
191 4, /* variable shift costs */
192 1, /* constant shift costs */
193 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
194 0, /* cost of multiply per each bit set */
195 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
196 3, /* cost of movsx */
197 2, /* cost of movzx */
198 8, /* "large" insn */
199 6, /* MOVE_RATIO */
200 6, /* cost for loading QImode using movzbl */
201 {2, 4, 2}, /* cost of loading integer registers
202 in QImode, HImode and SImode.
203 Relative to reg-reg move (2). */
204 {2, 4, 2}, /* cost of storing integer registers */
205 2, /* cost of reg,reg fld/fst */
206 {2, 2, 6}, /* cost of loading fp registers
207 in SFmode, DFmode and XFmode */
208 {4, 4, 6}, /* cost of loading integer registers */
209 8, /* cost of moving MMX register */
210 {8, 8}, /* cost of loading MMX registers
211 in SImode and DImode */
212 {8, 8}, /* cost of storing MMX registers
213 in SImode and DImode */
214 2, /* cost of moving SSE register */
215 {4, 8, 16}, /* cost of loading SSE registers
216 in SImode, DImode and TImode */
217 {4, 8, 16}, /* cost of storing SSE registers
218 in SImode, DImode and TImode */
219 3, /* MMX or SSE register to integer */
220 0, /* size of prefetch block */
221 0, /* number of parallel prefetches */
222 2, /* Branch cost */
223 3, /* cost of FADD and FSUB insns. */
224 3, /* cost of FMUL instruction. */
225 39, /* cost of FDIV instruction. */
226 1, /* cost of FABS instruction. */
227 1, /* cost of FCHS instruction. */
228 70, /* cost of FSQRT instruction. */
231 static const
232 struct processor_costs pentiumpro_cost = {
233 1, /* cost of an add instruction */
234 1, /* cost of a lea instruction */
235 1, /* variable shift costs */
236 1, /* constant shift costs */
237 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
238 0, /* cost of multiply per each bit set */
239 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
240 1, /* cost of movsx */
241 1, /* cost of movzx */
242 8, /* "large" insn */
243 6, /* MOVE_RATIO */
244 2, /* cost for loading QImode using movzbl */
245 {4, 4, 4}, /* cost of loading integer registers
246 in QImode, HImode and SImode.
247 Relative to reg-reg move (2). */
248 {2, 2, 2}, /* cost of storing integer registers */
249 2, /* cost of reg,reg fld/fst */
250 {2, 2, 6}, /* cost of loading fp registers
251 in SFmode, DFmode and XFmode */
252 {4, 4, 6}, /* cost of loading integer registers */
253 2, /* cost of moving MMX register */
254 {2, 2}, /* cost of loading MMX registers
255 in SImode and DImode */
256 {2, 2}, /* cost of storing MMX registers
257 in SImode and DImode */
258 2, /* cost of moving SSE register */
259 {2, 2, 8}, /* cost of loading SSE registers
260 in SImode, DImode and TImode */
261 {2, 2, 8}, /* cost of storing SSE registers
262 in SImode, DImode and TImode */
263 3, /* MMX or SSE register to integer */
264 32, /* size of prefetch block */
265 6, /* number of parallel prefetches */
266 2, /* Branch cost */
267 3, /* cost of FADD and FSUB insns. */
268 5, /* cost of FMUL instruction. */
269 56, /* cost of FDIV instruction. */
270 2, /* cost of FABS instruction. */
271 2, /* cost of FCHS instruction. */
272 56, /* cost of FSQRT instruction. */
275 static const
276 struct processor_costs k6_cost = {
277 1, /* cost of an add instruction */
278 2, /* cost of a lea instruction */
279 1, /* variable shift costs */
280 1, /* constant shift costs */
281 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
282 0, /* cost of multiply per each bit set */
283 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
284 2, /* cost of movsx */
285 2, /* cost of movzx */
286 8, /* "large" insn */
287 4, /* MOVE_RATIO */
288 3, /* cost for loading QImode using movzbl */
289 {4, 5, 4}, /* cost of loading integer registers
290 in QImode, HImode and SImode.
291 Relative to reg-reg move (2). */
292 {2, 3, 2}, /* cost of storing integer registers */
293 4, /* cost of reg,reg fld/fst */
294 {6, 6, 6}, /* cost of loading fp registers
295 in SFmode, DFmode and XFmode */
296 {4, 4, 4}, /* cost of loading integer registers */
297 2, /* cost of moving MMX register */
298 {2, 2}, /* cost of loading MMX registers
299 in SImode and DImode */
300 {2, 2}, /* cost of storing MMX registers
301 in SImode and DImode */
302 2, /* cost of moving SSE register */
303 {2, 2, 8}, /* cost of loading SSE registers
304 in SImode, DImode and TImode */
305 {2, 2, 8}, /* cost of storing SSE registers
306 in SImode, DImode and TImode */
307 6, /* MMX or SSE register to integer */
308 32, /* size of prefetch block */
309 1, /* number of parallel prefetches */
310 1, /* Branch cost */
311 2, /* cost of FADD and FSUB insns. */
312 2, /* cost of FMUL instruction. */
313 56, /* cost of FDIV instruction. */
314 2, /* cost of FABS instruction. */
315 2, /* cost of FCHS instruction. */
316 56, /* cost of FSQRT instruction. */
319 static const
320 struct processor_costs athlon_cost = {
321 1, /* cost of an add instruction */
322 2, /* cost of a lea instruction */
323 1, /* variable shift costs */
324 1, /* constant shift costs */
325 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
326 0, /* cost of multiply per each bit set */
327 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
328 1, /* cost of movsx */
329 1, /* cost of movzx */
330 8, /* "large" insn */
331 9, /* MOVE_RATIO */
332 4, /* cost for loading QImode using movzbl */
333 {3, 4, 3}, /* cost of loading integer registers
334 in QImode, HImode and SImode.
335 Relative to reg-reg move (2). */
336 {3, 4, 3}, /* cost of storing integer registers */
337 4, /* cost of reg,reg fld/fst */
338 {4, 4, 12}, /* cost of loading fp registers
339 in SFmode, DFmode and XFmode */
340 {6, 6, 8}, /* cost of loading integer registers */
341 2, /* cost of moving MMX register */
342 {4, 4}, /* cost of loading MMX registers
343 in SImode and DImode */
344 {4, 4}, /* cost of storing MMX registers
345 in SImode and DImode */
346 2, /* cost of moving SSE register */
347 {4, 4, 6}, /* cost of loading SSE registers
348 in SImode, DImode and TImode */
349 {4, 4, 5}, /* cost of storing SSE registers
350 in SImode, DImode and TImode */
351 5, /* MMX or SSE register to integer */
352 64, /* size of prefetch block */
353 6, /* number of parallel prefetches */
354 2, /* Branch cost */
355 4, /* cost of FADD and FSUB insns. */
356 4, /* cost of FMUL instruction. */
357 24, /* cost of FDIV instruction. */
358 2, /* cost of FABS instruction. */
359 2, /* cost of FCHS instruction. */
360 35, /* cost of FSQRT instruction. */
363 static const
364 struct processor_costs k8_cost = {
365 1, /* cost of an add instruction */
366 2, /* cost of a lea instruction */
367 1, /* variable shift costs */
368 1, /* constant shift costs */
369 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
370 0, /* cost of multiply per each bit set */
371 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
372 1, /* cost of movsx */
373 1, /* cost of movzx */
374 8, /* "large" insn */
375 9, /* MOVE_RATIO */
376 4, /* cost for loading QImode using movzbl */
377 {3, 4, 3}, /* cost of loading integer registers
378 in QImode, HImode and SImode.
379 Relative to reg-reg move (2). */
380 {3, 4, 3}, /* cost of storing integer registers */
381 4, /* cost of reg,reg fld/fst */
382 {4, 4, 12}, /* cost of loading fp registers
383 in SFmode, DFmode and XFmode */
384 {6, 6, 8}, /* cost of loading integer registers */
385 2, /* cost of moving MMX register */
386 {3, 3}, /* cost of loading MMX registers
387 in SImode and DImode */
388 {4, 4}, /* cost of storing MMX registers
389 in SImode and DImode */
390 2, /* cost of moving SSE register */
391 {4, 3, 6}, /* cost of loading SSE registers
392 in SImode, DImode and TImode */
393 {4, 4, 5}, /* cost of storing SSE registers
394 in SImode, DImode and TImode */
395 5, /* MMX or SSE register to integer */
396 64, /* size of prefetch block */
397 6, /* number of parallel prefetches */
398 2, /* Branch cost */
399 4, /* cost of FADD and FSUB insns. */
400 4, /* cost of FMUL instruction. */
401 19, /* cost of FDIV instruction. */
402 2, /* cost of FABS instruction. */
403 2, /* cost of FCHS instruction. */
404 35, /* cost of FSQRT instruction. */
407 static const
408 struct processor_costs pentium4_cost = {
409 1, /* cost of an add instruction */
410 1, /* cost of a lea instruction */
411 4, /* variable shift costs */
412 4, /* constant shift costs */
413 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
414 0, /* cost of multiply per each bit set */
415 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
416 1, /* cost of movsx */
417 1, /* cost of movzx */
418 16, /* "large" insn */
419 6, /* MOVE_RATIO */
420 2, /* cost for loading QImode using movzbl */
421 {4, 5, 4}, /* cost of loading integer registers
422 in QImode, HImode and SImode.
423 Relative to reg-reg move (2). */
424 {2, 3, 2}, /* cost of storing integer registers */
425 2, /* cost of reg,reg fld/fst */
426 {2, 2, 6}, /* cost of loading fp registers
427 in SFmode, DFmode and XFmode */
428 {4, 4, 6}, /* cost of loading integer registers */
429 2, /* cost of moving MMX register */
430 {2, 2}, /* cost of loading MMX registers
431 in SImode and DImode */
432 {2, 2}, /* cost of storing MMX registers
433 in SImode and DImode */
434 12, /* cost of moving SSE register */
435 {12, 12, 12}, /* cost of loading SSE registers
436 in SImode, DImode and TImode */
437 {2, 2, 8}, /* cost of storing SSE registers
438 in SImode, DImode and TImode */
439 10, /* MMX or SSE register to integer */
440 64, /* size of prefetch block */
441 6, /* number of parallel prefetches */
442 2, /* Branch cost */
443 5, /* cost of FADD and FSUB insns. */
444 7, /* cost of FMUL instruction. */
445 43, /* cost of FDIV instruction. */
446 2, /* cost of FABS instruction. */
447 2, /* cost of FCHS instruction. */
448 43, /* cost of FSQRT instruction. */
451 const struct processor_costs *ix86_cost = &pentium_cost;
453 /* Processor feature/optimization bitmasks. */
454 #define m_386 (1<<PROCESSOR_I386)
455 #define m_486 (1<<PROCESSOR_I486)
456 #define m_PENT (1<<PROCESSOR_PENTIUM)
457 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
458 #define m_K6 (1<<PROCESSOR_K6)
459 #define m_ATHLON (1<<PROCESSOR_ATHLON)
460 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
461 #define m_K8 (1<<PROCESSOR_K8)
462 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
464 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
465 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
466 const int x86_zero_extend_with_and = m_486 | m_PENT;
467 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
468 const int x86_double_with_add = ~m_386;
469 const int x86_use_bit_test = m_386;
470 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
471 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
472 const int x86_3dnow_a = m_ATHLON_K8;
473 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_branch_hints = m_PENT4;
475 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
476 const int x86_partial_reg_stall = m_PPRO;
477 const int x86_use_loop = m_K6;
478 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
479 const int x86_use_mov0 = m_K6;
480 const int x86_use_cltd = ~(m_PENT | m_K6);
481 const int x86_read_modify_write = ~m_PENT;
482 const int x86_read_modify = ~(m_PENT | m_PPRO);
483 const int x86_split_long_moves = m_PPRO;
484 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
485 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
486 const int x86_single_stringop = m_386 | m_PENT4;
487 const int x86_qimode_math = ~(0);
488 const int x86_promote_qi_regs = 0;
489 const int x86_himode_math = ~(m_PPRO);
490 const int x86_promote_hi_regs = m_PPRO;
491 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
492 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
493 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
494 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
495 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
496 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
497 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
498 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
499 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
500 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
501 const int x86_decompose_lea = m_PENT4;
502 const int x86_shift1 = ~m_486;
503 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
504 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
505 /* Set for machines where the type and dependencies are resolved on SSE register
506 parts instead of whole registers, so we may maintain just lower part of
507 scalar values in proper format leaving the upper part undefined. */
508 const int x86_sse_partial_regs = m_ATHLON_K8;
509 /* Athlon optimizes partial-register FPS special case, thus avoiding the
510 need for extra instructions beforehand */
511 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
512 const int x86_sse_typeless_stores = m_ATHLON_K8;
513 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
514 const int x86_use_ffreep = m_ATHLON_K8;
515 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
517 /* In case the average insn count for single function invocation is
518 lower than this constant, emit fast (but longer) prologue and
519 epilogue code. */
520 #define FAST_PROLOGUE_INSN_COUNT 20
522 /* Set by prologue expander and used by epilogue expander to determine
523 the style used. */
524 static int use_fast_prologue_epilogue;
526 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
527 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
528 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
529 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
531 /* Array of the smallest class containing reg number REGNO, indexed by
532 REGNO. Used by REGNO_REG_CLASS in i386.h. */
534 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
536 /* ax, dx, cx, bx */
537 AREG, DREG, CREG, BREG,
538 /* si, di, bp, sp */
539 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
540 /* FP registers */
541 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
542 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
543 /* arg pointer */
544 NON_Q_REGS,
545 /* flags, fpsr, dirflag, frame */
546 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
547 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
548 SSE_REGS, SSE_REGS,
549 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
550 MMX_REGS, MMX_REGS,
551 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
552 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
553 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
554 SSE_REGS, SSE_REGS,
557 /* The "default" register map used in 32bit mode. */
559 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
561 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
562 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
563 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
564 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
565 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
566 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
567 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
570 static int const x86_64_int_parameter_registers[6] =
572 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
573 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
576 static int const x86_64_int_return_registers[4] =
578 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
581 /* The "default" register map used in 64bit mode. */
582 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
584 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
585 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
586 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
587 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
588 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
589 8,9,10,11,12,13,14,15, /* extended integer registers */
590 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
593 /* Define the register numbers to be used in Dwarf debugging information.
594 The SVR4 reference port C compiler uses the following register numbers
595 in its Dwarf output code:
596 0 for %eax (gcc regno = 0)
597 1 for %ecx (gcc regno = 2)
598 2 for %edx (gcc regno = 1)
599 3 for %ebx (gcc regno = 3)
600 4 for %esp (gcc regno = 7)
601 5 for %ebp (gcc regno = 6)
602 6 for %esi (gcc regno = 4)
603 7 for %edi (gcc regno = 5)
604 The following three DWARF register numbers are never generated by
605 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
606 believes these numbers have these meanings.
607 8 for %eip (no gcc equivalent)
608 9 for %eflags (gcc regno = 17)
609 10 for %trapno (no gcc equivalent)
610 It is not at all clear how we should number the FP stack registers
611 for the x86 architecture. If the version of SDB on x86/svr4 were
612 a bit less brain dead with respect to floating-point then we would
613 have a precedent to follow with respect to DWARF register numbers
614 for x86 FP registers, but the SDB on x86/svr4 is so completely
615 broken with respect to FP registers that it is hardly worth thinking
616 of it as something to strive for compatibility with.
617 The version of x86/svr4 SDB I have at the moment does (partially)
618 seem to believe that DWARF register number 11 is associated with
619 the x86 register %st(0), but that's about all. Higher DWARF
620 register numbers don't seem to be associated with anything in
621 particular, and even for DWARF regno 11, SDB only seems to under-
622 stand that it should say that a variable lives in %st(0) (when
623 asked via an `=' command) if we said it was in DWARF regno 11,
624 but SDB still prints garbage when asked for the value of the
625 variable in question (via a `/' command).
626 (Also note that the labels SDB prints for various FP stack regs
627 when doing an `x' command are all wrong.)
628 Note that these problems generally don't affect the native SVR4
629 C compiler because it doesn't allow the use of -O with -g and
630 because when it is *not* optimizing, it allocates a memory
631 location for each floating-point variable, and the memory
632 location is what gets described in the DWARF AT_location
633 attribute for the variable in question.
634 Regardless of the severe mental illness of the x86/svr4 SDB, we
635 do something sensible here and we use the following DWARF
636 register numbers. Note that these are all stack-top-relative
637 numbers.
638 11 for %st(0) (gcc regno = 8)
639 12 for %st(1) (gcc regno = 9)
640 13 for %st(2) (gcc regno = 10)
641 14 for %st(3) (gcc regno = 11)
642 15 for %st(4) (gcc regno = 12)
643 16 for %st(5) (gcc regno = 13)
644 17 for %st(6) (gcc regno = 14)
645 18 for %st(7) (gcc regno = 15)
647 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
649 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
650 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
651 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
652 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
653 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
654 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
655 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
658 /* Test and compare insns in i386.md store the information needed to
659 generate branch and scc insns here. */
661 rtx ix86_compare_op0 = NULL_RTX;
662 rtx ix86_compare_op1 = NULL_RTX;
664 /* The encoding characters for the four TLS models present in ELF. */
666 static char const tls_model_chars[] = " GLil";
668 #define MAX_386_STACK_LOCALS 3
669 /* Size of the register save area. */
670 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
672 /* Define the structure for the machine field in struct function. */
673 struct machine_function GTY(())
675 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
676 const char *some_ld_name;
677 int save_varrargs_registers;
678 int accesses_prev_frame;
681 #define ix86_stack_locals (cfun->machine->stack_locals)
682 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
684 /* Structure describing stack frame layout.
685 Stack grows downward:
687 [arguments]
688 <- ARG_POINTER
689 saved pc
691 saved frame pointer if frame_pointer_needed
692 <- HARD_FRAME_POINTER
693 [saved regs]
695 [padding1] \
697 [va_arg registers] (
698 > to_allocate <- FRAME_POINTER
699 [frame] (
701 [padding2] /
703 struct ix86_frame
705 int nregs;
706 int padding1;
707 int va_arg_size;
708 HOST_WIDE_INT frame;
709 int padding2;
710 int outgoing_arguments_size;
711 int red_zone_size;
713 HOST_WIDE_INT to_allocate;
714 /* The offsets relative to ARG_POINTER. */
715 HOST_WIDE_INT frame_pointer_offset;
716 HOST_WIDE_INT hard_frame_pointer_offset;
717 HOST_WIDE_INT stack_pointer_offset;
720 /* Used to enable/disable debugging features. */
721 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
722 /* Code model option as passed by user. */
723 const char *ix86_cmodel_string;
724 /* Parsed value. */
725 enum cmodel ix86_cmodel;
726 /* Asm dialect. */
727 const char *ix86_asm_string;
728 enum asm_dialect ix86_asm_dialect = ASM_ATT;
729 /* TLS dialext. */
730 const char *ix86_tls_dialect_string;
731 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
733 /* Which unit we are generating floating point math for. */
734 enum fpmath_unit ix86_fpmath;
736 /* Which cpu are we scheduling for. */
737 enum processor_type ix86_cpu;
738 /* Which instruction set architecture to use. */
739 enum processor_type ix86_arch;
741 /* Strings to hold which cpu and instruction set architecture to use. */
742 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
743 const char *ix86_arch_string; /* for -march=<xxx> */
744 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
746 /* # of registers to use to pass arguments. */
747 const char *ix86_regparm_string;
749 /* true if sse prefetch instruction is not NOOP. */
750 int x86_prefetch_sse;
752 /* ix86_regparm_string as a number */
753 int ix86_regparm;
755 /* Alignment to use for loops and jumps: */
757 /* Power of two alignment for loops. */
758 const char *ix86_align_loops_string;
760 /* Power of two alignment for non-loop jumps. */
761 const char *ix86_align_jumps_string;
763 /* Power of two alignment for stack boundary in bytes. */
764 const char *ix86_preferred_stack_boundary_string;
766 /* Preferred alignment for stack boundary in bits. */
767 int ix86_preferred_stack_boundary;
769 /* Values 1-5: see jump.c */
770 int ix86_branch_cost;
771 const char *ix86_branch_cost_string;
773 /* Power of two alignment for functions. */
774 const char *ix86_align_funcs_string;
776 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
777 static char internal_label_prefix[16];
778 static int internal_label_prefix_len;
780 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
781 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
782 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
783 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
784 int, int, FILE *));
785 static const char *get_some_local_dynamic_name PARAMS ((void));
786 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
787 static rtx maybe_get_pool_constant PARAMS ((rtx));
788 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
789 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
790 rtx *, rtx *));
791 static rtx get_thread_pointer PARAMS ((void));
792 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
793 static rtx gen_push PARAMS ((rtx));
794 static int memory_address_length PARAMS ((rtx addr));
795 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
796 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
797 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
798 static void ix86_dump_ppro_packet PARAMS ((FILE *));
799 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
800 static struct machine_function * ix86_init_machine_status PARAMS ((void));
801 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
802 static int ix86_nsaved_regs PARAMS ((void));
803 static void ix86_emit_save_regs PARAMS ((void));
804 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
805 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
806 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
807 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
808 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
809 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
810 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
811 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
812 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
813 static int ix86_issue_rate PARAMS ((void));
814 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
815 static void ix86_sched_init PARAMS ((FILE *, int, int));
816 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
817 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
818 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
819 static int ia32_multipass_dfa_lookahead PARAMS ((void));
820 static void ix86_init_mmx_sse_builtins PARAMS ((void));
821 static rtx x86_this_parameter PARAMS ((tree));
822 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
823 HOST_WIDE_INT, tree));
824 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
825 HOST_WIDE_INT, tree));
826 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
828 struct ix86_address
830 rtx base, index, disp;
831 HOST_WIDE_INT scale;
834 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
835 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
837 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
838 static const char *ix86_strip_name_encoding PARAMS ((const char *))
839 ATTRIBUTE_UNUSED;
841 struct builtin_description;
842 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
843 tree, rtx));
844 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
845 tree, rtx));
846 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
847 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
848 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
849 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
850 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
851 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
852 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
853 enum rtx_code *,
854 enum rtx_code *,
855 enum rtx_code *));
856 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
857 rtx *, rtx *));
858 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
859 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
860 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
861 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
862 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
863 static int ix86_save_reg PARAMS ((unsigned int, int));
864 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
865 static int ix86_comp_type_attributes PARAMS ((tree, tree));
866 static int ix86_fntype_regparm PARAMS ((tree));
867 const struct attribute_spec ix86_attribute_table[];
868 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
869 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
870 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
871 static int ix86_value_regno PARAMS ((enum machine_mode));
872 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
873 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
874 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
876 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
877 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
878 #endif
880 /* Register class used for passing given 64bit part of the argument.
881 These represent classes as documented by the PS ABI, with the exception
882 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
883 use SF or DFmode move instead of DImode to avoid reformatting penalties.
885 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
886 whenever possible (upper half does contain padding).
888 enum x86_64_reg_class
890 X86_64_NO_CLASS,
891 X86_64_INTEGER_CLASS,
892 X86_64_INTEGERSI_CLASS,
893 X86_64_SSE_CLASS,
894 X86_64_SSESF_CLASS,
895 X86_64_SSEDF_CLASS,
896 X86_64_SSEUP_CLASS,
897 X86_64_X87_CLASS,
898 X86_64_X87UP_CLASS,
899 X86_64_MEMORY_CLASS
901 static const char * const x86_64_reg_class_name[] =
902 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
904 #define MAX_CLASSES 4
905 static int classify_argument PARAMS ((enum machine_mode, tree,
906 enum x86_64_reg_class [MAX_CLASSES],
907 int));
908 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
909 int *));
910 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
911 const int *, int));
912 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
913 enum x86_64_reg_class));
915 /* Initialize the GCC target structure. */
916 #undef TARGET_ATTRIBUTE_TABLE
917 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
918 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
919 # undef TARGET_MERGE_DECL_ATTRIBUTES
920 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
921 #endif
923 #undef TARGET_COMP_TYPE_ATTRIBUTES
924 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
926 #undef TARGET_INIT_BUILTINS
927 #define TARGET_INIT_BUILTINS ix86_init_builtins
929 #undef TARGET_EXPAND_BUILTIN
930 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
932 #undef TARGET_ASM_FUNCTION_EPILOGUE
933 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
935 #undef TARGET_ASM_OPEN_PAREN
936 #define TARGET_ASM_OPEN_PAREN ""
937 #undef TARGET_ASM_CLOSE_PAREN
938 #define TARGET_ASM_CLOSE_PAREN ""
940 #undef TARGET_ASM_ALIGNED_HI_OP
941 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
942 #undef TARGET_ASM_ALIGNED_SI_OP
943 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
944 #ifdef ASM_QUAD
945 #undef TARGET_ASM_ALIGNED_DI_OP
946 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
947 #endif
949 #undef TARGET_ASM_UNALIGNED_HI_OP
950 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
951 #undef TARGET_ASM_UNALIGNED_SI_OP
952 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
953 #undef TARGET_ASM_UNALIGNED_DI_OP
954 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
956 #undef TARGET_SCHED_ADJUST_COST
957 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
958 #undef TARGET_SCHED_ISSUE_RATE
959 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
960 #undef TARGET_SCHED_VARIABLE_ISSUE
961 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
962 #undef TARGET_SCHED_INIT
963 #define TARGET_SCHED_INIT ix86_sched_init
964 #undef TARGET_SCHED_REORDER
965 #define TARGET_SCHED_REORDER ix86_sched_reorder
966 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
967 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
968 ia32_use_dfa_pipeline_interface
969 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
970 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
971 ia32_multipass_dfa_lookahead
973 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
974 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
976 #ifdef HAVE_AS_TLS
977 #undef TARGET_HAVE_TLS
978 #define TARGET_HAVE_TLS true
979 #endif
980 #undef TARGET_CANNOT_FORCE_CONST_MEM
981 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
983 #undef TARGET_MS_BITFIELD_LAYOUT_P
984 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
986 #undef TARGET_ASM_OUTPUT_MI_THUNK
987 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
988 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
989 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
991 struct gcc_target targetm = TARGET_INITIALIZER;
993 /* Sometimes certain combinations of command options do not make
994 sense on a particular target machine. You can define a macro
995 `OVERRIDE_OPTIONS' to take account of this. This macro, if
996 defined, is executed once just after all the command options have
997 been parsed.
999 Don't use this macro to turn on various extra optimizations for
1000 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1002 void
1003 override_options ()
1005 int i;
1006 /* Comes from final.c -- no real reason to change it. */
1007 #define MAX_CODE_ALIGN 16
1009 static struct ptt
1011 const struct processor_costs *cost; /* Processor costs */
1012 const int target_enable; /* Target flags to enable. */
1013 const int target_disable; /* Target flags to disable. */
1014 const int align_loop; /* Default alignments. */
1015 const int align_loop_max_skip;
1016 const int align_jump;
1017 const int align_jump_max_skip;
1018 const int align_func;
1020 const processor_target_table[PROCESSOR_max] =
1022 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1023 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1024 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1025 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1026 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1027 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1028 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1029 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1032 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1033 static struct pta
1035 const char *const name; /* processor name or nickname. */
1036 const enum processor_type processor;
1037 const enum pta_flags
1039 PTA_SSE = 1,
1040 PTA_SSE2 = 2,
1041 PTA_MMX = 4,
1042 PTA_PREFETCH_SSE = 8,
1043 PTA_3DNOW = 16,
1044 PTA_3DNOW_A = 64,
1045 PTA_64BIT = 128
1046 } flags;
1048 const processor_alias_table[] =
1050 {"i386", PROCESSOR_I386, 0},
1051 {"i486", PROCESSOR_I486, 0},
1052 {"i586", PROCESSOR_PENTIUM, 0},
1053 {"pentium", PROCESSOR_PENTIUM, 0},
1054 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1055 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1056 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1057 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1058 {"i686", PROCESSOR_PENTIUMPRO, 0},
1059 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1060 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1061 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1062 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1063 PTA_MMX | PTA_PREFETCH_SSE},
1064 {"k6", PROCESSOR_K6, PTA_MMX},
1065 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1066 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1067 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1068 | PTA_3DNOW_A},
1069 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1070 | PTA_3DNOW | PTA_3DNOW_A},
1071 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1072 | PTA_3DNOW_A | PTA_SSE},
1073 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1074 | PTA_3DNOW_A | PTA_SSE},
1075 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1076 | PTA_3DNOW_A | PTA_SSE},
1077 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1078 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1081 int const pta_size = ARRAY_SIZE (processor_alias_table);
1083 /* By default our XFmode is the 80-bit extended format. If we have
1084 use TFmode instead, it's also the 80-bit format, but with padding. */
1085 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1086 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1088 /* Set the default values for switches whose default depends on TARGET_64BIT
1089 in case they weren't overwritten by command line options. */
1090 if (TARGET_64BIT)
1092 if (flag_omit_frame_pointer == 2)
1093 flag_omit_frame_pointer = 1;
1094 if (flag_asynchronous_unwind_tables == 2)
1095 flag_asynchronous_unwind_tables = 1;
1096 if (flag_pcc_struct_return == 2)
1097 flag_pcc_struct_return = 0;
1099 else
1101 if (flag_omit_frame_pointer == 2)
1102 flag_omit_frame_pointer = 0;
1103 if (flag_asynchronous_unwind_tables == 2)
1104 flag_asynchronous_unwind_tables = 0;
1105 if (flag_pcc_struct_return == 2)
1106 flag_pcc_struct_return = 1;
1109 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1110 SUBTARGET_OVERRIDE_OPTIONS;
1111 #endif
1113 if (!ix86_cpu_string && ix86_arch_string)
1114 ix86_cpu_string = ix86_arch_string;
1115 if (!ix86_cpu_string)
1116 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1117 if (!ix86_arch_string)
1118 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1120 if (ix86_cmodel_string != 0)
1122 if (!strcmp (ix86_cmodel_string, "small"))
1123 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1124 else if (flag_pic)
1125 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1126 else if (!strcmp (ix86_cmodel_string, "32"))
1127 ix86_cmodel = CM_32;
1128 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1129 ix86_cmodel = CM_KERNEL;
1130 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1131 ix86_cmodel = CM_MEDIUM;
1132 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1133 ix86_cmodel = CM_LARGE;
1134 else
1135 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1137 else
1139 ix86_cmodel = CM_32;
1140 if (TARGET_64BIT)
1141 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1143 if (ix86_asm_string != 0)
1145 if (!strcmp (ix86_asm_string, "intel"))
1146 ix86_asm_dialect = ASM_INTEL;
1147 else if (!strcmp (ix86_asm_string, "att"))
1148 ix86_asm_dialect = ASM_ATT;
1149 else
1150 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1152 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1153 error ("code model `%s' not supported in the %s bit mode",
1154 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1155 if (ix86_cmodel == CM_LARGE)
1156 sorry ("code model `large' not supported yet");
1157 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1158 sorry ("%i-bit mode not compiled in",
1159 (target_flags & MASK_64BIT) ? 64 : 32);
1161 for (i = 0; i < pta_size; i++)
1162 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1164 ix86_arch = processor_alias_table[i].processor;
1165 /* Default cpu tuning to the architecture. */
1166 ix86_cpu = ix86_arch;
1167 if (processor_alias_table[i].flags & PTA_MMX
1168 && !(target_flags_explicit & MASK_MMX))
1169 target_flags |= MASK_MMX;
1170 if (processor_alias_table[i].flags & PTA_3DNOW
1171 && !(target_flags_explicit & MASK_3DNOW))
1172 target_flags |= MASK_3DNOW;
1173 if (processor_alias_table[i].flags & PTA_3DNOW_A
1174 && !(target_flags_explicit & MASK_3DNOW_A))
1175 target_flags |= MASK_3DNOW_A;
1176 if (processor_alias_table[i].flags & PTA_SSE
1177 && !(target_flags_explicit & MASK_SSE))
1178 target_flags |= MASK_SSE;
1179 if (processor_alias_table[i].flags & PTA_SSE2
1180 && !(target_flags_explicit & MASK_SSE2))
1181 target_flags |= MASK_SSE2;
1182 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1183 x86_prefetch_sse = true;
1184 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1185 error ("CPU you selected does not support x86-64 instruction set");
1186 break;
1189 if (i == pta_size)
1190 error ("bad value (%s) for -march= switch", ix86_arch_string);
1192 for (i = 0; i < pta_size; i++)
1193 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1195 ix86_cpu = processor_alias_table[i].processor;
1196 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1197 error ("CPU you selected does not support x86-64 instruction set");
1198 break;
1200 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1201 x86_prefetch_sse = true;
1202 if (i == pta_size)
1203 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1205 if (optimize_size)
1206 ix86_cost = &size_cost;
1207 else
1208 ix86_cost = processor_target_table[ix86_cpu].cost;
1209 target_flags |= processor_target_table[ix86_cpu].target_enable;
1210 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1212 /* Arrange to set up i386_stack_locals for all functions. */
1213 init_machine_status = ix86_init_machine_status;
1215 /* Validate -mregparm= value. */
1216 if (ix86_regparm_string)
1218 i = atoi (ix86_regparm_string);
1219 if (i < 0 || i > REGPARM_MAX)
1220 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1221 else
1222 ix86_regparm = i;
1224 else
1225 if (TARGET_64BIT)
1226 ix86_regparm = REGPARM_MAX;
1228 /* If the user has provided any of the -malign-* options,
1229 warn and use that value only if -falign-* is not set.
1230 Remove this code in GCC 3.2 or later. */
1231 if (ix86_align_loops_string)
1233 warning ("-malign-loops is obsolete, use -falign-loops");
1234 if (align_loops == 0)
1236 i = atoi (ix86_align_loops_string);
1237 if (i < 0 || i > MAX_CODE_ALIGN)
1238 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1239 else
1240 align_loops = 1 << i;
1244 if (ix86_align_jumps_string)
1246 warning ("-malign-jumps is obsolete, use -falign-jumps");
1247 if (align_jumps == 0)
1249 i = atoi (ix86_align_jumps_string);
1250 if (i < 0 || i > MAX_CODE_ALIGN)
1251 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1252 else
1253 align_jumps = 1 << i;
1257 if (ix86_align_funcs_string)
1259 warning ("-malign-functions is obsolete, use -falign-functions");
1260 if (align_functions == 0)
1262 i = atoi (ix86_align_funcs_string);
1263 if (i < 0 || i > MAX_CODE_ALIGN)
1264 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1265 else
1266 align_functions = 1 << i;
1270 /* Default align_* from the processor table. */
1271 if (align_loops == 0)
1273 align_loops = processor_target_table[ix86_cpu].align_loop;
1274 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1276 if (align_jumps == 0)
1278 align_jumps = processor_target_table[ix86_cpu].align_jump;
1279 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1281 if (align_functions == 0)
1283 align_functions = processor_target_table[ix86_cpu].align_func;
1286 /* Validate -mpreferred-stack-boundary= value, or provide default.
1287 The default of 128 bits is for Pentium III's SSE __m128, but we
1288 don't want additional code to keep the stack aligned when
1289 optimizing for code size. */
1290 ix86_preferred_stack_boundary = (optimize_size
1291 ? TARGET_64BIT ? 128 : 32
1292 : 128);
1293 if (ix86_preferred_stack_boundary_string)
1295 i = atoi (ix86_preferred_stack_boundary_string);
1296 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1297 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1298 TARGET_64BIT ? 4 : 2);
1299 else
1300 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1303 /* Validate -mbranch-cost= value, or provide default. */
1304 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1305 if (ix86_branch_cost_string)
1307 i = atoi (ix86_branch_cost_string);
1308 if (i < 0 || i > 5)
1309 error ("-mbranch-cost=%d is not between 0 and 5", i);
1310 else
1311 ix86_branch_cost = i;
1314 if (ix86_tls_dialect_string)
1316 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1317 ix86_tls_dialect = TLS_DIALECT_GNU;
1318 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1319 ix86_tls_dialect = TLS_DIALECT_SUN;
1320 else
1321 error ("bad value (%s) for -mtls-dialect= switch",
1322 ix86_tls_dialect_string);
1325 /* Keep nonleaf frame pointers. */
1326 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1327 flag_omit_frame_pointer = 1;
1329 /* If we're doing fast math, we don't care about comparison order
1330 wrt NaNs. This lets us use a shorter comparison sequence. */
1331 if (flag_unsafe_math_optimizations)
1332 target_flags &= ~MASK_IEEE_FP;
1334 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1335 since the insns won't need emulation. */
1336 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1337 target_flags &= ~MASK_NO_FANCY_MATH_387;
1339 if (TARGET_64BIT)
1341 if (TARGET_ALIGN_DOUBLE)
1342 error ("-malign-double makes no sense in the 64bit mode");
1343 if (TARGET_RTD)
1344 error ("-mrtd calling convention not supported in the 64bit mode");
1345 /* Enable by default the SSE and MMX builtins. */
1346 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1347 ix86_fpmath = FPMATH_SSE;
1349 else
1350 ix86_fpmath = FPMATH_387;
1352 if (ix86_fpmath_string != 0)
1354 if (! strcmp (ix86_fpmath_string, "387"))
1355 ix86_fpmath = FPMATH_387;
1356 else if (! strcmp (ix86_fpmath_string, "sse"))
1358 if (!TARGET_SSE)
1360 warning ("SSE instruction set disabled, using 387 arithmetics");
1361 ix86_fpmath = FPMATH_387;
1363 else
1364 ix86_fpmath = FPMATH_SSE;
1366 else if (! strcmp (ix86_fpmath_string, "387,sse")
1367 || ! strcmp (ix86_fpmath_string, "sse,387"))
1369 if (!TARGET_SSE)
1371 warning ("SSE instruction set disabled, using 387 arithmetics");
1372 ix86_fpmath = FPMATH_387;
1374 else if (!TARGET_80387)
1376 warning ("387 instruction set disabled, using SSE arithmetics");
1377 ix86_fpmath = FPMATH_SSE;
1379 else
1380 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1382 else
1383 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1386 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1387 on by -msse. */
1388 if (TARGET_SSE)
1390 target_flags |= MASK_MMX;
1391 x86_prefetch_sse = true;
1394 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1395 if (TARGET_3DNOW)
1397 target_flags |= MASK_MMX;
1398 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1399 extensions it adds. */
1400 if (x86_3dnow_a & (1 << ix86_arch))
1401 target_flags |= MASK_3DNOW_A;
1403 if ((x86_accumulate_outgoing_args & CPUMASK)
1404 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1405 && !optimize_size)
1406 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1408 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1410 char *p;
1411 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1412 p = strchr (internal_label_prefix, 'X');
1413 internal_label_prefix_len = p - internal_label_prefix;
1414 *p = '\0';
1418 void
1419 optimization_options (level, size)
1420 int level;
1421 int size ATTRIBUTE_UNUSED;
1423 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1424 make the problem with not enough registers even worse. */
1425 #ifdef INSN_SCHEDULING
1426 if (level > 1)
1427 flag_schedule_insns = 0;
1428 #endif
1430 /* The default values of these switches depend on the TARGET_64BIT
1431 that is not known at this moment. Mark these values with 2 and
1432 let user the to override these. In case there is no command line option
1433 specifying them, we will set the defaults in override_options. */
1434 if (optimize >= 1)
1435 flag_omit_frame_pointer = 2;
1436 flag_pcc_struct_return = 2;
1437 flag_asynchronous_unwind_tables = 2;
1440 /* Table of valid machine attributes. */
1441 const struct attribute_spec ix86_attribute_table[] =
1443 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1444 /* Stdcall attribute says callee is responsible for popping arguments
1445 if they are not variable. */
1446 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1447 /* Fastcall attribute says callee is responsible for popping arguments
1448 if they are not variable. */
1449 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1450 /* Cdecl attribute says the callee is a normal C declaration */
1451 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1452 /* Regparm attribute specifies how many integer arguments are to be
1453 passed in registers. */
1454 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1455 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1456 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1457 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1458 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1459 #endif
1460 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1461 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1462 { NULL, 0, 0, false, false, false, NULL }
1465 /* If PIC, we cannot make sibling calls to global functions
1466 because the PLT requires %ebx live.
1467 If we are returning floats on the register stack, we cannot make
1468 sibling calls to functions that return floats. (The stack adjust
1469 instruction will wind up after the sibcall jump, and not be executed.) */
1471 static bool
1472 ix86_function_ok_for_sibcall (decl, exp)
1473 tree decl;
1474 tree exp;
1476 /* If we are generating position-independent code, we cannot sibcall
1477 optimize any indirect call, or a direct call to a global function,
1478 as the PLT requires %ebx be live. */
1479 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1480 return false;
1482 /* If we are returning floats on the 80387 register stack, we cannot
1483 make a sibcall from a function that doesn't return a float to a
1484 function that does; the necessary stack adjustment will not be
1485 executed. */
1486 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1487 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1488 return false;
1490 /* If this call is indirect, we'll need to be able to use a call-clobbered
1491 register for the address of the target function. Make sure that all
1492 such registers are not used for passing parameters. */
1493 if (!decl && !TARGET_64BIT)
1495 int regparm = ix86_regparm;
1496 tree attr, type;
1498 /* We're looking at the CALL_EXPR, we need the type of the function. */
1499 type = TREE_OPERAND (exp, 0); /* pointer expression */
1500 type = TREE_TYPE (type); /* pointer type */
1501 type = TREE_TYPE (type); /* function type */
1503 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1504 if (attr)
1505 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1507 if (regparm >= 3)
1509 /* ??? Need to count the actual number of registers to be used,
1510 not the possible number of registers. Fix later. */
1511 return false;
1515 /* Otherwise okay. That also includes certain types of indirect calls. */
1516 return true;
1519 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1520 arguments as in struct attribute_spec.handler. */
1521 static tree
1522 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1523 tree *node;
1524 tree name;
1525 tree args ATTRIBUTE_UNUSED;
1526 int flags ATTRIBUTE_UNUSED;
1527 bool *no_add_attrs;
1529 if (TREE_CODE (*node) != FUNCTION_TYPE
1530 && TREE_CODE (*node) != METHOD_TYPE
1531 && TREE_CODE (*node) != FIELD_DECL
1532 && TREE_CODE (*node) != TYPE_DECL)
1534 warning ("`%s' attribute only applies to functions",
1535 IDENTIFIER_POINTER (name));
1536 *no_add_attrs = true;
1538 else
1540 if (is_attribute_p ("fastcall", name))
1542 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1544 error ("fastcall and stdcall attributes are not compatible");
1546 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1548 error ("fastcall and regparm attributes are not compatible");
1551 else if (is_attribute_p ("stdcall", name))
1553 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1555 error ("fastcall and stdcall attributes are not compatible");
1560 if (TARGET_64BIT)
1562 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1563 *no_add_attrs = true;
1566 return NULL_TREE;
1569 /* Handle a "regparm" attribute;
1570 arguments as in struct attribute_spec.handler. */
1571 static tree
1572 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1573 tree *node;
1574 tree name;
1575 tree args;
1576 int flags ATTRIBUTE_UNUSED;
1577 bool *no_add_attrs;
1579 if (TREE_CODE (*node) != FUNCTION_TYPE
1580 && TREE_CODE (*node) != METHOD_TYPE
1581 && TREE_CODE (*node) != FIELD_DECL
1582 && TREE_CODE (*node) != TYPE_DECL)
1584 warning ("`%s' attribute only applies to functions",
1585 IDENTIFIER_POINTER (name));
1586 *no_add_attrs = true;
1588 else
1590 tree cst;
1592 cst = TREE_VALUE (args);
1593 if (TREE_CODE (cst) != INTEGER_CST)
1595 warning ("`%s' attribute requires an integer constant argument",
1596 IDENTIFIER_POINTER (name));
1597 *no_add_attrs = true;
1599 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1601 warning ("argument to `%s' attribute larger than %d",
1602 IDENTIFIER_POINTER (name), REGPARM_MAX);
1603 *no_add_attrs = true;
1606 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1608 error ("fastcall and regparm attributes are not compatible");
1612 return NULL_TREE;
1615 /* Return 0 if the attributes for two types are incompatible, 1 if they
1616 are compatible, and 2 if they are nearly compatible (which causes a
1617 warning to be generated). */
1619 static int
1620 ix86_comp_type_attributes (type1, type2)
1621 tree type1;
1622 tree type2;
1624 /* Check for mismatch of non-default calling convention. */
1625 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1627 if (TREE_CODE (type1) != FUNCTION_TYPE)
1628 return 1;
1630 /* Check for mismatched fastcall types */
1631 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1632 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1633 return 0;
1635 /* Check for mismatched return types (cdecl vs stdcall). */
1636 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1637 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1638 return 0;
1639 return 1;
1642 /* Return the regparm value for a fuctio with the indicated TYPE. */
1644 static int
1645 ix86_fntype_regparm (type)
1646 tree type;
1648 tree attr;
1650 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1651 if (attr)
1652 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1653 else
1654 return ix86_regparm;
1657 /* Value is the number of bytes of arguments automatically
1658 popped when returning from a subroutine call.
1659 FUNDECL is the declaration node of the function (as a tree),
1660 FUNTYPE is the data type of the function (as a tree),
1661 or for a library call it is an identifier node for the subroutine name.
1662 SIZE is the number of bytes of arguments passed on the stack.
1664 On the 80386, the RTD insn may be used to pop them if the number
1665 of args is fixed, but if the number is variable then the caller
1666 must pop them all. RTD can't be used for library calls now
1667 because the library is compiled with the Unix compiler.
1668 Use of RTD is a selectable option, since it is incompatible with
1669 standard Unix calling sequences. If the option is not selected,
1670 the caller must always pop the args.
1672 The attribute stdcall is equivalent to RTD on a per module basis. */
1675 ix86_return_pops_args (fundecl, funtype, size)
1676 tree fundecl;
1677 tree funtype;
1678 int size;
1680 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1682 /* Cdecl functions override -mrtd, and never pop the stack. */
1683 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1685 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1686 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1687 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1688 rtd = 1;
1690 if (rtd
1691 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1692 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1693 == void_type_node)))
1694 return size;
1697 /* Lose any fake structure return argument if it is passed on the stack. */
1698 if (aggregate_value_p (TREE_TYPE (funtype))
1699 && !TARGET_64BIT)
1701 int nregs = ix86_fntype_regparm (funtype);
1703 if (!nregs)
1704 return GET_MODE_SIZE (Pmode);
1707 return 0;
1710 /* Argument support functions. */
1712 /* Return true when register may be used to pass function parameters. */
1713 bool
1714 ix86_function_arg_regno_p (regno)
1715 int regno;
1717 int i;
1718 if (!TARGET_64BIT)
1719 return (regno < REGPARM_MAX
1720 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1721 if (SSE_REGNO_P (regno) && TARGET_SSE)
1722 return true;
1723 /* RAX is used as hidden argument to va_arg functions. */
1724 if (!regno)
1725 return true;
1726 for (i = 0; i < REGPARM_MAX; i++)
1727 if (regno == x86_64_int_parameter_registers[i])
1728 return true;
1729 return false;
1732 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1733 for a call to a function whose data type is FNTYPE.
1734 For a library call, FNTYPE is 0. */
1736 void
1737 init_cumulative_args (cum, fntype, libname)
1738 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1739 tree fntype; /* tree ptr for function decl */
1740 rtx libname; /* SYMBOL_REF of library name or 0 */
1742 static CUMULATIVE_ARGS zero_cum;
1743 tree param, next_param;
1745 if (TARGET_DEBUG_ARG)
1747 fprintf (stderr, "\ninit_cumulative_args (");
1748 if (fntype)
1749 fprintf (stderr, "fntype code = %s, ret code = %s",
1750 tree_code_name[(int) TREE_CODE (fntype)],
1751 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1752 else
1753 fprintf (stderr, "no fntype");
1755 if (libname)
1756 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1759 *cum = zero_cum;
1761 /* Set up the number of registers to use for passing arguments. */
1762 cum->nregs = ix86_regparm;
1763 cum->sse_nregs = SSE_REGPARM_MAX;
1764 if (fntype && !TARGET_64BIT)
1766 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1768 if (attr)
1769 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1771 cum->maybe_vaarg = false;
1773 /* Use ecx and edx registers if function has fastcall attribute */
1774 if (fntype && !TARGET_64BIT)
1776 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1778 cum->nregs = 2;
1779 cum->fastcall = 1;
1784 /* Determine if this function has variable arguments. This is
1785 indicated by the last argument being 'void_type_mode' if there
1786 are no variable arguments. If there are variable arguments, then
1787 we won't pass anything in registers */
1789 if (cum->nregs)
1791 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1792 param != 0; param = next_param)
1794 next_param = TREE_CHAIN (param);
1795 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1797 if (!TARGET_64BIT)
1799 cum->nregs = 0;
1800 cum->fastcall = 0;
1802 cum->maybe_vaarg = true;
1806 if ((!fntype && !libname)
1807 || (fntype && !TYPE_ARG_TYPES (fntype)))
1808 cum->maybe_vaarg = 1;
1810 if (TARGET_DEBUG_ARG)
1811 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1813 return;
1816 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1817 of this code is to classify each 8bytes of incoming argument by the register
1818 class and assign registers accordingly. */
1820 /* Return the union class of CLASS1 and CLASS2.
1821 See the x86-64 PS ABI for details. */
1823 static enum x86_64_reg_class
1824 merge_classes (class1, class2)
1825 enum x86_64_reg_class class1, class2;
1827 /* Rule #1: If both classes are equal, this is the resulting class. */
1828 if (class1 == class2)
1829 return class1;
1831 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1832 the other class. */
1833 if (class1 == X86_64_NO_CLASS)
1834 return class2;
1835 if (class2 == X86_64_NO_CLASS)
1836 return class1;
1838 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1839 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1840 return X86_64_MEMORY_CLASS;
1842 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1843 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1844 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1845 return X86_64_INTEGERSI_CLASS;
1846 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1847 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1848 return X86_64_INTEGER_CLASS;
1850 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1851 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1852 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1853 return X86_64_MEMORY_CLASS;
1855 /* Rule #6: Otherwise class SSE is used. */
1856 return X86_64_SSE_CLASS;
1859 /* Classify the argument of type TYPE and mode MODE.
1860 CLASSES will be filled by the register class used to pass each word
1861 of the operand. The number of words is returned. In case the parameter
1862 should be passed in memory, 0 is returned. As a special case for zero
1863 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1865 BIT_OFFSET is used internally for handling records and specifies offset
1866 of the offset in bits modulo 256 to avoid overflow cases.
1868 See the x86-64 PS ABI for details.
1871 static int
1872 classify_argument (mode, type, classes, bit_offset)
1873 enum machine_mode mode;
1874 tree type;
1875 enum x86_64_reg_class classes[MAX_CLASSES];
1876 int bit_offset;
1878 int bytes =
1879 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1880 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1882 /* Variable sized entities are always passed/returned in memory. */
1883 if (bytes < 0)
1884 return 0;
1886 if (type && AGGREGATE_TYPE_P (type))
1888 int i;
1889 tree field;
1890 enum x86_64_reg_class subclasses[MAX_CLASSES];
1892 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1893 if (bytes > 16)
1894 return 0;
1896 for (i = 0; i < words; i++)
1897 classes[i] = X86_64_NO_CLASS;
1899 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1900 signalize memory class, so handle it as special case. */
1901 if (!words)
1903 classes[0] = X86_64_NO_CLASS;
1904 return 1;
1907 /* Classify each field of record and merge classes. */
1908 if (TREE_CODE (type) == RECORD_TYPE)
1910 /* For classes first merge in the field of the subclasses. */
1911 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1913 tree bases = TYPE_BINFO_BASETYPES (type);
1914 int n_bases = TREE_VEC_LENGTH (bases);
1915 int i;
1917 for (i = 0; i < n_bases; ++i)
1919 tree binfo = TREE_VEC_ELT (bases, i);
1920 int num;
1921 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1922 tree type = BINFO_TYPE (binfo);
1924 num = classify_argument (TYPE_MODE (type),
1925 type, subclasses,
1926 (offset + bit_offset) % 256);
1927 if (!num)
1928 return 0;
1929 for (i = 0; i < num; i++)
1931 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1932 classes[i + pos] =
1933 merge_classes (subclasses[i], classes[i + pos]);
1937 /* And now merge the fields of structure. */
1938 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1940 if (TREE_CODE (field) == FIELD_DECL)
1942 int num;
1944 /* Bitfields are always classified as integer. Handle them
1945 early, since later code would consider them to be
1946 misaligned integers. */
1947 if (DECL_BIT_FIELD (field))
1949 for (i = int_bit_position (field) / 8 / 8;
1950 i < (int_bit_position (field)
1951 + tree_low_cst (DECL_SIZE (field), 0)
1952 + 63) / 8 / 8; i++)
1953 classes[i] =
1954 merge_classes (X86_64_INTEGER_CLASS,
1955 classes[i]);
1957 else
1959 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1960 TREE_TYPE (field), subclasses,
1961 (int_bit_position (field)
1962 + bit_offset) % 256);
1963 if (!num)
1964 return 0;
1965 for (i = 0; i < num; i++)
1967 int pos =
1968 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1969 classes[i + pos] =
1970 merge_classes (subclasses[i], classes[i + pos]);
1976 /* Arrays are handled as small records. */
1977 else if (TREE_CODE (type) == ARRAY_TYPE)
1979 int num;
1980 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1981 TREE_TYPE (type), subclasses, bit_offset);
1982 if (!num)
1983 return 0;
1985 /* The partial classes are now full classes. */
1986 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1987 subclasses[0] = X86_64_SSE_CLASS;
1988 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1989 subclasses[0] = X86_64_INTEGER_CLASS;
1991 for (i = 0; i < words; i++)
1992 classes[i] = subclasses[i % num];
1994 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1995 else if (TREE_CODE (type) == UNION_TYPE
1996 || TREE_CODE (type) == QUAL_UNION_TYPE)
1998 /* For classes first merge in the field of the subclasses. */
1999 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2001 tree bases = TYPE_BINFO_BASETYPES (type);
2002 int n_bases = TREE_VEC_LENGTH (bases);
2003 int i;
2005 for (i = 0; i < n_bases; ++i)
2007 tree binfo = TREE_VEC_ELT (bases, i);
2008 int num;
2009 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2010 tree type = BINFO_TYPE (binfo);
2012 num = classify_argument (TYPE_MODE (type),
2013 type, subclasses,
2014 (offset + (bit_offset % 64)) % 256);
2015 if (!num)
2016 return 0;
2017 for (i = 0; i < num; i++)
2019 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2020 classes[i + pos] =
2021 merge_classes (subclasses[i], classes[i + pos]);
2025 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2027 if (TREE_CODE (field) == FIELD_DECL)
2029 int num;
2030 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2031 TREE_TYPE (field), subclasses,
2032 bit_offset);
2033 if (!num)
2034 return 0;
2035 for (i = 0; i < num; i++)
2036 classes[i] = merge_classes (subclasses[i], classes[i]);
2040 else
2041 abort ();
2043 /* Final merger cleanup. */
2044 for (i = 0; i < words; i++)
2046 /* If one class is MEMORY, everything should be passed in
2047 memory. */
2048 if (classes[i] == X86_64_MEMORY_CLASS)
2049 return 0;
2051 /* The X86_64_SSEUP_CLASS should be always preceded by
2052 X86_64_SSE_CLASS. */
2053 if (classes[i] == X86_64_SSEUP_CLASS
2054 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2055 classes[i] = X86_64_SSE_CLASS;
2057 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2058 if (classes[i] == X86_64_X87UP_CLASS
2059 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2060 classes[i] = X86_64_SSE_CLASS;
2062 return words;
2065 /* Compute alignment needed. We align all types to natural boundaries with
2066 exception of XFmode that is aligned to 64bits. */
2067 if (mode != VOIDmode && mode != BLKmode)
2069 int mode_alignment = GET_MODE_BITSIZE (mode);
2071 if (mode == XFmode)
2072 mode_alignment = 128;
2073 else if (mode == XCmode)
2074 mode_alignment = 256;
2075 /* Misaligned fields are always returned in memory. */
2076 if (bit_offset % mode_alignment)
2077 return 0;
2080 /* Classification of atomic types. */
2081 switch (mode)
2083 case DImode:
2084 case SImode:
2085 case HImode:
2086 case QImode:
2087 case CSImode:
2088 case CHImode:
2089 case CQImode:
2090 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2091 classes[0] = X86_64_INTEGERSI_CLASS;
2092 else
2093 classes[0] = X86_64_INTEGER_CLASS;
2094 return 1;
2095 case CDImode:
2096 case TImode:
2097 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2098 return 2;
2099 case CTImode:
2100 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2101 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2102 return 4;
2103 case SFmode:
2104 if (!(bit_offset % 64))
2105 classes[0] = X86_64_SSESF_CLASS;
2106 else
2107 classes[0] = X86_64_SSE_CLASS;
2108 return 1;
2109 case DFmode:
2110 classes[0] = X86_64_SSEDF_CLASS;
2111 return 1;
2112 case TFmode:
2113 classes[0] = X86_64_X87_CLASS;
2114 classes[1] = X86_64_X87UP_CLASS;
2115 return 2;
2116 case TCmode:
2117 classes[0] = X86_64_X87_CLASS;
2118 classes[1] = X86_64_X87UP_CLASS;
2119 classes[2] = X86_64_X87_CLASS;
2120 classes[3] = X86_64_X87UP_CLASS;
2121 return 4;
2122 case DCmode:
2123 classes[0] = X86_64_SSEDF_CLASS;
2124 classes[1] = X86_64_SSEDF_CLASS;
2125 return 2;
2126 case SCmode:
2127 classes[0] = X86_64_SSE_CLASS;
2128 return 1;
2129 case V4SFmode:
2130 case V4SImode:
2131 case V16QImode:
2132 case V8HImode:
2133 case V2DFmode:
2134 case V2DImode:
2135 classes[0] = X86_64_SSE_CLASS;
2136 classes[1] = X86_64_SSEUP_CLASS;
2137 return 2;
2138 case V2SFmode:
2139 case V2SImode:
2140 case V4HImode:
2141 case V8QImode:
2142 return 0;
2143 case BLKmode:
2144 case VOIDmode:
2145 return 0;
2146 default:
2147 abort ();
2151 /* Examine the argument and return set number of register required in each
2152 class. Return 0 iff parameter should be passed in memory. */
2153 static int
2154 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2155 enum machine_mode mode;
2156 tree type;
2157 int *int_nregs, *sse_nregs;
2158 int in_return;
2160 enum x86_64_reg_class class[MAX_CLASSES];
2161 int n = classify_argument (mode, type, class, 0);
2163 *int_nregs = 0;
2164 *sse_nregs = 0;
2165 if (!n)
2166 return 0;
2167 for (n--; n >= 0; n--)
2168 switch (class[n])
2170 case X86_64_INTEGER_CLASS:
2171 case X86_64_INTEGERSI_CLASS:
2172 (*int_nregs)++;
2173 break;
2174 case X86_64_SSE_CLASS:
2175 case X86_64_SSESF_CLASS:
2176 case X86_64_SSEDF_CLASS:
2177 (*sse_nregs)++;
2178 break;
2179 case X86_64_NO_CLASS:
2180 case X86_64_SSEUP_CLASS:
2181 break;
2182 case X86_64_X87_CLASS:
2183 case X86_64_X87UP_CLASS:
2184 if (!in_return)
2185 return 0;
2186 break;
2187 case X86_64_MEMORY_CLASS:
2188 abort ();
2190 return 1;
2192 /* Construct container for the argument used by GCC interface. See
2193 FUNCTION_ARG for the detailed description. */
2194 static rtx
2195 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2196 enum machine_mode mode;
2197 tree type;
2198 int in_return;
2199 int nintregs, nsseregs;
2200 const int * intreg;
2201 int sse_regno;
2203 enum machine_mode tmpmode;
2204 int bytes =
2205 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2206 enum x86_64_reg_class class[MAX_CLASSES];
2207 int n;
2208 int i;
2209 int nexps = 0;
2210 int needed_sseregs, needed_intregs;
2211 rtx exp[MAX_CLASSES];
2212 rtx ret;
2214 n = classify_argument (mode, type, class, 0);
2215 if (TARGET_DEBUG_ARG)
2217 if (!n)
2218 fprintf (stderr, "Memory class\n");
2219 else
2221 fprintf (stderr, "Classes:");
2222 for (i = 0; i < n; i++)
2224 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2226 fprintf (stderr, "\n");
2229 if (!n)
2230 return NULL;
2231 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2232 return NULL;
2233 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2234 return NULL;
2236 /* First construct simple cases. Avoid SCmode, since we want to use
2237 single register to pass this type. */
2238 if (n == 1 && mode != SCmode)
2239 switch (class[0])
2241 case X86_64_INTEGER_CLASS:
2242 case X86_64_INTEGERSI_CLASS:
2243 return gen_rtx_REG (mode, intreg[0]);
2244 case X86_64_SSE_CLASS:
2245 case X86_64_SSESF_CLASS:
2246 case X86_64_SSEDF_CLASS:
2247 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2248 case X86_64_X87_CLASS:
2249 return gen_rtx_REG (mode, FIRST_STACK_REG);
2250 case X86_64_NO_CLASS:
2251 /* Zero sized array, struct or class. */
2252 return NULL;
2253 default:
2254 abort ();
2256 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2257 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2258 if (n == 2
2259 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2260 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2261 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2262 && class[1] == X86_64_INTEGER_CLASS
2263 && (mode == CDImode || mode == TImode)
2264 && intreg[0] + 1 == intreg[1])
2265 return gen_rtx_REG (mode, intreg[0]);
2266 if (n == 4
2267 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2268 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2269 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2271 /* Otherwise figure out the entries of the PARALLEL. */
2272 for (i = 0; i < n; i++)
2274 switch (class[i])
2276 case X86_64_NO_CLASS:
2277 break;
2278 case X86_64_INTEGER_CLASS:
2279 case X86_64_INTEGERSI_CLASS:
2280 /* Merge TImodes on aligned occasions here too. */
2281 if (i * 8 + 8 > bytes)
2282 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2283 else if (class[i] == X86_64_INTEGERSI_CLASS)
2284 tmpmode = SImode;
2285 else
2286 tmpmode = DImode;
2287 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2288 if (tmpmode == BLKmode)
2289 tmpmode = DImode;
2290 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2291 gen_rtx_REG (tmpmode, *intreg),
2292 GEN_INT (i*8));
2293 intreg++;
2294 break;
2295 case X86_64_SSESF_CLASS:
2296 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2297 gen_rtx_REG (SFmode,
2298 SSE_REGNO (sse_regno)),
2299 GEN_INT (i*8));
2300 sse_regno++;
2301 break;
2302 case X86_64_SSEDF_CLASS:
2303 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2304 gen_rtx_REG (DFmode,
2305 SSE_REGNO (sse_regno)),
2306 GEN_INT (i*8));
2307 sse_regno++;
2308 break;
2309 case X86_64_SSE_CLASS:
2310 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2311 tmpmode = TImode;
2312 else
2313 tmpmode = DImode;
2314 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2315 gen_rtx_REG (tmpmode,
2316 SSE_REGNO (sse_regno)),
2317 GEN_INT (i*8));
2318 if (tmpmode == TImode)
2319 i++;
2320 sse_regno++;
2321 break;
2322 default:
2323 abort ();
2326 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2327 for (i = 0; i < nexps; i++)
2328 XVECEXP (ret, 0, i) = exp [i];
2329 return ret;
2332 /* Update the data in CUM to advance over an argument
2333 of mode MODE and data type TYPE.
2334 (TYPE is null for libcalls where that information may not be available.) */
2336 void
2337 function_arg_advance (cum, mode, type, named)
2338 CUMULATIVE_ARGS *cum; /* current arg information */
2339 enum machine_mode mode; /* current arg mode */
2340 tree type; /* type of the argument or 0 if lib support */
2341 int named; /* whether or not the argument was named */
2343 int bytes =
2344 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2345 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2347 if (TARGET_DEBUG_ARG)
2348 fprintf (stderr,
2349 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2350 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2351 if (TARGET_64BIT)
2353 int int_nregs, sse_nregs;
2354 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2355 cum->words += words;
2356 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2358 cum->nregs -= int_nregs;
2359 cum->sse_nregs -= sse_nregs;
2360 cum->regno += int_nregs;
2361 cum->sse_regno += sse_nregs;
2363 else
2364 cum->words += words;
2366 else
2368 if (TARGET_SSE && mode == TImode)
2370 cum->sse_words += words;
2371 cum->sse_nregs -= 1;
2372 cum->sse_regno += 1;
2373 if (cum->sse_nregs <= 0)
2375 cum->sse_nregs = 0;
2376 cum->sse_regno = 0;
2379 else
2381 cum->words += words;
2382 cum->nregs -= words;
2383 cum->regno += words;
2385 if (cum->nregs <= 0)
2387 cum->nregs = 0;
2388 cum->regno = 0;
2392 return;
2395 /* Define where to put the arguments to a function.
2396 Value is zero to push the argument on the stack,
2397 or a hard register in which to store the argument.
2399 MODE is the argument's machine mode.
2400 TYPE is the data type of the argument (as a tree).
2401 This is null for libcalls where that information may
2402 not be available.
2403 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2404 the preceding args and about the function being called.
2405 NAMED is nonzero if this argument is a named parameter
2406 (otherwise it is an extra parameter matching an ellipsis). */
2409 function_arg (cum, mode, type, named)
2410 CUMULATIVE_ARGS *cum; /* current arg information */
2411 enum machine_mode mode; /* current arg mode */
2412 tree type; /* type of the argument or 0 if lib support */
2413 int named; /* != 0 for normal args, == 0 for ... args */
2415 rtx ret = NULL_RTX;
2416 int bytes =
2417 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2418 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2420 /* Handle a hidden AL argument containing number of registers for varargs
2421 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2422 any AL settings. */
2423 if (mode == VOIDmode)
2425 if (TARGET_64BIT)
2426 return GEN_INT (cum->maybe_vaarg
2427 ? (cum->sse_nregs < 0
2428 ? SSE_REGPARM_MAX
2429 : cum->sse_regno)
2430 : -1);
2431 else
2432 return constm1_rtx;
2434 if (TARGET_64BIT)
2435 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2436 &x86_64_int_parameter_registers [cum->regno],
2437 cum->sse_regno);
2438 else
2439 switch (mode)
2441 /* For now, pass fp/complex values on the stack. */
2442 default:
2443 break;
2445 case BLKmode:
2446 case DImode:
2447 case SImode:
2448 case HImode:
2449 case QImode:
2450 if (words <= cum->nregs)
2452 int regno = cum->regno;
2454 /* Fastcall allocates the first two DWORD (SImode) or
2455 smaller arguments to ECX and EDX. */
2456 if (cum->fastcall)
2458 if (mode == BLKmode || mode == DImode)
2459 break;
2461 /* ECX not EAX is the first allocated register. */
2462 if (regno == 0)
2463 regno = 2;
2465 ret = gen_rtx_REG (mode, regno);
2467 break;
2468 case TImode:
2469 if (cum->sse_nregs)
2470 ret = gen_rtx_REG (mode, cum->sse_regno);
2471 break;
2474 if (TARGET_DEBUG_ARG)
2476 fprintf (stderr,
2477 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2478 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2480 if (ret)
2481 print_simple_rtl (stderr, ret);
2482 else
2483 fprintf (stderr, ", stack");
2485 fprintf (stderr, " )\n");
2488 return ret;
2491 /* A C expression that indicates when an argument must be passed by
2492 reference. If nonzero for an argument, a copy of that argument is
2493 made in memory and a pointer to the argument is passed instead of
2494 the argument itself. The pointer is passed in whatever way is
2495 appropriate for passing a pointer to that type. */
2498 function_arg_pass_by_reference (cum, mode, type, named)
2499 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2500 enum machine_mode mode ATTRIBUTE_UNUSED;
2501 tree type;
2502 int named ATTRIBUTE_UNUSED;
2504 if (!TARGET_64BIT)
2505 return 0;
2507 if (type && int_size_in_bytes (type) == -1)
2509 if (TARGET_DEBUG_ARG)
2510 fprintf (stderr, "function_arg_pass_by_reference\n");
2511 return 1;
2514 return 0;
2517 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2518 and type. */
2521 ix86_function_arg_boundary (mode, type)
2522 enum machine_mode mode;
2523 tree type;
2525 int align;
2526 if (!TARGET_64BIT)
2527 return PARM_BOUNDARY;
2528 if (type)
2529 align = TYPE_ALIGN (type);
2530 else
2531 align = GET_MODE_ALIGNMENT (mode);
2532 if (align < PARM_BOUNDARY)
2533 align = PARM_BOUNDARY;
2534 if (align > 128)
2535 align = 128;
2536 return align;
2539 /* Return true if N is a possible register number of function value. */
2540 bool
2541 ix86_function_value_regno_p (regno)
2542 int regno;
2544 if (!TARGET_64BIT)
2546 return ((regno) == 0
2547 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2548 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2550 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2551 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2552 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2555 /* Define how to find the value returned by a function.
2556 VALTYPE is the data type of the value (as a tree).
2557 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2558 otherwise, FUNC is 0. */
2560 ix86_function_value (valtype)
2561 tree valtype;
2563 if (TARGET_64BIT)
2565 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2566 REGPARM_MAX, SSE_REGPARM_MAX,
2567 x86_64_int_return_registers, 0);
2568 /* For zero sized structures, construct_container return NULL, but we need
2569 to keep rest of compiler happy by returning meaningful value. */
2570 if (!ret)
2571 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2572 return ret;
2574 else
2575 return gen_rtx_REG (TYPE_MODE (valtype),
2576 ix86_value_regno (TYPE_MODE (valtype)));
2579 /* Return false iff type is returned in memory. */
2581 ix86_return_in_memory (type)
2582 tree type;
2584 int needed_intregs, needed_sseregs;
2585 if (TARGET_64BIT)
2587 return !examine_argument (TYPE_MODE (type), type, 1,
2588 &needed_intregs, &needed_sseregs);
2590 else
2592 if (TYPE_MODE (type) == BLKmode
2593 || (VECTOR_MODE_P (TYPE_MODE (type))
2594 && int_size_in_bytes (type) == 8)
2595 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2596 && TYPE_MODE (type) != TFmode
2597 && !VECTOR_MODE_P (TYPE_MODE (type))))
2598 return 1;
2599 return 0;
2603 /* Define how to find the value returned by a library function
2604 assuming the value has mode MODE. */
2606 ix86_libcall_value (mode)
2607 enum machine_mode mode;
2609 if (TARGET_64BIT)
2611 switch (mode)
2613 case SFmode:
2614 case SCmode:
2615 case DFmode:
2616 case DCmode:
2617 return gen_rtx_REG (mode, FIRST_SSE_REG);
2618 case TFmode:
2619 case TCmode:
2620 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2621 default:
2622 return gen_rtx_REG (mode, 0);
2625 else
2626 return gen_rtx_REG (mode, ix86_value_regno (mode));
2629 /* Given a mode, return the register to use for a return value. */
2631 static int
2632 ix86_value_regno (mode)
2633 enum machine_mode mode;
2635 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2636 return FIRST_FLOAT_REG;
2637 if (mode == TImode || VECTOR_MODE_P (mode))
2638 return FIRST_SSE_REG;
2639 return 0;
2642 /* Create the va_list data type. */
2644 tree
2645 ix86_build_va_list ()
2647 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2649 /* For i386 we use plain pointer to argument area. */
2650 if (!TARGET_64BIT)
2651 return build_pointer_type (char_type_node);
2653 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2654 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2656 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2657 unsigned_type_node);
2658 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2659 unsigned_type_node);
2660 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2661 ptr_type_node);
2662 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2663 ptr_type_node);
2665 DECL_FIELD_CONTEXT (f_gpr) = record;
2666 DECL_FIELD_CONTEXT (f_fpr) = record;
2667 DECL_FIELD_CONTEXT (f_ovf) = record;
2668 DECL_FIELD_CONTEXT (f_sav) = record;
2670 TREE_CHAIN (record) = type_decl;
2671 TYPE_NAME (record) = type_decl;
2672 TYPE_FIELDS (record) = f_gpr;
2673 TREE_CHAIN (f_gpr) = f_fpr;
2674 TREE_CHAIN (f_fpr) = f_ovf;
2675 TREE_CHAIN (f_ovf) = f_sav;
2677 layout_type (record);
2679 /* The correct type is an array type of one element. */
2680 return build_array_type (record, build_index_type (size_zero_node));
2683 /* Perform any needed actions needed for a function that is receiving a
2684 variable number of arguments.
2686 CUM is as above.
2688 MODE and TYPE are the mode and type of the current parameter.
2690 PRETEND_SIZE is a variable that should be set to the amount of stack
2691 that must be pushed by the prolog to pretend that our caller pushed
2694 Normally, this macro will push all remaining incoming registers on the
2695 stack and set PRETEND_SIZE to the length of the registers pushed. */
2697 void
2698 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2699 CUMULATIVE_ARGS *cum;
2700 enum machine_mode mode;
2701 tree type;
2702 int *pretend_size ATTRIBUTE_UNUSED;
2703 int no_rtl;
2706 CUMULATIVE_ARGS next_cum;
2707 rtx save_area = NULL_RTX, mem;
2708 rtx label;
2709 rtx label_ref;
2710 rtx tmp_reg;
2711 rtx nsse_reg;
2712 int set;
2713 tree fntype;
2714 int stdarg_p;
2715 int i;
2717 if (!TARGET_64BIT)
2718 return;
2720 /* Indicate to allocate space on the stack for varargs save area. */
2721 ix86_save_varrargs_registers = 1;
2723 fntype = TREE_TYPE (current_function_decl);
2724 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2725 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2726 != void_type_node));
2728 /* For varargs, we do not want to skip the dummy va_dcl argument.
2729 For stdargs, we do want to skip the last named argument. */
2730 next_cum = *cum;
2731 if (stdarg_p)
2732 function_arg_advance (&next_cum, mode, type, 1);
2734 if (!no_rtl)
2735 save_area = frame_pointer_rtx;
2737 set = get_varargs_alias_set ();
2739 for (i = next_cum.regno; i < ix86_regparm; i++)
2741 mem = gen_rtx_MEM (Pmode,
2742 plus_constant (save_area, i * UNITS_PER_WORD));
2743 set_mem_alias_set (mem, set);
2744 emit_move_insn (mem, gen_rtx_REG (Pmode,
2745 x86_64_int_parameter_registers[i]));
2748 if (next_cum.sse_nregs)
2750 /* Now emit code to save SSE registers. The AX parameter contains number
2751 of SSE parameter registers used to call this function. We use
2752 sse_prologue_save insn template that produces computed jump across
2753 SSE saves. We need some preparation work to get this working. */
2755 label = gen_label_rtx ();
2756 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2758 /* Compute address to jump to :
2759 label - 5*eax + nnamed_sse_arguments*5 */
2760 tmp_reg = gen_reg_rtx (Pmode);
2761 nsse_reg = gen_reg_rtx (Pmode);
2762 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2763 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2764 gen_rtx_MULT (Pmode, nsse_reg,
2765 GEN_INT (4))));
2766 if (next_cum.sse_regno)
2767 emit_move_insn
2768 (nsse_reg,
2769 gen_rtx_CONST (DImode,
2770 gen_rtx_PLUS (DImode,
2771 label_ref,
2772 GEN_INT (next_cum.sse_regno * 4))));
2773 else
2774 emit_move_insn (nsse_reg, label_ref);
2775 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2777 /* Compute address of memory block we save into. We always use pointer
2778 pointing 127 bytes after first byte to store - this is needed to keep
2779 instruction size limited by 4 bytes. */
2780 tmp_reg = gen_reg_rtx (Pmode);
2781 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2782 plus_constant (save_area,
2783 8 * REGPARM_MAX + 127)));
2784 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2785 set_mem_alias_set (mem, set);
2786 set_mem_align (mem, BITS_PER_WORD);
2788 /* And finally do the dirty job! */
2789 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2790 GEN_INT (next_cum.sse_regno), label));
2795 /* Implement va_start. */
2797 void
2798 ix86_va_start (valist, nextarg)
2799 tree valist;
2800 rtx nextarg;
2802 HOST_WIDE_INT words, n_gpr, n_fpr;
2803 tree f_gpr, f_fpr, f_ovf, f_sav;
2804 tree gpr, fpr, ovf, sav, t;
2806 /* Only 64bit target needs something special. */
2807 if (!TARGET_64BIT)
2809 std_expand_builtin_va_start (valist, nextarg);
2810 return;
2813 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2814 f_fpr = TREE_CHAIN (f_gpr);
2815 f_ovf = TREE_CHAIN (f_fpr);
2816 f_sav = TREE_CHAIN (f_ovf);
2818 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2819 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2820 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2821 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2822 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2824 /* Count number of gp and fp argument registers used. */
2825 words = current_function_args_info.words;
2826 n_gpr = current_function_args_info.regno;
2827 n_fpr = current_function_args_info.sse_regno;
2829 if (TARGET_DEBUG_ARG)
2830 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2831 (int) words, (int) n_gpr, (int) n_fpr);
2833 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2834 build_int_2 (n_gpr * 8, 0));
2835 TREE_SIDE_EFFECTS (t) = 1;
2836 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2838 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2839 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2840 TREE_SIDE_EFFECTS (t) = 1;
2841 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2843 /* Find the overflow area. */
2844 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2845 if (words != 0)
2846 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2847 build_int_2 (words * UNITS_PER_WORD, 0));
2848 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2849 TREE_SIDE_EFFECTS (t) = 1;
2850 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2852 /* Find the register save area.
2853 Prologue of the function save it right above stack frame. */
2854 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2855 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2856 TREE_SIDE_EFFECTS (t) = 1;
2857 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2860 /* Implement va_arg. */
2862 ix86_va_arg (valist, type)
2863 tree valist, type;
2865 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2866 tree f_gpr, f_fpr, f_ovf, f_sav;
2867 tree gpr, fpr, ovf, sav, t;
2868 int size, rsize;
2869 rtx lab_false, lab_over = NULL_RTX;
2870 rtx addr_rtx, r;
2871 rtx container;
2872 int indirect_p = 0;
2874 /* Only 64bit target needs something special. */
2875 if (!TARGET_64BIT)
2877 return std_expand_builtin_va_arg (valist, type);
2880 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2881 f_fpr = TREE_CHAIN (f_gpr);
2882 f_ovf = TREE_CHAIN (f_fpr);
2883 f_sav = TREE_CHAIN (f_ovf);
2885 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2886 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2887 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2888 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2889 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2891 size = int_size_in_bytes (type);
2892 if (size == -1)
2894 /* Passed by reference. */
2895 indirect_p = 1;
2896 type = build_pointer_type (type);
2897 size = int_size_in_bytes (type);
2899 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2901 container = construct_container (TYPE_MODE (type), type, 0,
2902 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2904 * Pull the value out of the saved registers ...
2907 addr_rtx = gen_reg_rtx (Pmode);
2909 if (container)
2911 rtx int_addr_rtx, sse_addr_rtx;
2912 int needed_intregs, needed_sseregs;
2913 int need_temp;
2915 lab_over = gen_label_rtx ();
2916 lab_false = gen_label_rtx ();
2918 examine_argument (TYPE_MODE (type), type, 0,
2919 &needed_intregs, &needed_sseregs);
2922 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2923 || TYPE_ALIGN (type) > 128);
2925 /* In case we are passing structure, verify that it is consecutive block
2926 on the register save area. If not we need to do moves. */
2927 if (!need_temp && !REG_P (container))
2929 /* Verify that all registers are strictly consecutive */
2930 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2932 int i;
2934 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2936 rtx slot = XVECEXP (container, 0, i);
2937 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2938 || INTVAL (XEXP (slot, 1)) != i * 16)
2939 need_temp = 1;
2942 else
2944 int i;
2946 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2948 rtx slot = XVECEXP (container, 0, i);
2949 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2950 || INTVAL (XEXP (slot, 1)) != i * 8)
2951 need_temp = 1;
2955 if (!need_temp)
2957 int_addr_rtx = addr_rtx;
2958 sse_addr_rtx = addr_rtx;
2960 else
2962 int_addr_rtx = gen_reg_rtx (Pmode);
2963 sse_addr_rtx = gen_reg_rtx (Pmode);
2965 /* First ensure that we fit completely in registers. */
2966 if (needed_intregs)
2968 emit_cmp_and_jump_insns (expand_expr
2969 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2970 GEN_INT ((REGPARM_MAX - needed_intregs +
2971 1) * 8), GE, const1_rtx, SImode,
2972 1, lab_false);
2974 if (needed_sseregs)
2976 emit_cmp_and_jump_insns (expand_expr
2977 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2978 GEN_INT ((SSE_REGPARM_MAX -
2979 needed_sseregs + 1) * 16 +
2980 REGPARM_MAX * 8), GE, const1_rtx,
2981 SImode, 1, lab_false);
2984 /* Compute index to start of area used for integer regs. */
2985 if (needed_intregs)
2987 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2988 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2989 if (r != int_addr_rtx)
2990 emit_move_insn (int_addr_rtx, r);
2992 if (needed_sseregs)
2994 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2995 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2996 if (r != sse_addr_rtx)
2997 emit_move_insn (sse_addr_rtx, r);
2999 if (need_temp)
3001 int i;
3002 rtx mem;
3004 /* Never use the memory itself, as it has the alias set. */
3005 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3006 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3007 set_mem_alias_set (mem, get_varargs_alias_set ());
3008 set_mem_align (mem, BITS_PER_UNIT);
3010 for (i = 0; i < XVECLEN (container, 0); i++)
3012 rtx slot = XVECEXP (container, 0, i);
3013 rtx reg = XEXP (slot, 0);
3014 enum machine_mode mode = GET_MODE (reg);
3015 rtx src_addr;
3016 rtx src_mem;
3017 int src_offset;
3018 rtx dest_mem;
3020 if (SSE_REGNO_P (REGNO (reg)))
3022 src_addr = sse_addr_rtx;
3023 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3025 else
3027 src_addr = int_addr_rtx;
3028 src_offset = REGNO (reg) * 8;
3030 src_mem = gen_rtx_MEM (mode, src_addr);
3031 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3032 src_mem = adjust_address (src_mem, mode, src_offset);
3033 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3034 emit_move_insn (dest_mem, src_mem);
3038 if (needed_intregs)
3041 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3042 build_int_2 (needed_intregs * 8, 0));
3043 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3044 TREE_SIDE_EFFECTS (t) = 1;
3045 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3047 if (needed_sseregs)
3050 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3051 build_int_2 (needed_sseregs * 16, 0));
3052 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3053 TREE_SIDE_EFFECTS (t) = 1;
3054 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3057 emit_jump_insn (gen_jump (lab_over));
3058 emit_barrier ();
3059 emit_label (lab_false);
3062 /* ... otherwise out of the overflow area. */
3064 /* Care for on-stack alignment if needed. */
3065 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3066 t = ovf;
3067 else
3069 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3070 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3071 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3073 t = save_expr (t);
3075 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3076 if (r != addr_rtx)
3077 emit_move_insn (addr_rtx, r);
3080 build (PLUS_EXPR, TREE_TYPE (t), t,
3081 build_int_2 (rsize * UNITS_PER_WORD, 0));
3082 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3083 TREE_SIDE_EFFECTS (t) = 1;
3084 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3086 if (container)
3087 emit_label (lab_over);
3089 if (indirect_p)
3091 r = gen_rtx_MEM (Pmode, addr_rtx);
3092 set_mem_alias_set (r, get_varargs_alias_set ());
3093 emit_move_insn (addr_rtx, r);
3096 return addr_rtx;
3099 /* Return nonzero if OP is either a i387 or SSE fp register. */
3101 any_fp_register_operand (op, mode)
3102 rtx op;
3103 enum machine_mode mode ATTRIBUTE_UNUSED;
3105 return ANY_FP_REG_P (op);
3108 /* Return nonzero if OP is an i387 fp register. */
3110 fp_register_operand (op, mode)
3111 rtx op;
3112 enum machine_mode mode ATTRIBUTE_UNUSED;
3114 return FP_REG_P (op);
3117 /* Return nonzero if OP is a non-fp register_operand. */
3119 register_and_not_any_fp_reg_operand (op, mode)
3120 rtx op;
3121 enum machine_mode mode;
3123 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3126 /* Return nonzero if OP is a register operand other than an
3127 i387 fp register. */
3129 register_and_not_fp_reg_operand (op, mode)
3130 rtx op;
3131 enum machine_mode mode;
3133 return register_operand (op, mode) && !FP_REG_P (op);
3136 /* Return nonzero if OP is general operand representable on x86_64. */
3139 x86_64_general_operand (op, mode)
3140 rtx op;
3141 enum machine_mode mode;
3143 if (!TARGET_64BIT)
3144 return general_operand (op, mode);
3145 if (nonimmediate_operand (op, mode))
3146 return 1;
3147 return x86_64_sign_extended_value (op);
3150 /* Return nonzero if OP is general operand representable on x86_64
3151 as either sign extended or zero extended constant. */
3154 x86_64_szext_general_operand (op, mode)
3155 rtx op;
3156 enum machine_mode mode;
3158 if (!TARGET_64BIT)
3159 return general_operand (op, mode);
3160 if (nonimmediate_operand (op, mode))
3161 return 1;
3162 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3165 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3168 x86_64_nonmemory_operand (op, mode)
3169 rtx op;
3170 enum machine_mode mode;
3172 if (!TARGET_64BIT)
3173 return nonmemory_operand (op, mode);
3174 if (register_operand (op, mode))
3175 return 1;
3176 return x86_64_sign_extended_value (op);
3179 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3182 x86_64_movabs_operand (op, mode)
3183 rtx op;
3184 enum machine_mode mode;
3186 if (!TARGET_64BIT || !flag_pic)
3187 return nonmemory_operand (op, mode);
3188 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3189 return 1;
3190 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3191 return 1;
3192 return 0;
3195 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3198 x86_64_szext_nonmemory_operand (op, mode)
3199 rtx op;
3200 enum machine_mode mode;
3202 if (!TARGET_64BIT)
3203 return nonmemory_operand (op, mode);
3204 if (register_operand (op, mode))
3205 return 1;
3206 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3209 /* Return nonzero if OP is immediate operand representable on x86_64. */
3212 x86_64_immediate_operand (op, mode)
3213 rtx op;
3214 enum machine_mode mode;
3216 if (!TARGET_64BIT)
3217 return immediate_operand (op, mode);
3218 return x86_64_sign_extended_value (op);
3221 /* Return nonzero if OP is immediate operand representable on x86_64. */
3224 x86_64_zext_immediate_operand (op, mode)
3225 rtx op;
3226 enum machine_mode mode ATTRIBUTE_UNUSED;
3228 return x86_64_zero_extended_value (op);
3231 /* Return nonzero if OP is (const_int 1), else return zero. */
3234 const_int_1_operand (op, mode)
3235 rtx op;
3236 enum machine_mode mode ATTRIBUTE_UNUSED;
3238 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3241 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3242 for shift & compare patterns, as shifting by 0 does not change flags),
3243 else return zero. */
3246 const_int_1_31_operand (op, mode)
3247 rtx op;
3248 enum machine_mode mode ATTRIBUTE_UNUSED;
3250 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3253 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3254 reference and a constant. */
3257 symbolic_operand (op, mode)
3258 register rtx op;
3259 enum machine_mode mode ATTRIBUTE_UNUSED;
3261 switch (GET_CODE (op))
3263 case SYMBOL_REF:
3264 case LABEL_REF:
3265 return 1;
3267 case CONST:
3268 op = XEXP (op, 0);
3269 if (GET_CODE (op) == SYMBOL_REF
3270 || GET_CODE (op) == LABEL_REF
3271 || (GET_CODE (op) == UNSPEC
3272 && (XINT (op, 1) == UNSPEC_GOT
3273 || XINT (op, 1) == UNSPEC_GOTOFF
3274 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3275 return 1;
3276 if (GET_CODE (op) != PLUS
3277 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3278 return 0;
3280 op = XEXP (op, 0);
3281 if (GET_CODE (op) == SYMBOL_REF
3282 || GET_CODE (op) == LABEL_REF)
3283 return 1;
3284 /* Only @GOTOFF gets offsets. */
3285 if (GET_CODE (op) != UNSPEC
3286 || XINT (op, 1) != UNSPEC_GOTOFF)
3287 return 0;
3289 op = XVECEXP (op, 0, 0);
3290 if (GET_CODE (op) == SYMBOL_REF
3291 || GET_CODE (op) == LABEL_REF)
3292 return 1;
3293 return 0;
3295 default:
3296 return 0;
3300 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3303 pic_symbolic_operand (op, mode)
3304 register rtx op;
3305 enum machine_mode mode ATTRIBUTE_UNUSED;
3307 if (GET_CODE (op) != CONST)
3308 return 0;
3309 op = XEXP (op, 0);
3310 if (TARGET_64BIT)
3312 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3313 return 1;
3315 else
3317 if (GET_CODE (op) == UNSPEC)
3318 return 1;
3319 if (GET_CODE (op) != PLUS
3320 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3321 return 0;
3322 op = XEXP (op, 0);
3323 if (GET_CODE (op) == UNSPEC)
3324 return 1;
3326 return 0;
3329 /* Return true if OP is a symbolic operand that resolves locally. */
3331 static int
3332 local_symbolic_operand (op, mode)
3333 rtx op;
3334 enum machine_mode mode ATTRIBUTE_UNUSED;
3336 if (GET_CODE (op) == CONST
3337 && GET_CODE (XEXP (op, 0)) == PLUS
3338 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3339 op = XEXP (XEXP (op, 0), 0);
3341 if (GET_CODE (op) == LABEL_REF)
3342 return 1;
3344 if (GET_CODE (op) != SYMBOL_REF)
3345 return 0;
3347 /* These we've been told are local by varasm and encode_section_info
3348 respectively. */
3349 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3350 return 1;
3352 /* There is, however, a not insubstantial body of code in the rest of
3353 the compiler that assumes it can just stick the results of
3354 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3355 /* ??? This is a hack. Should update the body of the compiler to
3356 always create a DECL an invoke targetm.encode_section_info. */
3357 if (strncmp (XSTR (op, 0), internal_label_prefix,
3358 internal_label_prefix_len) == 0)
3359 return 1;
3361 return 0;
3364 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3367 tls_symbolic_operand (op, mode)
3368 register rtx op;
3369 enum machine_mode mode ATTRIBUTE_UNUSED;
3371 const char *symbol_str;
3373 if (GET_CODE (op) != SYMBOL_REF)
3374 return 0;
3375 symbol_str = XSTR (op, 0);
3377 if (symbol_str[0] != '%')
3378 return 0;
3379 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3382 static int
3383 tls_symbolic_operand_1 (op, kind)
3384 rtx op;
3385 enum tls_model kind;
3387 const char *symbol_str;
3389 if (GET_CODE (op) != SYMBOL_REF)
3390 return 0;
3391 symbol_str = XSTR (op, 0);
3393 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3397 global_dynamic_symbolic_operand (op, mode)
3398 register rtx op;
3399 enum machine_mode mode ATTRIBUTE_UNUSED;
3401 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3405 local_dynamic_symbolic_operand (op, mode)
3406 register rtx op;
3407 enum machine_mode mode ATTRIBUTE_UNUSED;
3409 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3413 initial_exec_symbolic_operand (op, mode)
3414 register rtx op;
3415 enum machine_mode mode ATTRIBUTE_UNUSED;
3417 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3421 local_exec_symbolic_operand (op, mode)
3422 register rtx op;
3423 enum machine_mode mode ATTRIBUTE_UNUSED;
3425 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3428 /* Test for a valid operand for a call instruction. Don't allow the
3429 arg pointer register or virtual regs since they may decay into
3430 reg + const, which the patterns can't handle. */
3433 call_insn_operand (op, mode)
3434 rtx op;
3435 enum machine_mode mode ATTRIBUTE_UNUSED;
3437 /* Disallow indirect through a virtual register. This leads to
3438 compiler aborts when trying to eliminate them. */
3439 if (GET_CODE (op) == REG
3440 && (op == arg_pointer_rtx
3441 || op == frame_pointer_rtx
3442 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3443 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3444 return 0;
3446 /* Disallow `call 1234'. Due to varying assembler lameness this
3447 gets either rejected or translated to `call .+1234'. */
3448 if (GET_CODE (op) == CONST_INT)
3449 return 0;
3451 /* Explicitly allow SYMBOL_REF even if pic. */
3452 if (GET_CODE (op) == SYMBOL_REF)
3453 return 1;
3455 /* Otherwise we can allow any general_operand in the address. */
3456 return general_operand (op, Pmode);
3459 /* Test for a valid operand for a call instruction. Don't allow the
3460 arg pointer register or virtual regs since they may decay into
3461 reg + const, which the patterns can't handle. */
3464 sibcall_insn_operand (op, mode)
3465 rtx op;
3466 enum machine_mode mode ATTRIBUTE_UNUSED;
3468 /* Disallow indirect through a virtual register. This leads to
3469 compiler aborts when trying to eliminate them. */
3470 if (GET_CODE (op) == REG
3471 && (op == arg_pointer_rtx
3472 || op == frame_pointer_rtx
3473 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3474 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3475 return 0;
3477 /* Explicitly allow SYMBOL_REF even if pic. */
3478 if (GET_CODE (op) == SYMBOL_REF)
3479 return 1;
3481 /* Otherwise we can only allow register operands. */
3482 return register_operand (op, Pmode);
3486 constant_call_address_operand (op, mode)
3487 rtx op;
3488 enum machine_mode mode ATTRIBUTE_UNUSED;
3490 if (GET_CODE (op) == CONST
3491 && GET_CODE (XEXP (op, 0)) == PLUS
3492 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3493 op = XEXP (XEXP (op, 0), 0);
3494 return GET_CODE (op) == SYMBOL_REF;
3497 /* Match exactly zero and one. */
3500 const0_operand (op, mode)
3501 register rtx op;
3502 enum machine_mode mode;
3504 return op == CONST0_RTX (mode);
3508 const1_operand (op, mode)
3509 register rtx op;
3510 enum machine_mode mode ATTRIBUTE_UNUSED;
3512 return op == const1_rtx;
3515 /* Match 2, 4, or 8. Used for leal multiplicands. */
3518 const248_operand (op, mode)
3519 register rtx op;
3520 enum machine_mode mode ATTRIBUTE_UNUSED;
3522 return (GET_CODE (op) == CONST_INT
3523 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3526 /* True if this is a constant appropriate for an increment or decrement. */
3529 incdec_operand (op, mode)
3530 register rtx op;
3531 enum machine_mode mode ATTRIBUTE_UNUSED;
3533 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3534 registers, since carry flag is not set. */
3535 if (TARGET_PENTIUM4 && !optimize_size)
3536 return 0;
3537 return op == const1_rtx || op == constm1_rtx;
3540 /* Return nonzero if OP is acceptable as operand of DImode shift
3541 expander. */
3544 shiftdi_operand (op, mode)
3545 rtx op;
3546 enum machine_mode mode ATTRIBUTE_UNUSED;
3548 if (TARGET_64BIT)
3549 return nonimmediate_operand (op, mode);
3550 else
3551 return register_operand (op, mode);
3554 /* Return false if this is the stack pointer, or any other fake
3555 register eliminable to the stack pointer. Otherwise, this is
3556 a register operand.
3558 This is used to prevent esp from being used as an index reg.
3559 Which would only happen in pathological cases. */
3562 reg_no_sp_operand (op, mode)
3563 register rtx op;
3564 enum machine_mode mode;
3566 rtx t = op;
3567 if (GET_CODE (t) == SUBREG)
3568 t = SUBREG_REG (t);
3569 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3570 return 0;
3572 return register_operand (op, mode);
3576 mmx_reg_operand (op, mode)
3577 register rtx op;
3578 enum machine_mode mode ATTRIBUTE_UNUSED;
3580 return MMX_REG_P (op);
3583 /* Return false if this is any eliminable register. Otherwise
3584 general_operand. */
3587 general_no_elim_operand (op, mode)
3588 register rtx op;
3589 enum machine_mode mode;
3591 rtx t = op;
3592 if (GET_CODE (t) == SUBREG)
3593 t = SUBREG_REG (t);
3594 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3595 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3596 || t == virtual_stack_dynamic_rtx)
3597 return 0;
3598 if (REG_P (t)
3599 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3600 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3601 return 0;
3603 return general_operand (op, mode);
3606 /* Return false if this is any eliminable register. Otherwise
3607 register_operand or const_int. */
3610 nonmemory_no_elim_operand (op, mode)
3611 register rtx op;
3612 enum machine_mode mode;
3614 rtx t = op;
3615 if (GET_CODE (t) == SUBREG)
3616 t = SUBREG_REG (t);
3617 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3618 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3619 || t == virtual_stack_dynamic_rtx)
3620 return 0;
3622 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3625 /* Return false if this is any eliminable register or stack register,
3626 otherwise work like register_operand. */
3629 index_register_operand (op, mode)
3630 register rtx op;
3631 enum machine_mode mode;
3633 rtx t = op;
3634 if (GET_CODE (t) == SUBREG)
3635 t = SUBREG_REG (t);
3636 if (!REG_P (t))
3637 return 0;
3638 if (t == arg_pointer_rtx
3639 || t == frame_pointer_rtx
3640 || t == virtual_incoming_args_rtx
3641 || t == virtual_stack_vars_rtx
3642 || t == virtual_stack_dynamic_rtx
3643 || REGNO (t) == STACK_POINTER_REGNUM)
3644 return 0;
3646 return general_operand (op, mode);
3649 /* Return true if op is a Q_REGS class register. */
3652 q_regs_operand (op, mode)
3653 register rtx op;
3654 enum machine_mode mode;
3656 if (mode != VOIDmode && GET_MODE (op) != mode)
3657 return 0;
3658 if (GET_CODE (op) == SUBREG)
3659 op = SUBREG_REG (op);
3660 return ANY_QI_REG_P (op);
3663 /* Return true if op is an flags register. */
3666 flags_reg_operand (op, mode)
3667 register rtx op;
3668 enum machine_mode mode;
3670 if (mode != VOIDmode && GET_MODE (op) != mode)
3671 return 0;
3672 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3675 /* Return true if op is a NON_Q_REGS class register. */
3678 non_q_regs_operand (op, mode)
3679 register rtx op;
3680 enum machine_mode mode;
3682 if (mode != VOIDmode && GET_MODE (op) != mode)
3683 return 0;
3684 if (GET_CODE (op) == SUBREG)
3685 op = SUBREG_REG (op);
3686 return NON_QI_REG_P (op);
3690 zero_extended_scalar_load_operand (op, mode)
3691 rtx op;
3692 enum machine_mode mode ATTRIBUTE_UNUSED;
3694 unsigned n_elts;
3695 if (GET_CODE (op) != MEM)
3696 return 0;
3697 op = maybe_get_pool_constant (op);
3698 if (!op)
3699 return 0;
3700 if (GET_CODE (op) != CONST_VECTOR)
3701 return 0;
3702 n_elts =
3703 (GET_MODE_SIZE (GET_MODE (op)) /
3704 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3705 for (n_elts--; n_elts > 0; n_elts--)
3707 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3708 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3709 return 0;
3711 return 1;
3714 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3715 insns. */
3717 sse_comparison_operator (op, mode)
3718 rtx op;
3719 enum machine_mode mode ATTRIBUTE_UNUSED;
3721 enum rtx_code code = GET_CODE (op);
3722 switch (code)
3724 /* Operations supported directly. */
3725 case EQ:
3726 case LT:
3727 case LE:
3728 case UNORDERED:
3729 case NE:
3730 case UNGE:
3731 case UNGT:
3732 case ORDERED:
3733 return 1;
3734 /* These are equivalent to ones above in non-IEEE comparisons. */
3735 case UNEQ:
3736 case UNLT:
3737 case UNLE:
3738 case LTGT:
3739 case GE:
3740 case GT:
3741 return !TARGET_IEEE_FP;
3742 default:
3743 return 0;
3746 /* Return 1 if OP is a valid comparison operator in valid mode. */
3748 ix86_comparison_operator (op, mode)
3749 register rtx op;
3750 enum machine_mode mode;
3752 enum machine_mode inmode;
3753 enum rtx_code code = GET_CODE (op);
3754 if (mode != VOIDmode && GET_MODE (op) != mode)
3755 return 0;
3756 if (GET_RTX_CLASS (code) != '<')
3757 return 0;
3758 inmode = GET_MODE (XEXP (op, 0));
3760 if (inmode == CCFPmode || inmode == CCFPUmode)
3762 enum rtx_code second_code, bypass_code;
3763 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3764 return (bypass_code == NIL && second_code == NIL);
3766 switch (code)
3768 case EQ: case NE:
3769 return 1;
3770 case LT: case GE:
3771 if (inmode == CCmode || inmode == CCGCmode
3772 || inmode == CCGOCmode || inmode == CCNOmode)
3773 return 1;
3774 return 0;
3775 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3776 if (inmode == CCmode)
3777 return 1;
3778 return 0;
3779 case GT: case LE:
3780 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3781 return 1;
3782 return 0;
3783 default:
3784 return 0;
3788 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3791 fcmov_comparison_operator (op, mode)
3792 register rtx op;
3793 enum machine_mode mode;
3795 enum machine_mode inmode;
3796 enum rtx_code code = GET_CODE (op);
3797 if (mode != VOIDmode && GET_MODE (op) != mode)
3798 return 0;
3799 if (GET_RTX_CLASS (code) != '<')
3800 return 0;
3801 inmode = GET_MODE (XEXP (op, 0));
3802 if (inmode == CCFPmode || inmode == CCFPUmode)
3804 enum rtx_code second_code, bypass_code;
3805 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3806 if (bypass_code != NIL || second_code != NIL)
3807 return 0;
3808 code = ix86_fp_compare_code_to_integer (code);
3810 /* i387 supports just limited amount of conditional codes. */
3811 switch (code)
3813 case LTU: case GTU: case LEU: case GEU:
3814 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3815 return 1;
3816 return 0;
3817 case ORDERED: case UNORDERED:
3818 case EQ: case NE:
3819 return 1;
3820 default:
3821 return 0;
3825 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3828 promotable_binary_operator (op, mode)
3829 register rtx op;
3830 enum machine_mode mode ATTRIBUTE_UNUSED;
3832 switch (GET_CODE (op))
3834 case MULT:
3835 /* Modern CPUs have same latency for HImode and SImode multiply,
3836 but 386 and 486 do HImode multiply faster. */
3837 return ix86_cpu > PROCESSOR_I486;
3838 case PLUS:
3839 case AND:
3840 case IOR:
3841 case XOR:
3842 case ASHIFT:
3843 return 1;
3844 default:
3845 return 0;
3849 /* Nearly general operand, but accept any const_double, since we wish
3850 to be able to drop them into memory rather than have them get pulled
3851 into registers. */
3854 cmp_fp_expander_operand (op, mode)
3855 register rtx op;
3856 enum machine_mode mode;
3858 if (mode != VOIDmode && mode != GET_MODE (op))
3859 return 0;
3860 if (GET_CODE (op) == CONST_DOUBLE)
3861 return 1;
3862 return general_operand (op, mode);
3865 /* Match an SI or HImode register for a zero_extract. */
3868 ext_register_operand (op, mode)
3869 register rtx op;
3870 enum machine_mode mode ATTRIBUTE_UNUSED;
3872 int regno;
3873 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3874 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3875 return 0;
3877 if (!register_operand (op, VOIDmode))
3878 return 0;
3880 /* Be careful to accept only registers having upper parts. */
3881 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3882 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3885 /* Return 1 if this is a valid binary floating-point operation.
3886 OP is the expression matched, and MODE is its mode. */
3889 binary_fp_operator (op, mode)
3890 register rtx op;
3891 enum machine_mode mode;
3893 if (mode != VOIDmode && mode != GET_MODE (op))
3894 return 0;
3896 switch (GET_CODE (op))
3898 case PLUS:
3899 case MINUS:
3900 case MULT:
3901 case DIV:
3902 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3904 default:
3905 return 0;
3910 mult_operator (op, mode)
3911 register rtx op;
3912 enum machine_mode mode ATTRIBUTE_UNUSED;
3914 return GET_CODE (op) == MULT;
3918 div_operator (op, mode)
3919 register rtx op;
3920 enum machine_mode mode ATTRIBUTE_UNUSED;
3922 return GET_CODE (op) == DIV;
3926 arith_or_logical_operator (op, mode)
3927 rtx op;
3928 enum machine_mode mode;
3930 return ((mode == VOIDmode || GET_MODE (op) == mode)
3931 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3932 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3935 /* Returns 1 if OP is memory operand with a displacement. */
3938 memory_displacement_operand (op, mode)
3939 register rtx op;
3940 enum machine_mode mode;
3942 struct ix86_address parts;
3944 if (! memory_operand (op, mode))
3945 return 0;
3947 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3948 abort ();
3950 return parts.disp != NULL_RTX;
3953 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3954 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3956 ??? It seems likely that this will only work because cmpsi is an
3957 expander, and no actual insns use this. */
3960 cmpsi_operand (op, mode)
3961 rtx op;
3962 enum machine_mode mode;
3964 if (nonimmediate_operand (op, mode))
3965 return 1;
3967 if (GET_CODE (op) == AND
3968 && GET_MODE (op) == SImode
3969 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3970 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3971 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3972 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3973 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3974 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3975 return 1;
3977 return 0;
3980 /* Returns 1 if OP is memory operand that can not be represented by the
3981 modRM array. */
3984 long_memory_operand (op, mode)
3985 register rtx op;
3986 enum machine_mode mode;
3988 if (! memory_operand (op, mode))
3989 return 0;
3991 return memory_address_length (op) != 0;
3994 /* Return nonzero if the rtx is known aligned. */
3997 aligned_operand (op, mode)
3998 rtx op;
3999 enum machine_mode mode;
4001 struct ix86_address parts;
4003 if (!general_operand (op, mode))
4004 return 0;
4006 /* Registers and immediate operands are always "aligned". */
4007 if (GET_CODE (op) != MEM)
4008 return 1;
4010 /* Don't even try to do any aligned optimizations with volatiles. */
4011 if (MEM_VOLATILE_P (op))
4012 return 0;
4014 op = XEXP (op, 0);
4016 /* Pushes and pops are only valid on the stack pointer. */
4017 if (GET_CODE (op) == PRE_DEC
4018 || GET_CODE (op) == POST_INC)
4019 return 1;
4021 /* Decode the address. */
4022 if (! ix86_decompose_address (op, &parts))
4023 abort ();
4025 if (parts.base && GET_CODE (parts.base) == SUBREG)
4026 parts.base = SUBREG_REG (parts.base);
4027 if (parts.index && GET_CODE (parts.index) == SUBREG)
4028 parts.index = SUBREG_REG (parts.index);
4030 /* Look for some component that isn't known to be aligned. */
4031 if (parts.index)
4033 if (parts.scale < 4
4034 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4035 return 0;
4037 if (parts.base)
4039 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4040 return 0;
4042 if (parts.disp)
4044 if (GET_CODE (parts.disp) != CONST_INT
4045 || (INTVAL (parts.disp) & 3) != 0)
4046 return 0;
4049 /* Didn't find one -- this must be an aligned address. */
4050 return 1;
4053 /* Return true if the constant is something that can be loaded with
4054 a special instruction. Only handle 0.0 and 1.0; others are less
4055 worthwhile. */
4058 standard_80387_constant_p (x)
4059 rtx x;
4061 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4062 return -1;
4063 /* Note that on the 80387, other constants, such as pi, that we should support
4064 too. On some machines, these are much slower to load as standard constant,
4065 than to load from doubles in memory. */
4066 if (x == CONST0_RTX (GET_MODE (x)))
4067 return 1;
4068 if (x == CONST1_RTX (GET_MODE (x)))
4069 return 2;
4070 return 0;
4073 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4076 standard_sse_constant_p (x)
4077 rtx x;
4079 if (x == const0_rtx)
4080 return 1;
4081 return (x == CONST0_RTX (GET_MODE (x)));
4084 /* Returns 1 if OP contains a symbol reference */
4087 symbolic_reference_mentioned_p (op)
4088 rtx op;
4090 register const char *fmt;
4091 register int i;
4093 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4094 return 1;
4096 fmt = GET_RTX_FORMAT (GET_CODE (op));
4097 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4099 if (fmt[i] == 'E')
4101 register int j;
4103 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4104 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4105 return 1;
4108 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4109 return 1;
4112 return 0;
4115 /* Return 1 if it is appropriate to emit `ret' instructions in the
4116 body of a function. Do this only if the epilogue is simple, needing a
4117 couple of insns. Prior to reloading, we can't tell how many registers
4118 must be saved, so return 0 then. Return 0 if there is no frame
4119 marker to de-allocate.
4121 If NON_SAVING_SETJMP is defined and true, then it is not possible
4122 for the epilogue to be simple, so return 0. This is a special case
4123 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4124 until final, but jump_optimize may need to know sooner if a
4125 `return' is OK. */
4128 ix86_can_use_return_insn_p ()
4130 struct ix86_frame frame;
4132 #ifdef NON_SAVING_SETJMP
4133 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4134 return 0;
4135 #endif
4137 if (! reload_completed || frame_pointer_needed)
4138 return 0;
4140 /* Don't allow more than 32 pop, since that's all we can do
4141 with one instruction. */
4142 if (current_function_pops_args
4143 && current_function_args_size >= 32768)
4144 return 0;
4146 ix86_compute_frame_layout (&frame);
4147 return frame.to_allocate == 0 && frame.nregs == 0;
4150 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4152 x86_64_sign_extended_value (value)
4153 rtx value;
4155 switch (GET_CODE (value))
4157 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4158 to be at least 32 and this all acceptable constants are
4159 represented as CONST_INT. */
4160 case CONST_INT:
4161 if (HOST_BITS_PER_WIDE_INT == 32)
4162 return 1;
4163 else
4165 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4166 return trunc_int_for_mode (val, SImode) == val;
4168 break;
4170 /* For certain code models, the symbolic references are known to fit.
4171 in CM_SMALL_PIC model we know it fits if it is local to the shared
4172 library. Don't count TLS SYMBOL_REFs here, since they should fit
4173 only if inside of UNSPEC handled below. */
4174 case SYMBOL_REF:
4175 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4177 /* For certain code models, the code is near as well. */
4178 case LABEL_REF:
4179 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4180 || ix86_cmodel == CM_KERNEL);
4182 /* We also may accept the offsetted memory references in certain special
4183 cases. */
4184 case CONST:
4185 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4186 switch (XINT (XEXP (value, 0), 1))
4188 case UNSPEC_GOTPCREL:
4189 case UNSPEC_DTPOFF:
4190 case UNSPEC_GOTNTPOFF:
4191 case UNSPEC_NTPOFF:
4192 return 1;
4193 default:
4194 break;
4196 if (GET_CODE (XEXP (value, 0)) == PLUS)
4198 rtx op1 = XEXP (XEXP (value, 0), 0);
4199 rtx op2 = XEXP (XEXP (value, 0), 1);
4200 HOST_WIDE_INT offset;
4202 if (ix86_cmodel == CM_LARGE)
4203 return 0;
4204 if (GET_CODE (op2) != CONST_INT)
4205 return 0;
4206 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4207 switch (GET_CODE (op1))
4209 case SYMBOL_REF:
4210 /* For CM_SMALL assume that latest object is 16MB before
4211 end of 31bits boundary. We may also accept pretty
4212 large negative constants knowing that all objects are
4213 in the positive half of address space. */
4214 if (ix86_cmodel == CM_SMALL
4215 && offset < 16*1024*1024
4216 && trunc_int_for_mode (offset, SImode) == offset)
4217 return 1;
4218 /* For CM_KERNEL we know that all object resist in the
4219 negative half of 32bits address space. We may not
4220 accept negative offsets, since they may be just off
4221 and we may accept pretty large positive ones. */
4222 if (ix86_cmodel == CM_KERNEL
4223 && offset > 0
4224 && trunc_int_for_mode (offset, SImode) == offset)
4225 return 1;
4226 break;
4227 case LABEL_REF:
4228 /* These conditions are similar to SYMBOL_REF ones, just the
4229 constraints for code models differ. */
4230 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4231 && offset < 16*1024*1024
4232 && trunc_int_for_mode (offset, SImode) == offset)
4233 return 1;
4234 if (ix86_cmodel == CM_KERNEL
4235 && offset > 0
4236 && trunc_int_for_mode (offset, SImode) == offset)
4237 return 1;
4238 break;
4239 case UNSPEC:
4240 switch (XINT (op1, 1))
4242 case UNSPEC_DTPOFF:
4243 case UNSPEC_NTPOFF:
4244 if (offset > 0
4245 && trunc_int_for_mode (offset, SImode) == offset)
4246 return 1;
4248 break;
4249 default:
4250 return 0;
4253 return 0;
4254 default:
4255 return 0;
4259 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4261 x86_64_zero_extended_value (value)
4262 rtx value;
4264 switch (GET_CODE (value))
4266 case CONST_DOUBLE:
4267 if (HOST_BITS_PER_WIDE_INT == 32)
4268 return (GET_MODE (value) == VOIDmode
4269 && !CONST_DOUBLE_HIGH (value));
4270 else
4271 return 0;
4272 case CONST_INT:
4273 if (HOST_BITS_PER_WIDE_INT == 32)
4274 return INTVAL (value) >= 0;
4275 else
4276 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4277 break;
4279 /* For certain code models, the symbolic references are known to fit. */
4280 case SYMBOL_REF:
4281 return ix86_cmodel == CM_SMALL;
4283 /* For certain code models, the code is near as well. */
4284 case LABEL_REF:
4285 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4287 /* We also may accept the offsetted memory references in certain special
4288 cases. */
4289 case CONST:
4290 if (GET_CODE (XEXP (value, 0)) == PLUS)
4292 rtx op1 = XEXP (XEXP (value, 0), 0);
4293 rtx op2 = XEXP (XEXP (value, 0), 1);
4295 if (ix86_cmodel == CM_LARGE)
4296 return 0;
4297 switch (GET_CODE (op1))
4299 case SYMBOL_REF:
4300 return 0;
4301 /* For small code model we may accept pretty large positive
4302 offsets, since one bit is available for free. Negative
4303 offsets are limited by the size of NULL pointer area
4304 specified by the ABI. */
4305 if (ix86_cmodel == CM_SMALL
4306 && GET_CODE (op2) == CONST_INT
4307 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4308 && (trunc_int_for_mode (INTVAL (op2), SImode)
4309 == INTVAL (op2)))
4310 return 1;
4311 /* ??? For the kernel, we may accept adjustment of
4312 -0x10000000, since we know that it will just convert
4313 negative address space to positive, but perhaps this
4314 is not worthwhile. */
4315 break;
4316 case LABEL_REF:
4317 /* These conditions are similar to SYMBOL_REF ones, just the
4318 constraints for code models differ. */
4319 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4320 && GET_CODE (op2) == CONST_INT
4321 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4322 && (trunc_int_for_mode (INTVAL (op2), SImode)
4323 == INTVAL (op2)))
4324 return 1;
4325 break;
4326 default:
4327 return 0;
4330 return 0;
4331 default:
4332 return 0;
4336 /* Value should be nonzero if functions must have frame pointers.
4337 Zero means the frame pointer need not be set up (and parms may
4338 be accessed via the stack pointer) in functions that seem suitable. */
4341 ix86_frame_pointer_required ()
4343 /* If we accessed previous frames, then the generated code expects
4344 to be able to access the saved ebp value in our frame. */
4345 if (cfun->machine->accesses_prev_frame)
4346 return 1;
4348 /* Several x86 os'es need a frame pointer for other reasons,
4349 usually pertaining to setjmp. */
4350 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4351 return 1;
4353 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4354 the frame pointer by default. Turn it back on now if we've not
4355 got a leaf function. */
4356 if (TARGET_OMIT_LEAF_FRAME_POINTER
4357 && (!current_function_is_leaf))
4358 return 1;
4360 if (current_function_profile)
4361 return 1;
4363 return 0;
4366 /* Record that the current function accesses previous call frames. */
4368 void
4369 ix86_setup_frame_addresses ()
4371 cfun->machine->accesses_prev_frame = 1;
4374 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4375 # define USE_HIDDEN_LINKONCE 1
4376 #else
4377 # define USE_HIDDEN_LINKONCE 0
4378 #endif
4380 static int pic_labels_used;
4382 /* Fills in the label name that should be used for a pc thunk for
4383 the given register. */
4385 static void
4386 get_pc_thunk_name (name, regno)
4387 char name[32];
4388 unsigned int regno;
4390 if (USE_HIDDEN_LINKONCE)
4391 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4392 else
4393 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4397 /* This function generates code for -fpic that loads %ebx with
4398 the return address of the caller and then returns. */
4400 void
4401 ix86_asm_file_end (file)
4402 FILE *file;
4404 rtx xops[2];
4405 int regno;
4407 for (regno = 0; regno < 8; ++regno)
4409 char name[32];
4411 if (! ((pic_labels_used >> regno) & 1))
4412 continue;
4414 get_pc_thunk_name (name, regno);
4416 if (USE_HIDDEN_LINKONCE)
4418 tree decl;
4420 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4421 error_mark_node);
4422 TREE_PUBLIC (decl) = 1;
4423 TREE_STATIC (decl) = 1;
4424 DECL_ONE_ONLY (decl) = 1;
4426 (*targetm.asm_out.unique_section) (decl, 0);
4427 named_section (decl, NULL, 0);
4429 (*targetm.asm_out.globalize_label) (file, name);
4430 fputs ("\t.hidden\t", file);
4431 assemble_name (file, name);
4432 fputc ('\n', file);
4433 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4435 else
4437 text_section ();
4438 ASM_OUTPUT_LABEL (file, name);
4441 xops[0] = gen_rtx_REG (SImode, regno);
4442 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4443 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4444 output_asm_insn ("ret", xops);
4448 /* Emit code for the SET_GOT patterns. */
4450 const char *
4451 output_set_got (dest)
4452 rtx dest;
4454 rtx xops[3];
4456 xops[0] = dest;
4457 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4459 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4461 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4463 if (!flag_pic)
4464 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4465 else
4466 output_asm_insn ("call\t%a2", xops);
4468 #if TARGET_MACHO
4469 /* Output the "canonical" label name ("Lxx$pb") here too. This
4470 is what will be referred to by the Mach-O PIC subsystem. */
4471 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4472 #endif
4473 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4474 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4476 if (flag_pic)
4477 output_asm_insn ("pop{l}\t%0", xops);
4479 else
4481 char name[32];
4482 get_pc_thunk_name (name, REGNO (dest));
4483 pic_labels_used |= 1 << REGNO (dest);
4485 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4486 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4487 output_asm_insn ("call\t%X2", xops);
4490 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4491 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4492 else if (!TARGET_MACHO)
4493 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4495 return "";
4498 /* Generate an "push" pattern for input ARG. */
4500 static rtx
4501 gen_push (arg)
4502 rtx arg;
4504 return gen_rtx_SET (VOIDmode,
4505 gen_rtx_MEM (Pmode,
4506 gen_rtx_PRE_DEC (Pmode,
4507 stack_pointer_rtx)),
4508 arg);
4511 /* Return >= 0 if there is an unused call-clobbered register available
4512 for the entire function. */
4514 static unsigned int
4515 ix86_select_alt_pic_regnum ()
4517 if (current_function_is_leaf && !current_function_profile)
4519 int i;
4520 for (i = 2; i >= 0; --i)
4521 if (!regs_ever_live[i])
4522 return i;
4525 return INVALID_REGNUM;
4528 /* Return 1 if we need to save REGNO. */
4529 static int
4530 ix86_save_reg (regno, maybe_eh_return)
4531 unsigned int regno;
4532 int maybe_eh_return;
4534 if (pic_offset_table_rtx
4535 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4536 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4537 || current_function_profile
4538 || current_function_calls_eh_return))
4540 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4541 return 0;
4542 return 1;
4545 if (current_function_calls_eh_return && maybe_eh_return)
4547 unsigned i;
4548 for (i = 0; ; i++)
4550 unsigned test = EH_RETURN_DATA_REGNO (i);
4551 if (test == INVALID_REGNUM)
4552 break;
4553 if (test == regno)
4554 return 1;
4558 return (regs_ever_live[regno]
4559 && !call_used_regs[regno]
4560 && !fixed_regs[regno]
4561 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4564 /* Return number of registers to be saved on the stack. */
4566 static int
4567 ix86_nsaved_regs ()
4569 int nregs = 0;
4570 int regno;
4572 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4573 if (ix86_save_reg (regno, true))
4574 nregs++;
4575 return nregs;
4578 /* Return the offset between two registers, one to be eliminated, and the other
4579 its replacement, at the start of a routine. */
4581 HOST_WIDE_INT
4582 ix86_initial_elimination_offset (from, to)
4583 int from;
4584 int to;
4586 struct ix86_frame frame;
4587 ix86_compute_frame_layout (&frame);
4589 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4590 return frame.hard_frame_pointer_offset;
4591 else if (from == FRAME_POINTER_REGNUM
4592 && to == HARD_FRAME_POINTER_REGNUM)
4593 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4594 else
4596 if (to != STACK_POINTER_REGNUM)
4597 abort ();
4598 else if (from == ARG_POINTER_REGNUM)
4599 return frame.stack_pointer_offset;
4600 else if (from != FRAME_POINTER_REGNUM)
4601 abort ();
4602 else
4603 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4607 /* Fill structure ix86_frame about frame of currently computed function. */
4609 static void
4610 ix86_compute_frame_layout (frame)
4611 struct ix86_frame *frame;
4613 HOST_WIDE_INT total_size;
4614 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4615 int offset;
4616 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4617 HOST_WIDE_INT size = get_frame_size ();
4619 frame->nregs = ix86_nsaved_regs ();
4620 total_size = size;
4622 /* Skip return address and saved base pointer. */
4623 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4625 frame->hard_frame_pointer_offset = offset;
4627 /* Do some sanity checking of stack_alignment_needed and
4628 preferred_alignment, since i386 port is the only using those features
4629 that may break easily. */
4631 if (size && !stack_alignment_needed)
4632 abort ();
4633 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4634 abort ();
4635 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4636 abort ();
4637 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4638 abort ();
4640 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4641 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4643 /* Register save area */
4644 offset += frame->nregs * UNITS_PER_WORD;
4646 /* Va-arg area */
4647 if (ix86_save_varrargs_registers)
4649 offset += X86_64_VARARGS_SIZE;
4650 frame->va_arg_size = X86_64_VARARGS_SIZE;
4652 else
4653 frame->va_arg_size = 0;
4655 /* Align start of frame for local function. */
4656 frame->padding1 = ((offset + stack_alignment_needed - 1)
4657 & -stack_alignment_needed) - offset;
4659 offset += frame->padding1;
4661 /* Frame pointer points here. */
4662 frame->frame_pointer_offset = offset;
4664 offset += size;
4666 /* Add outgoing arguments area. Can be skipped if we eliminated
4667 all the function calls as dead code. */
4668 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4670 offset += current_function_outgoing_args_size;
4671 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4673 else
4674 frame->outgoing_arguments_size = 0;
4676 /* Align stack boundary. Only needed if we're calling another function
4677 or using alloca. */
4678 if (!current_function_is_leaf || current_function_calls_alloca)
4679 frame->padding2 = ((offset + preferred_alignment - 1)
4680 & -preferred_alignment) - offset;
4681 else
4682 frame->padding2 = 0;
4684 offset += frame->padding2;
4686 /* We've reached end of stack frame. */
4687 frame->stack_pointer_offset = offset;
4689 /* Size prologue needs to allocate. */
4690 frame->to_allocate =
4691 (size + frame->padding1 + frame->padding2
4692 + frame->outgoing_arguments_size + frame->va_arg_size);
4694 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4695 && current_function_is_leaf)
4697 frame->red_zone_size = frame->to_allocate;
4698 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4699 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4701 else
4702 frame->red_zone_size = 0;
4703 frame->to_allocate -= frame->red_zone_size;
4704 frame->stack_pointer_offset -= frame->red_zone_size;
4705 #if 0
4706 fprintf (stderr, "nregs: %i\n", frame->nregs);
4707 fprintf (stderr, "size: %i\n", size);
4708 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4709 fprintf (stderr, "padding1: %i\n", frame->padding1);
4710 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4711 fprintf (stderr, "padding2: %i\n", frame->padding2);
4712 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4713 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4714 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4715 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4716 frame->hard_frame_pointer_offset);
4717 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4718 #endif
4721 /* Emit code to save registers in the prologue. */
4723 static void
4724 ix86_emit_save_regs ()
4726 register int regno;
4727 rtx insn;
4729 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4730 if (ix86_save_reg (regno, true))
4732 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4733 RTX_FRAME_RELATED_P (insn) = 1;
4737 /* Emit code to save registers using MOV insns. First register
4738 is restored from POINTER + OFFSET. */
4739 static void
4740 ix86_emit_save_regs_using_mov (pointer, offset)
4741 rtx pointer;
4742 HOST_WIDE_INT offset;
4744 int regno;
4745 rtx insn;
4747 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4748 if (ix86_save_reg (regno, true))
4750 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4751 Pmode, offset),
4752 gen_rtx_REG (Pmode, regno));
4753 RTX_FRAME_RELATED_P (insn) = 1;
4754 offset += UNITS_PER_WORD;
4758 /* Expand the prologue into a bunch of separate insns. */
4760 void
4761 ix86_expand_prologue ()
4763 rtx insn;
4764 bool pic_reg_used;
4765 struct ix86_frame frame;
4766 int use_mov = 0;
4767 HOST_WIDE_INT allocate;
4769 ix86_compute_frame_layout (&frame);
4770 if (!optimize_size)
4772 int count = frame.nregs;
4774 /* The fast prologue uses move instead of push to save registers. This
4775 is significantly longer, but also executes faster as modern hardware
4776 can execute the moves in parallel, but can't do that for push/pop.
4778 Be careful about choosing what prologue to emit: When function takes
4779 many instructions to execute we may use slow version as well as in
4780 case function is known to be outside hot spot (this is known with
4781 feedback only). Weight the size of function by number of registers
4782 to save as it is cheap to use one or two push instructions but very
4783 slow to use many of them. */
4784 if (count)
4785 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4786 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4787 || (flag_branch_probabilities
4788 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4789 use_fast_prologue_epilogue = 0;
4790 else
4791 use_fast_prologue_epilogue = !expensive_function_p (count);
4792 if (TARGET_PROLOGUE_USING_MOVE)
4793 use_mov = use_fast_prologue_epilogue;
4796 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4797 slower on all targets. Also sdb doesn't like it. */
4799 if (frame_pointer_needed)
4801 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4802 RTX_FRAME_RELATED_P (insn) = 1;
4804 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4805 RTX_FRAME_RELATED_P (insn) = 1;
4808 allocate = frame.to_allocate;
4809 /* In case we are dealing only with single register and empty frame,
4810 push is equivalent of the mov+add sequence. */
4811 if (allocate == 0 && frame.nregs <= 1)
4812 use_mov = 0;
4814 if (!use_mov)
4815 ix86_emit_save_regs ();
4816 else
4817 allocate += frame.nregs * UNITS_PER_WORD;
4819 if (allocate == 0)
4821 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4823 insn = emit_insn (gen_pro_epilogue_adjust_stack
4824 (stack_pointer_rtx, stack_pointer_rtx,
4825 GEN_INT (-allocate)));
4826 RTX_FRAME_RELATED_P (insn) = 1;
4828 else
4830 /* ??? Is this only valid for Win32? */
4832 rtx arg0, sym;
4834 if (TARGET_64BIT)
4835 abort ();
4837 arg0 = gen_rtx_REG (SImode, 0);
4838 emit_move_insn (arg0, GEN_INT (allocate));
4840 sym = gen_rtx_MEM (FUNCTION_MODE,
4841 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4842 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4844 CALL_INSN_FUNCTION_USAGE (insn)
4845 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4846 CALL_INSN_FUNCTION_USAGE (insn));
4848 /* Don't allow scheduling pass to move insns across __alloca
4849 call. */
4850 emit_insn (gen_blockage (const0_rtx));
4852 if (use_mov)
4854 if (!frame_pointer_needed || !frame.to_allocate)
4855 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4856 else
4857 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4858 -frame.nregs * UNITS_PER_WORD);
4861 #ifdef SUBTARGET_PROLOGUE
4862 SUBTARGET_PROLOGUE;
4863 #endif
4865 pic_reg_used = false;
4866 if (pic_offset_table_rtx
4867 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4868 || current_function_profile))
4870 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4872 if (alt_pic_reg_used != INVALID_REGNUM)
4873 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4875 pic_reg_used = true;
4878 if (pic_reg_used)
4880 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4882 /* Even with accurate pre-reload life analysis, we can wind up
4883 deleting all references to the pic register after reload.
4884 Consider if cross-jumping unifies two sides of a branch
4885 controlled by a comparison vs the only read from a global.
4886 In which case, allow the set_got to be deleted, though we're
4887 too late to do anything about the ebx save in the prologue. */
4888 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4891 /* Prevent function calls from be scheduled before the call to mcount.
4892 In the pic_reg_used case, make sure that the got load isn't deleted. */
4893 if (current_function_profile)
4894 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4897 /* Emit code to restore saved registers using MOV insns. First register
4898 is restored from POINTER + OFFSET. */
4899 static void
4900 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4901 rtx pointer;
4902 int offset;
4903 int maybe_eh_return;
4905 int regno;
4907 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4908 if (ix86_save_reg (regno, maybe_eh_return))
4910 emit_move_insn (gen_rtx_REG (Pmode, regno),
4911 adjust_address (gen_rtx_MEM (Pmode, pointer),
4912 Pmode, offset));
4913 offset += UNITS_PER_WORD;
4917 /* Restore function stack, frame, and registers. */
4919 void
4920 ix86_expand_epilogue (style)
4921 int style;
4923 int regno;
4924 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4925 struct ix86_frame frame;
4926 HOST_WIDE_INT offset;
4928 ix86_compute_frame_layout (&frame);
4930 /* Calculate start of saved registers relative to ebp. Special care
4931 must be taken for the normal return case of a function using
4932 eh_return: the eax and edx registers are marked as saved, but not
4933 restored along this path. */
4934 offset = frame.nregs;
4935 if (current_function_calls_eh_return && style != 2)
4936 offset -= 2;
4937 offset *= -UNITS_PER_WORD;
4939 /* If we're only restoring one register and sp is not valid then
4940 using a move instruction to restore the register since it's
4941 less work than reloading sp and popping the register.
4943 The default code result in stack adjustment using add/lea instruction,
4944 while this code results in LEAVE instruction (or discrete equivalent),
4945 so it is profitable in some other cases as well. Especially when there
4946 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4947 and there is exactly one register to pop. This heuristic may need some
4948 tuning in future. */
4949 if ((!sp_valid && frame.nregs <= 1)
4950 || (TARGET_EPILOGUE_USING_MOVE
4951 && use_fast_prologue_epilogue
4952 && (frame.nregs > 1 || frame.to_allocate))
4953 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4954 || (frame_pointer_needed && TARGET_USE_LEAVE
4955 && use_fast_prologue_epilogue && frame.nregs == 1)
4956 || current_function_calls_eh_return)
4958 /* Restore registers. We can use ebp or esp to address the memory
4959 locations. If both are available, default to ebp, since offsets
4960 are known to be small. Only exception is esp pointing directly to the
4961 end of block of saved registers, where we may simplify addressing
4962 mode. */
4964 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4965 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4966 frame.to_allocate, style == 2);
4967 else
4968 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4969 offset, style == 2);
4971 /* eh_return epilogues need %ecx added to the stack pointer. */
4972 if (style == 2)
4974 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4976 if (frame_pointer_needed)
4978 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4979 tmp = plus_constant (tmp, UNITS_PER_WORD);
4980 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4982 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4983 emit_move_insn (hard_frame_pointer_rtx, tmp);
4985 emit_insn (gen_pro_epilogue_adjust_stack
4986 (stack_pointer_rtx, sa, const0_rtx));
4988 else
4990 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4991 tmp = plus_constant (tmp, (frame.to_allocate
4992 + frame.nregs * UNITS_PER_WORD));
4993 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4996 else if (!frame_pointer_needed)
4997 emit_insn (gen_pro_epilogue_adjust_stack
4998 (stack_pointer_rtx, stack_pointer_rtx,
4999 GEN_INT (frame.to_allocate
5000 + frame.nregs * UNITS_PER_WORD)));
5001 /* If not an i386, mov & pop is faster than "leave". */
5002 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
5003 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5004 else
5006 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5007 hard_frame_pointer_rtx,
5008 const0_rtx));
5009 if (TARGET_64BIT)
5010 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5011 else
5012 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5015 else
5017 /* First step is to deallocate the stack frame so that we can
5018 pop the registers. */
5019 if (!sp_valid)
5021 if (!frame_pointer_needed)
5022 abort ();
5023 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5024 hard_frame_pointer_rtx,
5025 GEN_INT (offset)));
5027 else if (frame.to_allocate)
5028 emit_insn (gen_pro_epilogue_adjust_stack
5029 (stack_pointer_rtx, stack_pointer_rtx,
5030 GEN_INT (frame.to_allocate)));
5032 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5033 if (ix86_save_reg (regno, false))
5035 if (TARGET_64BIT)
5036 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5037 else
5038 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5040 if (frame_pointer_needed)
5042 /* Leave results in shorter dependency chains on CPUs that are
5043 able to grok it fast. */
5044 if (TARGET_USE_LEAVE)
5045 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5046 else if (TARGET_64BIT)
5047 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5048 else
5049 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5053 /* Sibcall epilogues don't want a return instruction. */
5054 if (style == 0)
5055 return;
5057 if (current_function_pops_args && current_function_args_size)
5059 rtx popc = GEN_INT (current_function_pops_args);
5061 /* i386 can only pop 64K bytes. If asked to pop more, pop
5062 return address, do explicit add, and jump indirectly to the
5063 caller. */
5065 if (current_function_pops_args >= 65536)
5067 rtx ecx = gen_rtx_REG (SImode, 2);
5069 /* There are is no "pascal" calling convention in 64bit ABI. */
5070 if (TARGET_64BIT)
5071 abort ();
5073 emit_insn (gen_popsi1 (ecx));
5074 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5075 emit_jump_insn (gen_return_indirect_internal (ecx));
5077 else
5078 emit_jump_insn (gen_return_pop_internal (popc));
5080 else
5081 emit_jump_insn (gen_return_internal ());
5084 /* Reset from the function's potential modifications. */
5086 static void
5087 ix86_output_function_epilogue (file, size)
5088 FILE *file ATTRIBUTE_UNUSED;
5089 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5091 if (pic_offset_table_rtx)
5092 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5095 /* Extract the parts of an RTL expression that is a valid memory address
5096 for an instruction. Return 0 if the structure of the address is
5097 grossly off. Return -1 if the address contains ASHIFT, so it is not
5098 strictly valid, but still used for computing length of lea instruction.
5101 static int
5102 ix86_decompose_address (addr, out)
5103 register rtx addr;
5104 struct ix86_address *out;
5106 rtx base = NULL_RTX;
5107 rtx index = NULL_RTX;
5108 rtx disp = NULL_RTX;
5109 HOST_WIDE_INT scale = 1;
5110 rtx scale_rtx = NULL_RTX;
5111 int retval = 1;
5113 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5114 base = addr;
5115 else if (GET_CODE (addr) == PLUS)
5117 rtx op0 = XEXP (addr, 0);
5118 rtx op1 = XEXP (addr, 1);
5119 enum rtx_code code0 = GET_CODE (op0);
5120 enum rtx_code code1 = GET_CODE (op1);
5122 if (code0 == REG || code0 == SUBREG)
5124 if (code1 == REG || code1 == SUBREG)
5125 index = op0, base = op1; /* index + base */
5126 else
5127 base = op0, disp = op1; /* base + displacement */
5129 else if (code0 == MULT)
5131 index = XEXP (op0, 0);
5132 scale_rtx = XEXP (op0, 1);
5133 if (code1 == REG || code1 == SUBREG)
5134 base = op1; /* index*scale + base */
5135 else
5136 disp = op1; /* index*scale + disp */
5138 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5140 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5141 scale_rtx = XEXP (XEXP (op0, 0), 1);
5142 base = XEXP (op0, 1);
5143 disp = op1;
5145 else if (code0 == PLUS)
5147 index = XEXP (op0, 0); /* index + base + disp */
5148 base = XEXP (op0, 1);
5149 disp = op1;
5151 else
5152 return 0;
5154 else if (GET_CODE (addr) == MULT)
5156 index = XEXP (addr, 0); /* index*scale */
5157 scale_rtx = XEXP (addr, 1);
5159 else if (GET_CODE (addr) == ASHIFT)
5161 rtx tmp;
5163 /* We're called for lea too, which implements ashift on occasion. */
5164 index = XEXP (addr, 0);
5165 tmp = XEXP (addr, 1);
5166 if (GET_CODE (tmp) != CONST_INT)
5167 return 0;
5168 scale = INTVAL (tmp);
5169 if ((unsigned HOST_WIDE_INT) scale > 3)
5170 return 0;
5171 scale = 1 << scale;
5172 retval = -1;
5174 else
5175 disp = addr; /* displacement */
5177 /* Extract the integral value of scale. */
5178 if (scale_rtx)
5180 if (GET_CODE (scale_rtx) != CONST_INT)
5181 return 0;
5182 scale = INTVAL (scale_rtx);
5185 /* Allow arg pointer and stack pointer as index if there is not scaling */
5186 if (base && index && scale == 1
5187 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5188 || index == stack_pointer_rtx))
5190 rtx tmp = base;
5191 base = index;
5192 index = tmp;
5195 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5196 if ((base == hard_frame_pointer_rtx
5197 || base == frame_pointer_rtx
5198 || base == arg_pointer_rtx) && !disp)
5199 disp = const0_rtx;
5201 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5202 Avoid this by transforming to [%esi+0]. */
5203 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5204 && base && !index && !disp
5205 && REG_P (base)
5206 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5207 disp = const0_rtx;
5209 /* Special case: encode reg+reg instead of reg*2. */
5210 if (!base && index && scale && scale == 2)
5211 base = index, scale = 1;
5213 /* Special case: scaling cannot be encoded without base or displacement. */
5214 if (!base && !disp && index && scale != 1)
5215 disp = const0_rtx;
5217 out->base = base;
5218 out->index = index;
5219 out->disp = disp;
5220 out->scale = scale;
5222 return retval;
5225 /* Return cost of the memory address x.
5226 For i386, it is better to use a complex address than let gcc copy
5227 the address into a reg and make a new pseudo. But not if the address
5228 requires to two regs - that would mean more pseudos with longer
5229 lifetimes. */
5231 ix86_address_cost (x)
5232 rtx x;
5234 struct ix86_address parts;
5235 int cost = 1;
5237 if (!ix86_decompose_address (x, &parts))
5238 abort ();
5240 if (parts.base && GET_CODE (parts.base) == SUBREG)
5241 parts.base = SUBREG_REG (parts.base);
5242 if (parts.index && GET_CODE (parts.index) == SUBREG)
5243 parts.index = SUBREG_REG (parts.index);
5245 /* More complex memory references are better. */
5246 if (parts.disp && parts.disp != const0_rtx)
5247 cost--;
5249 /* Attempt to minimize number of registers in the address. */
5250 if ((parts.base
5251 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5252 || (parts.index
5253 && (!REG_P (parts.index)
5254 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5255 cost++;
5257 if (parts.base
5258 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5259 && parts.index
5260 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5261 && parts.base != parts.index)
5262 cost++;
5264 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5265 since it's predecode logic can't detect the length of instructions
5266 and it degenerates to vector decoded. Increase cost of such
5267 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5268 to split such addresses or even refuse such addresses at all.
5270 Following addressing modes are affected:
5271 [base+scale*index]
5272 [scale*index+disp]
5273 [base+index]
5275 The first and last case may be avoidable by explicitly coding the zero in
5276 memory address, but I don't have AMD-K6 machine handy to check this
5277 theory. */
5279 if (TARGET_K6
5280 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5281 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5282 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5283 cost += 10;
5285 return cost;
5288 /* If X is a machine specific address (i.e. a symbol or label being
5289 referenced as a displacement from the GOT implemented using an
5290 UNSPEC), then return the base term. Otherwise return X. */
5293 ix86_find_base_term (x)
5294 rtx x;
5296 rtx term;
5298 if (TARGET_64BIT)
5300 if (GET_CODE (x) != CONST)
5301 return x;
5302 term = XEXP (x, 0);
5303 if (GET_CODE (term) == PLUS
5304 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5305 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5306 term = XEXP (term, 0);
5307 if (GET_CODE (term) != UNSPEC
5308 || XINT (term, 1) != UNSPEC_GOTPCREL)
5309 return x;
5311 term = XVECEXP (term, 0, 0);
5313 if (GET_CODE (term) != SYMBOL_REF
5314 && GET_CODE (term) != LABEL_REF)
5315 return x;
5317 return term;
5320 if (GET_CODE (x) != PLUS
5321 || XEXP (x, 0) != pic_offset_table_rtx
5322 || GET_CODE (XEXP (x, 1)) != CONST)
5323 return x;
5325 term = XEXP (XEXP (x, 1), 0);
5327 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5328 term = XEXP (term, 0);
5330 if (GET_CODE (term) != UNSPEC
5331 || XINT (term, 1) != UNSPEC_GOTOFF)
5332 return x;
5334 term = XVECEXP (term, 0, 0);
5336 if (GET_CODE (term) != SYMBOL_REF
5337 && GET_CODE (term) != LABEL_REF)
5338 return x;
5340 return term;
5343 /* Determine if a given RTX is a valid constant. We already know this
5344 satisfies CONSTANT_P. */
5346 bool
5347 legitimate_constant_p (x)
5348 rtx x;
5350 rtx inner;
5352 switch (GET_CODE (x))
5354 case SYMBOL_REF:
5355 /* TLS symbols are not constant. */
5356 if (tls_symbolic_operand (x, Pmode))
5357 return false;
5358 break;
5360 case CONST:
5361 inner = XEXP (x, 0);
5363 /* Offsets of TLS symbols are never valid.
5364 Discourage CSE from creating them. */
5365 if (GET_CODE (inner) == PLUS
5366 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5367 return false;
5369 /* Only some unspecs are valid as "constants". */
5370 if (GET_CODE (inner) == UNSPEC)
5371 switch (XINT (inner, 1))
5373 case UNSPEC_TPOFF:
5374 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5375 default:
5376 return false;
5378 break;
5380 default:
5381 break;
5384 /* Otherwise we handle everything else in the move patterns. */
5385 return true;
5388 /* Determine if it's legal to put X into the constant pool. This
5389 is not possible for the address of thread-local symbols, which
5390 is checked above. */
5392 static bool
5393 ix86_cannot_force_const_mem (x)
5394 rtx x;
5396 return !legitimate_constant_p (x);
5399 /* Determine if a given RTX is a valid constant address. */
5401 bool
5402 constant_address_p (x)
5403 rtx x;
5405 switch (GET_CODE (x))
5407 case LABEL_REF:
5408 case CONST_INT:
5409 return true;
5411 case CONST_DOUBLE:
5412 return TARGET_64BIT;
5414 case CONST:
5415 /* For Mach-O, really believe the CONST. */
5416 if (TARGET_MACHO)
5417 return true;
5418 /* Otherwise fall through. */
5419 case SYMBOL_REF:
5420 return !flag_pic && legitimate_constant_p (x);
5422 default:
5423 return false;
5427 /* Nonzero if the constant value X is a legitimate general operand
5428 when generating PIC code. It is given that flag_pic is on and
5429 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5431 bool
5432 legitimate_pic_operand_p (x)
5433 rtx x;
5435 rtx inner;
5437 switch (GET_CODE (x))
5439 case CONST:
5440 inner = XEXP (x, 0);
5442 /* Only some unspecs are valid as "constants". */
5443 if (GET_CODE (inner) == UNSPEC)
5444 switch (XINT (inner, 1))
5446 case UNSPEC_TPOFF:
5447 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5448 default:
5449 return false;
5451 /* FALLTHRU */
5453 case SYMBOL_REF:
5454 case LABEL_REF:
5455 return legitimate_pic_address_disp_p (x);
5457 default:
5458 return true;
5462 /* Determine if a given CONST RTX is a valid memory displacement
5463 in PIC mode. */
5466 legitimate_pic_address_disp_p (disp)
5467 register rtx disp;
5469 bool saw_plus;
5471 /* In 64bit mode we can allow direct addresses of symbols and labels
5472 when they are not dynamic symbols. */
5473 if (TARGET_64BIT)
5475 /* TLS references should always be enclosed in UNSPEC. */
5476 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5477 return 0;
5478 if (GET_CODE (disp) == SYMBOL_REF
5479 && ix86_cmodel == CM_SMALL_PIC
5480 && (CONSTANT_POOL_ADDRESS_P (disp)
5481 || SYMBOL_REF_FLAG (disp)))
5482 return 1;
5483 if (GET_CODE (disp) == LABEL_REF)
5484 return 1;
5485 if (GET_CODE (disp) == CONST
5486 && GET_CODE (XEXP (disp, 0)) == PLUS
5487 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5488 && ix86_cmodel == CM_SMALL_PIC
5489 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5490 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5491 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5492 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5493 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5494 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5495 return 1;
5497 if (GET_CODE (disp) != CONST)
5498 return 0;
5499 disp = XEXP (disp, 0);
5501 if (TARGET_64BIT)
5503 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5504 of GOT tables. We should not need these anyway. */
5505 if (GET_CODE (disp) != UNSPEC
5506 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5507 return 0;
5509 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5510 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5511 return 0;
5512 return 1;
5515 saw_plus = false;
5516 if (GET_CODE (disp) == PLUS)
5518 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5519 return 0;
5520 disp = XEXP (disp, 0);
5521 saw_plus = true;
5524 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5525 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5527 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5528 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5529 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5531 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5532 if (strstr (sym_name, "$pb") != 0)
5533 return 1;
5537 if (GET_CODE (disp) != UNSPEC)
5538 return 0;
5540 switch (XINT (disp, 1))
5542 case UNSPEC_GOT:
5543 if (saw_plus)
5544 return false;
5545 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5546 case UNSPEC_GOTOFF:
5547 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5548 case UNSPEC_GOTTPOFF:
5549 case UNSPEC_GOTNTPOFF:
5550 case UNSPEC_INDNTPOFF:
5551 if (saw_plus)
5552 return false;
5553 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5554 case UNSPEC_NTPOFF:
5555 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5556 case UNSPEC_DTPOFF:
5557 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5560 return 0;
5563 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5564 memory address for an instruction. The MODE argument is the machine mode
5565 for the MEM expression that wants to use this address.
5567 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5568 convert common non-canonical forms to canonical form so that they will
5569 be recognized. */
5572 legitimate_address_p (mode, addr, strict)
5573 enum machine_mode mode;
5574 register rtx addr;
5575 int strict;
5577 struct ix86_address parts;
5578 rtx base, index, disp;
5579 HOST_WIDE_INT scale;
5580 const char *reason = NULL;
5581 rtx reason_rtx = NULL_RTX;
5583 if (TARGET_DEBUG_ADDR)
5585 fprintf (stderr,
5586 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5587 GET_MODE_NAME (mode), strict);
5588 debug_rtx (addr);
5591 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5593 if (TARGET_DEBUG_ADDR)
5594 fprintf (stderr, "Success.\n");
5595 return TRUE;
5598 if (ix86_decompose_address (addr, &parts) <= 0)
5600 reason = "decomposition failed";
5601 goto report_error;
5604 base = parts.base;
5605 index = parts.index;
5606 disp = parts.disp;
5607 scale = parts.scale;
5609 /* Validate base register.
5611 Don't allow SUBREG's here, it can lead to spill failures when the base
5612 is one word out of a two word structure, which is represented internally
5613 as a DImode int. */
5615 if (base)
5617 rtx reg;
5618 reason_rtx = base;
5620 if (GET_CODE (base) == SUBREG)
5621 reg = SUBREG_REG (base);
5622 else
5623 reg = base;
5625 if (GET_CODE (reg) != REG)
5627 reason = "base is not a register";
5628 goto report_error;
5631 if (GET_MODE (base) != Pmode)
5633 reason = "base is not in Pmode";
5634 goto report_error;
5637 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5638 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5640 reason = "base is not valid";
5641 goto report_error;
5645 /* Validate index register.
5647 Don't allow SUBREG's here, it can lead to spill failures when the index
5648 is one word out of a two word structure, which is represented internally
5649 as a DImode int. */
5651 if (index)
5653 rtx reg;
5654 reason_rtx = index;
5656 if (GET_CODE (index) == SUBREG)
5657 reg = SUBREG_REG (index);
5658 else
5659 reg = index;
5661 if (GET_CODE (reg) != REG)
5663 reason = "index is not a register";
5664 goto report_error;
5667 if (GET_MODE (index) != Pmode)
5669 reason = "index is not in Pmode";
5670 goto report_error;
5673 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5674 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5676 reason = "index is not valid";
5677 goto report_error;
5681 /* Validate scale factor. */
5682 if (scale != 1)
5684 reason_rtx = GEN_INT (scale);
5685 if (!index)
5687 reason = "scale without index";
5688 goto report_error;
5691 if (scale != 2 && scale != 4 && scale != 8)
5693 reason = "scale is not a valid multiplier";
5694 goto report_error;
5698 /* Validate displacement. */
5699 if (disp)
5701 reason_rtx = disp;
5703 if (GET_CODE (disp) == CONST
5704 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5705 switch (XINT (XEXP (disp, 0), 1))
5707 case UNSPEC_GOT:
5708 case UNSPEC_GOTOFF:
5709 case UNSPEC_GOTPCREL:
5710 if (!flag_pic)
5711 abort ();
5712 goto is_legitimate_pic;
5714 case UNSPEC_GOTTPOFF:
5715 case UNSPEC_GOTNTPOFF:
5716 case UNSPEC_INDNTPOFF:
5717 case UNSPEC_NTPOFF:
5718 case UNSPEC_DTPOFF:
5719 break;
5721 default:
5722 reason = "invalid address unspec";
5723 goto report_error;
5726 else if (flag_pic && (SYMBOLIC_CONST (disp)
5727 #if TARGET_MACHO
5728 && !machopic_operand_p (disp)
5729 #endif
5732 is_legitimate_pic:
5733 if (TARGET_64BIT && (index || base))
5735 /* foo@dtpoff(%rX) is ok. */
5736 if (GET_CODE (disp) != CONST
5737 || GET_CODE (XEXP (disp, 0)) != PLUS
5738 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5739 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5740 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5741 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5743 reason = "non-constant pic memory reference";
5744 goto report_error;
5747 else if (! legitimate_pic_address_disp_p (disp))
5749 reason = "displacement is an invalid pic construct";
5750 goto report_error;
5753 /* This code used to verify that a symbolic pic displacement
5754 includes the pic_offset_table_rtx register.
5756 While this is good idea, unfortunately these constructs may
5757 be created by "adds using lea" optimization for incorrect
5758 code like:
5760 int a;
5761 int foo(int i)
5763 return *(&a+i);
5766 This code is nonsensical, but results in addressing
5767 GOT table with pic_offset_table_rtx base. We can't
5768 just refuse it easily, since it gets matched by
5769 "addsi3" pattern, that later gets split to lea in the
5770 case output register differs from input. While this
5771 can be handled by separate addsi pattern for this case
5772 that never results in lea, this seems to be easier and
5773 correct fix for crash to disable this test. */
5775 else if (!CONSTANT_ADDRESS_P (disp))
5777 reason = "displacement is not constant";
5778 goto report_error;
5780 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5782 reason = "displacement is out of range";
5783 goto report_error;
5785 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5787 reason = "displacement is a const_double";
5788 goto report_error;
5792 /* Everything looks valid. */
5793 if (TARGET_DEBUG_ADDR)
5794 fprintf (stderr, "Success.\n");
5795 return TRUE;
5797 report_error:
5798 if (TARGET_DEBUG_ADDR)
5800 fprintf (stderr, "Error: %s\n", reason);
5801 debug_rtx (reason_rtx);
5803 return FALSE;
5806 /* Return an unique alias set for the GOT. */
5808 static HOST_WIDE_INT
5809 ix86_GOT_alias_set ()
5811 static HOST_WIDE_INT set = -1;
5812 if (set == -1)
5813 set = new_alias_set ();
5814 return set;
5817 /* Return a legitimate reference for ORIG (an address) using the
5818 register REG. If REG is 0, a new pseudo is generated.
5820 There are two types of references that must be handled:
5822 1. Global data references must load the address from the GOT, via
5823 the PIC reg. An insn is emitted to do this load, and the reg is
5824 returned.
5826 2. Static data references, constant pool addresses, and code labels
5827 compute the address as an offset from the GOT, whose base is in
5828 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5829 differentiate them from global data objects. The returned
5830 address is the PIC reg + an unspec constant.
5832 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5833 reg also appears in the address. */
5836 legitimize_pic_address (orig, reg)
5837 rtx orig;
5838 rtx reg;
5840 rtx addr = orig;
5841 rtx new = orig;
5842 rtx base;
5844 #if TARGET_MACHO
5845 if (reg == 0)
5846 reg = gen_reg_rtx (Pmode);
5847 /* Use the generic Mach-O PIC machinery. */
5848 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5849 #endif
5851 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5852 new = addr;
5853 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5855 /* This symbol may be referenced via a displacement from the PIC
5856 base address (@GOTOFF). */
5858 if (reload_in_progress)
5859 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5860 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5861 new = gen_rtx_CONST (Pmode, new);
5862 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5864 if (reg != 0)
5866 emit_move_insn (reg, new);
5867 new = reg;
5870 else if (GET_CODE (addr) == SYMBOL_REF)
5872 if (TARGET_64BIT)
5874 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5875 new = gen_rtx_CONST (Pmode, new);
5876 new = gen_rtx_MEM (Pmode, new);
5877 RTX_UNCHANGING_P (new) = 1;
5878 set_mem_alias_set (new, ix86_GOT_alias_set ());
5880 if (reg == 0)
5881 reg = gen_reg_rtx (Pmode);
5882 /* Use directly gen_movsi, otherwise the address is loaded
5883 into register for CSE. We don't want to CSE this addresses,
5884 instead we CSE addresses from the GOT table, so skip this. */
5885 emit_insn (gen_movsi (reg, new));
5886 new = reg;
5888 else
5890 /* This symbol must be referenced via a load from the
5891 Global Offset Table (@GOT). */
5893 if (reload_in_progress)
5894 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5895 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5896 new = gen_rtx_CONST (Pmode, new);
5897 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5898 new = gen_rtx_MEM (Pmode, new);
5899 RTX_UNCHANGING_P (new) = 1;
5900 set_mem_alias_set (new, ix86_GOT_alias_set ());
5902 if (reg == 0)
5903 reg = gen_reg_rtx (Pmode);
5904 emit_move_insn (reg, new);
5905 new = reg;
5908 else
5910 if (GET_CODE (addr) == CONST)
5912 addr = XEXP (addr, 0);
5914 /* We must match stuff we generate before. Assume the only
5915 unspecs that can get here are ours. Not that we could do
5916 anything with them anyway... */
5917 if (GET_CODE (addr) == UNSPEC
5918 || (GET_CODE (addr) == PLUS
5919 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5920 return orig;
5921 if (GET_CODE (addr) != PLUS)
5922 abort ();
5924 if (GET_CODE (addr) == PLUS)
5926 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5928 /* Check first to see if this is a constant offset from a @GOTOFF
5929 symbol reference. */
5930 if (local_symbolic_operand (op0, Pmode)
5931 && GET_CODE (op1) == CONST_INT)
5933 if (!TARGET_64BIT)
5935 if (reload_in_progress)
5936 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5937 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5938 UNSPEC_GOTOFF);
5939 new = gen_rtx_PLUS (Pmode, new, op1);
5940 new = gen_rtx_CONST (Pmode, new);
5941 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5943 if (reg != 0)
5945 emit_move_insn (reg, new);
5946 new = reg;
5949 else
5951 if (INTVAL (op1) < -16*1024*1024
5952 || INTVAL (op1) >= 16*1024*1024)
5953 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5956 else
5958 base = legitimize_pic_address (XEXP (addr, 0), reg);
5959 new = legitimize_pic_address (XEXP (addr, 1),
5960 base == reg ? NULL_RTX : reg);
5962 if (GET_CODE (new) == CONST_INT)
5963 new = plus_constant (base, INTVAL (new));
5964 else
5966 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5968 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5969 new = XEXP (new, 1);
5971 new = gen_rtx_PLUS (Pmode, base, new);
5976 return new;
5979 static void
5980 ix86_encode_section_info (decl, first)
5981 tree decl;
5982 int first ATTRIBUTE_UNUSED;
5984 bool local_p = (*targetm.binds_local_p) (decl);
5985 rtx rtl, symbol;
5987 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5988 if (GET_CODE (rtl) != MEM)
5989 return;
5990 symbol = XEXP (rtl, 0);
5991 if (GET_CODE (symbol) != SYMBOL_REF)
5992 return;
5994 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5995 symbol so that we may access it directly in the GOT. */
5997 if (flag_pic)
5998 SYMBOL_REF_FLAG (symbol) = local_p;
6000 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6001 "local dynamic", "initial exec" or "local exec" TLS models
6002 respectively. */
6004 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6006 const char *symbol_str;
6007 char *newstr;
6008 size_t len;
6009 enum tls_model kind = decl_tls_model (decl);
6011 if (TARGET_64BIT && ! flag_pic)
6013 /* x86-64 doesn't allow non-pic code for shared libraries,
6014 so don't generate GD/LD TLS models for non-pic code. */
6015 switch (kind)
6017 case TLS_MODEL_GLOBAL_DYNAMIC:
6018 kind = TLS_MODEL_INITIAL_EXEC; break;
6019 case TLS_MODEL_LOCAL_DYNAMIC:
6020 kind = TLS_MODEL_LOCAL_EXEC; break;
6021 default:
6022 break;
6026 symbol_str = XSTR (symbol, 0);
6028 if (symbol_str[0] == '%')
6030 if (symbol_str[1] == tls_model_chars[kind])
6031 return;
6032 symbol_str += 2;
6034 len = strlen (symbol_str) + 1;
6035 newstr = alloca (len + 2);
6037 newstr[0] = '%';
6038 newstr[1] = tls_model_chars[kind];
6039 memcpy (newstr + 2, symbol_str, len);
6041 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6045 /* Undo the above when printing symbol names. */
6047 static const char *
6048 ix86_strip_name_encoding (str)
6049 const char *str;
6051 if (str[0] == '%')
6052 str += 2;
6053 if (str [0] == '*')
6054 str += 1;
6055 return str;
6058 /* Load the thread pointer into a register. */
6060 static rtx
6061 get_thread_pointer ()
6063 rtx tp;
6065 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6066 tp = gen_rtx_MEM (Pmode, tp);
6067 RTX_UNCHANGING_P (tp) = 1;
6068 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6069 tp = force_reg (Pmode, tp);
6071 return tp;
6074 /* Try machine-dependent ways of modifying an illegitimate address
6075 to be legitimate. If we find one, return the new, valid address.
6076 This macro is used in only one place: `memory_address' in explow.c.
6078 OLDX is the address as it was before break_out_memory_refs was called.
6079 In some cases it is useful to look at this to decide what needs to be done.
6081 MODE and WIN are passed so that this macro can use
6082 GO_IF_LEGITIMATE_ADDRESS.
6084 It is always safe for this macro to do nothing. It exists to recognize
6085 opportunities to optimize the output.
6087 For the 80386, we handle X+REG by loading X into a register R and
6088 using R+REG. R will go in a general reg and indexing will be used.
6089 However, if REG is a broken-out memory address or multiplication,
6090 nothing needs to be done because REG can certainly go in a general reg.
6092 When -fpic is used, special handling is needed for symbolic references.
6093 See comments by legitimize_pic_address in i386.c for details. */
6096 legitimize_address (x, oldx, mode)
6097 register rtx x;
6098 register rtx oldx ATTRIBUTE_UNUSED;
6099 enum machine_mode mode;
6101 int changed = 0;
6102 unsigned log;
6104 if (TARGET_DEBUG_ADDR)
6106 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6107 GET_MODE_NAME (mode));
6108 debug_rtx (x);
6111 log = tls_symbolic_operand (x, mode);
6112 if (log)
6114 rtx dest, base, off, pic;
6115 int type;
6117 switch (log)
6119 case TLS_MODEL_GLOBAL_DYNAMIC:
6120 dest = gen_reg_rtx (Pmode);
6121 if (TARGET_64BIT)
6123 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6125 start_sequence ();
6126 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6127 insns = get_insns ();
6128 end_sequence ();
6130 emit_libcall_block (insns, dest, rax, x);
6132 else
6133 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6134 break;
6136 case TLS_MODEL_LOCAL_DYNAMIC:
6137 base = gen_reg_rtx (Pmode);
6138 if (TARGET_64BIT)
6140 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6142 start_sequence ();
6143 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6144 insns = get_insns ();
6145 end_sequence ();
6147 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6148 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6149 emit_libcall_block (insns, base, rax, note);
6151 else
6152 emit_insn (gen_tls_local_dynamic_base_32 (base));
6154 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6155 off = gen_rtx_CONST (Pmode, off);
6157 return gen_rtx_PLUS (Pmode, base, off);
6159 case TLS_MODEL_INITIAL_EXEC:
6160 if (TARGET_64BIT)
6162 pic = NULL;
6163 type = UNSPEC_GOTNTPOFF;
6165 else if (flag_pic)
6167 if (reload_in_progress)
6168 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6169 pic = pic_offset_table_rtx;
6170 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6172 else if (!TARGET_GNU_TLS)
6174 pic = gen_reg_rtx (Pmode);
6175 emit_insn (gen_set_got (pic));
6176 type = UNSPEC_GOTTPOFF;
6178 else
6180 pic = NULL;
6181 type = UNSPEC_INDNTPOFF;
6184 base = get_thread_pointer ();
6186 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6187 off = gen_rtx_CONST (Pmode, off);
6188 if (pic)
6189 off = gen_rtx_PLUS (Pmode, pic, off);
6190 off = gen_rtx_MEM (Pmode, off);
6191 RTX_UNCHANGING_P (off) = 1;
6192 set_mem_alias_set (off, ix86_GOT_alias_set ());
6193 dest = gen_reg_rtx (Pmode);
6195 if (TARGET_64BIT || TARGET_GNU_TLS)
6197 emit_move_insn (dest, off);
6198 return gen_rtx_PLUS (Pmode, base, dest);
6200 else
6201 emit_insn (gen_subsi3 (dest, base, off));
6202 break;
6204 case TLS_MODEL_LOCAL_EXEC:
6205 base = get_thread_pointer ();
6207 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6208 (TARGET_64BIT || TARGET_GNU_TLS)
6209 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6210 off = gen_rtx_CONST (Pmode, off);
6212 if (TARGET_64BIT || TARGET_GNU_TLS)
6213 return gen_rtx_PLUS (Pmode, base, off);
6214 else
6216 dest = gen_reg_rtx (Pmode);
6217 emit_insn (gen_subsi3 (dest, base, off));
6219 break;
6221 default:
6222 abort ();
6225 return dest;
6228 if (flag_pic && SYMBOLIC_CONST (x))
6229 return legitimize_pic_address (x, 0);
6231 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6232 if (GET_CODE (x) == ASHIFT
6233 && GET_CODE (XEXP (x, 1)) == CONST_INT
6234 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6236 changed = 1;
6237 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6238 GEN_INT (1 << log));
6241 if (GET_CODE (x) == PLUS)
6243 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6245 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6246 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6247 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6249 changed = 1;
6250 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6251 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6252 GEN_INT (1 << log));
6255 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6256 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6257 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6259 changed = 1;
6260 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6261 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6262 GEN_INT (1 << log));
6265 /* Put multiply first if it isn't already. */
6266 if (GET_CODE (XEXP (x, 1)) == MULT)
6268 rtx tmp = XEXP (x, 0);
6269 XEXP (x, 0) = XEXP (x, 1);
6270 XEXP (x, 1) = tmp;
6271 changed = 1;
6274 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6275 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6276 created by virtual register instantiation, register elimination, and
6277 similar optimizations. */
6278 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6280 changed = 1;
6281 x = gen_rtx_PLUS (Pmode,
6282 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6283 XEXP (XEXP (x, 1), 0)),
6284 XEXP (XEXP (x, 1), 1));
6287 /* Canonicalize
6288 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6289 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6290 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6291 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6292 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6293 && CONSTANT_P (XEXP (x, 1)))
6295 rtx constant;
6296 rtx other = NULL_RTX;
6298 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6300 constant = XEXP (x, 1);
6301 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6303 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6305 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6306 other = XEXP (x, 1);
6308 else
6309 constant = 0;
6311 if (constant)
6313 changed = 1;
6314 x = gen_rtx_PLUS (Pmode,
6315 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6316 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6317 plus_constant (other, INTVAL (constant)));
6321 if (changed && legitimate_address_p (mode, x, FALSE))
6322 return x;
6324 if (GET_CODE (XEXP (x, 0)) == MULT)
6326 changed = 1;
6327 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6330 if (GET_CODE (XEXP (x, 1)) == MULT)
6332 changed = 1;
6333 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6336 if (changed
6337 && GET_CODE (XEXP (x, 1)) == REG
6338 && GET_CODE (XEXP (x, 0)) == REG)
6339 return x;
6341 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6343 changed = 1;
6344 x = legitimize_pic_address (x, 0);
6347 if (changed && legitimate_address_p (mode, x, FALSE))
6348 return x;
6350 if (GET_CODE (XEXP (x, 0)) == REG)
6352 register rtx temp = gen_reg_rtx (Pmode);
6353 register rtx val = force_operand (XEXP (x, 1), temp);
6354 if (val != temp)
6355 emit_move_insn (temp, val);
6357 XEXP (x, 1) = temp;
6358 return x;
6361 else if (GET_CODE (XEXP (x, 1)) == REG)
6363 register rtx temp = gen_reg_rtx (Pmode);
6364 register rtx val = force_operand (XEXP (x, 0), temp);
6365 if (val != temp)
6366 emit_move_insn (temp, val);
6368 XEXP (x, 0) = temp;
6369 return x;
6373 return x;
6376 /* Print an integer constant expression in assembler syntax. Addition
6377 and subtraction are the only arithmetic that may appear in these
6378 expressions. FILE is the stdio stream to write to, X is the rtx, and
6379 CODE is the operand print code from the output string. */
6381 static void
6382 output_pic_addr_const (file, x, code)
6383 FILE *file;
6384 rtx x;
6385 int code;
6387 char buf[256];
6389 switch (GET_CODE (x))
6391 case PC:
6392 if (flag_pic)
6393 putc ('.', file);
6394 else
6395 abort ();
6396 break;
6398 case SYMBOL_REF:
6399 assemble_name (file, XSTR (x, 0));
6400 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6401 fputs ("@PLT", file);
6402 break;
6404 case LABEL_REF:
6405 x = XEXP (x, 0);
6406 /* FALLTHRU */
6407 case CODE_LABEL:
6408 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6409 assemble_name (asm_out_file, buf);
6410 break;
6412 case CONST_INT:
6413 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6414 break;
6416 case CONST:
6417 /* This used to output parentheses around the expression,
6418 but that does not work on the 386 (either ATT or BSD assembler). */
6419 output_pic_addr_const (file, XEXP (x, 0), code);
6420 break;
6422 case CONST_DOUBLE:
6423 if (GET_MODE (x) == VOIDmode)
6425 /* We can use %d if the number is <32 bits and positive. */
6426 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6427 fprintf (file, "0x%lx%08lx",
6428 (unsigned long) CONST_DOUBLE_HIGH (x),
6429 (unsigned long) CONST_DOUBLE_LOW (x));
6430 else
6431 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6433 else
6434 /* We can't handle floating point constants;
6435 PRINT_OPERAND must handle them. */
6436 output_operand_lossage ("floating constant misused");
6437 break;
6439 case PLUS:
6440 /* Some assemblers need integer constants to appear first. */
6441 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6443 output_pic_addr_const (file, XEXP (x, 0), code);
6444 putc ('+', file);
6445 output_pic_addr_const (file, XEXP (x, 1), code);
6447 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6449 output_pic_addr_const (file, XEXP (x, 1), code);
6450 putc ('+', file);
6451 output_pic_addr_const (file, XEXP (x, 0), code);
6453 else
6454 abort ();
6455 break;
6457 case MINUS:
6458 if (!TARGET_MACHO)
6459 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6460 output_pic_addr_const (file, XEXP (x, 0), code);
6461 putc ('-', file);
6462 output_pic_addr_const (file, XEXP (x, 1), code);
6463 if (!TARGET_MACHO)
6464 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6465 break;
6467 case UNSPEC:
6468 if (XVECLEN (x, 0) != 1)
6469 abort ();
6470 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6471 switch (XINT (x, 1))
6473 case UNSPEC_GOT:
6474 fputs ("@GOT", file);
6475 break;
6476 case UNSPEC_GOTOFF:
6477 fputs ("@GOTOFF", file);
6478 break;
6479 case UNSPEC_GOTPCREL:
6480 fputs ("@GOTPCREL(%rip)", file);
6481 break;
6482 case UNSPEC_GOTTPOFF:
6483 /* FIXME: This might be @TPOFF in Sun ld too. */
6484 fputs ("@GOTTPOFF", file);
6485 break;
6486 case UNSPEC_TPOFF:
6487 fputs ("@TPOFF", file);
6488 break;
6489 case UNSPEC_NTPOFF:
6490 if (TARGET_64BIT)
6491 fputs ("@TPOFF", file);
6492 else
6493 fputs ("@NTPOFF", file);
6494 break;
6495 case UNSPEC_DTPOFF:
6496 fputs ("@DTPOFF", file);
6497 break;
6498 case UNSPEC_GOTNTPOFF:
6499 if (TARGET_64BIT)
6500 fputs ("@GOTTPOFF(%rip)", file);
6501 else
6502 fputs ("@GOTNTPOFF", file);
6503 break;
6504 case UNSPEC_INDNTPOFF:
6505 fputs ("@INDNTPOFF", file);
6506 break;
6507 default:
6508 output_operand_lossage ("invalid UNSPEC as operand");
6509 break;
6511 break;
6513 default:
6514 output_operand_lossage ("invalid expression as operand");
6518 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6519 We need to handle our special PIC relocations. */
6521 void
6522 i386_dwarf_output_addr_const (file, x)
6523 FILE *file;
6524 rtx x;
6526 #ifdef ASM_QUAD
6527 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6528 #else
6529 if (TARGET_64BIT)
6530 abort ();
6531 fprintf (file, "%s", ASM_LONG);
6532 #endif
6533 if (flag_pic)
6534 output_pic_addr_const (file, x, '\0');
6535 else
6536 output_addr_const (file, x);
6537 fputc ('\n', file);
6540 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6541 We need to emit DTP-relative relocations. */
6543 void
6544 i386_output_dwarf_dtprel (file, size, x)
6545 FILE *file;
6546 int size;
6547 rtx x;
6549 fputs (ASM_LONG, file);
6550 output_addr_const (file, x);
6551 fputs ("@DTPOFF", file);
6552 switch (size)
6554 case 4:
6555 break;
6556 case 8:
6557 fputs (", 0", file);
6558 break;
6559 default:
6560 abort ();
6564 /* In the name of slightly smaller debug output, and to cater to
6565 general assembler losage, recognize PIC+GOTOFF and turn it back
6566 into a direct symbol reference. */
6569 i386_simplify_dwarf_addr (orig_x)
6570 rtx orig_x;
6572 rtx x = orig_x, y;
6574 if (GET_CODE (x) == MEM)
6575 x = XEXP (x, 0);
6577 if (TARGET_64BIT)
6579 if (GET_CODE (x) != CONST
6580 || GET_CODE (XEXP (x, 0)) != UNSPEC
6581 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6582 || GET_CODE (orig_x) != MEM)
6583 return orig_x;
6584 return XVECEXP (XEXP (x, 0), 0, 0);
6587 if (GET_CODE (x) != PLUS
6588 || GET_CODE (XEXP (x, 1)) != CONST)
6589 return orig_x;
6591 if (GET_CODE (XEXP (x, 0)) == REG
6592 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6593 /* %ebx + GOT/GOTOFF */
6594 y = NULL;
6595 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6597 /* %ebx + %reg * scale + GOT/GOTOFF */
6598 y = XEXP (x, 0);
6599 if (GET_CODE (XEXP (y, 0)) == REG
6600 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6601 y = XEXP (y, 1);
6602 else if (GET_CODE (XEXP (y, 1)) == REG
6603 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6604 y = XEXP (y, 0);
6605 else
6606 return orig_x;
6607 if (GET_CODE (y) != REG
6608 && GET_CODE (y) != MULT
6609 && GET_CODE (y) != ASHIFT)
6610 return orig_x;
6612 else
6613 return orig_x;
6615 x = XEXP (XEXP (x, 1), 0);
6616 if (GET_CODE (x) == UNSPEC
6617 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6618 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6620 if (y)
6621 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6622 return XVECEXP (x, 0, 0);
6625 if (GET_CODE (x) == PLUS
6626 && GET_CODE (XEXP (x, 0)) == UNSPEC
6627 && GET_CODE (XEXP (x, 1)) == CONST_INT
6628 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6629 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6630 && GET_CODE (orig_x) != MEM)))
6632 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6633 if (y)
6634 return gen_rtx_PLUS (Pmode, y, x);
6635 return x;
6638 return orig_x;
6641 static void
6642 put_condition_code (code, mode, reverse, fp, file)
6643 enum rtx_code code;
6644 enum machine_mode mode;
6645 int reverse, fp;
6646 FILE *file;
6648 const char *suffix;
6650 if (mode == CCFPmode || mode == CCFPUmode)
6652 enum rtx_code second_code, bypass_code;
6653 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6654 if (bypass_code != NIL || second_code != NIL)
6655 abort ();
6656 code = ix86_fp_compare_code_to_integer (code);
6657 mode = CCmode;
6659 if (reverse)
6660 code = reverse_condition (code);
6662 switch (code)
6664 case EQ:
6665 suffix = "e";
6666 break;
6667 case NE:
6668 suffix = "ne";
6669 break;
6670 case GT:
6671 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6672 abort ();
6673 suffix = "g";
6674 break;
6675 case GTU:
6676 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6677 Those same assemblers have the same but opposite losage on cmov. */
6678 if (mode != CCmode)
6679 abort ();
6680 suffix = fp ? "nbe" : "a";
6681 break;
6682 case LT:
6683 if (mode == CCNOmode || mode == CCGOCmode)
6684 suffix = "s";
6685 else if (mode == CCmode || mode == CCGCmode)
6686 suffix = "l";
6687 else
6688 abort ();
6689 break;
6690 case LTU:
6691 if (mode != CCmode)
6692 abort ();
6693 suffix = "b";
6694 break;
6695 case GE:
6696 if (mode == CCNOmode || mode == CCGOCmode)
6697 suffix = "ns";
6698 else if (mode == CCmode || mode == CCGCmode)
6699 suffix = "ge";
6700 else
6701 abort ();
6702 break;
6703 case GEU:
6704 /* ??? As above. */
6705 if (mode != CCmode)
6706 abort ();
6707 suffix = fp ? "nb" : "ae";
6708 break;
6709 case LE:
6710 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6711 abort ();
6712 suffix = "le";
6713 break;
6714 case LEU:
6715 if (mode != CCmode)
6716 abort ();
6717 suffix = "be";
6718 break;
6719 case UNORDERED:
6720 suffix = fp ? "u" : "p";
6721 break;
6722 case ORDERED:
6723 suffix = fp ? "nu" : "np";
6724 break;
6725 default:
6726 abort ();
6728 fputs (suffix, file);
6731 void
6732 print_reg (x, code, file)
6733 rtx x;
6734 int code;
6735 FILE *file;
6737 if (REGNO (x) == ARG_POINTER_REGNUM
6738 || REGNO (x) == FRAME_POINTER_REGNUM
6739 || REGNO (x) == FLAGS_REG
6740 || REGNO (x) == FPSR_REG)
6741 abort ();
6743 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6744 putc ('%', file);
6746 if (code == 'w' || MMX_REG_P (x))
6747 code = 2;
6748 else if (code == 'b')
6749 code = 1;
6750 else if (code == 'k')
6751 code = 4;
6752 else if (code == 'q')
6753 code = 8;
6754 else if (code == 'y')
6755 code = 3;
6756 else if (code == 'h')
6757 code = 0;
6758 else
6759 code = GET_MODE_SIZE (GET_MODE (x));
6761 /* Irritatingly, AMD extended registers use different naming convention
6762 from the normal registers. */
6763 if (REX_INT_REG_P (x))
6765 if (!TARGET_64BIT)
6766 abort ();
6767 switch (code)
6769 case 0:
6770 error ("extended registers have no high halves");
6771 break;
6772 case 1:
6773 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6774 break;
6775 case 2:
6776 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6777 break;
6778 case 4:
6779 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6780 break;
6781 case 8:
6782 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6783 break;
6784 default:
6785 error ("unsupported operand size for extended register");
6786 break;
6788 return;
6790 switch (code)
6792 case 3:
6793 if (STACK_TOP_P (x))
6795 fputs ("st(0)", file);
6796 break;
6798 /* FALLTHRU */
6799 case 8:
6800 case 4:
6801 case 12:
6802 if (! ANY_FP_REG_P (x))
6803 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6804 /* FALLTHRU */
6805 case 16:
6806 case 2:
6807 fputs (hi_reg_name[REGNO (x)], file);
6808 break;
6809 case 1:
6810 fputs (qi_reg_name[REGNO (x)], file);
6811 break;
6812 case 0:
6813 fputs (qi_high_reg_name[REGNO (x)], file);
6814 break;
6815 default:
6816 abort ();
6820 /* Locate some local-dynamic symbol still in use by this function
6821 so that we can print its name in some tls_local_dynamic_base
6822 pattern. */
6824 static const char *
6825 get_some_local_dynamic_name ()
6827 rtx insn;
6829 if (cfun->machine->some_ld_name)
6830 return cfun->machine->some_ld_name;
6832 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6833 if (INSN_P (insn)
6834 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6835 return cfun->machine->some_ld_name;
6837 abort ();
6840 static int
6841 get_some_local_dynamic_name_1 (px, data)
6842 rtx *px;
6843 void *data ATTRIBUTE_UNUSED;
6845 rtx x = *px;
6847 if (GET_CODE (x) == SYMBOL_REF
6848 && local_dynamic_symbolic_operand (x, Pmode))
6850 cfun->machine->some_ld_name = XSTR (x, 0);
6851 return 1;
6854 return 0;
6857 /* Meaning of CODE:
6858 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6859 C -- print opcode suffix for set/cmov insn.
6860 c -- like C, but print reversed condition
6861 F,f -- likewise, but for floating-point.
6862 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6863 nothing
6864 R -- print the prefix for register names.
6865 z -- print the opcode suffix for the size of the current operand.
6866 * -- print a star (in certain assembler syntax)
6867 A -- print an absolute memory reference.
6868 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6869 s -- print a shift double count, followed by the assemblers argument
6870 delimiter.
6871 b -- print the QImode name of the register for the indicated operand.
6872 %b0 would print %al if operands[0] is reg 0.
6873 w -- likewise, print the HImode name of the register.
6874 k -- likewise, print the SImode name of the register.
6875 q -- likewise, print the DImode name of the register.
6876 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6877 y -- print "st(0)" instead of "st" as a register.
6878 D -- print condition for SSE cmp instruction.
6879 P -- if PIC, print an @PLT suffix.
6880 X -- don't print any sort of PIC '@' suffix for a symbol.
6881 & -- print some in-use local-dynamic symbol name.
6884 void
6885 print_operand (file, x, code)
6886 FILE *file;
6887 rtx x;
6888 int code;
6890 if (code)
6892 switch (code)
6894 case '*':
6895 if (ASSEMBLER_DIALECT == ASM_ATT)
6896 putc ('*', file);
6897 return;
6899 case '&':
6900 assemble_name (file, get_some_local_dynamic_name ());
6901 return;
6903 case 'A':
6904 if (ASSEMBLER_DIALECT == ASM_ATT)
6905 putc ('*', file);
6906 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6908 /* Intel syntax. For absolute addresses, registers should not
6909 be surrounded by braces. */
6910 if (GET_CODE (x) != REG)
6912 putc ('[', file);
6913 PRINT_OPERAND (file, x, 0);
6914 putc (']', file);
6915 return;
6918 else
6919 abort ();
6921 PRINT_OPERAND (file, x, 0);
6922 return;
6925 case 'L':
6926 if (ASSEMBLER_DIALECT == ASM_ATT)
6927 putc ('l', file);
6928 return;
6930 case 'W':
6931 if (ASSEMBLER_DIALECT == ASM_ATT)
6932 putc ('w', file);
6933 return;
6935 case 'B':
6936 if (ASSEMBLER_DIALECT == ASM_ATT)
6937 putc ('b', file);
6938 return;
6940 case 'Q':
6941 if (ASSEMBLER_DIALECT == ASM_ATT)
6942 putc ('l', file);
6943 return;
6945 case 'S':
6946 if (ASSEMBLER_DIALECT == ASM_ATT)
6947 putc ('s', file);
6948 return;
6950 case 'T':
6951 if (ASSEMBLER_DIALECT == ASM_ATT)
6952 putc ('t', file);
6953 return;
6955 case 'z':
6956 /* 387 opcodes don't get size suffixes if the operands are
6957 registers. */
6958 if (STACK_REG_P (x))
6959 return;
6961 /* Likewise if using Intel opcodes. */
6962 if (ASSEMBLER_DIALECT == ASM_INTEL)
6963 return;
6965 /* This is the size of op from size of operand. */
6966 switch (GET_MODE_SIZE (GET_MODE (x)))
6968 case 2:
6969 #ifdef HAVE_GAS_FILDS_FISTS
6970 putc ('s', file);
6971 #endif
6972 return;
6974 case 4:
6975 if (GET_MODE (x) == SFmode)
6977 putc ('s', file);
6978 return;
6980 else
6981 putc ('l', file);
6982 return;
6984 case 12:
6985 case 16:
6986 putc ('t', file);
6987 return;
6989 case 8:
6990 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6992 #ifdef GAS_MNEMONICS
6993 putc ('q', file);
6994 #else
6995 putc ('l', file);
6996 putc ('l', file);
6997 #endif
6999 else
7000 putc ('l', file);
7001 return;
7003 default:
7004 abort ();
7007 case 'b':
7008 case 'w':
7009 case 'k':
7010 case 'q':
7011 case 'h':
7012 case 'y':
7013 case 'X':
7014 case 'P':
7015 break;
7017 case 's':
7018 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7020 PRINT_OPERAND (file, x, 0);
7021 putc (',', file);
7023 return;
7025 case 'D':
7026 /* Little bit of braindamage here. The SSE compare instructions
7027 does use completely different names for the comparisons that the
7028 fp conditional moves. */
7029 switch (GET_CODE (x))
7031 case EQ:
7032 case UNEQ:
7033 fputs ("eq", file);
7034 break;
7035 case LT:
7036 case UNLT:
7037 fputs ("lt", file);
7038 break;
7039 case LE:
7040 case UNLE:
7041 fputs ("le", file);
7042 break;
7043 case UNORDERED:
7044 fputs ("unord", file);
7045 break;
7046 case NE:
7047 case LTGT:
7048 fputs ("neq", file);
7049 break;
7050 case UNGE:
7051 case GE:
7052 fputs ("nlt", file);
7053 break;
7054 case UNGT:
7055 case GT:
7056 fputs ("nle", file);
7057 break;
7058 case ORDERED:
7059 fputs ("ord", file);
7060 break;
7061 default:
7062 abort ();
7063 break;
7065 return;
7066 case 'O':
7067 #ifdef CMOV_SUN_AS_SYNTAX
7068 if (ASSEMBLER_DIALECT == ASM_ATT)
7070 switch (GET_MODE (x))
7072 case HImode: putc ('w', file); break;
7073 case SImode:
7074 case SFmode: putc ('l', file); break;
7075 case DImode:
7076 case DFmode: putc ('q', file); break;
7077 default: abort ();
7079 putc ('.', file);
7081 #endif
7082 return;
7083 case 'C':
7084 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7085 return;
7086 case 'F':
7087 #ifdef CMOV_SUN_AS_SYNTAX
7088 if (ASSEMBLER_DIALECT == ASM_ATT)
7089 putc ('.', file);
7090 #endif
7091 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7092 return;
7094 /* Like above, but reverse condition */
7095 case 'c':
7096 /* Check to see if argument to %c is really a constant
7097 and not a condition code which needs to be reversed. */
7098 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7100 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7101 return;
7103 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7104 return;
7105 case 'f':
7106 #ifdef CMOV_SUN_AS_SYNTAX
7107 if (ASSEMBLER_DIALECT == ASM_ATT)
7108 putc ('.', file);
7109 #endif
7110 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7111 return;
7112 case '+':
7114 rtx x;
7116 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7117 return;
7119 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7120 if (x)
7122 int pred_val = INTVAL (XEXP (x, 0));
7124 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7125 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7127 int taken = pred_val > REG_BR_PROB_BASE / 2;
7128 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7130 /* Emit hints only in the case default branch prediction
7131 heuristics would fail. */
7132 if (taken != cputaken)
7134 /* We use 3e (DS) prefix for taken branches and
7135 2e (CS) prefix for not taken branches. */
7136 if (taken)
7137 fputs ("ds ; ", file);
7138 else
7139 fputs ("cs ; ", file);
7143 return;
7145 default:
7146 output_operand_lossage ("invalid operand code `%c'", code);
7150 if (GET_CODE (x) == REG)
7152 PRINT_REG (x, code, file);
7155 else if (GET_CODE (x) == MEM)
7157 /* No `byte ptr' prefix for call instructions. */
7158 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7160 const char * size;
7161 switch (GET_MODE_SIZE (GET_MODE (x)))
7163 case 1: size = "BYTE"; break;
7164 case 2: size = "WORD"; break;
7165 case 4: size = "DWORD"; break;
7166 case 8: size = "QWORD"; break;
7167 case 12: size = "XWORD"; break;
7168 case 16: size = "XMMWORD"; break;
7169 default:
7170 abort ();
7173 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7174 if (code == 'b')
7175 size = "BYTE";
7176 else if (code == 'w')
7177 size = "WORD";
7178 else if (code == 'k')
7179 size = "DWORD";
7181 fputs (size, file);
7182 fputs (" PTR ", file);
7185 x = XEXP (x, 0);
7186 if (flag_pic && CONSTANT_ADDRESS_P (x))
7187 output_pic_addr_const (file, x, code);
7188 /* Avoid (%rip) for call operands. */
7189 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7190 && GET_CODE (x) != CONST_INT)
7191 output_addr_const (file, x);
7192 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7193 output_operand_lossage ("invalid constraints for operand");
7194 else
7195 output_address (x);
7198 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7200 REAL_VALUE_TYPE r;
7201 long l;
7203 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7204 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7206 if (ASSEMBLER_DIALECT == ASM_ATT)
7207 putc ('$', file);
7208 fprintf (file, "0x%lx", l);
7211 /* These float cases don't actually occur as immediate operands. */
7212 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7214 char dstr[30];
7216 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7217 fprintf (file, "%s", dstr);
7220 else if (GET_CODE (x) == CONST_DOUBLE
7221 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7223 char dstr[30];
7225 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7226 fprintf (file, "%s", dstr);
7229 else
7231 if (code != 'P')
7233 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7235 if (ASSEMBLER_DIALECT == ASM_ATT)
7236 putc ('$', file);
7238 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7239 || GET_CODE (x) == LABEL_REF)
7241 if (ASSEMBLER_DIALECT == ASM_ATT)
7242 putc ('$', file);
7243 else
7244 fputs ("OFFSET FLAT:", file);
7247 if (GET_CODE (x) == CONST_INT)
7248 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7249 else if (flag_pic)
7250 output_pic_addr_const (file, x, code);
7251 else
7252 output_addr_const (file, x);
7256 /* Print a memory operand whose address is ADDR. */
7258 void
7259 print_operand_address (file, addr)
7260 FILE *file;
7261 register rtx addr;
7263 struct ix86_address parts;
7264 rtx base, index, disp;
7265 int scale;
7267 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7269 if (ASSEMBLER_DIALECT == ASM_INTEL)
7270 fputs ("DWORD PTR ", file);
7271 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7272 putc ('%', file);
7273 if (TARGET_64BIT)
7274 fputs ("fs:0", file);
7275 else
7276 fputs ("gs:0", file);
7277 return;
7280 if (! ix86_decompose_address (addr, &parts))
7281 abort ();
7283 base = parts.base;
7284 index = parts.index;
7285 disp = parts.disp;
7286 scale = parts.scale;
7288 if (!base && !index)
7290 /* Displacement only requires special attention. */
7292 if (GET_CODE (disp) == CONST_INT)
7294 if (ASSEMBLER_DIALECT == ASM_INTEL)
7296 if (USER_LABEL_PREFIX[0] == 0)
7297 putc ('%', file);
7298 fputs ("ds:", file);
7300 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7302 else if (flag_pic)
7303 output_pic_addr_const (file, addr, 0);
7304 else
7305 output_addr_const (file, addr);
7307 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7308 if (TARGET_64BIT
7309 && ((GET_CODE (addr) == SYMBOL_REF
7310 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7311 || GET_CODE (addr) == LABEL_REF
7312 || (GET_CODE (addr) == CONST
7313 && GET_CODE (XEXP (addr, 0)) == PLUS
7314 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7315 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7316 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7317 fputs ("(%rip)", file);
7319 else
7321 if (ASSEMBLER_DIALECT == ASM_ATT)
7323 if (disp)
7325 if (flag_pic)
7326 output_pic_addr_const (file, disp, 0);
7327 else if (GET_CODE (disp) == LABEL_REF)
7328 output_asm_label (disp);
7329 else
7330 output_addr_const (file, disp);
7333 putc ('(', file);
7334 if (base)
7335 PRINT_REG (base, 0, file);
7336 if (index)
7338 putc (',', file);
7339 PRINT_REG (index, 0, file);
7340 if (scale != 1)
7341 fprintf (file, ",%d", scale);
7343 putc (')', file);
7345 else
7347 rtx offset = NULL_RTX;
7349 if (disp)
7351 /* Pull out the offset of a symbol; print any symbol itself. */
7352 if (GET_CODE (disp) == CONST
7353 && GET_CODE (XEXP (disp, 0)) == PLUS
7354 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7356 offset = XEXP (XEXP (disp, 0), 1);
7357 disp = gen_rtx_CONST (VOIDmode,
7358 XEXP (XEXP (disp, 0), 0));
7361 if (flag_pic)
7362 output_pic_addr_const (file, disp, 0);
7363 else if (GET_CODE (disp) == LABEL_REF)
7364 output_asm_label (disp);
7365 else if (GET_CODE (disp) == CONST_INT)
7366 offset = disp;
7367 else
7368 output_addr_const (file, disp);
7371 putc ('[', file);
7372 if (base)
7374 PRINT_REG (base, 0, file);
7375 if (offset)
7377 if (INTVAL (offset) >= 0)
7378 putc ('+', file);
7379 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7382 else if (offset)
7383 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7384 else
7385 putc ('0', file);
7387 if (index)
7389 putc ('+', file);
7390 PRINT_REG (index, 0, file);
7391 if (scale != 1)
7392 fprintf (file, "*%d", scale);
7394 putc (']', file);
7399 bool
7400 output_addr_const_extra (file, x)
7401 FILE *file;
7402 rtx x;
7404 rtx op;
7406 if (GET_CODE (x) != UNSPEC)
7407 return false;
7409 op = XVECEXP (x, 0, 0);
7410 switch (XINT (x, 1))
7412 case UNSPEC_GOTTPOFF:
7413 output_addr_const (file, op);
7414 /* FIXME: This might be @TPOFF in Sun ld. */
7415 fputs ("@GOTTPOFF", file);
7416 break;
7417 case UNSPEC_TPOFF:
7418 output_addr_const (file, op);
7419 fputs ("@TPOFF", file);
7420 break;
7421 case UNSPEC_NTPOFF:
7422 output_addr_const (file, op);
7423 if (TARGET_64BIT)
7424 fputs ("@TPOFF", file);
7425 else
7426 fputs ("@NTPOFF", file);
7427 break;
7428 case UNSPEC_DTPOFF:
7429 output_addr_const (file, op);
7430 fputs ("@DTPOFF", file);
7431 break;
7432 case UNSPEC_GOTNTPOFF:
7433 output_addr_const (file, op);
7434 if (TARGET_64BIT)
7435 fputs ("@GOTTPOFF(%rip)", file);
7436 else
7437 fputs ("@GOTNTPOFF", file);
7438 break;
7439 case UNSPEC_INDNTPOFF:
7440 output_addr_const (file, op);
7441 fputs ("@INDNTPOFF", file);
7442 break;
7444 default:
7445 return false;
7448 return true;
7451 /* Split one or more DImode RTL references into pairs of SImode
7452 references. The RTL can be REG, offsettable MEM, integer constant, or
7453 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7454 split and "num" is its length. lo_half and hi_half are output arrays
7455 that parallel "operands". */
7457 void
7458 split_di (operands, num, lo_half, hi_half)
7459 rtx operands[];
7460 int num;
7461 rtx lo_half[], hi_half[];
7463 while (num--)
7465 rtx op = operands[num];
7467 /* simplify_subreg refuse to split volatile memory addresses,
7468 but we still have to handle it. */
7469 if (GET_CODE (op) == MEM)
7471 lo_half[num] = adjust_address (op, SImode, 0);
7472 hi_half[num] = adjust_address (op, SImode, 4);
7474 else
7476 lo_half[num] = simplify_gen_subreg (SImode, op,
7477 GET_MODE (op) == VOIDmode
7478 ? DImode : GET_MODE (op), 0);
7479 hi_half[num] = simplify_gen_subreg (SImode, op,
7480 GET_MODE (op) == VOIDmode
7481 ? DImode : GET_MODE (op), 4);
7485 /* Split one or more TImode RTL references into pairs of SImode
7486 references. The RTL can be REG, offsettable MEM, integer constant, or
7487 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7488 split and "num" is its length. lo_half and hi_half are output arrays
7489 that parallel "operands". */
7491 void
7492 split_ti (operands, num, lo_half, hi_half)
7493 rtx operands[];
7494 int num;
7495 rtx lo_half[], hi_half[];
7497 while (num--)
7499 rtx op = operands[num];
7501 /* simplify_subreg refuse to split volatile memory addresses, but we
7502 still have to handle it. */
7503 if (GET_CODE (op) == MEM)
7505 lo_half[num] = adjust_address (op, DImode, 0);
7506 hi_half[num] = adjust_address (op, DImode, 8);
7508 else
7510 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7511 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7516 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7517 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7518 is the expression of the binary operation. The output may either be
7519 emitted here, or returned to the caller, like all output_* functions.
7521 There is no guarantee that the operands are the same mode, as they
7522 might be within FLOAT or FLOAT_EXTEND expressions. */
7524 #ifndef SYSV386_COMPAT
7525 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7526 wants to fix the assemblers because that causes incompatibility
7527 with gcc. No-one wants to fix gcc because that causes
7528 incompatibility with assemblers... You can use the option of
7529 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7530 #define SYSV386_COMPAT 1
7531 #endif
7533 const char *
7534 output_387_binary_op (insn, operands)
7535 rtx insn;
7536 rtx *operands;
7538 static char buf[30];
7539 const char *p;
7540 const char *ssep;
7541 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7543 #ifdef ENABLE_CHECKING
7544 /* Even if we do not want to check the inputs, this documents input
7545 constraints. Which helps in understanding the following code. */
7546 if (STACK_REG_P (operands[0])
7547 && ((REG_P (operands[1])
7548 && REGNO (operands[0]) == REGNO (operands[1])
7549 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7550 || (REG_P (operands[2])
7551 && REGNO (operands[0]) == REGNO (operands[2])
7552 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7553 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7554 ; /* ok */
7555 else if (!is_sse)
7556 abort ();
7557 #endif
7559 switch (GET_CODE (operands[3]))
7561 case PLUS:
7562 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7563 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7564 p = "fiadd";
7565 else
7566 p = "fadd";
7567 ssep = "add";
7568 break;
7570 case MINUS:
7571 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7572 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7573 p = "fisub";
7574 else
7575 p = "fsub";
7576 ssep = "sub";
7577 break;
7579 case MULT:
7580 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7581 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7582 p = "fimul";
7583 else
7584 p = "fmul";
7585 ssep = "mul";
7586 break;
7588 case DIV:
7589 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7590 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7591 p = "fidiv";
7592 else
7593 p = "fdiv";
7594 ssep = "div";
7595 break;
7597 default:
7598 abort ();
7601 if (is_sse)
7603 strcpy (buf, ssep);
7604 if (GET_MODE (operands[0]) == SFmode)
7605 strcat (buf, "ss\t{%2, %0|%0, %2}");
7606 else
7607 strcat (buf, "sd\t{%2, %0|%0, %2}");
7608 return buf;
7610 strcpy (buf, p);
7612 switch (GET_CODE (operands[3]))
7614 case MULT:
7615 case PLUS:
7616 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7618 rtx temp = operands[2];
7619 operands[2] = operands[1];
7620 operands[1] = temp;
7623 /* know operands[0] == operands[1]. */
7625 if (GET_CODE (operands[2]) == MEM)
7627 p = "%z2\t%2";
7628 break;
7631 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7633 if (STACK_TOP_P (operands[0]))
7634 /* How is it that we are storing to a dead operand[2]?
7635 Well, presumably operands[1] is dead too. We can't
7636 store the result to st(0) as st(0) gets popped on this
7637 instruction. Instead store to operands[2] (which I
7638 think has to be st(1)). st(1) will be popped later.
7639 gcc <= 2.8.1 didn't have this check and generated
7640 assembly code that the Unixware assembler rejected. */
7641 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7642 else
7643 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7644 break;
7647 if (STACK_TOP_P (operands[0]))
7648 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7649 else
7650 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7651 break;
7653 case MINUS:
7654 case DIV:
7655 if (GET_CODE (operands[1]) == MEM)
7657 p = "r%z1\t%1";
7658 break;
7661 if (GET_CODE (operands[2]) == MEM)
7663 p = "%z2\t%2";
7664 break;
7667 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7669 #if SYSV386_COMPAT
7670 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7671 derived assemblers, confusingly reverse the direction of
7672 the operation for fsub{r} and fdiv{r} when the
7673 destination register is not st(0). The Intel assembler
7674 doesn't have this brain damage. Read !SYSV386_COMPAT to
7675 figure out what the hardware really does. */
7676 if (STACK_TOP_P (operands[0]))
7677 p = "{p\t%0, %2|rp\t%2, %0}";
7678 else
7679 p = "{rp\t%2, %0|p\t%0, %2}";
7680 #else
7681 if (STACK_TOP_P (operands[0]))
7682 /* As above for fmul/fadd, we can't store to st(0). */
7683 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7684 else
7685 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7686 #endif
7687 break;
7690 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7692 #if SYSV386_COMPAT
7693 if (STACK_TOP_P (operands[0]))
7694 p = "{rp\t%0, %1|p\t%1, %0}";
7695 else
7696 p = "{p\t%1, %0|rp\t%0, %1}";
7697 #else
7698 if (STACK_TOP_P (operands[0]))
7699 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7700 else
7701 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7702 #endif
7703 break;
7706 if (STACK_TOP_P (operands[0]))
7708 if (STACK_TOP_P (operands[1]))
7709 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7710 else
7711 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7712 break;
7714 else if (STACK_TOP_P (operands[1]))
7716 #if SYSV386_COMPAT
7717 p = "{\t%1, %0|r\t%0, %1}";
7718 #else
7719 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7720 #endif
7722 else
7724 #if SYSV386_COMPAT
7725 p = "{r\t%2, %0|\t%0, %2}";
7726 #else
7727 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7728 #endif
7730 break;
7732 default:
7733 abort ();
7736 strcat (buf, p);
7737 return buf;
7740 /* Output code to initialize control word copies used by
7741 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7742 is set to control word rounding downwards. */
7743 void
7744 emit_i387_cw_initialization (normal, round_down)
7745 rtx normal, round_down;
7747 rtx reg = gen_reg_rtx (HImode);
7749 emit_insn (gen_x86_fnstcw_1 (normal));
7750 emit_move_insn (reg, normal);
7751 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7752 && !TARGET_64BIT)
7753 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7754 else
7755 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7756 emit_move_insn (round_down, reg);
7759 /* Output code for INSN to convert a float to a signed int. OPERANDS
7760 are the insn operands. The output may be [HSD]Imode and the input
7761 operand may be [SDX]Fmode. */
7763 const char *
7764 output_fix_trunc (insn, operands)
7765 rtx insn;
7766 rtx *operands;
7768 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7769 int dimode_p = GET_MODE (operands[0]) == DImode;
7771 /* Jump through a hoop or two for DImode, since the hardware has no
7772 non-popping instruction. We used to do this a different way, but
7773 that was somewhat fragile and broke with post-reload splitters. */
7774 if (dimode_p && !stack_top_dies)
7775 output_asm_insn ("fld\t%y1", operands);
7777 if (!STACK_TOP_P (operands[1]))
7778 abort ();
7780 if (GET_CODE (operands[0]) != MEM)
7781 abort ();
7783 output_asm_insn ("fldcw\t%3", operands);
7784 if (stack_top_dies || dimode_p)
7785 output_asm_insn ("fistp%z0\t%0", operands);
7786 else
7787 output_asm_insn ("fist%z0\t%0", operands);
7788 output_asm_insn ("fldcw\t%2", operands);
7790 return "";
7793 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7794 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7795 when fucom should be used. */
7797 const char *
7798 output_fp_compare (insn, operands, eflags_p, unordered_p)
7799 rtx insn;
7800 rtx *operands;
7801 int eflags_p, unordered_p;
7803 int stack_top_dies;
7804 rtx cmp_op0 = operands[0];
7805 rtx cmp_op1 = operands[1];
7806 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7808 if (eflags_p == 2)
7810 cmp_op0 = cmp_op1;
7811 cmp_op1 = operands[2];
7813 if (is_sse)
7815 if (GET_MODE (operands[0]) == SFmode)
7816 if (unordered_p)
7817 return "ucomiss\t{%1, %0|%0, %1}";
7818 else
7819 return "comiss\t{%1, %0|%0, %1}";
7820 else
7821 if (unordered_p)
7822 return "ucomisd\t{%1, %0|%0, %1}";
7823 else
7824 return "comisd\t{%1, %0|%0, %1}";
7827 if (! STACK_TOP_P (cmp_op0))
7828 abort ();
7830 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7832 if (STACK_REG_P (cmp_op1)
7833 && stack_top_dies
7834 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7835 && REGNO (cmp_op1) != FIRST_STACK_REG)
7837 /* If both the top of the 387 stack dies, and the other operand
7838 is also a stack register that dies, then this must be a
7839 `fcompp' float compare */
7841 if (eflags_p == 1)
7843 /* There is no double popping fcomi variant. Fortunately,
7844 eflags is immune from the fstp's cc clobbering. */
7845 if (unordered_p)
7846 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7847 else
7848 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7849 return "fstp\t%y0";
7851 else
7853 if (eflags_p == 2)
7855 if (unordered_p)
7856 return "fucompp\n\tfnstsw\t%0";
7857 else
7858 return "fcompp\n\tfnstsw\t%0";
7860 else
7862 if (unordered_p)
7863 return "fucompp";
7864 else
7865 return "fcompp";
7869 else
7871 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7873 static const char * const alt[24] =
7875 "fcom%z1\t%y1",
7876 "fcomp%z1\t%y1",
7877 "fucom%z1\t%y1",
7878 "fucomp%z1\t%y1",
7880 "ficom%z1\t%y1",
7881 "ficomp%z1\t%y1",
7882 NULL,
7883 NULL,
7885 "fcomi\t{%y1, %0|%0, %y1}",
7886 "fcomip\t{%y1, %0|%0, %y1}",
7887 "fucomi\t{%y1, %0|%0, %y1}",
7888 "fucomip\t{%y1, %0|%0, %y1}",
7890 NULL,
7891 NULL,
7892 NULL,
7893 NULL,
7895 "fcom%z2\t%y2\n\tfnstsw\t%0",
7896 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7897 "fucom%z2\t%y2\n\tfnstsw\t%0",
7898 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7900 "ficom%z2\t%y2\n\tfnstsw\t%0",
7901 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7902 NULL,
7903 NULL
7906 int mask;
7907 const char *ret;
7909 mask = eflags_p << 3;
7910 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7911 mask |= unordered_p << 1;
7912 mask |= stack_top_dies;
7914 if (mask >= 24)
7915 abort ();
7916 ret = alt[mask];
7917 if (ret == NULL)
7918 abort ();
7920 return ret;
7924 void
7925 ix86_output_addr_vec_elt (file, value)
7926 FILE *file;
7927 int value;
7929 const char *directive = ASM_LONG;
7931 if (TARGET_64BIT)
7933 #ifdef ASM_QUAD
7934 directive = ASM_QUAD;
7935 #else
7936 abort ();
7937 #endif
7940 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7943 void
7944 ix86_output_addr_diff_elt (file, value, rel)
7945 FILE *file;
7946 int value, rel;
7948 if (TARGET_64BIT)
7949 fprintf (file, "%s%s%d-%s%d\n",
7950 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7951 else if (HAVE_AS_GOTOFF_IN_DATA)
7952 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7953 #if TARGET_MACHO
7954 else if (TARGET_MACHO)
7955 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7956 machopic_function_base_name () + 1);
7957 #endif
7958 else
7959 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7960 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7963 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7964 for the target. */
7966 void
7967 ix86_expand_clear (dest)
7968 rtx dest;
7970 rtx tmp;
7972 /* We play register width games, which are only valid after reload. */
7973 if (!reload_completed)
7974 abort ();
7976 /* Avoid HImode and its attendant prefix byte. */
7977 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7978 dest = gen_rtx_REG (SImode, REGNO (dest));
7980 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7982 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7983 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7985 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7986 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7989 emit_insn (tmp);
7992 /* X is an unchanging MEM. If it is a constant pool reference, return
7993 the constant pool rtx, else NULL. */
7995 static rtx
7996 maybe_get_pool_constant (x)
7997 rtx x;
7999 x = XEXP (x, 0);
8001 if (flag_pic && ! TARGET_64BIT)
8003 if (GET_CODE (x) != PLUS)
8004 return NULL_RTX;
8005 if (XEXP (x, 0) != pic_offset_table_rtx)
8006 return NULL_RTX;
8007 x = XEXP (x, 1);
8008 if (GET_CODE (x) != CONST)
8009 return NULL_RTX;
8010 x = XEXP (x, 0);
8011 if (GET_CODE (x) != UNSPEC)
8012 return NULL_RTX;
8013 if (XINT (x, 1) != UNSPEC_GOTOFF)
8014 return NULL_RTX;
8015 x = XVECEXP (x, 0, 0);
8018 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8019 return get_pool_constant (x);
8021 return NULL_RTX;
8024 void
8025 ix86_expand_move (mode, operands)
8026 enum machine_mode mode;
8027 rtx operands[];
8029 int strict = (reload_in_progress || reload_completed);
8030 rtx insn, op0, op1, tmp;
8032 op0 = operands[0];
8033 op1 = operands[1];
8035 if (tls_symbolic_operand (op1, Pmode))
8037 op1 = legitimize_address (op1, op1, VOIDmode);
8038 if (GET_CODE (op0) == MEM)
8040 tmp = gen_reg_rtx (mode);
8041 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8042 op1 = tmp;
8045 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8047 #if TARGET_MACHO
8048 if (MACHOPIC_PURE)
8050 rtx temp = ((reload_in_progress
8051 || ((op0 && GET_CODE (op0) == REG)
8052 && mode == Pmode))
8053 ? op0 : gen_reg_rtx (Pmode));
8054 op1 = machopic_indirect_data_reference (op1, temp);
8055 op1 = machopic_legitimize_pic_address (op1, mode,
8056 temp == op1 ? 0 : temp);
8058 else
8060 if (MACHOPIC_INDIRECT)
8061 op1 = machopic_indirect_data_reference (op1, 0);
8063 if (op0 != op1)
8065 insn = gen_rtx_SET (VOIDmode, op0, op1);
8066 emit_insn (insn);
8068 return;
8069 #endif /* TARGET_MACHO */
8070 if (GET_CODE (op0) == MEM)
8071 op1 = force_reg (Pmode, op1);
8072 else
8074 rtx temp = op0;
8075 if (GET_CODE (temp) != REG)
8076 temp = gen_reg_rtx (Pmode);
8077 temp = legitimize_pic_address (op1, temp);
8078 if (temp == op0)
8079 return;
8080 op1 = temp;
8083 else
8085 if (GET_CODE (op0) == MEM
8086 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8087 || !push_operand (op0, mode))
8088 && GET_CODE (op1) == MEM)
8089 op1 = force_reg (mode, op1);
8091 if (push_operand (op0, mode)
8092 && ! general_no_elim_operand (op1, mode))
8093 op1 = copy_to_mode_reg (mode, op1);
8095 /* Force large constants in 64bit compilation into register
8096 to get them CSEed. */
8097 if (TARGET_64BIT && mode == DImode
8098 && immediate_operand (op1, mode)
8099 && !x86_64_zero_extended_value (op1)
8100 && !register_operand (op0, mode)
8101 && optimize && !reload_completed && !reload_in_progress)
8102 op1 = copy_to_mode_reg (mode, op1);
8104 if (FLOAT_MODE_P (mode))
8106 /* If we are loading a floating point constant to a register,
8107 force the value to memory now, since we'll get better code
8108 out the back end. */
8110 if (strict)
8112 else if (GET_CODE (op1) == CONST_DOUBLE
8113 && register_operand (op0, mode))
8114 op1 = validize_mem (force_const_mem (mode, op1));
8118 insn = gen_rtx_SET (VOIDmode, op0, op1);
8120 emit_insn (insn);
8123 void
8124 ix86_expand_vector_move (mode, operands)
8125 enum machine_mode mode;
8126 rtx operands[];
8128 /* Force constants other than zero into memory. We do not know how
8129 the instructions used to build constants modify the upper 64 bits
8130 of the register, once we have that information we may be able
8131 to handle some of them more efficiently. */
8132 if ((reload_in_progress | reload_completed) == 0
8133 && register_operand (operands[0], mode)
8134 && CONSTANT_P (operands[1]))
8135 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8137 /* Make operand1 a register if it isn't already. */
8138 if (!no_new_pseudos
8139 && !register_operand (operands[0], mode)
8140 && !register_operand (operands[1], mode))
8142 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8143 emit_move_insn (operands[0], temp);
8144 return;
8147 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8150 /* Attempt to expand a binary operator. Make the expansion closer to the
8151 actual machine, then just general_operand, which will allow 3 separate
8152 memory references (one output, two input) in a single insn. */
8154 void
8155 ix86_expand_binary_operator (code, mode, operands)
8156 enum rtx_code code;
8157 enum machine_mode mode;
8158 rtx operands[];
8160 int matching_memory;
8161 rtx src1, src2, dst, op, clob;
8163 dst = operands[0];
8164 src1 = operands[1];
8165 src2 = operands[2];
8167 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8168 if (GET_RTX_CLASS (code) == 'c'
8169 && (rtx_equal_p (dst, src2)
8170 || immediate_operand (src1, mode)))
8172 rtx temp = src1;
8173 src1 = src2;
8174 src2 = temp;
8177 /* If the destination is memory, and we do not have matching source
8178 operands, do things in registers. */
8179 matching_memory = 0;
8180 if (GET_CODE (dst) == MEM)
8182 if (rtx_equal_p (dst, src1))
8183 matching_memory = 1;
8184 else if (GET_RTX_CLASS (code) == 'c'
8185 && rtx_equal_p (dst, src2))
8186 matching_memory = 2;
8187 else
8188 dst = gen_reg_rtx (mode);
8191 /* Both source operands cannot be in memory. */
8192 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8194 if (matching_memory != 2)
8195 src2 = force_reg (mode, src2);
8196 else
8197 src1 = force_reg (mode, src1);
8200 /* If the operation is not commutable, source 1 cannot be a constant
8201 or non-matching memory. */
8202 if ((CONSTANT_P (src1)
8203 || (!matching_memory && GET_CODE (src1) == MEM))
8204 && GET_RTX_CLASS (code) != 'c')
8205 src1 = force_reg (mode, src1);
8207 /* If optimizing, copy to regs to improve CSE */
8208 if (optimize && ! no_new_pseudos)
8210 if (GET_CODE (dst) == MEM)
8211 dst = gen_reg_rtx (mode);
8212 if (GET_CODE (src1) == MEM)
8213 src1 = force_reg (mode, src1);
8214 if (GET_CODE (src2) == MEM)
8215 src2 = force_reg (mode, src2);
8218 /* Emit the instruction. */
8220 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8221 if (reload_in_progress)
8223 /* Reload doesn't know about the flags register, and doesn't know that
8224 it doesn't want to clobber it. We can only do this with PLUS. */
8225 if (code != PLUS)
8226 abort ();
8227 emit_insn (op);
8229 else
8231 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8232 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8235 /* Fix up the destination if needed. */
8236 if (dst != operands[0])
8237 emit_move_insn (operands[0], dst);
8240 /* Return TRUE or FALSE depending on whether the binary operator meets the
8241 appropriate constraints. */
8244 ix86_binary_operator_ok (code, mode, operands)
8245 enum rtx_code code;
8246 enum machine_mode mode ATTRIBUTE_UNUSED;
8247 rtx operands[3];
8249 /* Both source operands cannot be in memory. */
8250 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8251 return 0;
8252 /* If the operation is not commutable, source 1 cannot be a constant. */
8253 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8254 return 0;
8255 /* If the destination is memory, we must have a matching source operand. */
8256 if (GET_CODE (operands[0]) == MEM
8257 && ! (rtx_equal_p (operands[0], operands[1])
8258 || (GET_RTX_CLASS (code) == 'c'
8259 && rtx_equal_p (operands[0], operands[2]))))
8260 return 0;
8261 /* If the operation is not commutable and the source 1 is memory, we must
8262 have a matching destination. */
8263 if (GET_CODE (operands[1]) == MEM
8264 && GET_RTX_CLASS (code) != 'c'
8265 && ! rtx_equal_p (operands[0], operands[1]))
8266 return 0;
8267 return 1;
8270 /* Attempt to expand a unary operator. Make the expansion closer to the
8271 actual machine, then just general_operand, which will allow 2 separate
8272 memory references (one output, one input) in a single insn. */
8274 void
8275 ix86_expand_unary_operator (code, mode, operands)
8276 enum rtx_code code;
8277 enum machine_mode mode;
8278 rtx operands[];
8280 int matching_memory;
8281 rtx src, dst, op, clob;
8283 dst = operands[0];
8284 src = operands[1];
8286 /* If the destination is memory, and we do not have matching source
8287 operands, do things in registers. */
8288 matching_memory = 0;
8289 if (GET_CODE (dst) == MEM)
8291 if (rtx_equal_p (dst, src))
8292 matching_memory = 1;
8293 else
8294 dst = gen_reg_rtx (mode);
8297 /* When source operand is memory, destination must match. */
8298 if (!matching_memory && GET_CODE (src) == MEM)
8299 src = force_reg (mode, src);
8301 /* If optimizing, copy to regs to improve CSE */
8302 if (optimize && ! no_new_pseudos)
8304 if (GET_CODE (dst) == MEM)
8305 dst = gen_reg_rtx (mode);
8306 if (GET_CODE (src) == MEM)
8307 src = force_reg (mode, src);
8310 /* Emit the instruction. */
8312 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8313 if (reload_in_progress || code == NOT)
8315 /* Reload doesn't know about the flags register, and doesn't know that
8316 it doesn't want to clobber it. */
8317 if (code != NOT)
8318 abort ();
8319 emit_insn (op);
8321 else
8323 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8324 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8327 /* Fix up the destination if needed. */
8328 if (dst != operands[0])
8329 emit_move_insn (operands[0], dst);
8332 /* Return TRUE or FALSE depending on whether the unary operator meets the
8333 appropriate constraints. */
8336 ix86_unary_operator_ok (code, mode, operands)
8337 enum rtx_code code ATTRIBUTE_UNUSED;
8338 enum machine_mode mode ATTRIBUTE_UNUSED;
8339 rtx operands[2] ATTRIBUTE_UNUSED;
8341 /* If one of operands is memory, source and destination must match. */
8342 if ((GET_CODE (operands[0]) == MEM
8343 || GET_CODE (operands[1]) == MEM)
8344 && ! rtx_equal_p (operands[0], operands[1]))
8345 return FALSE;
8346 return TRUE;
8349 /* Return TRUE or FALSE depending on whether the first SET in INSN
8350 has source and destination with matching CC modes, and that the
8351 CC mode is at least as constrained as REQ_MODE. */
8354 ix86_match_ccmode (insn, req_mode)
8355 rtx insn;
8356 enum machine_mode req_mode;
8358 rtx set;
8359 enum machine_mode set_mode;
8361 set = PATTERN (insn);
8362 if (GET_CODE (set) == PARALLEL)
8363 set = XVECEXP (set, 0, 0);
8364 if (GET_CODE (set) != SET)
8365 abort ();
8366 if (GET_CODE (SET_SRC (set)) != COMPARE)
8367 abort ();
8369 set_mode = GET_MODE (SET_DEST (set));
8370 switch (set_mode)
8372 case CCNOmode:
8373 if (req_mode != CCNOmode
8374 && (req_mode != CCmode
8375 || XEXP (SET_SRC (set), 1) != const0_rtx))
8376 return 0;
8377 break;
8378 case CCmode:
8379 if (req_mode == CCGCmode)
8380 return 0;
8381 /* FALLTHRU */
8382 case CCGCmode:
8383 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8384 return 0;
8385 /* FALLTHRU */
8386 case CCGOCmode:
8387 if (req_mode == CCZmode)
8388 return 0;
8389 /* FALLTHRU */
8390 case CCZmode:
8391 break;
8393 default:
8394 abort ();
8397 return (GET_MODE (SET_SRC (set)) == set_mode);
8400 /* Generate insn patterns to do an integer compare of OPERANDS. */
8402 static rtx
8403 ix86_expand_int_compare (code, op0, op1)
8404 enum rtx_code code;
8405 rtx op0, op1;
8407 enum machine_mode cmpmode;
8408 rtx tmp, flags;
8410 cmpmode = SELECT_CC_MODE (code, op0, op1);
8411 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8413 /* This is very simple, but making the interface the same as in the
8414 FP case makes the rest of the code easier. */
8415 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8416 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8418 /* Return the test that should be put into the flags user, i.e.
8419 the bcc, scc, or cmov instruction. */
8420 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8423 /* Figure out whether to use ordered or unordered fp comparisons.
8424 Return the appropriate mode to use. */
8426 enum machine_mode
8427 ix86_fp_compare_mode (code)
8428 enum rtx_code code ATTRIBUTE_UNUSED;
8430 /* ??? In order to make all comparisons reversible, we do all comparisons
8431 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8432 all forms trapping and nontrapping comparisons, we can make inequality
8433 comparisons trapping again, since it results in better code when using
8434 FCOM based compares. */
8435 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8438 enum machine_mode
8439 ix86_cc_mode (code, op0, op1)
8440 enum rtx_code code;
8441 rtx op0, op1;
8443 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8444 return ix86_fp_compare_mode (code);
8445 switch (code)
8447 /* Only zero flag is needed. */
8448 case EQ: /* ZF=0 */
8449 case NE: /* ZF!=0 */
8450 return CCZmode;
8451 /* Codes needing carry flag. */
8452 case GEU: /* CF=0 */
8453 case GTU: /* CF=0 & ZF=0 */
8454 case LTU: /* CF=1 */
8455 case LEU: /* CF=1 | ZF=1 */
8456 return CCmode;
8457 /* Codes possibly doable only with sign flag when
8458 comparing against zero. */
8459 case GE: /* SF=OF or SF=0 */
8460 case LT: /* SF<>OF or SF=1 */
8461 if (op1 == const0_rtx)
8462 return CCGOCmode;
8463 else
8464 /* For other cases Carry flag is not required. */
8465 return CCGCmode;
8466 /* Codes doable only with sign flag when comparing
8467 against zero, but we miss jump instruction for it
8468 so we need to use relational tests agains overflow
8469 that thus needs to be zero. */
8470 case GT: /* ZF=0 & SF=OF */
8471 case LE: /* ZF=1 | SF<>OF */
8472 if (op1 == const0_rtx)
8473 return CCNOmode;
8474 else
8475 return CCGCmode;
8476 /* strcmp pattern do (use flags) and combine may ask us for proper
8477 mode. */
8478 case USE:
8479 return CCmode;
8480 default:
8481 abort ();
8485 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8488 ix86_use_fcomi_compare (code)
8489 enum rtx_code code ATTRIBUTE_UNUSED;
8491 enum rtx_code swapped_code = swap_condition (code);
8492 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8493 || (ix86_fp_comparison_cost (swapped_code)
8494 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8497 /* Swap, force into registers, or otherwise massage the two operands
8498 to a fp comparison. The operands are updated in place; the new
8499 comparison code is returned. */
8501 static enum rtx_code
8502 ix86_prepare_fp_compare_args (code, pop0, pop1)
8503 enum rtx_code code;
8504 rtx *pop0, *pop1;
8506 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8507 rtx op0 = *pop0, op1 = *pop1;
8508 enum machine_mode op_mode = GET_MODE (op0);
8509 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8511 /* All of the unordered compare instructions only work on registers.
8512 The same is true of the XFmode compare instructions. The same is
8513 true of the fcomi compare instructions. */
8515 if (!is_sse
8516 && (fpcmp_mode == CCFPUmode
8517 || op_mode == XFmode
8518 || op_mode == TFmode
8519 || ix86_use_fcomi_compare (code)))
8521 op0 = force_reg (op_mode, op0);
8522 op1 = force_reg (op_mode, op1);
8524 else
8526 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8527 things around if they appear profitable, otherwise force op0
8528 into a register. */
8530 if (standard_80387_constant_p (op0) == 0
8531 || (GET_CODE (op0) == MEM
8532 && ! (standard_80387_constant_p (op1) == 0
8533 || GET_CODE (op1) == MEM)))
8535 rtx tmp;
8536 tmp = op0, op0 = op1, op1 = tmp;
8537 code = swap_condition (code);
8540 if (GET_CODE (op0) != REG)
8541 op0 = force_reg (op_mode, op0);
8543 if (CONSTANT_P (op1))
8545 if (standard_80387_constant_p (op1))
8546 op1 = force_reg (op_mode, op1);
8547 else
8548 op1 = validize_mem (force_const_mem (op_mode, op1));
8552 /* Try to rearrange the comparison to make it cheaper. */
8553 if (ix86_fp_comparison_cost (code)
8554 > ix86_fp_comparison_cost (swap_condition (code))
8555 && (GET_CODE (op1) == REG || !no_new_pseudos))
8557 rtx tmp;
8558 tmp = op0, op0 = op1, op1 = tmp;
8559 code = swap_condition (code);
8560 if (GET_CODE (op0) != REG)
8561 op0 = force_reg (op_mode, op0);
8564 *pop0 = op0;
8565 *pop1 = op1;
8566 return code;
8569 /* Convert comparison codes we use to represent FP comparison to integer
8570 code that will result in proper branch. Return UNKNOWN if no such code
8571 is available. */
8572 static enum rtx_code
8573 ix86_fp_compare_code_to_integer (code)
8574 enum rtx_code code;
8576 switch (code)
8578 case GT:
8579 return GTU;
8580 case GE:
8581 return GEU;
8582 case ORDERED:
8583 case UNORDERED:
8584 return code;
8585 break;
8586 case UNEQ:
8587 return EQ;
8588 break;
8589 case UNLT:
8590 return LTU;
8591 break;
8592 case UNLE:
8593 return LEU;
8594 break;
8595 case LTGT:
8596 return NE;
8597 break;
8598 default:
8599 return UNKNOWN;
8603 /* Split comparison code CODE into comparisons we can do using branch
8604 instructions. BYPASS_CODE is comparison code for branch that will
8605 branch around FIRST_CODE and SECOND_CODE. If some of branches
8606 is not required, set value to NIL.
8607 We never require more than two branches. */
8608 static void
8609 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8610 enum rtx_code code, *bypass_code, *first_code, *second_code;
8612 *first_code = code;
8613 *bypass_code = NIL;
8614 *second_code = NIL;
8616 /* The fcomi comparison sets flags as follows:
8618 cmp ZF PF CF
8619 > 0 0 0
8620 < 0 0 1
8621 = 1 0 0
8622 un 1 1 1 */
8624 switch (code)
8626 case GT: /* GTU - CF=0 & ZF=0 */
8627 case GE: /* GEU - CF=0 */
8628 case ORDERED: /* PF=0 */
8629 case UNORDERED: /* PF=1 */
8630 case UNEQ: /* EQ - ZF=1 */
8631 case UNLT: /* LTU - CF=1 */
8632 case UNLE: /* LEU - CF=1 | ZF=1 */
8633 case LTGT: /* EQ - ZF=0 */
8634 break;
8635 case LT: /* LTU - CF=1 - fails on unordered */
8636 *first_code = UNLT;
8637 *bypass_code = UNORDERED;
8638 break;
8639 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8640 *first_code = UNLE;
8641 *bypass_code = UNORDERED;
8642 break;
8643 case EQ: /* EQ - ZF=1 - fails on unordered */
8644 *first_code = UNEQ;
8645 *bypass_code = UNORDERED;
8646 break;
8647 case NE: /* NE - ZF=0 - fails on unordered */
8648 *first_code = LTGT;
8649 *second_code = UNORDERED;
8650 break;
8651 case UNGE: /* GEU - CF=0 - fails on unordered */
8652 *first_code = GE;
8653 *second_code = UNORDERED;
8654 break;
8655 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8656 *first_code = GT;
8657 *second_code = UNORDERED;
8658 break;
8659 default:
8660 abort ();
8662 if (!TARGET_IEEE_FP)
8664 *second_code = NIL;
8665 *bypass_code = NIL;
8669 /* Return cost of comparison done fcom + arithmetics operations on AX.
8670 All following functions do use number of instructions as a cost metrics.
8671 In future this should be tweaked to compute bytes for optimize_size and
8672 take into account performance of various instructions on various CPUs. */
8673 static int
8674 ix86_fp_comparison_arithmetics_cost (code)
8675 enum rtx_code code;
8677 if (!TARGET_IEEE_FP)
8678 return 4;
8679 /* The cost of code output by ix86_expand_fp_compare. */
8680 switch (code)
8682 case UNLE:
8683 case UNLT:
8684 case LTGT:
8685 case GT:
8686 case GE:
8687 case UNORDERED:
8688 case ORDERED:
8689 case UNEQ:
8690 return 4;
8691 break;
8692 case LT:
8693 case NE:
8694 case EQ:
8695 case UNGE:
8696 return 5;
8697 break;
8698 case LE:
8699 case UNGT:
8700 return 6;
8701 break;
8702 default:
8703 abort ();
8707 /* Return cost of comparison done using fcomi operation.
8708 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8709 static int
8710 ix86_fp_comparison_fcomi_cost (code)
8711 enum rtx_code code;
8713 enum rtx_code bypass_code, first_code, second_code;
8714 /* Return arbitrarily high cost when instruction is not supported - this
8715 prevents gcc from using it. */
8716 if (!TARGET_CMOVE)
8717 return 1024;
8718 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8719 return (bypass_code != NIL || second_code != NIL) + 2;
8722 /* Return cost of comparison done using sahf operation.
8723 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8724 static int
8725 ix86_fp_comparison_sahf_cost (code)
8726 enum rtx_code code;
8728 enum rtx_code bypass_code, first_code, second_code;
8729 /* Return arbitrarily high cost when instruction is not preferred - this
8730 avoids gcc from using it. */
8731 if (!TARGET_USE_SAHF && !optimize_size)
8732 return 1024;
8733 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8734 return (bypass_code != NIL || second_code != NIL) + 3;
8737 /* Compute cost of the comparison done using any method.
8738 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8739 static int
8740 ix86_fp_comparison_cost (code)
8741 enum rtx_code code;
8743 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8744 int min;
8746 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8747 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8749 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8750 if (min > sahf_cost)
8751 min = sahf_cost;
8752 if (min > fcomi_cost)
8753 min = fcomi_cost;
8754 return min;
8757 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8759 static rtx
8760 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8761 enum rtx_code code;
8762 rtx op0, op1, scratch;
8763 rtx *second_test;
8764 rtx *bypass_test;
8766 enum machine_mode fpcmp_mode, intcmp_mode;
8767 rtx tmp, tmp2;
8768 int cost = ix86_fp_comparison_cost (code);
8769 enum rtx_code bypass_code, first_code, second_code;
8771 fpcmp_mode = ix86_fp_compare_mode (code);
8772 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8774 if (second_test)
8775 *second_test = NULL_RTX;
8776 if (bypass_test)
8777 *bypass_test = NULL_RTX;
8779 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8781 /* Do fcomi/sahf based test when profitable. */
8782 if ((bypass_code == NIL || bypass_test)
8783 && (second_code == NIL || second_test)
8784 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8786 if (TARGET_CMOVE)
8788 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8789 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8790 tmp);
8791 emit_insn (tmp);
8793 else
8795 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8796 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8797 if (!scratch)
8798 scratch = gen_reg_rtx (HImode);
8799 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8800 emit_insn (gen_x86_sahf_1 (scratch));
8803 /* The FP codes work out to act like unsigned. */
8804 intcmp_mode = fpcmp_mode;
8805 code = first_code;
8806 if (bypass_code != NIL)
8807 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8808 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8809 const0_rtx);
8810 if (second_code != NIL)
8811 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8812 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8813 const0_rtx);
8815 else
8817 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8818 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8819 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8820 if (!scratch)
8821 scratch = gen_reg_rtx (HImode);
8822 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8824 /* In the unordered case, we have to check C2 for NaN's, which
8825 doesn't happen to work out to anything nice combination-wise.
8826 So do some bit twiddling on the value we've got in AH to come
8827 up with an appropriate set of condition codes. */
8829 intcmp_mode = CCNOmode;
8830 switch (code)
8832 case GT:
8833 case UNGT:
8834 if (code == GT || !TARGET_IEEE_FP)
8836 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8837 code = EQ;
8839 else
8841 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8842 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8843 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8844 intcmp_mode = CCmode;
8845 code = GEU;
8847 break;
8848 case LT:
8849 case UNLT:
8850 if (code == LT && TARGET_IEEE_FP)
8852 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8853 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8854 intcmp_mode = CCmode;
8855 code = EQ;
8857 else
8859 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8860 code = NE;
8862 break;
8863 case GE:
8864 case UNGE:
8865 if (code == GE || !TARGET_IEEE_FP)
8867 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8868 code = EQ;
8870 else
8872 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8873 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8874 GEN_INT (0x01)));
8875 code = NE;
8877 break;
8878 case LE:
8879 case UNLE:
8880 if (code == LE && TARGET_IEEE_FP)
8882 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8883 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8884 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8885 intcmp_mode = CCmode;
8886 code = LTU;
8888 else
8890 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8891 code = NE;
8893 break;
8894 case EQ:
8895 case UNEQ:
8896 if (code == EQ && TARGET_IEEE_FP)
8898 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8899 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8900 intcmp_mode = CCmode;
8901 code = EQ;
8903 else
8905 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8906 code = NE;
8907 break;
8909 break;
8910 case NE:
8911 case LTGT:
8912 if (code == NE && TARGET_IEEE_FP)
8914 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8915 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8916 GEN_INT (0x40)));
8917 code = NE;
8919 else
8921 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8922 code = EQ;
8924 break;
8926 case UNORDERED:
8927 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8928 code = NE;
8929 break;
8930 case ORDERED:
8931 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8932 code = EQ;
8933 break;
8935 default:
8936 abort ();
8940 /* Return the test that should be put into the flags user, i.e.
8941 the bcc, scc, or cmov instruction. */
8942 return gen_rtx_fmt_ee (code, VOIDmode,
8943 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8944 const0_rtx);
8948 ix86_expand_compare (code, second_test, bypass_test)
8949 enum rtx_code code;
8950 rtx *second_test, *bypass_test;
8952 rtx op0, op1, ret;
8953 op0 = ix86_compare_op0;
8954 op1 = ix86_compare_op1;
8956 if (second_test)
8957 *second_test = NULL_RTX;
8958 if (bypass_test)
8959 *bypass_test = NULL_RTX;
8961 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8962 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8963 second_test, bypass_test);
8964 else
8965 ret = ix86_expand_int_compare (code, op0, op1);
8967 return ret;
8970 /* Return true if the CODE will result in nontrivial jump sequence. */
8971 bool
8972 ix86_fp_jump_nontrivial_p (code)
8973 enum rtx_code code;
8975 enum rtx_code bypass_code, first_code, second_code;
8976 if (!TARGET_CMOVE)
8977 return true;
8978 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8979 return bypass_code != NIL || second_code != NIL;
8982 void
8983 ix86_expand_branch (code, label)
8984 enum rtx_code code;
8985 rtx label;
8987 rtx tmp;
8989 switch (GET_MODE (ix86_compare_op0))
8991 case QImode:
8992 case HImode:
8993 case SImode:
8994 simple:
8995 tmp = ix86_expand_compare (code, NULL, NULL);
8996 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8997 gen_rtx_LABEL_REF (VOIDmode, label),
8998 pc_rtx);
8999 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9000 return;
9002 case SFmode:
9003 case DFmode:
9004 case XFmode:
9005 case TFmode:
9007 rtvec vec;
9008 int use_fcomi;
9009 enum rtx_code bypass_code, first_code, second_code;
9011 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9012 &ix86_compare_op1);
9014 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9016 /* Check whether we will use the natural sequence with one jump. If
9017 so, we can expand jump early. Otherwise delay expansion by
9018 creating compound insn to not confuse optimizers. */
9019 if (bypass_code == NIL && second_code == NIL
9020 && TARGET_CMOVE)
9022 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9023 gen_rtx_LABEL_REF (VOIDmode, label),
9024 pc_rtx, NULL_RTX);
9026 else
9028 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9029 ix86_compare_op0, ix86_compare_op1);
9030 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9031 gen_rtx_LABEL_REF (VOIDmode, label),
9032 pc_rtx);
9033 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9035 use_fcomi = ix86_use_fcomi_compare (code);
9036 vec = rtvec_alloc (3 + !use_fcomi);
9037 RTVEC_ELT (vec, 0) = tmp;
9038 RTVEC_ELT (vec, 1)
9039 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9040 RTVEC_ELT (vec, 2)
9041 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9042 if (! use_fcomi)
9043 RTVEC_ELT (vec, 3)
9044 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9046 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9048 return;
9051 case DImode:
9052 if (TARGET_64BIT)
9053 goto simple;
9054 /* Expand DImode branch into multiple compare+branch. */
9056 rtx lo[2], hi[2], label2;
9057 enum rtx_code code1, code2, code3;
9059 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9061 tmp = ix86_compare_op0;
9062 ix86_compare_op0 = ix86_compare_op1;
9063 ix86_compare_op1 = tmp;
9064 code = swap_condition (code);
9066 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9067 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9069 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9070 avoid two branches. This costs one extra insn, so disable when
9071 optimizing for size. */
9073 if ((code == EQ || code == NE)
9074 && (!optimize_size
9075 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9077 rtx xor0, xor1;
9079 xor1 = hi[0];
9080 if (hi[1] != const0_rtx)
9081 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9082 NULL_RTX, 0, OPTAB_WIDEN);
9084 xor0 = lo[0];
9085 if (lo[1] != const0_rtx)
9086 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9087 NULL_RTX, 0, OPTAB_WIDEN);
9089 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9090 NULL_RTX, 0, OPTAB_WIDEN);
9092 ix86_compare_op0 = tmp;
9093 ix86_compare_op1 = const0_rtx;
9094 ix86_expand_branch (code, label);
9095 return;
9098 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9099 op1 is a constant and the low word is zero, then we can just
9100 examine the high word. */
9102 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9103 switch (code)
9105 case LT: case LTU: case GE: case GEU:
9106 ix86_compare_op0 = hi[0];
9107 ix86_compare_op1 = hi[1];
9108 ix86_expand_branch (code, label);
9109 return;
9110 default:
9111 break;
9114 /* Otherwise, we need two or three jumps. */
9116 label2 = gen_label_rtx ();
9118 code1 = code;
9119 code2 = swap_condition (code);
9120 code3 = unsigned_condition (code);
9122 switch (code)
9124 case LT: case GT: case LTU: case GTU:
9125 break;
9127 case LE: code1 = LT; code2 = GT; break;
9128 case GE: code1 = GT; code2 = LT; break;
9129 case LEU: code1 = LTU; code2 = GTU; break;
9130 case GEU: code1 = GTU; code2 = LTU; break;
9132 case EQ: code1 = NIL; code2 = NE; break;
9133 case NE: code2 = NIL; break;
9135 default:
9136 abort ();
9140 * a < b =>
9141 * if (hi(a) < hi(b)) goto true;
9142 * if (hi(a) > hi(b)) goto false;
9143 * if (lo(a) < lo(b)) goto true;
9144 * false:
9147 ix86_compare_op0 = hi[0];
9148 ix86_compare_op1 = hi[1];
9150 if (code1 != NIL)
9151 ix86_expand_branch (code1, label);
9152 if (code2 != NIL)
9153 ix86_expand_branch (code2, label2);
9155 ix86_compare_op0 = lo[0];
9156 ix86_compare_op1 = lo[1];
9157 ix86_expand_branch (code3, label);
9159 if (code2 != NIL)
9160 emit_label (label2);
9161 return;
9164 default:
9165 abort ();
9169 /* Split branch based on floating point condition. */
9170 void
9171 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9172 enum rtx_code code;
9173 rtx op1, op2, target1, target2, tmp;
9175 rtx second, bypass;
9176 rtx label = NULL_RTX;
9177 rtx condition;
9178 int bypass_probability = -1, second_probability = -1, probability = -1;
9179 rtx i;
9181 if (target2 != pc_rtx)
9183 rtx tmp = target2;
9184 code = reverse_condition_maybe_unordered (code);
9185 target2 = target1;
9186 target1 = tmp;
9189 condition = ix86_expand_fp_compare (code, op1, op2,
9190 tmp, &second, &bypass);
9192 if (split_branch_probability >= 0)
9194 /* Distribute the probabilities across the jumps.
9195 Assume the BYPASS and SECOND to be always test
9196 for UNORDERED. */
9197 probability = split_branch_probability;
9199 /* Value of 1 is low enough to make no need for probability
9200 to be updated. Later we may run some experiments and see
9201 if unordered values are more frequent in practice. */
9202 if (bypass)
9203 bypass_probability = 1;
9204 if (second)
9205 second_probability = 1;
9207 if (bypass != NULL_RTX)
9209 label = gen_label_rtx ();
9210 i = emit_jump_insn (gen_rtx_SET
9211 (VOIDmode, pc_rtx,
9212 gen_rtx_IF_THEN_ELSE (VOIDmode,
9213 bypass,
9214 gen_rtx_LABEL_REF (VOIDmode,
9215 label),
9216 pc_rtx)));
9217 if (bypass_probability >= 0)
9218 REG_NOTES (i)
9219 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9220 GEN_INT (bypass_probability),
9221 REG_NOTES (i));
9223 i = emit_jump_insn (gen_rtx_SET
9224 (VOIDmode, pc_rtx,
9225 gen_rtx_IF_THEN_ELSE (VOIDmode,
9226 condition, target1, target2)));
9227 if (probability >= 0)
9228 REG_NOTES (i)
9229 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9230 GEN_INT (probability),
9231 REG_NOTES (i));
9232 if (second != NULL_RTX)
9234 i = emit_jump_insn (gen_rtx_SET
9235 (VOIDmode, pc_rtx,
9236 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9237 target2)));
9238 if (second_probability >= 0)
9239 REG_NOTES (i)
9240 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9241 GEN_INT (second_probability),
9242 REG_NOTES (i));
9244 if (label != NULL_RTX)
9245 emit_label (label);
9249 ix86_expand_setcc (code, dest)
9250 enum rtx_code code;
9251 rtx dest;
9253 rtx ret, tmp, tmpreg;
9254 rtx second_test, bypass_test;
9256 if (GET_MODE (ix86_compare_op0) == DImode
9257 && !TARGET_64BIT)
9258 return 0; /* FAIL */
9260 if (GET_MODE (dest) != QImode)
9261 abort ();
9263 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9264 PUT_MODE (ret, QImode);
9266 tmp = dest;
9267 tmpreg = dest;
9269 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9270 if (bypass_test || second_test)
9272 rtx test = second_test;
9273 int bypass = 0;
9274 rtx tmp2 = gen_reg_rtx (QImode);
9275 if (bypass_test)
9277 if (second_test)
9278 abort ();
9279 test = bypass_test;
9280 bypass = 1;
9281 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9283 PUT_MODE (test, QImode);
9284 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9286 if (bypass)
9287 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9288 else
9289 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9292 return 1; /* DONE */
9295 /* Expand comparison setting or clearing carry flag. Return true when successful
9296 and set pop for the operation. */
9297 bool
9298 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9299 rtx op0, op1, *pop;
9300 enum rtx_code code;
9302 enum machine_mode mode =
9303 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9305 /* Do not handle DImode compares that go trought special path. Also we can't
9306 deal with FP compares yet. This is possible to add. */
9307 if ((mode == DImode && !TARGET_64BIT) || !INTEGRAL_MODE_P (mode))
9308 return false;
9309 switch (code)
9311 case LTU:
9312 case GEU:
9313 break;
9315 /* Convert a==0 into (unsigned)a<1. */
9316 case EQ:
9317 case NE:
9318 if (op1 != const0_rtx)
9319 return false;
9320 op1 = const1_rtx;
9321 code = (code == EQ ? LTU : GEU);
9322 break;
9324 /* Convert a>b into b<a or a>=b-1. */
9325 case GTU:
9326 case LEU:
9327 if (GET_CODE (op1) == CONST_INT)
9329 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9330 /* Bail out on overflow. We still can swap operands but that
9331 would force loading of the constant into register. */
9332 if (op1 == const0_rtx
9333 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9334 return false;
9335 code = (code == GTU ? GEU : LTU);
9337 else
9339 rtx tmp = op1;
9340 op1 = op0;
9341 op0 = tmp;
9342 code = (code == GTU ? LTU : GEU);
9344 break;
9346 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9347 case LT:
9348 case GE:
9349 if (mode == DImode || op1 != const0_rtx)
9350 return false;
9351 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9352 code = (code == LT ? GEU : LTU);
9353 break;
9354 case LE:
9355 case GT:
9356 if (mode == DImode || op1 != constm1_rtx)
9357 return false;
9358 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9359 code = (code == LE ? GEU : LTU);
9360 break;
9362 default:
9363 return false;
9365 ix86_compare_op0 = op0;
9366 ix86_compare_op1 = op1;
9367 *pop = ix86_expand_compare (code, NULL, NULL);
9368 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9369 abort ();
9370 return true;
9374 ix86_expand_int_movcc (operands)
9375 rtx operands[];
9377 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9378 rtx compare_seq, compare_op;
9379 rtx second_test, bypass_test;
9380 enum machine_mode mode = GET_MODE (operands[0]);
9381 bool sign_bit_compare_p = false;;
9383 start_sequence ();
9384 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9385 compare_seq = get_insns ();
9386 end_sequence ();
9388 compare_code = GET_CODE (compare_op);
9390 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9391 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9392 sign_bit_compare_p = true;
9394 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9395 HImode insns, we'd be swallowed in word prefix ops. */
9397 if ((mode != HImode || TARGET_FAST_PREFIX)
9398 && (mode != DImode || TARGET_64BIT)
9399 && GET_CODE (operands[2]) == CONST_INT
9400 && GET_CODE (operands[3]) == CONST_INT)
9402 rtx out = operands[0];
9403 HOST_WIDE_INT ct = INTVAL (operands[2]);
9404 HOST_WIDE_INT cf = INTVAL (operands[3]);
9405 HOST_WIDE_INT diff;
9407 diff = ct - cf;
9408 /* Sign bit compares are better done using shifts than we do by using
9409 sbb. */
9410 if (sign_bit_compare_p
9411 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9412 ix86_compare_op1, &compare_op))
9414 /* Detect overlap between destination and compare sources. */
9415 rtx tmp = out;
9417 if (!sign_bit_compare_p)
9419 compare_code = GET_CODE (compare_op);
9421 /* To simplify rest of code, restrict to the GEU case. */
9422 if (compare_code == LTU)
9424 HOST_WIDE_INT tmp = ct;
9425 ct = cf;
9426 cf = tmp;
9427 compare_code = reverse_condition (compare_code);
9428 code = reverse_condition (code);
9430 diff = ct - cf;
9432 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9433 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9434 tmp = gen_reg_rtx (mode);
9436 if (mode == DImode)
9437 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9438 else
9439 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp)));
9441 else
9443 if (code == GT || code == GE)
9444 code = reverse_condition (code);
9445 else
9447 HOST_WIDE_INT tmp = ct;
9448 ct = cf;
9449 cf = tmp;
9451 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9452 ix86_compare_op1, VOIDmode, 0, -1);
9455 if (diff == 1)
9458 * cmpl op0,op1
9459 * sbbl dest,dest
9460 * [addl dest, ct]
9462 * Size 5 - 8.
9464 if (ct)
9465 tmp = expand_simple_binop (mode, PLUS,
9466 tmp, GEN_INT (ct),
9467 copy_rtx (tmp), 1, OPTAB_DIRECT);
9469 else if (cf == -1)
9472 * cmpl op0,op1
9473 * sbbl dest,dest
9474 * orl $ct, dest
9476 * Size 8.
9478 tmp = expand_simple_binop (mode, IOR,
9479 tmp, GEN_INT (ct),
9480 copy_rtx (tmp), 1, OPTAB_DIRECT);
9482 else if (diff == -1 && ct)
9485 * cmpl op0,op1
9486 * sbbl dest,dest
9487 * notl dest
9488 * [addl dest, cf]
9490 * Size 8 - 11.
9492 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9493 if (cf)
9494 tmp = expand_simple_binop (mode, PLUS,
9495 copy_rtx (tmp), GEN_INT (cf),
9496 copy_rtx (tmp), 1, OPTAB_DIRECT);
9498 else
9501 * cmpl op0,op1
9502 * sbbl dest,dest
9503 * [notl dest]
9504 * andl cf - ct, dest
9505 * [addl dest, ct]
9507 * Size 8 - 11.
9510 if (cf == 0)
9512 cf = ct;
9513 ct = 0;
9514 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9517 tmp = expand_simple_binop (mode, AND,
9518 copy_rtx (tmp),
9519 gen_int_mode (cf - ct, mode),
9520 copy_rtx (tmp), 1, OPTAB_DIRECT);
9521 if (ct)
9522 tmp = expand_simple_binop (mode, PLUS,
9523 copy_rtx (tmp), GEN_INT (ct),
9524 copy_rtx (tmp), 1, OPTAB_DIRECT);
9527 if (!rtx_equal_p (tmp, out))
9528 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9530 return 1; /* DONE */
9533 if (diff < 0)
9535 HOST_WIDE_INT tmp;
9536 tmp = ct, ct = cf, cf = tmp;
9537 diff = -diff;
9538 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9540 /* We may be reversing unordered compare to normal compare, that
9541 is not valid in general (we may convert non-trapping condition
9542 to trapping one), however on i386 we currently emit all
9543 comparisons unordered. */
9544 compare_code = reverse_condition_maybe_unordered (compare_code);
9545 code = reverse_condition_maybe_unordered (code);
9547 else
9549 compare_code = reverse_condition (compare_code);
9550 code = reverse_condition (code);
9554 compare_code = NIL;
9555 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9556 && GET_CODE (ix86_compare_op1) == CONST_INT)
9558 if (ix86_compare_op1 == const0_rtx
9559 && (code == LT || code == GE))
9560 compare_code = code;
9561 else if (ix86_compare_op1 == constm1_rtx)
9563 if (code == LE)
9564 compare_code = LT;
9565 else if (code == GT)
9566 compare_code = GE;
9570 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9571 if (compare_code != NIL
9572 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9573 && (cf == -1 || ct == -1))
9575 /* If lea code below could be used, only optimize
9576 if it results in a 2 insn sequence. */
9578 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9579 || diff == 3 || diff == 5 || diff == 9)
9580 || (compare_code == LT && ct == -1)
9581 || (compare_code == GE && cf == -1))
9584 * notl op1 (if necessary)
9585 * sarl $31, op1
9586 * orl cf, op1
9588 if (ct != -1)
9590 cf = ct;
9591 ct = -1;
9592 code = reverse_condition (code);
9595 out = emit_store_flag (out, code, ix86_compare_op0,
9596 ix86_compare_op1, VOIDmode, 0, -1);
9598 out = expand_simple_binop (mode, IOR,
9599 out, GEN_INT (cf),
9600 out, 1, OPTAB_DIRECT);
9601 if (out != operands[0])
9602 emit_move_insn (operands[0], out);
9604 return 1; /* DONE */
9609 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9610 || diff == 3 || diff == 5 || diff == 9)
9611 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9612 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9615 * xorl dest,dest
9616 * cmpl op1,op2
9617 * setcc dest
9618 * lea cf(dest*(ct-cf)),dest
9620 * Size 14.
9622 * This also catches the degenerate setcc-only case.
9625 rtx tmp;
9626 int nops;
9628 out = emit_store_flag (out, code, ix86_compare_op0,
9629 ix86_compare_op1, VOIDmode, 0, 1);
9631 nops = 0;
9632 /* On x86_64 the lea instruction operates on Pmode, so we need
9633 to get arithmetics done in proper mode to match. */
9634 if (diff == 1)
9635 tmp = copy_rtx (out);
9636 else
9638 rtx out1;
9639 out1 = copy_rtx (out);
9640 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9641 nops++;
9642 if (diff & 1)
9644 tmp = gen_rtx_PLUS (mode, tmp, out1);
9645 nops++;
9648 if (cf != 0)
9650 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9651 nops++;
9653 if (!rtx_equal_p (tmp, out))
9655 if (nops == 1)
9656 out = force_operand (tmp, copy_rtx (out));
9657 else
9658 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9660 if (!rtx_equal_p (out, operands[0]))
9661 emit_move_insn (operands[0], copy_rtx (out));
9663 return 1; /* DONE */
9667 * General case: Jumpful:
9668 * xorl dest,dest cmpl op1, op2
9669 * cmpl op1, op2 movl ct, dest
9670 * setcc dest jcc 1f
9671 * decl dest movl cf, dest
9672 * andl (cf-ct),dest 1:
9673 * addl ct,dest
9675 * Size 20. Size 14.
9677 * This is reasonably steep, but branch mispredict costs are
9678 * high on modern cpus, so consider failing only if optimizing
9679 * for space.
9682 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9683 && BRANCH_COST >= 2)
9685 if (cf == 0)
9687 cf = ct;
9688 ct = 0;
9689 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9690 /* We may be reversing unordered compare to normal compare,
9691 that is not valid in general (we may convert non-trapping
9692 condition to trapping one), however on i386 we currently
9693 emit all comparisons unordered. */
9694 code = reverse_condition_maybe_unordered (code);
9695 else
9697 code = reverse_condition (code);
9698 if (compare_code != NIL)
9699 compare_code = reverse_condition (compare_code);
9703 if (compare_code != NIL)
9705 /* notl op1 (if needed)
9706 sarl $31, op1
9707 andl (cf-ct), op1
9708 addl ct, op1
9710 For x < 0 (resp. x <= -1) there will be no notl,
9711 so if possible swap the constants to get rid of the
9712 complement.
9713 True/false will be -1/0 while code below (store flag
9714 followed by decrement) is 0/-1, so the constants need
9715 to be exchanged once more. */
9717 if (compare_code == GE || !cf)
9719 code = reverse_condition (code);
9720 compare_code = LT;
9722 else
9724 HOST_WIDE_INT tmp = cf;
9725 cf = ct;
9726 ct = tmp;
9729 out = emit_store_flag (out, code, ix86_compare_op0,
9730 ix86_compare_op1, VOIDmode, 0, -1);
9732 else
9734 out = emit_store_flag (out, code, ix86_compare_op0,
9735 ix86_compare_op1, VOIDmode, 0, 1);
9737 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9738 copy_rtx (out), 1, OPTAB_DIRECT);
9741 out = expand_simple_binop (mode, AND, copy_rtx (out),
9742 gen_int_mode (cf - ct, mode),
9743 copy_rtx (out), 1, OPTAB_DIRECT);
9744 if (ct)
9745 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9746 copy_rtx (out), 1, OPTAB_DIRECT);
9747 if (!rtx_equal_p (out, operands[0]))
9748 emit_move_insn (operands[0], copy_rtx (out));
9750 return 1; /* DONE */
9754 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9756 /* Try a few things more with specific constants and a variable. */
9758 optab op;
9759 rtx var, orig_out, out, tmp;
9761 if (BRANCH_COST <= 2)
9762 return 0; /* FAIL */
9764 /* If one of the two operands is an interesting constant, load a
9765 constant with the above and mask it in with a logical operation. */
9767 if (GET_CODE (operands[2]) == CONST_INT)
9769 var = operands[3];
9770 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9771 operands[3] = constm1_rtx, op = and_optab;
9772 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9773 operands[3] = const0_rtx, op = ior_optab;
9774 else
9775 return 0; /* FAIL */
9777 else if (GET_CODE (operands[3]) == CONST_INT)
9779 var = operands[2];
9780 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9781 operands[2] = constm1_rtx, op = and_optab;
9782 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9783 operands[2] = const0_rtx, op = ior_optab;
9784 else
9785 return 0; /* FAIL */
9787 else
9788 return 0; /* FAIL */
9790 orig_out = operands[0];
9791 tmp = gen_reg_rtx (mode);
9792 operands[0] = tmp;
9794 /* Recurse to get the constant loaded. */
9795 if (ix86_expand_int_movcc (operands) == 0)
9796 return 0; /* FAIL */
9798 /* Mask in the interesting variable. */
9799 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9800 OPTAB_WIDEN);
9801 if (!rtx_equal_p (out, orig_out))
9802 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9804 return 1; /* DONE */
9808 * For comparison with above,
9810 * movl cf,dest
9811 * movl ct,tmp
9812 * cmpl op1,op2
9813 * cmovcc tmp,dest
9815 * Size 15.
9818 if (! nonimmediate_operand (operands[2], mode))
9819 operands[2] = force_reg (mode, operands[2]);
9820 if (! nonimmediate_operand (operands[3], mode))
9821 operands[3] = force_reg (mode, operands[3]);
9823 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9825 rtx tmp = gen_reg_rtx (mode);
9826 emit_move_insn (tmp, operands[3]);
9827 operands[3] = tmp;
9829 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9831 rtx tmp = gen_reg_rtx (mode);
9832 emit_move_insn (tmp, operands[2]);
9833 operands[2] = tmp;
9836 if (! register_operand (operands[2], VOIDmode)
9837 && (mode == QImode
9838 || ! register_operand (operands[3], VOIDmode)))
9839 operands[2] = force_reg (mode, operands[2]);
9841 if (mode == QImode
9842 && ! register_operand (operands[3], VOIDmode))
9843 operands[3] = force_reg (mode, operands[3]);
9845 emit_insn (compare_seq);
9846 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9847 gen_rtx_IF_THEN_ELSE (mode,
9848 compare_op, operands[2],
9849 operands[3])));
9850 if (bypass_test)
9851 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9852 gen_rtx_IF_THEN_ELSE (mode,
9853 bypass_test,
9854 copy_rtx (operands[3]),
9855 copy_rtx (operands[0]))));
9856 if (second_test)
9857 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9858 gen_rtx_IF_THEN_ELSE (mode,
9859 second_test,
9860 copy_rtx (operands[2]),
9861 copy_rtx (operands[0]))));
9863 return 1; /* DONE */
9867 ix86_expand_fp_movcc (operands)
9868 rtx operands[];
9870 enum rtx_code code;
9871 rtx tmp;
9872 rtx compare_op, second_test, bypass_test;
9874 /* For SF/DFmode conditional moves based on comparisons
9875 in same mode, we may want to use SSE min/max instructions. */
9876 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9877 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9878 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9879 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9880 && (!TARGET_IEEE_FP
9881 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9882 /* We may be called from the post-reload splitter. */
9883 && (!REG_P (operands[0])
9884 || SSE_REG_P (operands[0])
9885 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9887 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9888 code = GET_CODE (operands[1]);
9890 /* See if we have (cross) match between comparison operands and
9891 conditional move operands. */
9892 if (rtx_equal_p (operands[2], op1))
9894 rtx tmp = op0;
9895 op0 = op1;
9896 op1 = tmp;
9897 code = reverse_condition_maybe_unordered (code);
9899 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9901 /* Check for min operation. */
9902 if (code == LT || code == UNLE)
9904 if (code == UNLE)
9906 rtx tmp = op0;
9907 op0 = op1;
9908 op1 = tmp;
9910 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9911 if (memory_operand (op0, VOIDmode))
9912 op0 = force_reg (GET_MODE (operands[0]), op0);
9913 if (GET_MODE (operands[0]) == SFmode)
9914 emit_insn (gen_minsf3 (operands[0], op0, op1));
9915 else
9916 emit_insn (gen_mindf3 (operands[0], op0, op1));
9917 return 1;
9919 /* Check for max operation. */
9920 if (code == GT || code == UNGE)
9922 if (code == UNGE)
9924 rtx tmp = op0;
9925 op0 = op1;
9926 op1 = tmp;
9928 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9929 if (memory_operand (op0, VOIDmode))
9930 op0 = force_reg (GET_MODE (operands[0]), op0);
9931 if (GET_MODE (operands[0]) == SFmode)
9932 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9933 else
9934 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9935 return 1;
9938 /* Manage condition to be sse_comparison_operator. In case we are
9939 in non-ieee mode, try to canonicalize the destination operand
9940 to be first in the comparison - this helps reload to avoid extra
9941 moves. */
9942 if (!sse_comparison_operator (operands[1], VOIDmode)
9943 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9945 rtx tmp = ix86_compare_op0;
9946 ix86_compare_op0 = ix86_compare_op1;
9947 ix86_compare_op1 = tmp;
9948 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9949 VOIDmode, ix86_compare_op0,
9950 ix86_compare_op1);
9952 /* Similarly try to manage result to be first operand of conditional
9953 move. We also don't support the NE comparison on SSE, so try to
9954 avoid it. */
9955 if ((rtx_equal_p (operands[0], operands[3])
9956 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9957 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9959 rtx tmp = operands[2];
9960 operands[2] = operands[3];
9961 operands[3] = tmp;
9962 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9963 (GET_CODE (operands[1])),
9964 VOIDmode, ix86_compare_op0,
9965 ix86_compare_op1);
9967 if (GET_MODE (operands[0]) == SFmode)
9968 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9969 operands[2], operands[3],
9970 ix86_compare_op0, ix86_compare_op1));
9971 else
9972 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9973 operands[2], operands[3],
9974 ix86_compare_op0, ix86_compare_op1));
9975 return 1;
9978 /* The floating point conditional move instructions don't directly
9979 support conditions resulting from a signed integer comparison. */
9981 code = GET_CODE (operands[1]);
9982 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9984 /* The floating point conditional move instructions don't directly
9985 support signed integer comparisons. */
9987 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9989 if (second_test != NULL || bypass_test != NULL)
9990 abort ();
9991 tmp = gen_reg_rtx (QImode);
9992 ix86_expand_setcc (code, tmp);
9993 code = NE;
9994 ix86_compare_op0 = tmp;
9995 ix86_compare_op1 = const0_rtx;
9996 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9998 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10000 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10001 emit_move_insn (tmp, operands[3]);
10002 operands[3] = tmp;
10004 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10006 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10007 emit_move_insn (tmp, operands[2]);
10008 operands[2] = tmp;
10011 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10012 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10013 compare_op,
10014 operands[2],
10015 operands[3])));
10016 if (bypass_test)
10017 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10018 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10019 bypass_test,
10020 operands[3],
10021 operands[0])));
10022 if (second_test)
10023 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10024 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10025 second_test,
10026 operands[2],
10027 operands[0])));
10029 return 1;
10032 /* Expand conditional increment or decrement using adb/sbb instructions.
10033 The default case using setcc followed by the conditional move can be
10034 done by generic code. */
10036 ix86_expand_int_addcc (operands)
10037 rtx operands[];
10039 enum rtx_code code = GET_CODE (operands[1]);
10040 rtx compare_op;
10041 rtx val = const0_rtx;
10043 if (operands[3] != const1_rtx
10044 && operands[3] != constm1_rtx)
10045 return 0;
10046 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10047 ix86_compare_op1, &compare_op))
10048 return 0;
10049 if (GET_CODE (compare_op) != LTU)
10050 val = constm1_rtx;
10051 if ((GET_CODE (compare_op) == LTU) == (operands[3] == constm1_rtx))
10053 switch (GET_MODE (operands[0]))
10055 case QImode:
10056 emit_insn (gen_subqi3_carry (operands[0], operands[2], val));
10057 break;
10058 case HImode:
10059 emit_insn (gen_subhi3_carry (operands[0], operands[2], val));
10060 break;
10061 case SImode:
10062 emit_insn (gen_subsi3_carry (operands[0], operands[2], val));
10063 break;
10064 case DImode:
10065 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val));
10066 break;
10067 default:
10068 abort ();
10071 else
10073 switch (GET_MODE (operands[0]))
10075 case QImode:
10076 emit_insn (gen_addqi3_carry (operands[0], operands[2], val));
10077 break;
10078 case HImode:
10079 emit_insn (gen_addhi3_carry (operands[0], operands[2], val));
10080 break;
10081 case SImode:
10082 emit_insn (gen_addsi3_carry (operands[0], operands[2], val));
10083 break;
10084 case DImode:
10085 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val));
10086 break;
10087 default:
10088 abort ();
10091 return 1; /* DONE */
10095 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10096 works for floating pointer parameters and nonoffsetable memories.
10097 For pushes, it returns just stack offsets; the values will be saved
10098 in the right order. Maximally three parts are generated. */
10100 static int
10101 ix86_split_to_parts (operand, parts, mode)
10102 rtx operand;
10103 rtx *parts;
10104 enum machine_mode mode;
10106 int size;
10108 if (!TARGET_64BIT)
10109 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10110 else
10111 size = (GET_MODE_SIZE (mode) + 4) / 8;
10113 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10114 abort ();
10115 if (size < 2 || size > 3)
10116 abort ();
10118 /* Optimize constant pool reference to immediates. This is used by fp
10119 moves, that force all constants to memory to allow combining. */
10120 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10122 rtx tmp = maybe_get_pool_constant (operand);
10123 if (tmp)
10124 operand = tmp;
10127 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10129 /* The only non-offsetable memories we handle are pushes. */
10130 if (! push_operand (operand, VOIDmode))
10131 abort ();
10133 operand = copy_rtx (operand);
10134 PUT_MODE (operand, Pmode);
10135 parts[0] = parts[1] = parts[2] = operand;
10137 else if (!TARGET_64BIT)
10139 if (mode == DImode)
10140 split_di (&operand, 1, &parts[0], &parts[1]);
10141 else
10143 if (REG_P (operand))
10145 if (!reload_completed)
10146 abort ();
10147 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10148 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10149 if (size == 3)
10150 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10152 else if (offsettable_memref_p (operand))
10154 operand = adjust_address (operand, SImode, 0);
10155 parts[0] = operand;
10156 parts[1] = adjust_address (operand, SImode, 4);
10157 if (size == 3)
10158 parts[2] = adjust_address (operand, SImode, 8);
10160 else if (GET_CODE (operand) == CONST_DOUBLE)
10162 REAL_VALUE_TYPE r;
10163 long l[4];
10165 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10166 switch (mode)
10168 case XFmode:
10169 case TFmode:
10170 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10171 parts[2] = gen_int_mode (l[2], SImode);
10172 break;
10173 case DFmode:
10174 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10175 break;
10176 default:
10177 abort ();
10179 parts[1] = gen_int_mode (l[1], SImode);
10180 parts[0] = gen_int_mode (l[0], SImode);
10182 else
10183 abort ();
10186 else
10188 if (mode == TImode)
10189 split_ti (&operand, 1, &parts[0], &parts[1]);
10190 if (mode == XFmode || mode == TFmode)
10192 if (REG_P (operand))
10194 if (!reload_completed)
10195 abort ();
10196 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10197 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10199 else if (offsettable_memref_p (operand))
10201 operand = adjust_address (operand, DImode, 0);
10202 parts[0] = operand;
10203 parts[1] = adjust_address (operand, SImode, 8);
10205 else if (GET_CODE (operand) == CONST_DOUBLE)
10207 REAL_VALUE_TYPE r;
10208 long l[3];
10210 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10211 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10212 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10213 if (HOST_BITS_PER_WIDE_INT >= 64)
10214 parts[0]
10215 = gen_int_mode
10216 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10217 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10218 DImode);
10219 else
10220 parts[0] = immed_double_const (l[0], l[1], DImode);
10221 parts[1] = gen_int_mode (l[2], SImode);
10223 else
10224 abort ();
10228 return size;
10231 /* Emit insns to perform a move or push of DI, DF, and XF values.
10232 Return false when normal moves are needed; true when all required
10233 insns have been emitted. Operands 2-4 contain the input values
10234 int the correct order; operands 5-7 contain the output values. */
10236 void
10237 ix86_split_long_move (operands)
10238 rtx operands[];
10240 rtx part[2][3];
10241 int nparts;
10242 int push = 0;
10243 int collisions = 0;
10244 enum machine_mode mode = GET_MODE (operands[0]);
10246 /* The DFmode expanders may ask us to move double.
10247 For 64bit target this is single move. By hiding the fact
10248 here we simplify i386.md splitters. */
10249 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10251 /* Optimize constant pool reference to immediates. This is used by
10252 fp moves, that force all constants to memory to allow combining. */
10254 if (GET_CODE (operands[1]) == MEM
10255 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10256 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10257 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10258 if (push_operand (operands[0], VOIDmode))
10260 operands[0] = copy_rtx (operands[0]);
10261 PUT_MODE (operands[0], Pmode);
10263 else
10264 operands[0] = gen_lowpart (DImode, operands[0]);
10265 operands[1] = gen_lowpart (DImode, operands[1]);
10266 emit_move_insn (operands[0], operands[1]);
10267 return;
10270 /* The only non-offsettable memory we handle is push. */
10271 if (push_operand (operands[0], VOIDmode))
10272 push = 1;
10273 else if (GET_CODE (operands[0]) == MEM
10274 && ! offsettable_memref_p (operands[0]))
10275 abort ();
10277 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10278 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10280 /* When emitting push, take care for source operands on the stack. */
10281 if (push && GET_CODE (operands[1]) == MEM
10282 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10284 if (nparts == 3)
10285 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10286 XEXP (part[1][2], 0));
10287 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10288 XEXP (part[1][1], 0));
10291 /* We need to do copy in the right order in case an address register
10292 of the source overlaps the destination. */
10293 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10295 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10296 collisions++;
10297 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10298 collisions++;
10299 if (nparts == 3
10300 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10301 collisions++;
10303 /* Collision in the middle part can be handled by reordering. */
10304 if (collisions == 1 && nparts == 3
10305 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10307 rtx tmp;
10308 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10309 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10312 /* If there are more collisions, we can't handle it by reordering.
10313 Do an lea to the last part and use only one colliding move. */
10314 else if (collisions > 1)
10316 collisions = 1;
10317 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10318 XEXP (part[1][0], 0)));
10319 part[1][0] = change_address (part[1][0],
10320 TARGET_64BIT ? DImode : SImode,
10321 part[0][nparts - 1]);
10322 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10323 if (nparts == 3)
10324 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10328 if (push)
10330 if (!TARGET_64BIT)
10332 if (nparts == 3)
10334 /* We use only first 12 bytes of TFmode value, but for pushing we
10335 are required to adjust stack as if we were pushing real 16byte
10336 value. */
10337 if (mode == TFmode && !TARGET_64BIT)
10338 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10339 GEN_INT (-4)));
10340 emit_move_insn (part[0][2], part[1][2]);
10343 else
10345 /* In 64bit mode we don't have 32bit push available. In case this is
10346 register, it is OK - we will just use larger counterpart. We also
10347 retype memory - these comes from attempt to avoid REX prefix on
10348 moving of second half of TFmode value. */
10349 if (GET_MODE (part[1][1]) == SImode)
10351 if (GET_CODE (part[1][1]) == MEM)
10352 part[1][1] = adjust_address (part[1][1], DImode, 0);
10353 else if (REG_P (part[1][1]))
10354 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10355 else
10356 abort ();
10357 if (GET_MODE (part[1][0]) == SImode)
10358 part[1][0] = part[1][1];
10361 emit_move_insn (part[0][1], part[1][1]);
10362 emit_move_insn (part[0][0], part[1][0]);
10363 return;
10366 /* Choose correct order to not overwrite the source before it is copied. */
10367 if ((REG_P (part[0][0])
10368 && REG_P (part[1][1])
10369 && (REGNO (part[0][0]) == REGNO (part[1][1])
10370 || (nparts == 3
10371 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10372 || (collisions > 0
10373 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10375 if (nparts == 3)
10377 operands[2] = part[0][2];
10378 operands[3] = part[0][1];
10379 operands[4] = part[0][0];
10380 operands[5] = part[1][2];
10381 operands[6] = part[1][1];
10382 operands[7] = part[1][0];
10384 else
10386 operands[2] = part[0][1];
10387 operands[3] = part[0][0];
10388 operands[5] = part[1][1];
10389 operands[6] = part[1][0];
10392 else
10394 if (nparts == 3)
10396 operands[2] = part[0][0];
10397 operands[3] = part[0][1];
10398 operands[4] = part[0][2];
10399 operands[5] = part[1][0];
10400 operands[6] = part[1][1];
10401 operands[7] = part[1][2];
10403 else
10405 operands[2] = part[0][0];
10406 operands[3] = part[0][1];
10407 operands[5] = part[1][0];
10408 operands[6] = part[1][1];
10411 emit_move_insn (operands[2], operands[5]);
10412 emit_move_insn (operands[3], operands[6]);
10413 if (nparts == 3)
10414 emit_move_insn (operands[4], operands[7]);
10416 return;
10419 void
10420 ix86_split_ashldi (operands, scratch)
10421 rtx *operands, scratch;
10423 rtx low[2], high[2];
10424 int count;
10426 if (GET_CODE (operands[2]) == CONST_INT)
10428 split_di (operands, 2, low, high);
10429 count = INTVAL (operands[2]) & 63;
10431 if (count >= 32)
10433 emit_move_insn (high[0], low[1]);
10434 emit_move_insn (low[0], const0_rtx);
10436 if (count > 32)
10437 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10439 else
10441 if (!rtx_equal_p (operands[0], operands[1]))
10442 emit_move_insn (operands[0], operands[1]);
10443 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10444 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10447 else
10449 if (!rtx_equal_p (operands[0], operands[1]))
10450 emit_move_insn (operands[0], operands[1]);
10452 split_di (operands, 1, low, high);
10454 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10455 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10457 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10459 if (! no_new_pseudos)
10460 scratch = force_reg (SImode, const0_rtx);
10461 else
10462 emit_move_insn (scratch, const0_rtx);
10464 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10465 scratch));
10467 else
10468 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10472 void
10473 ix86_split_ashrdi (operands, scratch)
10474 rtx *operands, scratch;
10476 rtx low[2], high[2];
10477 int count;
10479 if (GET_CODE (operands[2]) == CONST_INT)
10481 split_di (operands, 2, low, high);
10482 count = INTVAL (operands[2]) & 63;
10484 if (count >= 32)
10486 emit_move_insn (low[0], high[1]);
10488 if (! reload_completed)
10489 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10490 else
10492 emit_move_insn (high[0], low[0]);
10493 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10496 if (count > 32)
10497 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10499 else
10501 if (!rtx_equal_p (operands[0], operands[1]))
10502 emit_move_insn (operands[0], operands[1]);
10503 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10504 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10507 else
10509 if (!rtx_equal_p (operands[0], operands[1]))
10510 emit_move_insn (operands[0], operands[1]);
10512 split_di (operands, 1, low, high);
10514 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10515 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10517 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10519 if (! no_new_pseudos)
10520 scratch = gen_reg_rtx (SImode);
10521 emit_move_insn (scratch, high[0]);
10522 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10523 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10524 scratch));
10526 else
10527 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10531 void
10532 ix86_split_lshrdi (operands, scratch)
10533 rtx *operands, scratch;
10535 rtx low[2], high[2];
10536 int count;
10538 if (GET_CODE (operands[2]) == CONST_INT)
10540 split_di (operands, 2, low, high);
10541 count = INTVAL (operands[2]) & 63;
10543 if (count >= 32)
10545 emit_move_insn (low[0], high[1]);
10546 emit_move_insn (high[0], const0_rtx);
10548 if (count > 32)
10549 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10551 else
10553 if (!rtx_equal_p (operands[0], operands[1]))
10554 emit_move_insn (operands[0], operands[1]);
10555 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10556 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10559 else
10561 if (!rtx_equal_p (operands[0], operands[1]))
10562 emit_move_insn (operands[0], operands[1]);
10564 split_di (operands, 1, low, high);
10566 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10567 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10569 /* Heh. By reversing the arguments, we can reuse this pattern. */
10570 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10572 if (! no_new_pseudos)
10573 scratch = force_reg (SImode, const0_rtx);
10574 else
10575 emit_move_insn (scratch, const0_rtx);
10577 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10578 scratch));
10580 else
10581 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10585 /* Helper function for the string operations below. Dest VARIABLE whether
10586 it is aligned to VALUE bytes. If true, jump to the label. */
10587 static rtx
10588 ix86_expand_aligntest (variable, value)
10589 rtx variable;
10590 int value;
10592 rtx label = gen_label_rtx ();
10593 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10594 if (GET_MODE (variable) == DImode)
10595 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10596 else
10597 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10598 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10599 1, label);
10600 return label;
10603 /* Adjust COUNTER by the VALUE. */
10604 static void
10605 ix86_adjust_counter (countreg, value)
10606 rtx countreg;
10607 HOST_WIDE_INT value;
10609 if (GET_MODE (countreg) == DImode)
10610 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10611 else
10612 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10615 /* Zero extend possibly SImode EXP to Pmode register. */
10617 ix86_zero_extend_to_Pmode (exp)
10618 rtx exp;
10620 rtx r;
10621 if (GET_MODE (exp) == VOIDmode)
10622 return force_reg (Pmode, exp);
10623 if (GET_MODE (exp) == Pmode)
10624 return copy_to_mode_reg (Pmode, exp);
10625 r = gen_reg_rtx (Pmode);
10626 emit_insn (gen_zero_extendsidi2 (r, exp));
10627 return r;
10630 /* Expand string move (memcpy) operation. Use i386 string operations when
10631 profitable. expand_clrstr contains similar code. */
10633 ix86_expand_movstr (dst, src, count_exp, align_exp)
10634 rtx dst, src, count_exp, align_exp;
10636 rtx srcreg, destreg, countreg;
10637 enum machine_mode counter_mode;
10638 HOST_WIDE_INT align = 0;
10639 unsigned HOST_WIDE_INT count = 0;
10640 rtx insns;
10643 if (GET_CODE (align_exp) == CONST_INT)
10644 align = INTVAL (align_exp);
10646 /* This simple hack avoids all inlining code and simplifies code below. */
10647 if (!TARGET_ALIGN_STRINGOPS)
10648 align = 64;
10650 if (GET_CODE (count_exp) == CONST_INT)
10652 count = INTVAL (count_exp);
10653 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10654 return 0;
10657 /* Figure out proper mode for counter. For 32bits it is always SImode,
10658 for 64bits use SImode when possible, otherwise DImode.
10659 Set count to number of bytes copied when known at compile time. */
10660 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10661 || x86_64_zero_extended_value (count_exp))
10662 counter_mode = SImode;
10663 else
10664 counter_mode = DImode;
10666 start_sequence ();
10668 if (counter_mode != SImode && counter_mode != DImode)
10669 abort ();
10671 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10672 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10674 emit_insn (gen_cld ());
10676 /* When optimizing for size emit simple rep ; movsb instruction for
10677 counts not divisible by 4. */
10679 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10681 countreg = ix86_zero_extend_to_Pmode (count_exp);
10682 if (TARGET_64BIT)
10683 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10684 destreg, srcreg, countreg));
10685 else
10686 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10687 destreg, srcreg, countreg));
10690 /* For constant aligned (or small unaligned) copies use rep movsl
10691 followed by code copying the rest. For PentiumPro ensure 8 byte
10692 alignment to allow rep movsl acceleration. */
10694 else if (count != 0
10695 && (align >= 8
10696 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10697 || optimize_size || count < (unsigned int) 64))
10699 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10700 if (count & ~(size - 1))
10702 countreg = copy_to_mode_reg (counter_mode,
10703 GEN_INT ((count >> (size == 4 ? 2 : 3))
10704 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10705 countreg = ix86_zero_extend_to_Pmode (countreg);
10706 if (size == 4)
10708 if (TARGET_64BIT)
10709 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10710 destreg, srcreg, countreg));
10711 else
10712 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10713 destreg, srcreg, countreg));
10715 else
10716 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10717 destreg, srcreg, countreg));
10719 if (size == 8 && (count & 0x04))
10720 emit_insn (gen_strmovsi (destreg, srcreg));
10721 if (count & 0x02)
10722 emit_insn (gen_strmovhi (destreg, srcreg));
10723 if (count & 0x01)
10724 emit_insn (gen_strmovqi (destreg, srcreg));
10726 /* The generic code based on the glibc implementation:
10727 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10728 allowing accelerated copying there)
10729 - copy the data using rep movsl
10730 - copy the rest. */
10731 else
10733 rtx countreg2;
10734 rtx label = NULL;
10735 int desired_alignment = (TARGET_PENTIUMPRO
10736 && (count == 0 || count >= (unsigned int) 260)
10737 ? 8 : UNITS_PER_WORD);
10739 /* In case we don't know anything about the alignment, default to
10740 library version, since it is usually equally fast and result in
10741 shorter code.
10743 Also emit call when we know that the count is large and call overhead
10744 will not be important. */
10745 if (!TARGET_INLINE_ALL_STRINGOPS
10746 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10748 end_sequence ();
10749 return 0;
10752 if (TARGET_SINGLE_STRINGOP)
10753 emit_insn (gen_cld ());
10755 countreg2 = gen_reg_rtx (Pmode);
10756 countreg = copy_to_mode_reg (counter_mode, count_exp);
10758 /* We don't use loops to align destination and to copy parts smaller
10759 than 4 bytes, because gcc is able to optimize such code better (in
10760 the case the destination or the count really is aligned, gcc is often
10761 able to predict the branches) and also it is friendlier to the
10762 hardware branch prediction.
10764 Using loops is beneficial for generic case, because we can
10765 handle small counts using the loops. Many CPUs (such as Athlon)
10766 have large REP prefix setup costs.
10768 This is quite costy. Maybe we can revisit this decision later or
10769 add some customizability to this code. */
10771 if (count == 0 && align < desired_alignment)
10773 label = gen_label_rtx ();
10774 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10775 LEU, 0, counter_mode, 1, label);
10777 if (align <= 1)
10779 rtx label = ix86_expand_aligntest (destreg, 1);
10780 emit_insn (gen_strmovqi (destreg, srcreg));
10781 ix86_adjust_counter (countreg, 1);
10782 emit_label (label);
10783 LABEL_NUSES (label) = 1;
10785 if (align <= 2)
10787 rtx label = ix86_expand_aligntest (destreg, 2);
10788 emit_insn (gen_strmovhi (destreg, srcreg));
10789 ix86_adjust_counter (countreg, 2);
10790 emit_label (label);
10791 LABEL_NUSES (label) = 1;
10793 if (align <= 4 && desired_alignment > 4)
10795 rtx label = ix86_expand_aligntest (destreg, 4);
10796 emit_insn (gen_strmovsi (destreg, srcreg));
10797 ix86_adjust_counter (countreg, 4);
10798 emit_label (label);
10799 LABEL_NUSES (label) = 1;
10802 if (label && desired_alignment > 4 && !TARGET_64BIT)
10804 emit_label (label);
10805 LABEL_NUSES (label) = 1;
10806 label = NULL_RTX;
10808 if (!TARGET_SINGLE_STRINGOP)
10809 emit_insn (gen_cld ());
10810 if (TARGET_64BIT)
10812 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10813 GEN_INT (3)));
10814 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10815 destreg, srcreg, countreg2));
10817 else
10819 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10820 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10821 destreg, srcreg, countreg2));
10824 if (label)
10826 emit_label (label);
10827 LABEL_NUSES (label) = 1;
10829 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10830 emit_insn (gen_strmovsi (destreg, srcreg));
10831 if ((align <= 4 || count == 0) && TARGET_64BIT)
10833 rtx label = ix86_expand_aligntest (countreg, 4);
10834 emit_insn (gen_strmovsi (destreg, srcreg));
10835 emit_label (label);
10836 LABEL_NUSES (label) = 1;
10838 if (align > 2 && count != 0 && (count & 2))
10839 emit_insn (gen_strmovhi (destreg, srcreg));
10840 if (align <= 2 || count == 0)
10842 rtx label = ix86_expand_aligntest (countreg, 2);
10843 emit_insn (gen_strmovhi (destreg, srcreg));
10844 emit_label (label);
10845 LABEL_NUSES (label) = 1;
10847 if (align > 1 && count != 0 && (count & 1))
10848 emit_insn (gen_strmovqi (destreg, srcreg));
10849 if (align <= 1 || count == 0)
10851 rtx label = ix86_expand_aligntest (countreg, 1);
10852 emit_insn (gen_strmovqi (destreg, srcreg));
10853 emit_label (label);
10854 LABEL_NUSES (label) = 1;
10858 insns = get_insns ();
10859 end_sequence ();
10861 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10862 emit_insn (insns);
10863 return 1;
10866 /* Expand string clear operation (bzero). Use i386 string operations when
10867 profitable. expand_movstr contains similar code. */
10869 ix86_expand_clrstr (src, count_exp, align_exp)
10870 rtx src, count_exp, align_exp;
10872 rtx destreg, zeroreg, countreg;
10873 enum machine_mode counter_mode;
10874 HOST_WIDE_INT align = 0;
10875 unsigned HOST_WIDE_INT count = 0;
10877 if (GET_CODE (align_exp) == CONST_INT)
10878 align = INTVAL (align_exp);
10880 /* This simple hack avoids all inlining code and simplifies code below. */
10881 if (!TARGET_ALIGN_STRINGOPS)
10882 align = 32;
10884 if (GET_CODE (count_exp) == CONST_INT)
10886 count = INTVAL (count_exp);
10887 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10888 return 0;
10890 /* Figure out proper mode for counter. For 32bits it is always SImode,
10891 for 64bits use SImode when possible, otherwise DImode.
10892 Set count to number of bytes copied when known at compile time. */
10893 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10894 || x86_64_zero_extended_value (count_exp))
10895 counter_mode = SImode;
10896 else
10897 counter_mode = DImode;
10899 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10901 emit_insn (gen_cld ());
10903 /* When optimizing for size emit simple rep ; movsb instruction for
10904 counts not divisible by 4. */
10906 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10908 countreg = ix86_zero_extend_to_Pmode (count_exp);
10909 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10910 if (TARGET_64BIT)
10911 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10912 destreg, countreg));
10913 else
10914 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10915 destreg, countreg));
10917 else if (count != 0
10918 && (align >= 8
10919 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10920 || optimize_size || count < (unsigned int) 64))
10922 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10923 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10924 if (count & ~(size - 1))
10926 countreg = copy_to_mode_reg (counter_mode,
10927 GEN_INT ((count >> (size == 4 ? 2 : 3))
10928 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10929 countreg = ix86_zero_extend_to_Pmode (countreg);
10930 if (size == 4)
10932 if (TARGET_64BIT)
10933 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10934 destreg, countreg));
10935 else
10936 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10937 destreg, countreg));
10939 else
10940 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10941 destreg, countreg));
10943 if (size == 8 && (count & 0x04))
10944 emit_insn (gen_strsetsi (destreg,
10945 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10946 if (count & 0x02)
10947 emit_insn (gen_strsethi (destreg,
10948 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10949 if (count & 0x01)
10950 emit_insn (gen_strsetqi (destreg,
10951 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10953 else
10955 rtx countreg2;
10956 rtx label = NULL;
10957 /* Compute desired alignment of the string operation. */
10958 int desired_alignment = (TARGET_PENTIUMPRO
10959 && (count == 0 || count >= (unsigned int) 260)
10960 ? 8 : UNITS_PER_WORD);
10962 /* In case we don't know anything about the alignment, default to
10963 library version, since it is usually equally fast and result in
10964 shorter code.
10966 Also emit call when we know that the count is large and call overhead
10967 will not be important. */
10968 if (!TARGET_INLINE_ALL_STRINGOPS
10969 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10970 return 0;
10972 if (TARGET_SINGLE_STRINGOP)
10973 emit_insn (gen_cld ());
10975 countreg2 = gen_reg_rtx (Pmode);
10976 countreg = copy_to_mode_reg (counter_mode, count_exp);
10977 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10979 if (count == 0 && align < desired_alignment)
10981 label = gen_label_rtx ();
10982 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10983 LEU, 0, counter_mode, 1, label);
10985 if (align <= 1)
10987 rtx label = ix86_expand_aligntest (destreg, 1);
10988 emit_insn (gen_strsetqi (destreg,
10989 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10990 ix86_adjust_counter (countreg, 1);
10991 emit_label (label);
10992 LABEL_NUSES (label) = 1;
10994 if (align <= 2)
10996 rtx label = ix86_expand_aligntest (destreg, 2);
10997 emit_insn (gen_strsethi (destreg,
10998 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10999 ix86_adjust_counter (countreg, 2);
11000 emit_label (label);
11001 LABEL_NUSES (label) = 1;
11003 if (align <= 4 && desired_alignment > 4)
11005 rtx label = ix86_expand_aligntest (destreg, 4);
11006 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11007 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11008 : zeroreg)));
11009 ix86_adjust_counter (countreg, 4);
11010 emit_label (label);
11011 LABEL_NUSES (label) = 1;
11014 if (label && desired_alignment > 4 && !TARGET_64BIT)
11016 emit_label (label);
11017 LABEL_NUSES (label) = 1;
11018 label = NULL_RTX;
11021 if (!TARGET_SINGLE_STRINGOP)
11022 emit_insn (gen_cld ());
11023 if (TARGET_64BIT)
11025 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11026 GEN_INT (3)));
11027 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11028 destreg, countreg2));
11030 else
11032 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11033 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11034 destreg, countreg2));
11036 if (label)
11038 emit_label (label);
11039 LABEL_NUSES (label) = 1;
11042 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11043 emit_insn (gen_strsetsi (destreg,
11044 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11045 if (TARGET_64BIT && (align <= 4 || count == 0))
11047 rtx label = ix86_expand_aligntest (countreg, 4);
11048 emit_insn (gen_strsetsi (destreg,
11049 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11050 emit_label (label);
11051 LABEL_NUSES (label) = 1;
11053 if (align > 2 && count != 0 && (count & 2))
11054 emit_insn (gen_strsethi (destreg,
11055 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11056 if (align <= 2 || count == 0)
11058 rtx label = ix86_expand_aligntest (countreg, 2);
11059 emit_insn (gen_strsethi (destreg,
11060 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11061 emit_label (label);
11062 LABEL_NUSES (label) = 1;
11064 if (align > 1 && count != 0 && (count & 1))
11065 emit_insn (gen_strsetqi (destreg,
11066 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11067 if (align <= 1 || count == 0)
11069 rtx label = ix86_expand_aligntest (countreg, 1);
11070 emit_insn (gen_strsetqi (destreg,
11071 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11072 emit_label (label);
11073 LABEL_NUSES (label) = 1;
11076 return 1;
11078 /* Expand strlen. */
11080 ix86_expand_strlen (out, src, eoschar, align)
11081 rtx out, src, eoschar, align;
11083 rtx addr, scratch1, scratch2, scratch3, scratch4;
11085 /* The generic case of strlen expander is long. Avoid it's
11086 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11088 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11089 && !TARGET_INLINE_ALL_STRINGOPS
11090 && !optimize_size
11091 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11092 return 0;
11094 addr = force_reg (Pmode, XEXP (src, 0));
11095 scratch1 = gen_reg_rtx (Pmode);
11097 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11098 && !optimize_size)
11100 /* Well it seems that some optimizer does not combine a call like
11101 foo(strlen(bar), strlen(bar));
11102 when the move and the subtraction is done here. It does calculate
11103 the length just once when these instructions are done inside of
11104 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11105 often used and I use one fewer register for the lifetime of
11106 output_strlen_unroll() this is better. */
11108 emit_move_insn (out, addr);
11110 ix86_expand_strlensi_unroll_1 (out, align);
11112 /* strlensi_unroll_1 returns the address of the zero at the end of
11113 the string, like memchr(), so compute the length by subtracting
11114 the start address. */
11115 if (TARGET_64BIT)
11116 emit_insn (gen_subdi3 (out, out, addr));
11117 else
11118 emit_insn (gen_subsi3 (out, out, addr));
11120 else
11122 scratch2 = gen_reg_rtx (Pmode);
11123 scratch3 = gen_reg_rtx (Pmode);
11124 scratch4 = force_reg (Pmode, constm1_rtx);
11126 emit_move_insn (scratch3, addr);
11127 eoschar = force_reg (QImode, eoschar);
11129 emit_insn (gen_cld ());
11130 if (TARGET_64BIT)
11132 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11133 align, scratch4, scratch3));
11134 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11135 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11137 else
11139 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11140 align, scratch4, scratch3));
11141 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11142 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11145 return 1;
11148 /* Expand the appropriate insns for doing strlen if not just doing
11149 repnz; scasb
11151 out = result, initialized with the start address
11152 align_rtx = alignment of the address.
11153 scratch = scratch register, initialized with the startaddress when
11154 not aligned, otherwise undefined
11156 This is just the body. It needs the initialisations mentioned above and
11157 some address computing at the end. These things are done in i386.md. */
11159 static void
11160 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11161 rtx out, align_rtx;
11163 int align;
11164 rtx tmp;
11165 rtx align_2_label = NULL_RTX;
11166 rtx align_3_label = NULL_RTX;
11167 rtx align_4_label = gen_label_rtx ();
11168 rtx end_0_label = gen_label_rtx ();
11169 rtx mem;
11170 rtx tmpreg = gen_reg_rtx (SImode);
11171 rtx scratch = gen_reg_rtx (SImode);
11173 align = 0;
11174 if (GET_CODE (align_rtx) == CONST_INT)
11175 align = INTVAL (align_rtx);
11177 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11179 /* Is there a known alignment and is it less than 4? */
11180 if (align < 4)
11182 rtx scratch1 = gen_reg_rtx (Pmode);
11183 emit_move_insn (scratch1, out);
11184 /* Is there a known alignment and is it not 2? */
11185 if (align != 2)
11187 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11188 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11190 /* Leave just the 3 lower bits. */
11191 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11192 NULL_RTX, 0, OPTAB_WIDEN);
11194 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11195 Pmode, 1, align_4_label);
11196 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11197 Pmode, 1, align_2_label);
11198 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11199 Pmode, 1, align_3_label);
11201 else
11203 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11204 check if is aligned to 4 - byte. */
11206 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11207 NULL_RTX, 0, OPTAB_WIDEN);
11209 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11210 Pmode, 1, align_4_label);
11213 mem = gen_rtx_MEM (QImode, out);
11215 /* Now compare the bytes. */
11217 /* Compare the first n unaligned byte on a byte per byte basis. */
11218 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11219 QImode, 1, end_0_label);
11221 /* Increment the address. */
11222 if (TARGET_64BIT)
11223 emit_insn (gen_adddi3 (out, out, const1_rtx));
11224 else
11225 emit_insn (gen_addsi3 (out, out, const1_rtx));
11227 /* Not needed with an alignment of 2 */
11228 if (align != 2)
11230 emit_label (align_2_label);
11232 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11233 end_0_label);
11235 if (TARGET_64BIT)
11236 emit_insn (gen_adddi3 (out, out, const1_rtx));
11237 else
11238 emit_insn (gen_addsi3 (out, out, const1_rtx));
11240 emit_label (align_3_label);
11243 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11244 end_0_label);
11246 if (TARGET_64BIT)
11247 emit_insn (gen_adddi3 (out, out, const1_rtx));
11248 else
11249 emit_insn (gen_addsi3 (out, out, const1_rtx));
11252 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11253 align this loop. It gives only huge programs, but does not help to
11254 speed up. */
11255 emit_label (align_4_label);
11257 mem = gen_rtx_MEM (SImode, out);
11258 emit_move_insn (scratch, mem);
11259 if (TARGET_64BIT)
11260 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11261 else
11262 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11264 /* This formula yields a nonzero result iff one of the bytes is zero.
11265 This saves three branches inside loop and many cycles. */
11267 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11268 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11269 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11270 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11271 gen_int_mode (0x80808080, SImode)));
11272 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11273 align_4_label);
11275 if (TARGET_CMOVE)
11277 rtx reg = gen_reg_rtx (SImode);
11278 rtx reg2 = gen_reg_rtx (Pmode);
11279 emit_move_insn (reg, tmpreg);
11280 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11282 /* If zero is not in the first two bytes, move two bytes forward. */
11283 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11284 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11285 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11286 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11287 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11288 reg,
11289 tmpreg)));
11290 /* Emit lea manually to avoid clobbering of flags. */
11291 emit_insn (gen_rtx_SET (SImode, reg2,
11292 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11294 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11295 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11296 emit_insn (gen_rtx_SET (VOIDmode, out,
11297 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11298 reg2,
11299 out)));
11302 else
11304 rtx end_2_label = gen_label_rtx ();
11305 /* Is zero in the first two bytes? */
11307 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11308 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11309 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11310 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11311 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11312 pc_rtx);
11313 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11314 JUMP_LABEL (tmp) = end_2_label;
11316 /* Not in the first two. Move two bytes forward. */
11317 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11318 if (TARGET_64BIT)
11319 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11320 else
11321 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11323 emit_label (end_2_label);
11327 /* Avoid branch in fixing the byte. */
11328 tmpreg = gen_lowpart (QImode, tmpreg);
11329 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11330 if (TARGET_64BIT)
11331 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11332 else
11333 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11335 emit_label (end_0_label);
11338 void
11339 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11340 rtx retval, fnaddr, callarg1, callarg2, pop;
11341 int sibcall;
11343 rtx use = NULL, call;
11345 if (pop == const0_rtx)
11346 pop = NULL;
11347 if (TARGET_64BIT && pop)
11348 abort ();
11350 #if TARGET_MACHO
11351 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11352 fnaddr = machopic_indirect_call_target (fnaddr);
11353 #else
11354 /* Static functions and indirect calls don't need the pic register. */
11355 if (! TARGET_64BIT && flag_pic
11356 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11357 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11358 use_reg (&use, pic_offset_table_rtx);
11360 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11362 rtx al = gen_rtx_REG (QImode, 0);
11363 emit_move_insn (al, callarg2);
11364 use_reg (&use, al);
11366 #endif /* TARGET_MACHO */
11368 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11370 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11371 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11373 if (sibcall && TARGET_64BIT
11374 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11376 rtx addr;
11377 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11378 fnaddr = gen_rtx_REG (Pmode, 40);
11379 emit_move_insn (fnaddr, addr);
11380 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11383 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11384 if (retval)
11385 call = gen_rtx_SET (VOIDmode, retval, call);
11386 if (pop)
11388 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11389 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11390 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11393 call = emit_call_insn (call);
11394 if (use)
11395 CALL_INSN_FUNCTION_USAGE (call) = use;
11399 /* Clear stack slot assignments remembered from previous functions.
11400 This is called from INIT_EXPANDERS once before RTL is emitted for each
11401 function. */
11403 static struct machine_function *
11404 ix86_init_machine_status ()
11406 return ggc_alloc_cleared (sizeof (struct machine_function));
11409 /* Return a MEM corresponding to a stack slot with mode MODE.
11410 Allocate a new slot if necessary.
11412 The RTL for a function can have several slots available: N is
11413 which slot to use. */
11416 assign_386_stack_local (mode, n)
11417 enum machine_mode mode;
11418 int n;
11420 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11421 abort ();
11423 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11424 ix86_stack_locals[(int) mode][n]
11425 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11427 return ix86_stack_locals[(int) mode][n];
11430 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11432 static GTY(()) rtx ix86_tls_symbol;
11434 ix86_tls_get_addr ()
11437 if (!ix86_tls_symbol)
11439 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11440 (TARGET_GNU_TLS && !TARGET_64BIT)
11441 ? "___tls_get_addr"
11442 : "__tls_get_addr");
11445 return ix86_tls_symbol;
11448 /* Calculate the length of the memory address in the instruction
11449 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11451 static int
11452 memory_address_length (addr)
11453 rtx addr;
11455 struct ix86_address parts;
11456 rtx base, index, disp;
11457 int len;
11459 if (GET_CODE (addr) == PRE_DEC
11460 || GET_CODE (addr) == POST_INC
11461 || GET_CODE (addr) == PRE_MODIFY
11462 || GET_CODE (addr) == POST_MODIFY)
11463 return 0;
11465 if (! ix86_decompose_address (addr, &parts))
11466 abort ();
11468 base = parts.base;
11469 index = parts.index;
11470 disp = parts.disp;
11471 len = 0;
11473 /* Register Indirect. */
11474 if (base && !index && !disp)
11476 /* Special cases: ebp and esp need the two-byte modrm form. */
11477 if (addr == stack_pointer_rtx
11478 || addr == arg_pointer_rtx
11479 || addr == frame_pointer_rtx
11480 || addr == hard_frame_pointer_rtx)
11481 len = 1;
11484 /* Direct Addressing. */
11485 else if (disp && !base && !index)
11486 len = 4;
11488 else
11490 /* Find the length of the displacement constant. */
11491 if (disp)
11493 if (GET_CODE (disp) == CONST_INT
11494 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11495 len = 1;
11496 else
11497 len = 4;
11500 /* An index requires the two-byte modrm form. */
11501 if (index)
11502 len += 1;
11505 return len;
11508 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11509 is set, expect that insn have 8bit immediate alternative. */
11511 ix86_attr_length_immediate_default (insn, shortform)
11512 rtx insn;
11513 int shortform;
11515 int len = 0;
11516 int i;
11517 extract_insn_cached (insn);
11518 for (i = recog_data.n_operands - 1; i >= 0; --i)
11519 if (CONSTANT_P (recog_data.operand[i]))
11521 if (len)
11522 abort ();
11523 if (shortform
11524 && GET_CODE (recog_data.operand[i]) == CONST_INT
11525 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11526 len = 1;
11527 else
11529 switch (get_attr_mode (insn))
11531 case MODE_QI:
11532 len+=1;
11533 break;
11534 case MODE_HI:
11535 len+=2;
11536 break;
11537 case MODE_SI:
11538 len+=4;
11539 break;
11540 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11541 case MODE_DI:
11542 len+=4;
11543 break;
11544 default:
11545 fatal_insn ("unknown insn mode", insn);
11549 return len;
11551 /* Compute default value for "length_address" attribute. */
11553 ix86_attr_length_address_default (insn)
11554 rtx insn;
11556 int i;
11557 extract_insn_cached (insn);
11558 for (i = recog_data.n_operands - 1; i >= 0; --i)
11559 if (GET_CODE (recog_data.operand[i]) == MEM)
11561 return memory_address_length (XEXP (recog_data.operand[i], 0));
11562 break;
11564 return 0;
11567 /* Return the maximum number of instructions a cpu can issue. */
11569 static int
11570 ix86_issue_rate ()
11572 switch (ix86_cpu)
11574 case PROCESSOR_PENTIUM:
11575 case PROCESSOR_K6:
11576 return 2;
11578 case PROCESSOR_PENTIUMPRO:
11579 case PROCESSOR_PENTIUM4:
11580 case PROCESSOR_ATHLON:
11581 case PROCESSOR_K8:
11582 return 3;
11584 default:
11585 return 1;
11589 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11590 by DEP_INSN and nothing set by DEP_INSN. */
11592 static int
11593 ix86_flags_dependant (insn, dep_insn, insn_type)
11594 rtx insn, dep_insn;
11595 enum attr_type insn_type;
11597 rtx set, set2;
11599 /* Simplify the test for uninteresting insns. */
11600 if (insn_type != TYPE_SETCC
11601 && insn_type != TYPE_ICMOV
11602 && insn_type != TYPE_FCMOV
11603 && insn_type != TYPE_IBR)
11604 return 0;
11606 if ((set = single_set (dep_insn)) != 0)
11608 set = SET_DEST (set);
11609 set2 = NULL_RTX;
11611 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11612 && XVECLEN (PATTERN (dep_insn), 0) == 2
11613 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11614 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11616 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11617 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11619 else
11620 return 0;
11622 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11623 return 0;
11625 /* This test is true if the dependent insn reads the flags but
11626 not any other potentially set register. */
11627 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11628 return 0;
11630 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11631 return 0;
11633 return 1;
11636 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11637 address with operands set by DEP_INSN. */
11639 static int
11640 ix86_agi_dependant (insn, dep_insn, insn_type)
11641 rtx insn, dep_insn;
11642 enum attr_type insn_type;
11644 rtx addr;
11646 if (insn_type == TYPE_LEA
11647 && TARGET_PENTIUM)
11649 addr = PATTERN (insn);
11650 if (GET_CODE (addr) == SET)
11652 else if (GET_CODE (addr) == PARALLEL
11653 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11654 addr = XVECEXP (addr, 0, 0);
11655 else
11656 abort ();
11657 addr = SET_SRC (addr);
11659 else
11661 int i;
11662 extract_insn_cached (insn);
11663 for (i = recog_data.n_operands - 1; i >= 0; --i)
11664 if (GET_CODE (recog_data.operand[i]) == MEM)
11666 addr = XEXP (recog_data.operand[i], 0);
11667 goto found;
11669 return 0;
11670 found:;
11673 return modified_in_p (addr, dep_insn);
11676 static int
11677 ix86_adjust_cost (insn, link, dep_insn, cost)
11678 rtx insn, link, dep_insn;
11679 int cost;
11681 enum attr_type insn_type, dep_insn_type;
11682 enum attr_memory memory, dep_memory;
11683 rtx set, set2;
11684 int dep_insn_code_number;
11686 /* Anti and output dependencies have zero cost on all CPUs. */
11687 if (REG_NOTE_KIND (link) != 0)
11688 return 0;
11690 dep_insn_code_number = recog_memoized (dep_insn);
11692 /* If we can't recognize the insns, we can't really do anything. */
11693 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11694 return cost;
11696 insn_type = get_attr_type (insn);
11697 dep_insn_type = get_attr_type (dep_insn);
11699 switch (ix86_cpu)
11701 case PROCESSOR_PENTIUM:
11702 /* Address Generation Interlock adds a cycle of latency. */
11703 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11704 cost += 1;
11706 /* ??? Compares pair with jump/setcc. */
11707 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11708 cost = 0;
11710 /* Floating point stores require value to be ready one cycle earlier. */
11711 if (insn_type == TYPE_FMOV
11712 && get_attr_memory (insn) == MEMORY_STORE
11713 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11714 cost += 1;
11715 break;
11717 case PROCESSOR_PENTIUMPRO:
11718 memory = get_attr_memory (insn);
11719 dep_memory = get_attr_memory (dep_insn);
11721 /* Since we can't represent delayed latencies of load+operation,
11722 increase the cost here for non-imov insns. */
11723 if (dep_insn_type != TYPE_IMOV
11724 && dep_insn_type != TYPE_FMOV
11725 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11726 cost += 1;
11728 /* INT->FP conversion is expensive. */
11729 if (get_attr_fp_int_src (dep_insn))
11730 cost += 5;
11732 /* There is one cycle extra latency between an FP op and a store. */
11733 if (insn_type == TYPE_FMOV
11734 && (set = single_set (dep_insn)) != NULL_RTX
11735 && (set2 = single_set (insn)) != NULL_RTX
11736 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11737 && GET_CODE (SET_DEST (set2)) == MEM)
11738 cost += 1;
11740 /* Show ability of reorder buffer to hide latency of load by executing
11741 in parallel with previous instruction in case
11742 previous instruction is not needed to compute the address. */
11743 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11744 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11746 /* Claim moves to take one cycle, as core can issue one load
11747 at time and the next load can start cycle later. */
11748 if (dep_insn_type == TYPE_IMOV
11749 || dep_insn_type == TYPE_FMOV)
11750 cost = 1;
11751 else if (cost > 1)
11752 cost--;
11754 break;
11756 case PROCESSOR_K6:
11757 memory = get_attr_memory (insn);
11758 dep_memory = get_attr_memory (dep_insn);
11759 /* The esp dependency is resolved before the instruction is really
11760 finished. */
11761 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11762 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11763 return 1;
11765 /* Since we can't represent delayed latencies of load+operation,
11766 increase the cost here for non-imov insns. */
11767 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11768 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11770 /* INT->FP conversion is expensive. */
11771 if (get_attr_fp_int_src (dep_insn))
11772 cost += 5;
11774 /* Show ability of reorder buffer to hide latency of load by executing
11775 in parallel with previous instruction in case
11776 previous instruction is not needed to compute the address. */
11777 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11778 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11780 /* Claim moves to take one cycle, as core can issue one load
11781 at time and the next load can start cycle later. */
11782 if (dep_insn_type == TYPE_IMOV
11783 || dep_insn_type == TYPE_FMOV)
11784 cost = 1;
11785 else if (cost > 2)
11786 cost -= 2;
11787 else
11788 cost = 1;
11790 break;
11792 case PROCESSOR_ATHLON:
11793 case PROCESSOR_K8:
11794 memory = get_attr_memory (insn);
11795 dep_memory = get_attr_memory (dep_insn);
11797 /* Show ability of reorder buffer to hide latency of load by executing
11798 in parallel with previous instruction in case
11799 previous instruction is not needed to compute the address. */
11800 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11801 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11803 /* Claim moves to take one cycle, as core can issue one load
11804 at time and the next load can start cycle later. */
11805 if (dep_insn_type == TYPE_IMOV
11806 || dep_insn_type == TYPE_FMOV)
11807 cost = 0;
11808 else if (cost >= 3)
11809 cost -= 3;
11810 else
11811 cost = 0;
11814 default:
11815 break;
11818 return cost;
11821 static union
11823 struct ppro_sched_data
11825 rtx decode[3];
11826 int issued_this_cycle;
11827 } ppro;
11828 } ix86_sched_data;
11830 static enum attr_ppro_uops
11831 ix86_safe_ppro_uops (insn)
11832 rtx insn;
11834 if (recog_memoized (insn) >= 0)
11835 return get_attr_ppro_uops (insn);
11836 else
11837 return PPRO_UOPS_MANY;
11840 static void
11841 ix86_dump_ppro_packet (dump)
11842 FILE *dump;
11844 if (ix86_sched_data.ppro.decode[0])
11846 fprintf (dump, "PPRO packet: %d",
11847 INSN_UID (ix86_sched_data.ppro.decode[0]));
11848 if (ix86_sched_data.ppro.decode[1])
11849 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11850 if (ix86_sched_data.ppro.decode[2])
11851 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11852 fputc ('\n', dump);
11856 /* We're beginning a new block. Initialize data structures as necessary. */
11858 static void
11859 ix86_sched_init (dump, sched_verbose, veclen)
11860 FILE *dump ATTRIBUTE_UNUSED;
11861 int sched_verbose ATTRIBUTE_UNUSED;
11862 int veclen ATTRIBUTE_UNUSED;
11864 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11867 /* Shift INSN to SLOT, and shift everything else down. */
11869 static void
11870 ix86_reorder_insn (insnp, slot)
11871 rtx *insnp, *slot;
11873 if (insnp != slot)
11875 rtx insn = *insnp;
11877 insnp[0] = insnp[1];
11878 while (++insnp != slot);
11879 *insnp = insn;
11883 static void
11884 ix86_sched_reorder_ppro (ready, e_ready)
11885 rtx *ready;
11886 rtx *e_ready;
11888 rtx decode[3];
11889 enum attr_ppro_uops cur_uops;
11890 int issued_this_cycle;
11891 rtx *insnp;
11892 int i;
11894 /* At this point .ppro.decode contains the state of the three
11895 decoders from last "cycle". That is, those insns that were
11896 actually independent. But here we're scheduling for the
11897 decoder, and we may find things that are decodable in the
11898 same cycle. */
11900 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11901 issued_this_cycle = 0;
11903 insnp = e_ready;
11904 cur_uops = ix86_safe_ppro_uops (*insnp);
11906 /* If the decoders are empty, and we've a complex insn at the
11907 head of the priority queue, let it issue without complaint. */
11908 if (decode[0] == NULL)
11910 if (cur_uops == PPRO_UOPS_MANY)
11912 decode[0] = *insnp;
11913 goto ppro_done;
11916 /* Otherwise, search for a 2-4 uop unsn to issue. */
11917 while (cur_uops != PPRO_UOPS_FEW)
11919 if (insnp == ready)
11920 break;
11921 cur_uops = ix86_safe_ppro_uops (*--insnp);
11924 /* If so, move it to the head of the line. */
11925 if (cur_uops == PPRO_UOPS_FEW)
11926 ix86_reorder_insn (insnp, e_ready);
11928 /* Issue the head of the queue. */
11929 issued_this_cycle = 1;
11930 decode[0] = *e_ready--;
11933 /* Look for simple insns to fill in the other two slots. */
11934 for (i = 1; i < 3; ++i)
11935 if (decode[i] == NULL)
11937 if (ready > e_ready)
11938 goto ppro_done;
11940 insnp = e_ready;
11941 cur_uops = ix86_safe_ppro_uops (*insnp);
11942 while (cur_uops != PPRO_UOPS_ONE)
11944 if (insnp == ready)
11945 break;
11946 cur_uops = ix86_safe_ppro_uops (*--insnp);
11949 /* Found one. Move it to the head of the queue and issue it. */
11950 if (cur_uops == PPRO_UOPS_ONE)
11952 ix86_reorder_insn (insnp, e_ready);
11953 decode[i] = *e_ready--;
11954 issued_this_cycle++;
11955 continue;
11958 /* ??? Didn't find one. Ideally, here we would do a lazy split
11959 of 2-uop insns, issue one and queue the other. */
11962 ppro_done:
11963 if (issued_this_cycle == 0)
11964 issued_this_cycle = 1;
11965 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11968 /* We are about to being issuing insns for this clock cycle.
11969 Override the default sort algorithm to better slot instructions. */
11970 static int
11971 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11972 FILE *dump ATTRIBUTE_UNUSED;
11973 int sched_verbose ATTRIBUTE_UNUSED;
11974 rtx *ready;
11975 int *n_readyp;
11976 int clock_var ATTRIBUTE_UNUSED;
11978 int n_ready = *n_readyp;
11979 rtx *e_ready = ready + n_ready - 1;
11981 /* Make sure to go ahead and initialize key items in
11982 ix86_sched_data if we are not going to bother trying to
11983 reorder the ready queue. */
11984 if (n_ready < 2)
11986 ix86_sched_data.ppro.issued_this_cycle = 1;
11987 goto out;
11990 switch (ix86_cpu)
11992 default:
11993 break;
11995 case PROCESSOR_PENTIUMPRO:
11996 ix86_sched_reorder_ppro (ready, e_ready);
11997 break;
12000 out:
12001 return ix86_issue_rate ();
12004 /* We are about to issue INSN. Return the number of insns left on the
12005 ready queue that can be issued this cycle. */
12007 static int
12008 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12009 FILE *dump;
12010 int sched_verbose;
12011 rtx insn;
12012 int can_issue_more;
12014 int i;
12015 switch (ix86_cpu)
12017 default:
12018 return can_issue_more - 1;
12020 case PROCESSOR_PENTIUMPRO:
12022 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12024 if (uops == PPRO_UOPS_MANY)
12026 if (sched_verbose)
12027 ix86_dump_ppro_packet (dump);
12028 ix86_sched_data.ppro.decode[0] = insn;
12029 ix86_sched_data.ppro.decode[1] = NULL;
12030 ix86_sched_data.ppro.decode[2] = NULL;
12031 if (sched_verbose)
12032 ix86_dump_ppro_packet (dump);
12033 ix86_sched_data.ppro.decode[0] = NULL;
12035 else if (uops == PPRO_UOPS_FEW)
12037 if (sched_verbose)
12038 ix86_dump_ppro_packet (dump);
12039 ix86_sched_data.ppro.decode[0] = insn;
12040 ix86_sched_data.ppro.decode[1] = NULL;
12041 ix86_sched_data.ppro.decode[2] = NULL;
12043 else
12045 for (i = 0; i < 3; ++i)
12046 if (ix86_sched_data.ppro.decode[i] == NULL)
12048 ix86_sched_data.ppro.decode[i] = insn;
12049 break;
12051 if (i == 3)
12052 abort ();
12053 if (i == 2)
12055 if (sched_verbose)
12056 ix86_dump_ppro_packet (dump);
12057 ix86_sched_data.ppro.decode[0] = NULL;
12058 ix86_sched_data.ppro.decode[1] = NULL;
12059 ix86_sched_data.ppro.decode[2] = NULL;
12063 return --ix86_sched_data.ppro.issued_this_cycle;
12067 static int
12068 ia32_use_dfa_pipeline_interface ()
12070 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12071 return 1;
12072 return 0;
12075 /* How many alternative schedules to try. This should be as wide as the
12076 scheduling freedom in the DFA, but no wider. Making this value too
12077 large results extra work for the scheduler. */
12079 static int
12080 ia32_multipass_dfa_lookahead ()
12082 if (ix86_cpu == PROCESSOR_PENTIUM)
12083 return 2;
12084 else
12085 return 0;
12089 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12090 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12091 appropriate. */
12093 void
12094 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12095 rtx insns;
12096 rtx dstref, srcref, dstreg, srcreg;
12098 rtx insn;
12100 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12101 if (INSN_P (insn))
12102 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12103 dstreg, srcreg);
12106 /* Subroutine of above to actually do the updating by recursively walking
12107 the rtx. */
12109 static void
12110 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12111 rtx x;
12112 rtx dstref, srcref, dstreg, srcreg;
12114 enum rtx_code code = GET_CODE (x);
12115 const char *format_ptr = GET_RTX_FORMAT (code);
12116 int i, j;
12118 if (code == MEM && XEXP (x, 0) == dstreg)
12119 MEM_COPY_ATTRIBUTES (x, dstref);
12120 else if (code == MEM && XEXP (x, 0) == srcreg)
12121 MEM_COPY_ATTRIBUTES (x, srcref);
12123 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12125 if (*format_ptr == 'e')
12126 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12127 dstreg, srcreg);
12128 else if (*format_ptr == 'E')
12129 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12130 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12131 dstreg, srcreg);
12135 /* Compute the alignment given to a constant that is being placed in memory.
12136 EXP is the constant and ALIGN is the alignment that the object would
12137 ordinarily have.
12138 The value of this function is used instead of that alignment to align
12139 the object. */
12142 ix86_constant_alignment (exp, align)
12143 tree exp;
12144 int align;
12146 if (TREE_CODE (exp) == REAL_CST)
12148 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12149 return 64;
12150 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12151 return 128;
12153 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12154 && align < 256)
12155 return 256;
12157 return align;
12160 /* Compute the alignment for a static variable.
12161 TYPE is the data type, and ALIGN is the alignment that
12162 the object would ordinarily have. The value of this function is used
12163 instead of that alignment to align the object. */
12166 ix86_data_alignment (type, align)
12167 tree type;
12168 int align;
12170 if (AGGREGATE_TYPE_P (type)
12171 && TYPE_SIZE (type)
12172 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12173 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12174 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12175 return 256;
12177 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12178 to 16byte boundary. */
12179 if (TARGET_64BIT)
12181 if (AGGREGATE_TYPE_P (type)
12182 && TYPE_SIZE (type)
12183 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12184 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12185 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12186 return 128;
12189 if (TREE_CODE (type) == ARRAY_TYPE)
12191 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12192 return 64;
12193 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12194 return 128;
12196 else if (TREE_CODE (type) == COMPLEX_TYPE)
12199 if (TYPE_MODE (type) == DCmode && align < 64)
12200 return 64;
12201 if (TYPE_MODE (type) == XCmode && align < 128)
12202 return 128;
12204 else if ((TREE_CODE (type) == RECORD_TYPE
12205 || TREE_CODE (type) == UNION_TYPE
12206 || TREE_CODE (type) == QUAL_UNION_TYPE)
12207 && TYPE_FIELDS (type))
12209 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12210 return 64;
12211 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12212 return 128;
12214 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12215 || TREE_CODE (type) == INTEGER_TYPE)
12217 if (TYPE_MODE (type) == DFmode && align < 64)
12218 return 64;
12219 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12220 return 128;
12223 return align;
12226 /* Compute the alignment for a local variable.
12227 TYPE is the data type, and ALIGN is the alignment that
12228 the object would ordinarily have. The value of this macro is used
12229 instead of that alignment to align the object. */
12232 ix86_local_alignment (type, align)
12233 tree type;
12234 int align;
12236 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12237 to 16byte boundary. */
12238 if (TARGET_64BIT)
12240 if (AGGREGATE_TYPE_P (type)
12241 && TYPE_SIZE (type)
12242 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12243 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12244 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12245 return 128;
12247 if (TREE_CODE (type) == ARRAY_TYPE)
12249 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12250 return 64;
12251 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12252 return 128;
12254 else if (TREE_CODE (type) == COMPLEX_TYPE)
12256 if (TYPE_MODE (type) == DCmode && align < 64)
12257 return 64;
12258 if (TYPE_MODE (type) == XCmode && align < 128)
12259 return 128;
12261 else if ((TREE_CODE (type) == RECORD_TYPE
12262 || TREE_CODE (type) == UNION_TYPE
12263 || TREE_CODE (type) == QUAL_UNION_TYPE)
12264 && TYPE_FIELDS (type))
12266 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12267 return 64;
12268 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12269 return 128;
12271 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12272 || TREE_CODE (type) == INTEGER_TYPE)
12275 if (TYPE_MODE (type) == DFmode && align < 64)
12276 return 64;
12277 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12278 return 128;
12280 return align;
12283 /* Emit RTL insns to initialize the variable parts of a trampoline.
12284 FNADDR is an RTX for the address of the function's pure code.
12285 CXT is an RTX for the static chain value for the function. */
12286 void
12287 x86_initialize_trampoline (tramp, fnaddr, cxt)
12288 rtx tramp, fnaddr, cxt;
12290 if (!TARGET_64BIT)
12292 /* Compute offset from the end of the jmp to the target function. */
12293 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12294 plus_constant (tramp, 10),
12295 NULL_RTX, 1, OPTAB_DIRECT);
12296 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12297 gen_int_mode (0xb9, QImode));
12298 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12299 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12300 gen_int_mode (0xe9, QImode));
12301 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12303 else
12305 int offset = 0;
12306 /* Try to load address using shorter movl instead of movabs.
12307 We may want to support movq for kernel mode, but kernel does not use
12308 trampolines at the moment. */
12309 if (x86_64_zero_extended_value (fnaddr))
12311 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12312 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12313 gen_int_mode (0xbb41, HImode));
12314 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12315 gen_lowpart (SImode, fnaddr));
12316 offset += 6;
12318 else
12320 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12321 gen_int_mode (0xbb49, HImode));
12322 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12323 fnaddr);
12324 offset += 10;
12326 /* Load static chain using movabs to r10. */
12327 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12328 gen_int_mode (0xba49, HImode));
12329 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12330 cxt);
12331 offset += 10;
12332 /* Jump to the r11 */
12333 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12334 gen_int_mode (0xff49, HImode));
12335 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12336 gen_int_mode (0xe3, QImode));
12337 offset += 3;
12338 if (offset > TRAMPOLINE_SIZE)
12339 abort ();
12342 #ifdef TRANSFER_FROM_TRAMPOLINE
12343 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12344 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12345 #endif
12348 #define def_builtin(MASK, NAME, TYPE, CODE) \
12349 do { \
12350 if ((MASK) & target_flags) \
12351 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12352 NULL, NULL_TREE); \
12353 } while (0)
12355 struct builtin_description
12357 const unsigned int mask;
12358 const enum insn_code icode;
12359 const char *const name;
12360 const enum ix86_builtins code;
12361 const enum rtx_code comparison;
12362 const unsigned int flag;
12365 /* Used for builtins that are enabled both by -msse and -msse2. */
12366 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12368 static const struct builtin_description bdesc_comi[] =
12370 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12371 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12372 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12373 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12374 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12375 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12376 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12377 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12378 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12379 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12380 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12381 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12382 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12383 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12384 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12385 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12386 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12387 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12388 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12389 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12390 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12391 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12392 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12393 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12396 static const struct builtin_description bdesc_2arg[] =
12398 /* SSE */
12399 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12400 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12401 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12402 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12403 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12404 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12405 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12406 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12408 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12409 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12410 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12411 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12412 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12413 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12414 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12415 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12416 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12417 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12418 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12419 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12420 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12421 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12422 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12423 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12424 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12425 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12426 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12427 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12429 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12430 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12431 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12432 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12434 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12435 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12436 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12437 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12439 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12440 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12441 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12442 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12443 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12445 /* MMX */
12446 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12447 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12448 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12449 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12450 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12451 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12453 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12454 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12455 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12456 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12457 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12458 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12459 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12460 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12462 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12463 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12464 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12466 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12467 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12468 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12469 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12471 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12472 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12474 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12475 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12476 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12477 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12478 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12479 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12481 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12482 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12483 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12484 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12486 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12487 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12488 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12489 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12490 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12491 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12493 /* Special. */
12494 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12495 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12496 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12498 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12499 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12501 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12502 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12503 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12504 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12505 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12506 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12508 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12509 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12510 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12511 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12512 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12513 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12515 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12516 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12517 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12518 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12520 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12521 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12523 /* SSE2 */
12524 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12525 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12526 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12527 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12528 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12529 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12530 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12531 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12533 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12534 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12535 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12536 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12537 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12538 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12539 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12540 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12541 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12542 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12543 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12544 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12545 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12546 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12547 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12548 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12549 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12550 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12551 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12552 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12554 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12555 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12556 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12557 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12559 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12560 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12561 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12562 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12564 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12565 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12566 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12568 /* SSE2 MMX */
12569 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12570 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12571 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12572 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12573 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12574 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12575 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12576 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12578 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12579 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12580 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12581 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12582 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12583 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12584 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12585 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12587 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12588 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12589 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12590 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12592 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12593 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12594 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12595 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12597 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12598 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12600 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12601 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12602 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12603 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12604 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12605 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12607 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12608 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12609 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12610 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12612 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12613 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12614 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12615 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12616 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12617 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12618 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12619 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12621 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12622 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12623 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12625 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12626 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12628 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12629 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12630 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12631 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12632 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12633 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12635 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12636 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12637 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12638 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12639 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12640 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12642 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12643 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12644 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12645 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12647 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12649 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12650 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12651 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12654 static const struct builtin_description bdesc_1arg[] =
12656 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12657 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12659 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12660 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12661 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12663 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12664 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12665 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12666 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12668 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12669 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12670 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12671 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12673 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12675 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12676 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12678 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12679 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12680 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12681 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12682 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12684 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12686 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12687 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12689 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12690 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12691 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12693 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12696 void
12697 ix86_init_builtins ()
12699 if (TARGET_MMX)
12700 ix86_init_mmx_sse_builtins ();
12703 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12704 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12705 builtins. */
12706 static void
12707 ix86_init_mmx_sse_builtins ()
12709 const struct builtin_description * d;
12710 size_t i;
12712 tree pchar_type_node = build_pointer_type (char_type_node);
12713 tree pcchar_type_node = build_pointer_type (
12714 build_type_variant (char_type_node, 1, 0));
12715 tree pfloat_type_node = build_pointer_type (float_type_node);
12716 tree pcfloat_type_node = build_pointer_type (
12717 build_type_variant (float_type_node, 1, 0));
12718 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12719 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12720 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12722 /* Comparisons. */
12723 tree int_ftype_v4sf_v4sf
12724 = build_function_type_list (integer_type_node,
12725 V4SF_type_node, V4SF_type_node, NULL_TREE);
12726 tree v4si_ftype_v4sf_v4sf
12727 = build_function_type_list (V4SI_type_node,
12728 V4SF_type_node, V4SF_type_node, NULL_TREE);
12729 /* MMX/SSE/integer conversions. */
12730 tree int_ftype_v4sf
12731 = build_function_type_list (integer_type_node,
12732 V4SF_type_node, NULL_TREE);
12733 tree int_ftype_v8qi
12734 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12735 tree v4sf_ftype_v4sf_int
12736 = build_function_type_list (V4SF_type_node,
12737 V4SF_type_node, integer_type_node, NULL_TREE);
12738 tree v4sf_ftype_v4sf_v2si
12739 = build_function_type_list (V4SF_type_node,
12740 V4SF_type_node, V2SI_type_node, NULL_TREE);
12741 tree int_ftype_v4hi_int
12742 = build_function_type_list (integer_type_node,
12743 V4HI_type_node, integer_type_node, NULL_TREE);
12744 tree v4hi_ftype_v4hi_int_int
12745 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12746 integer_type_node, integer_type_node,
12747 NULL_TREE);
12748 /* Miscellaneous. */
12749 tree v8qi_ftype_v4hi_v4hi
12750 = build_function_type_list (V8QI_type_node,
12751 V4HI_type_node, V4HI_type_node, NULL_TREE);
12752 tree v4hi_ftype_v2si_v2si
12753 = build_function_type_list (V4HI_type_node,
12754 V2SI_type_node, V2SI_type_node, NULL_TREE);
12755 tree v4sf_ftype_v4sf_v4sf_int
12756 = build_function_type_list (V4SF_type_node,
12757 V4SF_type_node, V4SF_type_node,
12758 integer_type_node, NULL_TREE);
12759 tree v2si_ftype_v4hi_v4hi
12760 = build_function_type_list (V2SI_type_node,
12761 V4HI_type_node, V4HI_type_node, NULL_TREE);
12762 tree v4hi_ftype_v4hi_int
12763 = build_function_type_list (V4HI_type_node,
12764 V4HI_type_node, integer_type_node, NULL_TREE);
12765 tree v4hi_ftype_v4hi_di
12766 = build_function_type_list (V4HI_type_node,
12767 V4HI_type_node, long_long_unsigned_type_node,
12768 NULL_TREE);
12769 tree v2si_ftype_v2si_di
12770 = build_function_type_list (V2SI_type_node,
12771 V2SI_type_node, long_long_unsigned_type_node,
12772 NULL_TREE);
12773 tree void_ftype_void
12774 = build_function_type (void_type_node, void_list_node);
12775 tree void_ftype_unsigned
12776 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12777 tree unsigned_ftype_void
12778 = build_function_type (unsigned_type_node, void_list_node);
12779 tree di_ftype_void
12780 = build_function_type (long_long_unsigned_type_node, void_list_node);
12781 tree v4sf_ftype_void
12782 = build_function_type (V4SF_type_node, void_list_node);
12783 tree v2si_ftype_v4sf
12784 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12785 /* Loads/stores. */
12786 tree void_ftype_v8qi_v8qi_pchar
12787 = build_function_type_list (void_type_node,
12788 V8QI_type_node, V8QI_type_node,
12789 pchar_type_node, NULL_TREE);
12790 tree v4sf_ftype_pcfloat
12791 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12792 /* @@@ the type is bogus */
12793 tree v4sf_ftype_v4sf_pv2si
12794 = build_function_type_list (V4SF_type_node,
12795 V4SF_type_node, pv2si_type_node, NULL_TREE);
12796 tree void_ftype_pv2si_v4sf
12797 = build_function_type_list (void_type_node,
12798 pv2si_type_node, V4SF_type_node, NULL_TREE);
12799 tree void_ftype_pfloat_v4sf
12800 = build_function_type_list (void_type_node,
12801 pfloat_type_node, V4SF_type_node, NULL_TREE);
12802 tree void_ftype_pdi_di
12803 = build_function_type_list (void_type_node,
12804 pdi_type_node, long_long_unsigned_type_node,
12805 NULL_TREE);
12806 tree void_ftype_pv2di_v2di
12807 = build_function_type_list (void_type_node,
12808 pv2di_type_node, V2DI_type_node, NULL_TREE);
12809 /* Normal vector unops. */
12810 tree v4sf_ftype_v4sf
12811 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12813 /* Normal vector binops. */
12814 tree v4sf_ftype_v4sf_v4sf
12815 = build_function_type_list (V4SF_type_node,
12816 V4SF_type_node, V4SF_type_node, NULL_TREE);
12817 tree v8qi_ftype_v8qi_v8qi
12818 = build_function_type_list (V8QI_type_node,
12819 V8QI_type_node, V8QI_type_node, NULL_TREE);
12820 tree v4hi_ftype_v4hi_v4hi
12821 = build_function_type_list (V4HI_type_node,
12822 V4HI_type_node, V4HI_type_node, NULL_TREE);
12823 tree v2si_ftype_v2si_v2si
12824 = build_function_type_list (V2SI_type_node,
12825 V2SI_type_node, V2SI_type_node, NULL_TREE);
12826 tree di_ftype_di_di
12827 = build_function_type_list (long_long_unsigned_type_node,
12828 long_long_unsigned_type_node,
12829 long_long_unsigned_type_node, NULL_TREE);
12831 tree v2si_ftype_v2sf
12832 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12833 tree v2sf_ftype_v2si
12834 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12835 tree v2si_ftype_v2si
12836 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12837 tree v2sf_ftype_v2sf
12838 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12839 tree v2sf_ftype_v2sf_v2sf
12840 = build_function_type_list (V2SF_type_node,
12841 V2SF_type_node, V2SF_type_node, NULL_TREE);
12842 tree v2si_ftype_v2sf_v2sf
12843 = build_function_type_list (V2SI_type_node,
12844 V2SF_type_node, V2SF_type_node, NULL_TREE);
12845 tree pint_type_node = build_pointer_type (integer_type_node);
12846 tree pcint_type_node = build_pointer_type (
12847 build_type_variant (integer_type_node, 1, 0));
12848 tree pdouble_type_node = build_pointer_type (double_type_node);
12849 tree pcdouble_type_node = build_pointer_type (
12850 build_type_variant (double_type_node, 1, 0));
12851 tree int_ftype_v2df_v2df
12852 = build_function_type_list (integer_type_node,
12853 V2DF_type_node, V2DF_type_node, NULL_TREE);
12855 tree ti_ftype_void
12856 = build_function_type (intTI_type_node, void_list_node);
12857 tree v2di_ftype_void
12858 = build_function_type (V2DI_type_node, void_list_node);
12859 tree ti_ftype_ti_ti
12860 = build_function_type_list (intTI_type_node,
12861 intTI_type_node, intTI_type_node, NULL_TREE);
12862 tree void_ftype_pcvoid
12863 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12864 tree v2di_ftype_di
12865 = build_function_type_list (V2DI_type_node,
12866 long_long_unsigned_type_node, NULL_TREE);
12867 tree di_ftype_v2di
12868 = build_function_type_list (long_long_unsigned_type_node,
12869 V2DI_type_node, NULL_TREE);
12870 tree v4sf_ftype_v4si
12871 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12872 tree v4si_ftype_v4sf
12873 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12874 tree v2df_ftype_v4si
12875 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12876 tree v4si_ftype_v2df
12877 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12878 tree v2si_ftype_v2df
12879 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12880 tree v4sf_ftype_v2df
12881 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12882 tree v2df_ftype_v2si
12883 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12884 tree v2df_ftype_v4sf
12885 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12886 tree int_ftype_v2df
12887 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12888 tree v2df_ftype_v2df_int
12889 = build_function_type_list (V2DF_type_node,
12890 V2DF_type_node, integer_type_node, NULL_TREE);
12891 tree v4sf_ftype_v4sf_v2df
12892 = build_function_type_list (V4SF_type_node,
12893 V4SF_type_node, V2DF_type_node, NULL_TREE);
12894 tree v2df_ftype_v2df_v4sf
12895 = build_function_type_list (V2DF_type_node,
12896 V2DF_type_node, V4SF_type_node, NULL_TREE);
12897 tree v2df_ftype_v2df_v2df_int
12898 = build_function_type_list (V2DF_type_node,
12899 V2DF_type_node, V2DF_type_node,
12900 integer_type_node,
12901 NULL_TREE);
12902 tree v2df_ftype_v2df_pv2si
12903 = build_function_type_list (V2DF_type_node,
12904 V2DF_type_node, pv2si_type_node, NULL_TREE);
12905 tree void_ftype_pv2si_v2df
12906 = build_function_type_list (void_type_node,
12907 pv2si_type_node, V2DF_type_node, NULL_TREE);
12908 tree void_ftype_pdouble_v2df
12909 = build_function_type_list (void_type_node,
12910 pdouble_type_node, V2DF_type_node, NULL_TREE);
12911 tree void_ftype_pint_int
12912 = build_function_type_list (void_type_node,
12913 pint_type_node, integer_type_node, NULL_TREE);
12914 tree void_ftype_v16qi_v16qi_pchar
12915 = build_function_type_list (void_type_node,
12916 V16QI_type_node, V16QI_type_node,
12917 pchar_type_node, NULL_TREE);
12918 tree v2df_ftype_pcdouble
12919 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12920 tree v2df_ftype_v2df_v2df
12921 = build_function_type_list (V2DF_type_node,
12922 V2DF_type_node, V2DF_type_node, NULL_TREE);
12923 tree v16qi_ftype_v16qi_v16qi
12924 = build_function_type_list (V16QI_type_node,
12925 V16QI_type_node, V16QI_type_node, NULL_TREE);
12926 tree v8hi_ftype_v8hi_v8hi
12927 = build_function_type_list (V8HI_type_node,
12928 V8HI_type_node, V8HI_type_node, NULL_TREE);
12929 tree v4si_ftype_v4si_v4si
12930 = build_function_type_list (V4SI_type_node,
12931 V4SI_type_node, V4SI_type_node, NULL_TREE);
12932 tree v2di_ftype_v2di_v2di
12933 = build_function_type_list (V2DI_type_node,
12934 V2DI_type_node, V2DI_type_node, NULL_TREE);
12935 tree v2di_ftype_v2df_v2df
12936 = build_function_type_list (V2DI_type_node,
12937 V2DF_type_node, V2DF_type_node, NULL_TREE);
12938 tree v2df_ftype_v2df
12939 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12940 tree v2df_ftype_double
12941 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12942 tree v2df_ftype_double_double
12943 = build_function_type_list (V2DF_type_node,
12944 double_type_node, double_type_node, NULL_TREE);
12945 tree int_ftype_v8hi_int
12946 = build_function_type_list (integer_type_node,
12947 V8HI_type_node, integer_type_node, NULL_TREE);
12948 tree v8hi_ftype_v8hi_int_int
12949 = build_function_type_list (V8HI_type_node,
12950 V8HI_type_node, integer_type_node,
12951 integer_type_node, NULL_TREE);
12952 tree v2di_ftype_v2di_int
12953 = build_function_type_list (V2DI_type_node,
12954 V2DI_type_node, integer_type_node, NULL_TREE);
12955 tree v4si_ftype_v4si_int
12956 = build_function_type_list (V4SI_type_node,
12957 V4SI_type_node, integer_type_node, NULL_TREE);
12958 tree v8hi_ftype_v8hi_int
12959 = build_function_type_list (V8HI_type_node,
12960 V8HI_type_node, integer_type_node, NULL_TREE);
12961 tree v8hi_ftype_v8hi_v2di
12962 = build_function_type_list (V8HI_type_node,
12963 V8HI_type_node, V2DI_type_node, NULL_TREE);
12964 tree v4si_ftype_v4si_v2di
12965 = build_function_type_list (V4SI_type_node,
12966 V4SI_type_node, V2DI_type_node, NULL_TREE);
12967 tree v4si_ftype_v8hi_v8hi
12968 = build_function_type_list (V4SI_type_node,
12969 V8HI_type_node, V8HI_type_node, NULL_TREE);
12970 tree di_ftype_v8qi_v8qi
12971 = build_function_type_list (long_long_unsigned_type_node,
12972 V8QI_type_node, V8QI_type_node, NULL_TREE);
12973 tree v2di_ftype_v16qi_v16qi
12974 = build_function_type_list (V2DI_type_node,
12975 V16QI_type_node, V16QI_type_node, NULL_TREE);
12976 tree int_ftype_v16qi
12977 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12978 tree v16qi_ftype_pcchar
12979 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12980 tree void_ftype_pchar_v16qi
12981 = build_function_type_list (void_type_node,
12982 pchar_type_node, V16QI_type_node, NULL_TREE);
12983 tree v4si_ftype_pcint
12984 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12985 tree void_ftype_pcint_v4si
12986 = build_function_type_list (void_type_node,
12987 pcint_type_node, V4SI_type_node, NULL_TREE);
12988 tree v2di_ftype_v2di
12989 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12991 /* Add all builtins that are more or less simple operations on two
12992 operands. */
12993 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12995 /* Use one of the operands; the target can have a different mode for
12996 mask-generating compares. */
12997 enum machine_mode mode;
12998 tree type;
13000 if (d->name == 0)
13001 continue;
13002 mode = insn_data[d->icode].operand[1].mode;
13004 switch (mode)
13006 case V16QImode:
13007 type = v16qi_ftype_v16qi_v16qi;
13008 break;
13009 case V8HImode:
13010 type = v8hi_ftype_v8hi_v8hi;
13011 break;
13012 case V4SImode:
13013 type = v4si_ftype_v4si_v4si;
13014 break;
13015 case V2DImode:
13016 type = v2di_ftype_v2di_v2di;
13017 break;
13018 case V2DFmode:
13019 type = v2df_ftype_v2df_v2df;
13020 break;
13021 case TImode:
13022 type = ti_ftype_ti_ti;
13023 break;
13024 case V4SFmode:
13025 type = v4sf_ftype_v4sf_v4sf;
13026 break;
13027 case V8QImode:
13028 type = v8qi_ftype_v8qi_v8qi;
13029 break;
13030 case V4HImode:
13031 type = v4hi_ftype_v4hi_v4hi;
13032 break;
13033 case V2SImode:
13034 type = v2si_ftype_v2si_v2si;
13035 break;
13036 case DImode:
13037 type = di_ftype_di_di;
13038 break;
13040 default:
13041 abort ();
13044 /* Override for comparisons. */
13045 if (d->icode == CODE_FOR_maskcmpv4sf3
13046 || d->icode == CODE_FOR_maskncmpv4sf3
13047 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13048 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13049 type = v4si_ftype_v4sf_v4sf;
13051 if (d->icode == CODE_FOR_maskcmpv2df3
13052 || d->icode == CODE_FOR_maskncmpv2df3
13053 || d->icode == CODE_FOR_vmmaskcmpv2df3
13054 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13055 type = v2di_ftype_v2df_v2df;
13057 def_builtin (d->mask, d->name, type, d->code);
13060 /* Add the remaining MMX insns with somewhat more complicated types. */
13061 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13062 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13063 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13064 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13065 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13067 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13068 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13069 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13071 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13072 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13074 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13075 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13077 /* comi/ucomi insns. */
13078 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13079 if (d->mask == MASK_SSE2)
13080 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13081 else
13082 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13084 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13085 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13086 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13088 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13089 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13090 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13091 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13092 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13093 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13094 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13095 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13097 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13098 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13100 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13102 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13103 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13104 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13105 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13106 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13107 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13109 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13110 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13111 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13112 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13114 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13115 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13116 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13117 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13119 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13121 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13123 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13124 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13125 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13126 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13127 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13128 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13130 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13132 /* Original 3DNow! */
13133 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13134 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13135 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13136 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13137 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13138 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13139 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13140 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13141 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13142 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13143 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13144 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13145 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13146 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13147 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13148 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13149 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13150 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13151 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13152 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13154 /* 3DNow! extension as used in the Athlon CPU. */
13155 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13156 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13157 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13158 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13159 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13160 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13162 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13164 /* SSE2 */
13165 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13166 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13168 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13169 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13170 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13172 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13173 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13174 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13175 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13176 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13177 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13179 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13180 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13181 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13182 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13184 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13185 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13186 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13187 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13188 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13190 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13191 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13192 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13193 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13195 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13196 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13198 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13200 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13201 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13203 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13204 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13205 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13206 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13207 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13209 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13211 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13212 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13214 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13215 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13216 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13218 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13219 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13220 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13222 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13223 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13224 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13225 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13226 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13227 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13228 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13230 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13231 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13232 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13234 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13235 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13236 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13237 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13238 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13239 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13240 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13242 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13244 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13245 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13246 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13248 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13249 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13250 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13252 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13253 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13255 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13256 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13257 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13258 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13260 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13261 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13262 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13263 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13265 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13266 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13268 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13271 /* Errors in the source file can cause expand_expr to return const0_rtx
13272 where we expect a vector. To avoid crashing, use one of the vector
13273 clear instructions. */
13274 static rtx
13275 safe_vector_operand (x, mode)
13276 rtx x;
13277 enum machine_mode mode;
13279 if (x != const0_rtx)
13280 return x;
13281 x = gen_reg_rtx (mode);
13283 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13284 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13285 : gen_rtx_SUBREG (DImode, x, 0)));
13286 else
13287 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13288 : gen_rtx_SUBREG (V4SFmode, x, 0),
13289 CONST0_RTX (V4SFmode)));
13290 return x;
13293 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13295 static rtx
13296 ix86_expand_binop_builtin (icode, arglist, target)
13297 enum insn_code icode;
13298 tree arglist;
13299 rtx target;
13301 rtx pat;
13302 tree arg0 = TREE_VALUE (arglist);
13303 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13304 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13305 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13306 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13307 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13308 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13310 if (VECTOR_MODE_P (mode0))
13311 op0 = safe_vector_operand (op0, mode0);
13312 if (VECTOR_MODE_P (mode1))
13313 op1 = safe_vector_operand (op1, mode1);
13315 if (! target
13316 || GET_MODE (target) != tmode
13317 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13318 target = gen_reg_rtx (tmode);
13320 /* In case the insn wants input operands in modes different from
13321 the result, abort. */
13322 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13323 abort ();
13325 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13326 op0 = copy_to_mode_reg (mode0, op0);
13327 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13328 op1 = copy_to_mode_reg (mode1, op1);
13330 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13331 yet one of the two must not be a memory. This is normally enforced
13332 by expanders, but we didn't bother to create one here. */
13333 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13334 op0 = copy_to_mode_reg (mode0, op0);
13336 pat = GEN_FCN (icode) (target, op0, op1);
13337 if (! pat)
13338 return 0;
13339 emit_insn (pat);
13340 return target;
13343 /* Subroutine of ix86_expand_builtin to take care of stores. */
13345 static rtx
13346 ix86_expand_store_builtin (icode, arglist)
13347 enum insn_code icode;
13348 tree arglist;
13350 rtx pat;
13351 tree arg0 = TREE_VALUE (arglist);
13352 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13353 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13354 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13355 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13356 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13358 if (VECTOR_MODE_P (mode1))
13359 op1 = safe_vector_operand (op1, mode1);
13361 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13363 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13364 op1 = copy_to_mode_reg (mode1, op1);
13366 pat = GEN_FCN (icode) (op0, op1);
13367 if (pat)
13368 emit_insn (pat);
13369 return 0;
13372 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13374 static rtx
13375 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13376 enum insn_code icode;
13377 tree arglist;
13378 rtx target;
13379 int do_load;
13381 rtx pat;
13382 tree arg0 = TREE_VALUE (arglist);
13383 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13384 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13385 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13387 if (! target
13388 || GET_MODE (target) != tmode
13389 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13390 target = gen_reg_rtx (tmode);
13391 if (do_load)
13392 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13393 else
13395 if (VECTOR_MODE_P (mode0))
13396 op0 = safe_vector_operand (op0, mode0);
13398 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13399 op0 = copy_to_mode_reg (mode0, op0);
13402 pat = GEN_FCN (icode) (target, op0);
13403 if (! pat)
13404 return 0;
13405 emit_insn (pat);
13406 return target;
13409 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13410 sqrtss, rsqrtss, rcpss. */
13412 static rtx
13413 ix86_expand_unop1_builtin (icode, arglist, target)
13414 enum insn_code icode;
13415 tree arglist;
13416 rtx target;
13418 rtx pat;
13419 tree arg0 = TREE_VALUE (arglist);
13420 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13421 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13422 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13424 if (! target
13425 || GET_MODE (target) != tmode
13426 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13427 target = gen_reg_rtx (tmode);
13429 if (VECTOR_MODE_P (mode0))
13430 op0 = safe_vector_operand (op0, mode0);
13432 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13433 op0 = copy_to_mode_reg (mode0, op0);
13435 op1 = op0;
13436 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13437 op1 = copy_to_mode_reg (mode0, op1);
13439 pat = GEN_FCN (icode) (target, op0, op1);
13440 if (! pat)
13441 return 0;
13442 emit_insn (pat);
13443 return target;
13446 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13448 static rtx
13449 ix86_expand_sse_compare (d, arglist, target)
13450 const struct builtin_description *d;
13451 tree arglist;
13452 rtx target;
13454 rtx pat;
13455 tree arg0 = TREE_VALUE (arglist);
13456 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13457 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13458 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13459 rtx op2;
13460 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13461 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13462 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13463 enum rtx_code comparison = d->comparison;
13465 if (VECTOR_MODE_P (mode0))
13466 op0 = safe_vector_operand (op0, mode0);
13467 if (VECTOR_MODE_P (mode1))
13468 op1 = safe_vector_operand (op1, mode1);
13470 /* Swap operands if we have a comparison that isn't available in
13471 hardware. */
13472 if (d->flag)
13474 rtx tmp = gen_reg_rtx (mode1);
13475 emit_move_insn (tmp, op1);
13476 op1 = op0;
13477 op0 = tmp;
13480 if (! target
13481 || GET_MODE (target) != tmode
13482 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13483 target = gen_reg_rtx (tmode);
13485 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13486 op0 = copy_to_mode_reg (mode0, op0);
13487 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13488 op1 = copy_to_mode_reg (mode1, op1);
13490 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13491 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13492 if (! pat)
13493 return 0;
13494 emit_insn (pat);
13495 return target;
13498 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13500 static rtx
13501 ix86_expand_sse_comi (d, arglist, target)
13502 const struct builtin_description *d;
13503 tree arglist;
13504 rtx target;
13506 rtx pat;
13507 tree arg0 = TREE_VALUE (arglist);
13508 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13509 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13510 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13511 rtx op2;
13512 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13513 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13514 enum rtx_code comparison = d->comparison;
13516 if (VECTOR_MODE_P (mode0))
13517 op0 = safe_vector_operand (op0, mode0);
13518 if (VECTOR_MODE_P (mode1))
13519 op1 = safe_vector_operand (op1, mode1);
13521 /* Swap operands if we have a comparison that isn't available in
13522 hardware. */
13523 if (d->flag)
13525 rtx tmp = op1;
13526 op1 = op0;
13527 op0 = tmp;
13530 target = gen_reg_rtx (SImode);
13531 emit_move_insn (target, const0_rtx);
13532 target = gen_rtx_SUBREG (QImode, target, 0);
13534 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13535 op0 = copy_to_mode_reg (mode0, op0);
13536 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13537 op1 = copy_to_mode_reg (mode1, op1);
13539 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13540 pat = GEN_FCN (d->icode) (op0, op1);
13541 if (! pat)
13542 return 0;
13543 emit_insn (pat);
13544 emit_insn (gen_rtx_SET (VOIDmode,
13545 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13546 gen_rtx_fmt_ee (comparison, QImode,
13547 SET_DEST (pat),
13548 const0_rtx)));
13550 return SUBREG_REG (target);
13553 /* Expand an expression EXP that calls a built-in function,
13554 with result going to TARGET if that's convenient
13555 (and in mode MODE if that's convenient).
13556 SUBTARGET may be used as the target for computing one of EXP's operands.
13557 IGNORE is nonzero if the value is to be ignored. */
13560 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13561 tree exp;
13562 rtx target;
13563 rtx subtarget ATTRIBUTE_UNUSED;
13564 enum machine_mode mode ATTRIBUTE_UNUSED;
13565 int ignore ATTRIBUTE_UNUSED;
13567 const struct builtin_description *d;
13568 size_t i;
13569 enum insn_code icode;
13570 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13571 tree arglist = TREE_OPERAND (exp, 1);
13572 tree arg0, arg1, arg2;
13573 rtx op0, op1, op2, pat;
13574 enum machine_mode tmode, mode0, mode1, mode2;
13575 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13577 switch (fcode)
13579 case IX86_BUILTIN_EMMS:
13580 emit_insn (gen_emms ());
13581 return 0;
13583 case IX86_BUILTIN_SFENCE:
13584 emit_insn (gen_sfence ());
13585 return 0;
13587 case IX86_BUILTIN_PEXTRW:
13588 case IX86_BUILTIN_PEXTRW128:
13589 icode = (fcode == IX86_BUILTIN_PEXTRW
13590 ? CODE_FOR_mmx_pextrw
13591 : CODE_FOR_sse2_pextrw);
13592 arg0 = TREE_VALUE (arglist);
13593 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13594 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13595 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13596 tmode = insn_data[icode].operand[0].mode;
13597 mode0 = insn_data[icode].operand[1].mode;
13598 mode1 = insn_data[icode].operand[2].mode;
13600 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13601 op0 = copy_to_mode_reg (mode0, op0);
13602 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13604 /* @@@ better error message */
13605 error ("selector must be an immediate");
13606 return gen_reg_rtx (tmode);
13608 if (target == 0
13609 || GET_MODE (target) != tmode
13610 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13611 target = gen_reg_rtx (tmode);
13612 pat = GEN_FCN (icode) (target, op0, op1);
13613 if (! pat)
13614 return 0;
13615 emit_insn (pat);
13616 return target;
13618 case IX86_BUILTIN_PINSRW:
13619 case IX86_BUILTIN_PINSRW128:
13620 icode = (fcode == IX86_BUILTIN_PINSRW
13621 ? CODE_FOR_mmx_pinsrw
13622 : CODE_FOR_sse2_pinsrw);
13623 arg0 = TREE_VALUE (arglist);
13624 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13625 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13626 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13627 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13628 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13629 tmode = insn_data[icode].operand[0].mode;
13630 mode0 = insn_data[icode].operand[1].mode;
13631 mode1 = insn_data[icode].operand[2].mode;
13632 mode2 = insn_data[icode].operand[3].mode;
13634 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13635 op0 = copy_to_mode_reg (mode0, op0);
13636 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13637 op1 = copy_to_mode_reg (mode1, op1);
13638 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13640 /* @@@ better error message */
13641 error ("selector must be an immediate");
13642 return const0_rtx;
13644 if (target == 0
13645 || GET_MODE (target) != tmode
13646 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13647 target = gen_reg_rtx (tmode);
13648 pat = GEN_FCN (icode) (target, op0, op1, op2);
13649 if (! pat)
13650 return 0;
13651 emit_insn (pat);
13652 return target;
13654 case IX86_BUILTIN_MASKMOVQ:
13655 case IX86_BUILTIN_MASKMOVDQU:
13656 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13657 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13658 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13659 : CODE_FOR_sse2_maskmovdqu));
13660 /* Note the arg order is different from the operand order. */
13661 arg1 = TREE_VALUE (arglist);
13662 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13663 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13664 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13665 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13666 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13667 mode0 = insn_data[icode].operand[0].mode;
13668 mode1 = insn_data[icode].operand[1].mode;
13669 mode2 = insn_data[icode].operand[2].mode;
13671 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13672 op0 = copy_to_mode_reg (mode0, op0);
13673 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13674 op1 = copy_to_mode_reg (mode1, op1);
13675 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13676 op2 = copy_to_mode_reg (mode2, op2);
13677 pat = GEN_FCN (icode) (op0, op1, op2);
13678 if (! pat)
13679 return 0;
13680 emit_insn (pat);
13681 return 0;
13683 case IX86_BUILTIN_SQRTSS:
13684 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13685 case IX86_BUILTIN_RSQRTSS:
13686 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13687 case IX86_BUILTIN_RCPSS:
13688 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13690 case IX86_BUILTIN_LOADAPS:
13691 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13693 case IX86_BUILTIN_LOADUPS:
13694 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13696 case IX86_BUILTIN_STOREAPS:
13697 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13699 case IX86_BUILTIN_STOREUPS:
13700 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13702 case IX86_BUILTIN_LOADSS:
13703 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13705 case IX86_BUILTIN_STORESS:
13706 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13708 case IX86_BUILTIN_LOADHPS:
13709 case IX86_BUILTIN_LOADLPS:
13710 case IX86_BUILTIN_LOADHPD:
13711 case IX86_BUILTIN_LOADLPD:
13712 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13713 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13714 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13715 : CODE_FOR_sse2_movlpd);
13716 arg0 = TREE_VALUE (arglist);
13717 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13718 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13719 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13720 tmode = insn_data[icode].operand[0].mode;
13721 mode0 = insn_data[icode].operand[1].mode;
13722 mode1 = insn_data[icode].operand[2].mode;
13724 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13725 op0 = copy_to_mode_reg (mode0, op0);
13726 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13727 if (target == 0
13728 || GET_MODE (target) != tmode
13729 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13730 target = gen_reg_rtx (tmode);
13731 pat = GEN_FCN (icode) (target, op0, op1);
13732 if (! pat)
13733 return 0;
13734 emit_insn (pat);
13735 return target;
13737 case IX86_BUILTIN_STOREHPS:
13738 case IX86_BUILTIN_STORELPS:
13739 case IX86_BUILTIN_STOREHPD:
13740 case IX86_BUILTIN_STORELPD:
13741 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13742 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13743 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13744 : CODE_FOR_sse2_movlpd);
13745 arg0 = TREE_VALUE (arglist);
13746 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13747 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13748 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13749 mode0 = insn_data[icode].operand[1].mode;
13750 mode1 = insn_data[icode].operand[2].mode;
13752 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13753 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13754 op1 = copy_to_mode_reg (mode1, op1);
13756 pat = GEN_FCN (icode) (op0, op0, op1);
13757 if (! pat)
13758 return 0;
13759 emit_insn (pat);
13760 return 0;
13762 case IX86_BUILTIN_MOVNTPS:
13763 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13764 case IX86_BUILTIN_MOVNTQ:
13765 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13767 case IX86_BUILTIN_LDMXCSR:
13768 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13769 target = assign_386_stack_local (SImode, 0);
13770 emit_move_insn (target, op0);
13771 emit_insn (gen_ldmxcsr (target));
13772 return 0;
13774 case IX86_BUILTIN_STMXCSR:
13775 target = assign_386_stack_local (SImode, 0);
13776 emit_insn (gen_stmxcsr (target));
13777 return copy_to_mode_reg (SImode, target);
13779 case IX86_BUILTIN_SHUFPS:
13780 case IX86_BUILTIN_SHUFPD:
13781 icode = (fcode == IX86_BUILTIN_SHUFPS
13782 ? CODE_FOR_sse_shufps
13783 : CODE_FOR_sse2_shufpd);
13784 arg0 = TREE_VALUE (arglist);
13785 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13786 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13787 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13788 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13789 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13790 tmode = insn_data[icode].operand[0].mode;
13791 mode0 = insn_data[icode].operand[1].mode;
13792 mode1 = insn_data[icode].operand[2].mode;
13793 mode2 = insn_data[icode].operand[3].mode;
13795 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13796 op0 = copy_to_mode_reg (mode0, op0);
13797 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13798 op1 = copy_to_mode_reg (mode1, op1);
13799 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13801 /* @@@ better error message */
13802 error ("mask must be an immediate");
13803 return gen_reg_rtx (tmode);
13805 if (target == 0
13806 || GET_MODE (target) != tmode
13807 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13808 target = gen_reg_rtx (tmode);
13809 pat = GEN_FCN (icode) (target, op0, op1, op2);
13810 if (! pat)
13811 return 0;
13812 emit_insn (pat);
13813 return target;
13815 case IX86_BUILTIN_PSHUFW:
13816 case IX86_BUILTIN_PSHUFD:
13817 case IX86_BUILTIN_PSHUFHW:
13818 case IX86_BUILTIN_PSHUFLW:
13819 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13820 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13821 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13822 : CODE_FOR_mmx_pshufw);
13823 arg0 = TREE_VALUE (arglist);
13824 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13825 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13826 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13827 tmode = insn_data[icode].operand[0].mode;
13828 mode1 = insn_data[icode].operand[1].mode;
13829 mode2 = insn_data[icode].operand[2].mode;
13831 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13832 op0 = copy_to_mode_reg (mode1, op0);
13833 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13835 /* @@@ better error message */
13836 error ("mask must be an immediate");
13837 return const0_rtx;
13839 if (target == 0
13840 || GET_MODE (target) != tmode
13841 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13842 target = gen_reg_rtx (tmode);
13843 pat = GEN_FCN (icode) (target, op0, op1);
13844 if (! pat)
13845 return 0;
13846 emit_insn (pat);
13847 return target;
13849 case IX86_BUILTIN_PSLLDQI128:
13850 case IX86_BUILTIN_PSRLDQI128:
13851 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13852 : CODE_FOR_sse2_lshrti3);
13853 arg0 = TREE_VALUE (arglist);
13854 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13855 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13856 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13857 tmode = insn_data[icode].operand[0].mode;
13858 mode1 = insn_data[icode].operand[1].mode;
13859 mode2 = insn_data[icode].operand[2].mode;
13861 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13863 op0 = copy_to_reg (op0);
13864 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13866 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13868 error ("shift must be an immediate");
13869 return const0_rtx;
13871 target = gen_reg_rtx (V2DImode);
13872 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13873 if (! pat)
13874 return 0;
13875 emit_insn (pat);
13876 return target;
13878 case IX86_BUILTIN_FEMMS:
13879 emit_insn (gen_femms ());
13880 return NULL_RTX;
13882 case IX86_BUILTIN_PAVGUSB:
13883 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13885 case IX86_BUILTIN_PF2ID:
13886 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13888 case IX86_BUILTIN_PFACC:
13889 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13891 case IX86_BUILTIN_PFADD:
13892 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13894 case IX86_BUILTIN_PFCMPEQ:
13895 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13897 case IX86_BUILTIN_PFCMPGE:
13898 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13900 case IX86_BUILTIN_PFCMPGT:
13901 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13903 case IX86_BUILTIN_PFMAX:
13904 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13906 case IX86_BUILTIN_PFMIN:
13907 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13909 case IX86_BUILTIN_PFMUL:
13910 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13912 case IX86_BUILTIN_PFRCP:
13913 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13915 case IX86_BUILTIN_PFRCPIT1:
13916 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13918 case IX86_BUILTIN_PFRCPIT2:
13919 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13921 case IX86_BUILTIN_PFRSQIT1:
13922 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13924 case IX86_BUILTIN_PFRSQRT:
13925 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13927 case IX86_BUILTIN_PFSUB:
13928 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13930 case IX86_BUILTIN_PFSUBR:
13931 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13933 case IX86_BUILTIN_PI2FD:
13934 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13936 case IX86_BUILTIN_PMULHRW:
13937 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13939 case IX86_BUILTIN_PF2IW:
13940 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13942 case IX86_BUILTIN_PFNACC:
13943 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13945 case IX86_BUILTIN_PFPNACC:
13946 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13948 case IX86_BUILTIN_PI2FW:
13949 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13951 case IX86_BUILTIN_PSWAPDSI:
13952 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13954 case IX86_BUILTIN_PSWAPDSF:
13955 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13957 case IX86_BUILTIN_SSE_ZERO:
13958 target = gen_reg_rtx (V4SFmode);
13959 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13960 return target;
13962 case IX86_BUILTIN_MMX_ZERO:
13963 target = gen_reg_rtx (DImode);
13964 emit_insn (gen_mmx_clrdi (target));
13965 return target;
13967 case IX86_BUILTIN_CLRTI:
13968 target = gen_reg_rtx (V2DImode);
13969 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13970 return target;
13973 case IX86_BUILTIN_SQRTSD:
13974 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13975 case IX86_BUILTIN_LOADAPD:
13976 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13977 case IX86_BUILTIN_LOADUPD:
13978 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13980 case IX86_BUILTIN_STOREAPD:
13981 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13982 case IX86_BUILTIN_STOREUPD:
13983 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13985 case IX86_BUILTIN_LOADSD:
13986 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13988 case IX86_BUILTIN_STORESD:
13989 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13991 case IX86_BUILTIN_SETPD1:
13992 target = assign_386_stack_local (DFmode, 0);
13993 arg0 = TREE_VALUE (arglist);
13994 emit_move_insn (adjust_address (target, DFmode, 0),
13995 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13996 op0 = gen_reg_rtx (V2DFmode);
13997 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13998 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13999 return op0;
14001 case IX86_BUILTIN_SETPD:
14002 target = assign_386_stack_local (V2DFmode, 0);
14003 arg0 = TREE_VALUE (arglist);
14004 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14005 emit_move_insn (adjust_address (target, DFmode, 0),
14006 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14007 emit_move_insn (adjust_address (target, DFmode, 8),
14008 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14009 op0 = gen_reg_rtx (V2DFmode);
14010 emit_insn (gen_sse2_movapd (op0, target));
14011 return op0;
14013 case IX86_BUILTIN_LOADRPD:
14014 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14015 gen_reg_rtx (V2DFmode), 1);
14016 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14017 return target;
14019 case IX86_BUILTIN_LOADPD1:
14020 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14021 gen_reg_rtx (V2DFmode), 1);
14022 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14023 return target;
14025 case IX86_BUILTIN_STOREPD1:
14026 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14027 case IX86_BUILTIN_STORERPD:
14028 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14030 case IX86_BUILTIN_CLRPD:
14031 target = gen_reg_rtx (V2DFmode);
14032 emit_insn (gen_sse_clrv2df (target));
14033 return target;
14035 case IX86_BUILTIN_MFENCE:
14036 emit_insn (gen_sse2_mfence ());
14037 return 0;
14038 case IX86_BUILTIN_LFENCE:
14039 emit_insn (gen_sse2_lfence ());
14040 return 0;
14042 case IX86_BUILTIN_CLFLUSH:
14043 arg0 = TREE_VALUE (arglist);
14044 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14045 icode = CODE_FOR_sse2_clflush;
14046 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14047 op0 = copy_to_mode_reg (Pmode, op0);
14049 emit_insn (gen_sse2_clflush (op0));
14050 return 0;
14052 case IX86_BUILTIN_MOVNTPD:
14053 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14054 case IX86_BUILTIN_MOVNTDQ:
14055 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14056 case IX86_BUILTIN_MOVNTI:
14057 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14059 case IX86_BUILTIN_LOADDQA:
14060 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14061 case IX86_BUILTIN_LOADDQU:
14062 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14063 case IX86_BUILTIN_LOADD:
14064 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14066 case IX86_BUILTIN_STOREDQA:
14067 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14068 case IX86_BUILTIN_STOREDQU:
14069 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14070 case IX86_BUILTIN_STORED:
14071 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14073 default:
14074 break;
14077 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14078 if (d->code == fcode)
14080 /* Compares are treated specially. */
14081 if (d->icode == CODE_FOR_maskcmpv4sf3
14082 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14083 || d->icode == CODE_FOR_maskncmpv4sf3
14084 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14085 || d->icode == CODE_FOR_maskcmpv2df3
14086 || d->icode == CODE_FOR_vmmaskcmpv2df3
14087 || d->icode == CODE_FOR_maskncmpv2df3
14088 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14089 return ix86_expand_sse_compare (d, arglist, target);
14091 return ix86_expand_binop_builtin (d->icode, arglist, target);
14094 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14095 if (d->code == fcode)
14096 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14098 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14099 if (d->code == fcode)
14100 return ix86_expand_sse_comi (d, arglist, target);
14102 /* @@@ Should really do something sensible here. */
14103 return 0;
14106 /* Store OPERAND to the memory after reload is completed. This means
14107 that we can't easily use assign_stack_local. */
14109 ix86_force_to_memory (mode, operand)
14110 enum machine_mode mode;
14111 rtx operand;
14113 rtx result;
14114 if (!reload_completed)
14115 abort ();
14116 if (TARGET_64BIT && TARGET_RED_ZONE)
14118 result = gen_rtx_MEM (mode,
14119 gen_rtx_PLUS (Pmode,
14120 stack_pointer_rtx,
14121 GEN_INT (-RED_ZONE_SIZE)));
14122 emit_move_insn (result, operand);
14124 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14126 switch (mode)
14128 case HImode:
14129 case SImode:
14130 operand = gen_lowpart (DImode, operand);
14131 /* FALLTHRU */
14132 case DImode:
14133 emit_insn (
14134 gen_rtx_SET (VOIDmode,
14135 gen_rtx_MEM (DImode,
14136 gen_rtx_PRE_DEC (DImode,
14137 stack_pointer_rtx)),
14138 operand));
14139 break;
14140 default:
14141 abort ();
14143 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14145 else
14147 switch (mode)
14149 case DImode:
14151 rtx operands[2];
14152 split_di (&operand, 1, operands, operands + 1);
14153 emit_insn (
14154 gen_rtx_SET (VOIDmode,
14155 gen_rtx_MEM (SImode,
14156 gen_rtx_PRE_DEC (Pmode,
14157 stack_pointer_rtx)),
14158 operands[1]));
14159 emit_insn (
14160 gen_rtx_SET (VOIDmode,
14161 gen_rtx_MEM (SImode,
14162 gen_rtx_PRE_DEC (Pmode,
14163 stack_pointer_rtx)),
14164 operands[0]));
14166 break;
14167 case HImode:
14168 /* It is better to store HImodes as SImodes. */
14169 if (!TARGET_PARTIAL_REG_STALL)
14170 operand = gen_lowpart (SImode, operand);
14171 /* FALLTHRU */
14172 case SImode:
14173 emit_insn (
14174 gen_rtx_SET (VOIDmode,
14175 gen_rtx_MEM (GET_MODE (operand),
14176 gen_rtx_PRE_DEC (SImode,
14177 stack_pointer_rtx)),
14178 operand));
14179 break;
14180 default:
14181 abort ();
14183 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14185 return result;
14188 /* Free operand from the memory. */
14189 void
14190 ix86_free_from_memory (mode)
14191 enum machine_mode mode;
14193 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14195 int size;
14197 if (mode == DImode || TARGET_64BIT)
14198 size = 8;
14199 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14200 size = 2;
14201 else
14202 size = 4;
14203 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14204 to pop or add instruction if registers are available. */
14205 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14206 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14207 GEN_INT (size))));
14211 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14212 QImode must go into class Q_REGS.
14213 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14214 movdf to do mem-to-mem moves through integer regs. */
14215 enum reg_class
14216 ix86_preferred_reload_class (x, class)
14217 rtx x;
14218 enum reg_class class;
14220 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14221 return NO_REGS;
14222 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14224 /* SSE can't load any constant directly yet. */
14225 if (SSE_CLASS_P (class))
14226 return NO_REGS;
14227 /* Floats can load 0 and 1. */
14228 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14230 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14231 if (MAYBE_SSE_CLASS_P (class))
14232 return (reg_class_subset_p (class, GENERAL_REGS)
14233 ? GENERAL_REGS : FLOAT_REGS);
14234 else
14235 return class;
14237 /* General regs can load everything. */
14238 if (reg_class_subset_p (class, GENERAL_REGS))
14239 return GENERAL_REGS;
14240 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14241 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14242 return NO_REGS;
14244 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14245 return NO_REGS;
14246 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14247 return Q_REGS;
14248 return class;
14251 /* If we are copying between general and FP registers, we need a memory
14252 location. The same is true for SSE and MMX registers.
14254 The macro can't work reliably when one of the CLASSES is class containing
14255 registers from multiple units (SSE, MMX, integer). We avoid this by never
14256 combining those units in single alternative in the machine description.
14257 Ensure that this constraint holds to avoid unexpected surprises.
14259 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14260 enforce these sanity checks. */
14262 ix86_secondary_memory_needed (class1, class2, mode, strict)
14263 enum reg_class class1, class2;
14264 enum machine_mode mode;
14265 int strict;
14267 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14268 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14269 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14270 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14271 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14272 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14274 if (strict)
14275 abort ();
14276 else
14277 return 1;
14279 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14280 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14281 && (mode) != SImode)
14282 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14283 && (mode) != SImode));
14285 /* Return the cost of moving data from a register in class CLASS1 to
14286 one in class CLASS2.
14288 It is not required that the cost always equal 2 when FROM is the same as TO;
14289 on some machines it is expensive to move between registers if they are not
14290 general registers. */
14292 ix86_register_move_cost (mode, class1, class2)
14293 enum machine_mode mode;
14294 enum reg_class class1, class2;
14296 /* In case we require secondary memory, compute cost of the store followed
14297 by load. In order to avoid bad register allocation choices, we need
14298 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14300 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14302 int cost = 1;
14304 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14305 MEMORY_MOVE_COST (mode, class1, 1));
14306 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14307 MEMORY_MOVE_COST (mode, class2, 1));
14309 /* In case of copying from general_purpose_register we may emit multiple
14310 stores followed by single load causing memory size mismatch stall.
14311 Count this as arbitrarily high cost of 20. */
14312 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14313 cost += 20;
14315 /* In the case of FP/MMX moves, the registers actually overlap, and we
14316 have to switch modes in order to treat them differently. */
14317 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14318 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14319 cost += 20;
14321 return cost;
14324 /* Moves between SSE/MMX and integer unit are expensive. */
14325 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14326 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14327 return ix86_cost->mmxsse_to_integer;
14328 if (MAYBE_FLOAT_CLASS_P (class1))
14329 return ix86_cost->fp_move;
14330 if (MAYBE_SSE_CLASS_P (class1))
14331 return ix86_cost->sse_move;
14332 if (MAYBE_MMX_CLASS_P (class1))
14333 return ix86_cost->mmx_move;
14334 return 2;
14337 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14339 ix86_hard_regno_mode_ok (regno, mode)
14340 int regno;
14341 enum machine_mode mode;
14343 /* Flags and only flags can only hold CCmode values. */
14344 if (CC_REGNO_P (regno))
14345 return GET_MODE_CLASS (mode) == MODE_CC;
14346 if (GET_MODE_CLASS (mode) == MODE_CC
14347 || GET_MODE_CLASS (mode) == MODE_RANDOM
14348 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14349 return 0;
14350 if (FP_REGNO_P (regno))
14351 return VALID_FP_MODE_P (mode);
14352 if (SSE_REGNO_P (regno))
14353 return VALID_SSE_REG_MODE (mode);
14354 if (MMX_REGNO_P (regno))
14355 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14356 /* We handle both integer and floats in the general purpose registers.
14357 In future we should be able to handle vector modes as well. */
14358 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14359 return 0;
14360 /* Take care for QImode values - they can be in non-QI regs, but then
14361 they do cause partial register stalls. */
14362 if (regno < 4 || mode != QImode || TARGET_64BIT)
14363 return 1;
14364 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14367 /* Return the cost of moving data of mode M between a
14368 register and memory. A value of 2 is the default; this cost is
14369 relative to those in `REGISTER_MOVE_COST'.
14371 If moving between registers and memory is more expensive than
14372 between two registers, you should define this macro to express the
14373 relative cost.
14375 Model also increased moving costs of QImode registers in non
14376 Q_REGS classes.
14379 ix86_memory_move_cost (mode, class, in)
14380 enum machine_mode mode;
14381 enum reg_class class;
14382 int in;
14384 if (FLOAT_CLASS_P (class))
14386 int index;
14387 switch (mode)
14389 case SFmode:
14390 index = 0;
14391 break;
14392 case DFmode:
14393 index = 1;
14394 break;
14395 case XFmode:
14396 case TFmode:
14397 index = 2;
14398 break;
14399 default:
14400 return 100;
14402 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14404 if (SSE_CLASS_P (class))
14406 int index;
14407 switch (GET_MODE_SIZE (mode))
14409 case 4:
14410 index = 0;
14411 break;
14412 case 8:
14413 index = 1;
14414 break;
14415 case 16:
14416 index = 2;
14417 break;
14418 default:
14419 return 100;
14421 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14423 if (MMX_CLASS_P (class))
14425 int index;
14426 switch (GET_MODE_SIZE (mode))
14428 case 4:
14429 index = 0;
14430 break;
14431 case 8:
14432 index = 1;
14433 break;
14434 default:
14435 return 100;
14437 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14439 switch (GET_MODE_SIZE (mode))
14441 case 1:
14442 if (in)
14443 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14444 : ix86_cost->movzbl_load);
14445 else
14446 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14447 : ix86_cost->int_store[0] + 4);
14448 break;
14449 case 2:
14450 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14451 default:
14452 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14453 if (mode == TFmode)
14454 mode = XFmode;
14455 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14456 * ((int) GET_MODE_SIZE (mode)
14457 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14461 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14462 static void
14463 ix86_svr3_asm_out_constructor (symbol, priority)
14464 rtx symbol;
14465 int priority ATTRIBUTE_UNUSED;
14467 init_section ();
14468 fputs ("\tpushl $", asm_out_file);
14469 assemble_name (asm_out_file, XSTR (symbol, 0));
14470 fputc ('\n', asm_out_file);
14472 #endif
14474 #if TARGET_MACHO
14476 static int current_machopic_label_num;
14478 /* Given a symbol name and its associated stub, write out the
14479 definition of the stub. */
14481 void
14482 machopic_output_stub (file, symb, stub)
14483 FILE *file;
14484 const char *symb, *stub;
14486 unsigned int length;
14487 char *binder_name, *symbol_name, lazy_ptr_name[32];
14488 int label = ++current_machopic_label_num;
14490 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14491 symb = (*targetm.strip_name_encoding) (symb);
14493 length = strlen (stub);
14494 binder_name = alloca (length + 32);
14495 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14497 length = strlen (symb);
14498 symbol_name = alloca (length + 32);
14499 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14501 sprintf (lazy_ptr_name, "L%d$lz", label);
14503 if (MACHOPIC_PURE)
14504 machopic_picsymbol_stub_section ();
14505 else
14506 machopic_symbol_stub_section ();
14508 fprintf (file, "%s:\n", stub);
14509 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14511 if (MACHOPIC_PURE)
14513 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14514 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14515 fprintf (file, "\tjmp %%edx\n");
14517 else
14518 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14520 fprintf (file, "%s:\n", binder_name);
14522 if (MACHOPIC_PURE)
14524 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14525 fprintf (file, "\tpushl %%eax\n");
14527 else
14528 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14530 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14532 machopic_lazy_symbol_ptr_section ();
14533 fprintf (file, "%s:\n", lazy_ptr_name);
14534 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14535 fprintf (file, "\t.long %s\n", binder_name);
14537 #endif /* TARGET_MACHO */
14539 /* Order the registers for register allocator. */
14541 void
14542 x86_order_regs_for_local_alloc ()
14544 int pos = 0;
14545 int i;
14547 /* First allocate the local general purpose registers. */
14548 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14549 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14550 reg_alloc_order [pos++] = i;
14552 /* Global general purpose registers. */
14553 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14554 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14555 reg_alloc_order [pos++] = i;
14557 /* x87 registers come first in case we are doing FP math
14558 using them. */
14559 if (!TARGET_SSE_MATH)
14560 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14561 reg_alloc_order [pos++] = i;
14563 /* SSE registers. */
14564 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14565 reg_alloc_order [pos++] = i;
14566 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14567 reg_alloc_order [pos++] = i;
14569 /* x87 registers. */
14570 if (TARGET_SSE_MATH)
14571 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14572 reg_alloc_order [pos++] = i;
14574 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14575 reg_alloc_order [pos++] = i;
14577 /* Initialize the rest of array as we do not allocate some registers
14578 at all. */
14579 while (pos < FIRST_PSEUDO_REGISTER)
14580 reg_alloc_order [pos++] = 0;
14583 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14584 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14585 #endif
14587 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14588 struct attribute_spec.handler. */
14589 static tree
14590 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
14591 tree *node;
14592 tree name;
14593 tree args ATTRIBUTE_UNUSED;
14594 int flags ATTRIBUTE_UNUSED;
14595 bool *no_add_attrs;
14597 tree *type = NULL;
14598 if (DECL_P (*node))
14600 if (TREE_CODE (*node) == TYPE_DECL)
14601 type = &TREE_TYPE (*node);
14603 else
14604 type = node;
14606 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14607 || TREE_CODE (*type) == UNION_TYPE)))
14609 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14610 *no_add_attrs = true;
14613 else if ((is_attribute_p ("ms_struct", name)
14614 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14615 || ((is_attribute_p ("gcc_struct", name)
14616 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14618 warning ("`%s' incompatible attribute ignored",
14619 IDENTIFIER_POINTER (name));
14620 *no_add_attrs = true;
14623 return NULL_TREE;
14626 static bool
14627 ix86_ms_bitfield_layout_p (record_type)
14628 tree record_type;
14630 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14631 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14632 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14635 /* Returns an expression indicating where the this parameter is
14636 located on entry to the FUNCTION. */
14638 static rtx
14639 x86_this_parameter (function)
14640 tree function;
14642 tree type = TREE_TYPE (function);
14644 if (TARGET_64BIT)
14646 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14647 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14650 if (ix86_fntype_regparm (type) > 0)
14652 tree parm;
14654 parm = TYPE_ARG_TYPES (type);
14655 /* Figure out whether or not the function has a variable number of
14656 arguments. */
14657 for (; parm; parm = TREE_CHAIN (parm))
14658 if (TREE_VALUE (parm) == void_type_node)
14659 break;
14660 /* If not, the this parameter is in %eax. */
14661 if (parm)
14662 return gen_rtx_REG (SImode, 0);
14665 if (aggregate_value_p (TREE_TYPE (type)))
14666 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14667 else
14668 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14671 /* Determine whether x86_output_mi_thunk can succeed. */
14673 static bool
14674 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14675 tree thunk ATTRIBUTE_UNUSED;
14676 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14677 HOST_WIDE_INT vcall_offset;
14678 tree function;
14680 /* 64-bit can handle anything. */
14681 if (TARGET_64BIT)
14682 return true;
14684 /* For 32-bit, everything's fine if we have one free register. */
14685 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14686 return true;
14688 /* Need a free register for vcall_offset. */
14689 if (vcall_offset)
14690 return false;
14692 /* Need a free register for GOT references. */
14693 if (flag_pic && !(*targetm.binds_local_p) (function))
14694 return false;
14696 /* Otherwise ok. */
14697 return true;
14700 /* Output the assembler code for a thunk function. THUNK_DECL is the
14701 declaration for the thunk function itself, FUNCTION is the decl for
14702 the target function. DELTA is an immediate constant offset to be
14703 added to THIS. If VCALL_OFFSET is nonzero, the word at
14704 *(*this + vcall_offset) should be added to THIS. */
14706 static void
14707 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14708 FILE *file ATTRIBUTE_UNUSED;
14709 tree thunk ATTRIBUTE_UNUSED;
14710 HOST_WIDE_INT delta;
14711 HOST_WIDE_INT vcall_offset;
14712 tree function;
14714 rtx xops[3];
14715 rtx this = x86_this_parameter (function);
14716 rtx this_reg, tmp;
14718 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14719 pull it in now and let DELTA benefit. */
14720 if (REG_P (this))
14721 this_reg = this;
14722 else if (vcall_offset)
14724 /* Put the this parameter into %eax. */
14725 xops[0] = this;
14726 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14727 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14729 else
14730 this_reg = NULL_RTX;
14732 /* Adjust the this parameter by a fixed constant. */
14733 if (delta)
14735 xops[0] = GEN_INT (delta);
14736 xops[1] = this_reg ? this_reg : this;
14737 if (TARGET_64BIT)
14739 if (!x86_64_general_operand (xops[0], DImode))
14741 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14742 xops[1] = tmp;
14743 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14744 xops[0] = tmp;
14745 xops[1] = this;
14747 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14749 else
14750 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14753 /* Adjust the this parameter by a value stored in the vtable. */
14754 if (vcall_offset)
14756 if (TARGET_64BIT)
14757 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14758 else
14759 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14761 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14762 xops[1] = tmp;
14763 if (TARGET_64BIT)
14764 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14765 else
14766 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14768 /* Adjust the this parameter. */
14769 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14770 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14772 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14773 xops[0] = GEN_INT (vcall_offset);
14774 xops[1] = tmp2;
14775 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14776 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14778 xops[1] = this_reg;
14779 if (TARGET_64BIT)
14780 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14781 else
14782 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14785 /* If necessary, drop THIS back to its stack slot. */
14786 if (this_reg && this_reg != this)
14788 xops[0] = this_reg;
14789 xops[1] = this;
14790 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14793 xops[0] = DECL_RTL (function);
14794 if (TARGET_64BIT)
14796 if (!flag_pic || (*targetm.binds_local_p) (function))
14797 output_asm_insn ("jmp\t%P0", xops);
14798 else
14800 tmp = XEXP (xops[0], 0);
14801 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14802 tmp = gen_rtx_CONST (Pmode, tmp);
14803 tmp = gen_rtx_MEM (QImode, tmp);
14804 xops[0] = tmp;
14805 output_asm_insn ("jmp\t%A0", xops);
14808 else
14810 if (!flag_pic || (*targetm.binds_local_p) (function))
14811 output_asm_insn ("jmp\t%P0", xops);
14812 else
14813 #if TARGET_MACHO
14814 if (TARGET_MACHO)
14816 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
14817 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
14818 tmp = gen_rtx_MEM (QImode, tmp);
14819 xops[0] = tmp;
14820 output_asm_insn ("jmp\t%0", xops);
14822 else
14823 #endif /* TARGET_MACHO */
14825 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14826 output_set_got (tmp);
14828 xops[1] = tmp;
14829 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14830 output_asm_insn ("jmp\t{*}%1", xops);
14836 x86_field_alignment (field, computed)
14837 tree field;
14838 int computed;
14840 enum machine_mode mode;
14841 tree type = TREE_TYPE (field);
14843 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14844 return computed;
14845 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14846 ? get_inner_array_type (type) : type);
14847 if (mode == DFmode || mode == DCmode
14848 || GET_MODE_CLASS (mode) == MODE_INT
14849 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14850 return MIN (32, computed);
14851 return computed;
14854 /* Output assembler code to FILE to increment profiler label # LABELNO
14855 for profiling a function entry. */
14856 void
14857 x86_function_profiler (file, labelno)
14858 FILE *file;
14859 int labelno ATTRIBUTE_UNUSED;
14861 if (TARGET_64BIT)
14862 if (flag_pic)
14864 #ifndef NO_PROFILE_COUNTERS
14865 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14866 #endif
14867 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14869 else
14871 #ifndef NO_PROFILE_COUNTERS
14872 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14873 #endif
14874 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14876 else if (flag_pic)
14878 #ifndef NO_PROFILE_COUNTERS
14879 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14880 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14881 #endif
14882 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14884 else
14886 #ifndef NO_PROFILE_COUNTERS
14887 fprintf (file, "\tmovl\t$%sP%d,%%$%s\n", LPREFIX, labelno,
14888 PROFILE_COUNT_REGISTER);
14889 #endif
14890 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14894 /* Implement machine specific optimizations.
14895 At the moment we implement single transformation: AMD Athlon works faster
14896 when RET is not destination of conditional jump or directly preceded
14897 by other jump instruction. We avoid the penalty by inserting NOP just
14898 before the RET instructions in such cases. */
14899 void
14900 x86_machine_dependent_reorg (first)
14901 rtx first ATTRIBUTE_UNUSED;
14903 edge e;
14905 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
14906 return;
14907 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14909 basic_block bb = e->src;
14910 rtx ret = bb->end;
14911 rtx prev;
14912 bool insert = false;
14914 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14915 continue;
14916 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
14917 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
14918 break;
14919 if (prev && GET_CODE (prev) == CODE_LABEL)
14921 edge e;
14922 for (e = bb->pred; e; e = e->pred_next)
14923 if (EDGE_FREQUENCY (e) && e->src->index >= 0
14924 && !(e->flags & EDGE_FALLTHRU))
14925 insert = 1;
14927 if (!insert)
14929 prev = prev_active_insn (ret);
14930 if (prev && GET_CODE (prev) == JUMP_INSN
14931 && any_condjump_p (prev))
14932 insert = 1;
14933 /* Empty functions get branch misspredict even when the jump destination
14934 is not visible to us. */
14935 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
14936 insert = 1;
14938 if (insert)
14939 emit_insn_before (gen_nop (), ret);
14943 /* Return nonzero when QImode register that must be represented via REX prefix
14944 is used. */
14945 bool
14946 x86_extended_QIreg_mentioned_p (insn)
14947 rtx insn;
14949 int i;
14950 extract_insn_cached (insn);
14951 for (i = 0; i < recog_data.n_operands; i++)
14952 if (REG_P (recog_data.operand[i])
14953 && REGNO (recog_data.operand[i]) >= 4)
14954 return true;
14955 return false;
14958 /* Return nonzero when P points to register encoded via REX prefix.
14959 Called via for_each_rtx. */
14960 static int
14961 extended_reg_mentioned_1 (p, data)
14962 rtx *p;
14963 void *data ATTRIBUTE_UNUSED;
14965 unsigned int regno;
14966 if (!REG_P (*p))
14967 return 0;
14968 regno = REGNO (*p);
14969 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
14972 /* Return true when INSN mentions register that must be encoded using REX
14973 prefix. */
14974 bool
14975 x86_extended_reg_mentioned_p (insn)
14976 rtx insn;
14978 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
14981 #include "gt-i386.h"