i386.c (index_register_operand): New.
[official-gcc.git] / gcc / config / i386 / i386.c
blob81a9b97841d68db70149c368207e3dc410819f51
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs *ix86_cost = &pentium_cost;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403 #define FAST_PROLOGUE_INSN_COUNT 30
405 /* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407 static int use_fast_prologue_epilogue;
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
417 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
419 /* ax, dx, cx, bx */
420 AREG, DREG, CREG, BREG,
421 /* si, di, bp, sp */
422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
423 /* FP registers */
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
426 /* arg pointer */
427 NON_Q_REGS,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
431 SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
433 MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
437 SSE_REGS, SSE_REGS,
440 /* The "default" register map used in 32bit mode. */
442 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
453 static int const x86_64_int_parameter_registers[6] =
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
459 static int const x86_64_int_return_registers[4] =
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
520 numbers.
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
530 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
544 rtx ix86_compare_op0 = NULL_RTX;
545 rtx ix86_compare_op1 = NULL_RTX;
547 /* The encoding characters for the four TLS models present in ELF. */
549 static char const tls_model_chars[] = " GLil";
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function GTY(())
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559 const char *some_ld_name;
560 int save_varrargs_registers;
561 int accesses_prev_frame;
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
567 /* Structure describing stack frame layout.
568 Stack grows downward:
570 [arguments]
571 <- ARG_POINTER
572 saved pc
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
576 [saved regs]
578 [padding1] \
580 [va_arg registers] (
581 > to_allocate <- FRAME_POINTER
582 [frame] (
584 [padding2] /
586 struct ix86_frame
588 int nregs;
589 int padding1;
590 int va_arg_size;
591 HOST_WIDE_INT frame;
592 int padding2;
593 int outgoing_arguments_size;
594 int red_zone_size;
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string;
607 /* Parsed value. */
608 enum cmodel ix86_cmodel;
609 /* Asm dialect. */
610 const char *ix86_asm_string;
611 enum asm_dialect ix86_asm_dialect = ASM_ATT;
612 /* TLS dialext. */
613 const char *ix86_tls_dialect_string;
614 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath;
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch;
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string; /* for -march=<xxx> */
627 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string;
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse;
635 /* ix86_regparm_string as a number */
636 int ix86_regparm;
638 /* Alignment to use for loops and jumps: */
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string;
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string;
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string;
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary;
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost;
654 const char *ix86_branch_cost_string;
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string;
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix[16];
661 static int internal_label_prefix_len;
663 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
664 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
665 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
667 int, int, FILE *));
668 static const char *get_some_local_dynamic_name PARAMS ((void));
669 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670 static rtx maybe_get_pool_constant PARAMS ((rtx));
671 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
672 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
673 rtx *, rtx *));
674 static rtx get_thread_pointer PARAMS ((void));
675 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
676 static rtx gen_push PARAMS ((rtx));
677 static int memory_address_length PARAMS ((rtx addr));
678 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
680 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681 static void ix86_dump_ppro_packet PARAMS ((FILE *));
682 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
683 static struct machine_function * ix86_init_machine_status PARAMS ((void));
684 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
685 static int ix86_nsaved_regs PARAMS ((void));
686 static void ix86_emit_save_regs PARAMS ((void));
687 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
688 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
689 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
690 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
691 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
692 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
693 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
694 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
696 static int ix86_issue_rate PARAMS ((void));
697 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698 static void ix86_sched_init PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
701 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins PARAMS ((void));
705 struct ix86_address
707 rtx base, index, disp;
708 HOST_WIDE_INT scale;
711 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
713 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714 static const char *ix86_strip_name_encoding PARAMS ((const char *))
715 ATTRIBUTE_UNUSED;
717 struct builtin_description;
718 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
719 tree, rtx));
720 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
721 tree, rtx));
722 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
726 tree, rtx));
727 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
728 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
729 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
731 enum rtx_code *,
732 enum rtx_code *,
733 enum rtx_code *));
734 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
735 rtx *, rtx *));
736 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
740 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
741 static int ix86_save_reg PARAMS ((unsigned int, int));
742 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
743 static int ix86_comp_type_attributes PARAMS ((tree, tree));
744 const struct attribute_spec ix86_attribute_table[];
745 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
747 static int ix86_value_regno PARAMS ((enum machine_mode));
749 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
750 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
751 #endif
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
761 enum x86_64_reg_class
763 X86_64_NO_CLASS,
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
766 X86_64_SSE_CLASS,
767 X86_64_SSESF_CLASS,
768 X86_64_SSEDF_CLASS,
769 X86_64_SSEUP_CLASS,
770 X86_64_X87_CLASS,
771 X86_64_X87UP_CLASS,
772 X86_64_MEMORY_CLASS
774 static const char * const x86_64_reg_class_name[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
777 #define MAX_CLASSES 4
778 static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
780 int));
781 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
782 int *));
783 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
784 const int *, int));
785 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
794 #endif
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
817 #ifdef ASM_QUAD
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
820 #endif
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
846 #ifdef HAVE_AS_TLS
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
849 #endif
851 struct gcc_target targetm = TARGET_INITIALIZER;
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
857 been parsed.
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
862 void
863 override_options ()
865 int i;
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
869 static struct ptt
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
875 const int align_loop_max_skip;
876 const int align_jump;
877 const int align_jump_max_skip;
878 const int align_func;
879 const int branch_cost;
881 const processor_target_table[PROCESSOR_max] =
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
893 static struct pta
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
897 const enum pta_flags
899 PTA_SSE = 1,
900 PTA_SSE2 = 2,
901 PTA_MMX = 4,
902 PTA_PREFETCH_SSE = 8,
903 PTA_3DNOW = 16,
904 PTA_3DNOW_A = 64
905 } flags;
907 const processor_alias_table[] =
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"i686", PROCESSOR_PENTIUMPRO, 0},
915 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
916 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
917 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
918 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
919 PTA_MMX | PTA_PREFETCH_SSE},
920 {"k6", PROCESSOR_K6, PTA_MMX},
921 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
922 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
923 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
924 | PTA_3DNOW_A},
925 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
926 | PTA_3DNOW | PTA_3DNOW_A},
927 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
928 | PTA_3DNOW_A | PTA_SSE},
929 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
930 | PTA_3DNOW_A | PTA_SSE},
931 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
932 | PTA_3DNOW_A | PTA_SSE},
935 int const pta_size = ARRAY_SIZE (processor_alias_table);
937 #ifdef SUBTARGET_OVERRIDE_OPTIONS
938 SUBTARGET_OVERRIDE_OPTIONS;
939 #endif
941 if (!ix86_cpu_string && ix86_arch_string)
942 ix86_cpu_string = ix86_arch_string;
943 if (!ix86_cpu_string)
944 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
945 if (!ix86_arch_string)
946 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
948 if (ix86_cmodel_string != 0)
950 if (!strcmp (ix86_cmodel_string, "small"))
951 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
952 else if (flag_pic)
953 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
954 else if (!strcmp (ix86_cmodel_string, "32"))
955 ix86_cmodel = CM_32;
956 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
957 ix86_cmodel = CM_KERNEL;
958 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
959 ix86_cmodel = CM_MEDIUM;
960 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
961 ix86_cmodel = CM_LARGE;
962 else
963 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
965 else
967 ix86_cmodel = CM_32;
968 if (TARGET_64BIT)
969 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
971 if (ix86_asm_string != 0)
973 if (!strcmp (ix86_asm_string, "intel"))
974 ix86_asm_dialect = ASM_INTEL;
975 else if (!strcmp (ix86_asm_string, "att"))
976 ix86_asm_dialect = ASM_ATT;
977 else
978 error ("bad value (%s) for -masm= switch", ix86_asm_string);
980 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
981 error ("code model `%s' not supported in the %s bit mode",
982 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
983 if (ix86_cmodel == CM_LARGE)
984 sorry ("code model `large' not supported yet");
985 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
986 sorry ("%i-bit mode not compiled in",
987 (target_flags & MASK_64BIT) ? 64 : 32);
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
992 ix86_arch = processor_alias_table[i].processor;
993 /* Default cpu tuning to the architecture. */
994 ix86_cpu = ix86_arch;
995 if (processor_alias_table[i].flags & PTA_MMX
996 && !(target_flags & MASK_MMX_SET))
997 target_flags |= MASK_MMX;
998 if (processor_alias_table[i].flags & PTA_3DNOW
999 && !(target_flags & MASK_3DNOW_SET))
1000 target_flags |= MASK_3DNOW;
1001 if (processor_alias_table[i].flags & PTA_3DNOW_A
1002 && !(target_flags & MASK_3DNOW_A_SET))
1003 target_flags |= MASK_3DNOW_A;
1004 if (processor_alias_table[i].flags & PTA_SSE
1005 && !(target_flags & MASK_SSE_SET))
1006 target_flags |= MASK_SSE;
1007 if (processor_alias_table[i].flags & PTA_SSE2
1008 && !(target_flags & MASK_SSE2_SET))
1009 target_flags |= MASK_SSE2;
1010 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1011 x86_prefetch_sse = true;
1012 break;
1015 if (i == pta_size)
1016 error ("bad value (%s) for -march= switch", ix86_arch_string);
1018 for (i = 0; i < pta_size; i++)
1019 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1021 ix86_cpu = processor_alias_table[i].processor;
1022 break;
1024 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1025 x86_prefetch_sse = true;
1026 if (i == pta_size)
1027 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1029 if (optimize_size)
1030 ix86_cost = &size_cost;
1031 else
1032 ix86_cost = processor_target_table[ix86_cpu].cost;
1033 target_flags |= processor_target_table[ix86_cpu].target_enable;
1034 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1036 /* Arrange to set up i386_stack_locals for all functions. */
1037 init_machine_status = ix86_init_machine_status;
1039 /* Validate -mregparm= value. */
1040 if (ix86_regparm_string)
1042 i = atoi (ix86_regparm_string);
1043 if (i < 0 || i > REGPARM_MAX)
1044 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1045 else
1046 ix86_regparm = i;
1048 else
1049 if (TARGET_64BIT)
1050 ix86_regparm = REGPARM_MAX;
1052 /* If the user has provided any of the -malign-* options,
1053 warn and use that value only if -falign-* is not set.
1054 Remove this code in GCC 3.2 or later. */
1055 if (ix86_align_loops_string)
1057 warning ("-malign-loops is obsolete, use -falign-loops");
1058 if (align_loops == 0)
1060 i = atoi (ix86_align_loops_string);
1061 if (i < 0 || i > MAX_CODE_ALIGN)
1062 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1063 else
1064 align_loops = 1 << i;
1068 if (ix86_align_jumps_string)
1070 warning ("-malign-jumps is obsolete, use -falign-jumps");
1071 if (align_jumps == 0)
1073 i = atoi (ix86_align_jumps_string);
1074 if (i < 0 || i > MAX_CODE_ALIGN)
1075 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1076 else
1077 align_jumps = 1 << i;
1081 if (ix86_align_funcs_string)
1083 warning ("-malign-functions is obsolete, use -falign-functions");
1084 if (align_functions == 0)
1086 i = atoi (ix86_align_funcs_string);
1087 if (i < 0 || i > MAX_CODE_ALIGN)
1088 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1089 else
1090 align_functions = 1 << i;
1094 /* Default align_* from the processor table. */
1095 if (align_loops == 0)
1097 align_loops = processor_target_table[ix86_cpu].align_loop;
1098 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1100 if (align_jumps == 0)
1102 align_jumps = processor_target_table[ix86_cpu].align_jump;
1103 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1105 if (align_functions == 0)
1107 align_functions = processor_target_table[ix86_cpu].align_func;
1110 /* Validate -mpreferred-stack-boundary= value, or provide default.
1111 The default of 128 bits is for Pentium III's SSE __m128, but we
1112 don't want additional code to keep the stack aligned when
1113 optimizing for code size. */
1114 ix86_preferred_stack_boundary = (optimize_size
1115 ? TARGET_64BIT ? 64 : 32
1116 : 128);
1117 if (ix86_preferred_stack_boundary_string)
1119 i = atoi (ix86_preferred_stack_boundary_string);
1120 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1121 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1122 TARGET_64BIT ? 3 : 2);
1123 else
1124 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1127 /* Validate -mbranch-cost= value, or provide default. */
1128 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1129 if (ix86_branch_cost_string)
1131 i = atoi (ix86_branch_cost_string);
1132 if (i < 0 || i > 5)
1133 error ("-mbranch-cost=%d is not between 0 and 5", i);
1134 else
1135 ix86_branch_cost = i;
1138 if (ix86_tls_dialect_string)
1140 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1141 ix86_tls_dialect = TLS_DIALECT_GNU;
1142 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1143 ix86_tls_dialect = TLS_DIALECT_SUN;
1144 else
1145 error ("bad value (%s) for -mtls-dialect= switch",
1146 ix86_tls_dialect_string);
1149 if (profile_flag)
1150 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1152 /* Keep nonleaf frame pointers. */
1153 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1154 flag_omit_frame_pointer = 1;
1156 /* If we're doing fast math, we don't care about comparison order
1157 wrt NaNs. This lets us use a shorter comparison sequence. */
1158 if (flag_unsafe_math_optimizations)
1159 target_flags &= ~MASK_IEEE_FP;
1161 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1162 since the insns won't need emulation. */
1163 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1164 target_flags &= ~MASK_NO_FANCY_MATH_387;
1166 if (TARGET_64BIT)
1168 if (TARGET_ALIGN_DOUBLE)
1169 error ("-malign-double makes no sense in the 64bit mode");
1170 if (TARGET_RTD)
1171 error ("-mrtd calling convention not supported in the 64bit mode");
1172 /* Enable by default the SSE and MMX builtins. */
1173 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1174 ix86_fpmath = FPMATH_SSE;
1176 else
1177 ix86_fpmath = FPMATH_387;
1179 if (ix86_fpmath_string != 0)
1181 if (! strcmp (ix86_fpmath_string, "387"))
1182 ix86_fpmath = FPMATH_387;
1183 else if (! strcmp (ix86_fpmath_string, "sse"))
1185 if (!TARGET_SSE)
1187 warning ("SSE instruction set disabled, using 387 arithmetics");
1188 ix86_fpmath = FPMATH_387;
1190 else
1191 ix86_fpmath = FPMATH_SSE;
1193 else if (! strcmp (ix86_fpmath_string, "387,sse")
1194 || ! strcmp (ix86_fpmath_string, "sse,387"))
1196 if (!TARGET_SSE)
1198 warning ("SSE instruction set disabled, using 387 arithmetics");
1199 ix86_fpmath = FPMATH_387;
1201 else if (!TARGET_80387)
1203 warning ("387 instruction set disabled, using SSE arithmetics");
1204 ix86_fpmath = FPMATH_SSE;
1206 else
1207 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1209 else
1210 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1213 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1214 on by -msse. */
1215 if (TARGET_SSE)
1217 target_flags |= MASK_MMX;
1218 x86_prefetch_sse = true;
1221 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1222 if (TARGET_3DNOW)
1224 target_flags |= MASK_MMX;
1225 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1226 extensions it adds. */
1227 if (x86_3dnow_a & (1 << ix86_arch))
1228 target_flags |= MASK_3DNOW_A;
1230 if ((x86_accumulate_outgoing_args & CPUMASK)
1231 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1232 && !optimize_size)
1233 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1235 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1237 char *p;
1238 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1239 p = strchr (internal_label_prefix, 'X');
1240 internal_label_prefix_len = p - internal_label_prefix;
1241 *p = '\0';
1245 void
1246 optimization_options (level, size)
1247 int level;
1248 int size ATTRIBUTE_UNUSED;
1250 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1251 make the problem with not enough registers even worse. */
1252 #ifdef INSN_SCHEDULING
1253 if (level > 1)
1254 flag_schedule_insns = 0;
1255 #endif
1256 if (TARGET_64BIT && optimize >= 1)
1257 flag_omit_frame_pointer = 1;
1258 if (TARGET_64BIT)
1260 flag_pcc_struct_return = 0;
1261 flag_asynchronous_unwind_tables = 1;
1263 if (profile_flag)
1264 flag_omit_frame_pointer = 0;
1267 /* Table of valid machine attributes. */
1268 const struct attribute_spec ix86_attribute_table[] =
1270 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1271 /* Stdcall attribute says callee is responsible for popping arguments
1272 if they are not variable. */
1273 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1274 /* Cdecl attribute says the callee is a normal C declaration */
1275 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1276 /* Regparm attribute specifies how many integer arguments are to be
1277 passed in registers. */
1278 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1279 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1280 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1281 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1282 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1283 #endif
1284 { NULL, 0, 0, false, false, false, NULL }
1287 /* Handle a "cdecl" or "stdcall" attribute;
1288 arguments as in struct attribute_spec.handler. */
1289 static tree
1290 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1291 tree *node;
1292 tree name;
1293 tree args ATTRIBUTE_UNUSED;
1294 int flags ATTRIBUTE_UNUSED;
1295 bool *no_add_attrs;
1297 if (TREE_CODE (*node) != FUNCTION_TYPE
1298 && TREE_CODE (*node) != METHOD_TYPE
1299 && TREE_CODE (*node) != FIELD_DECL
1300 && TREE_CODE (*node) != TYPE_DECL)
1302 warning ("`%s' attribute only applies to functions",
1303 IDENTIFIER_POINTER (name));
1304 *no_add_attrs = true;
1307 if (TARGET_64BIT)
1309 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1310 *no_add_attrs = true;
1313 return NULL_TREE;
1316 /* Handle a "regparm" attribute;
1317 arguments as in struct attribute_spec.handler. */
1318 static tree
1319 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1320 tree *node;
1321 tree name;
1322 tree args;
1323 int flags ATTRIBUTE_UNUSED;
1324 bool *no_add_attrs;
1326 if (TREE_CODE (*node) != FUNCTION_TYPE
1327 && TREE_CODE (*node) != METHOD_TYPE
1328 && TREE_CODE (*node) != FIELD_DECL
1329 && TREE_CODE (*node) != TYPE_DECL)
1331 warning ("`%s' attribute only applies to functions",
1332 IDENTIFIER_POINTER (name));
1333 *no_add_attrs = true;
1335 else
1337 tree cst;
1339 cst = TREE_VALUE (args);
1340 if (TREE_CODE (cst) != INTEGER_CST)
1342 warning ("`%s' attribute requires an integer constant argument",
1343 IDENTIFIER_POINTER (name));
1344 *no_add_attrs = true;
1346 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1348 warning ("argument to `%s' attribute larger than %d",
1349 IDENTIFIER_POINTER (name), REGPARM_MAX);
1350 *no_add_attrs = true;
1354 return NULL_TREE;
1357 /* Return 0 if the attributes for two types are incompatible, 1 if they
1358 are compatible, and 2 if they are nearly compatible (which causes a
1359 warning to be generated). */
1361 static int
1362 ix86_comp_type_attributes (type1, type2)
1363 tree type1;
1364 tree type2;
1366 /* Check for mismatch of non-default calling convention. */
1367 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1369 if (TREE_CODE (type1) != FUNCTION_TYPE)
1370 return 1;
1372 /* Check for mismatched return types (cdecl vs stdcall). */
1373 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1374 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1375 return 0;
1376 return 1;
1379 /* Value is the number of bytes of arguments automatically
1380 popped when returning from a subroutine call.
1381 FUNDECL is the declaration node of the function (as a tree),
1382 FUNTYPE is the data type of the function (as a tree),
1383 or for a library call it is an identifier node for the subroutine name.
1384 SIZE is the number of bytes of arguments passed on the stack.
1386 On the 80386, the RTD insn may be used to pop them if the number
1387 of args is fixed, but if the number is variable then the caller
1388 must pop them all. RTD can't be used for library calls now
1389 because the library is compiled with the Unix compiler.
1390 Use of RTD is a selectable option, since it is incompatible with
1391 standard Unix calling sequences. If the option is not selected,
1392 the caller must always pop the args.
1394 The attribute stdcall is equivalent to RTD on a per module basis. */
1397 ix86_return_pops_args (fundecl, funtype, size)
1398 tree fundecl;
1399 tree funtype;
1400 int size;
1402 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1404 /* Cdecl functions override -mrtd, and never pop the stack. */
1405 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1407 /* Stdcall functions will pop the stack if not variable args. */
1408 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1409 rtd = 1;
1411 if (rtd
1412 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1413 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1414 == void_type_node)))
1415 return size;
1418 /* Lose any fake structure return argument if it is passed on the stack. */
1419 if (aggregate_value_p (TREE_TYPE (funtype))
1420 && !TARGET_64BIT)
1422 int nregs = ix86_regparm;
1424 if (funtype)
1426 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1428 if (attr)
1429 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1432 if (!nregs)
1433 return GET_MODE_SIZE (Pmode);
1436 return 0;
1439 /* Argument support functions. */
1441 /* Return true when register may be used to pass function parameters. */
1442 bool
1443 ix86_function_arg_regno_p (regno)
1444 int regno;
1446 int i;
1447 if (!TARGET_64BIT)
1448 return (regno < REGPARM_MAX
1449 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1450 if (SSE_REGNO_P (regno) && TARGET_SSE)
1451 return true;
1452 /* RAX is used as hidden argument to va_arg functions. */
1453 if (!regno)
1454 return true;
1455 for (i = 0; i < REGPARM_MAX; i++)
1456 if (regno == x86_64_int_parameter_registers[i])
1457 return true;
1458 return false;
1461 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1462 for a call to a function whose data type is FNTYPE.
1463 For a library call, FNTYPE is 0. */
1465 void
1466 init_cumulative_args (cum, fntype, libname)
1467 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1468 tree fntype; /* tree ptr for function decl */
1469 rtx libname; /* SYMBOL_REF of library name or 0 */
1471 static CUMULATIVE_ARGS zero_cum;
1472 tree param, next_param;
1474 if (TARGET_DEBUG_ARG)
1476 fprintf (stderr, "\ninit_cumulative_args (");
1477 if (fntype)
1478 fprintf (stderr, "fntype code = %s, ret code = %s",
1479 tree_code_name[(int) TREE_CODE (fntype)],
1480 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1481 else
1482 fprintf (stderr, "no fntype");
1484 if (libname)
1485 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1488 *cum = zero_cum;
1490 /* Set up the number of registers to use for passing arguments. */
1491 cum->nregs = ix86_regparm;
1492 cum->sse_nregs = SSE_REGPARM_MAX;
1493 if (fntype && !TARGET_64BIT)
1495 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1497 if (attr)
1498 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1500 cum->maybe_vaarg = false;
1502 /* Determine if this function has variable arguments. This is
1503 indicated by the last argument being 'void_type_mode' if there
1504 are no variable arguments. If there are variable arguments, then
1505 we won't pass anything in registers */
1507 if (cum->nregs)
1509 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1510 param != 0; param = next_param)
1512 next_param = TREE_CHAIN (param);
1513 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1515 if (!TARGET_64BIT)
1516 cum->nregs = 0;
1517 cum->maybe_vaarg = true;
1521 if ((!fntype && !libname)
1522 || (fntype && !TYPE_ARG_TYPES (fntype)))
1523 cum->maybe_vaarg = 1;
1525 if (TARGET_DEBUG_ARG)
1526 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1528 return;
1531 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1532 of this code is to classify each 8bytes of incoming argument by the register
1533 class and assign registers accordingly. */
1535 /* Return the union class of CLASS1 and CLASS2.
1536 See the x86-64 PS ABI for details. */
1538 static enum x86_64_reg_class
1539 merge_classes (class1, class2)
1540 enum x86_64_reg_class class1, class2;
1542 /* Rule #1: If both classes are equal, this is the resulting class. */
1543 if (class1 == class2)
1544 return class1;
1546 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1547 the other class. */
1548 if (class1 == X86_64_NO_CLASS)
1549 return class2;
1550 if (class2 == X86_64_NO_CLASS)
1551 return class1;
1553 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1554 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1555 return X86_64_MEMORY_CLASS;
1557 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1558 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1559 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1560 return X86_64_INTEGERSI_CLASS;
1561 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1562 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1563 return X86_64_INTEGER_CLASS;
1565 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1566 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1567 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1568 return X86_64_MEMORY_CLASS;
1570 /* Rule #6: Otherwise class SSE is used. */
1571 return X86_64_SSE_CLASS;
1574 /* Classify the argument of type TYPE and mode MODE.
1575 CLASSES will be filled by the register class used to pass each word
1576 of the operand. The number of words is returned. In case the parameter
1577 should be passed in memory, 0 is returned. As a special case for zero
1578 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1580 BIT_OFFSET is used internally for handling records and specifies offset
1581 of the offset in bits modulo 256 to avoid overflow cases.
1583 See the x86-64 PS ABI for details.
1586 static int
1587 classify_argument (mode, type, classes, bit_offset)
1588 enum machine_mode mode;
1589 tree type;
1590 enum x86_64_reg_class classes[MAX_CLASSES];
1591 int bit_offset;
1593 int bytes =
1594 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1595 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1597 /* Variable sized entities are always passed/returned in memory. */
1598 if (bytes < 0)
1599 return 0;
1601 if (type && AGGREGATE_TYPE_P (type))
1603 int i;
1604 tree field;
1605 enum x86_64_reg_class subclasses[MAX_CLASSES];
1607 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1608 if (bytes > 16)
1609 return 0;
1611 for (i = 0; i < words; i++)
1612 classes[i] = X86_64_NO_CLASS;
1614 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1615 signalize memory class, so handle it as special case. */
1616 if (!words)
1618 classes[0] = X86_64_NO_CLASS;
1619 return 1;
1622 /* Classify each field of record and merge classes. */
1623 if (TREE_CODE (type) == RECORD_TYPE)
1625 /* For classes first merge in the field of the subclasses. */
1626 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1628 tree bases = TYPE_BINFO_BASETYPES (type);
1629 int n_bases = TREE_VEC_LENGTH (bases);
1630 int i;
1632 for (i = 0; i < n_bases; ++i)
1634 tree binfo = TREE_VEC_ELT (bases, i);
1635 int num;
1636 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1637 tree type = BINFO_TYPE (binfo);
1639 num = classify_argument (TYPE_MODE (type),
1640 type, subclasses,
1641 (offset + bit_offset) % 256);
1642 if (!num)
1643 return 0;
1644 for (i = 0; i < num; i++)
1646 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1647 classes[i + pos] =
1648 merge_classes (subclasses[i], classes[i + pos]);
1652 /* And now merge the fields of structure. */
1653 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1655 if (TREE_CODE (field) == FIELD_DECL)
1657 int num;
1659 /* Bitfields are always classified as integer. Handle them
1660 early, since later code would consider them to be
1661 misaligned integers. */
1662 if (DECL_BIT_FIELD (field))
1664 for (i = int_bit_position (field) / 8 / 8;
1665 i < (int_bit_position (field)
1666 + tree_low_cst (DECL_SIZE (field), 0)
1667 + 63) / 8 / 8; i++)
1668 classes[i] =
1669 merge_classes (X86_64_INTEGER_CLASS,
1670 classes[i]);
1672 else
1674 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1675 TREE_TYPE (field), subclasses,
1676 (int_bit_position (field)
1677 + bit_offset) % 256);
1678 if (!num)
1679 return 0;
1680 for (i = 0; i < num; i++)
1682 int pos =
1683 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1684 classes[i + pos] =
1685 merge_classes (subclasses[i], classes[i + pos]);
1691 /* Arrays are handled as small records. */
1692 else if (TREE_CODE (type) == ARRAY_TYPE)
1694 int num;
1695 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1696 TREE_TYPE (type), subclasses, bit_offset);
1697 if (!num)
1698 return 0;
1700 /* The partial classes are now full classes. */
1701 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1702 subclasses[0] = X86_64_SSE_CLASS;
1703 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1704 subclasses[0] = X86_64_INTEGER_CLASS;
1706 for (i = 0; i < words; i++)
1707 classes[i] = subclasses[i % num];
1709 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1710 else if (TREE_CODE (type) == UNION_TYPE
1711 || TREE_CODE (type) == QUAL_UNION_TYPE)
1713 /* For classes first merge in the field of the subclasses. */
1714 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1716 tree bases = TYPE_BINFO_BASETYPES (type);
1717 int n_bases = TREE_VEC_LENGTH (bases);
1718 int i;
1720 for (i = 0; i < n_bases; ++i)
1722 tree binfo = TREE_VEC_ELT (bases, i);
1723 int num;
1724 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1725 tree type = BINFO_TYPE (binfo);
1727 num = classify_argument (TYPE_MODE (type),
1728 type, subclasses,
1729 (offset + (bit_offset % 64)) % 256);
1730 if (!num)
1731 return 0;
1732 for (i = 0; i < num; i++)
1734 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1735 classes[i + pos] =
1736 merge_classes (subclasses[i], classes[i + pos]);
1740 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1742 if (TREE_CODE (field) == FIELD_DECL)
1744 int num;
1745 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1746 TREE_TYPE (field), subclasses,
1747 bit_offset);
1748 if (!num)
1749 return 0;
1750 for (i = 0; i < num; i++)
1751 classes[i] = merge_classes (subclasses[i], classes[i]);
1755 else
1756 abort ();
1758 /* Final merger cleanup. */
1759 for (i = 0; i < words; i++)
1761 /* If one class is MEMORY, everything should be passed in
1762 memory. */
1763 if (classes[i] == X86_64_MEMORY_CLASS)
1764 return 0;
1766 /* The X86_64_SSEUP_CLASS should be always preceded by
1767 X86_64_SSE_CLASS. */
1768 if (classes[i] == X86_64_SSEUP_CLASS
1769 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1770 classes[i] = X86_64_SSE_CLASS;
1772 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1773 if (classes[i] == X86_64_X87UP_CLASS
1774 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1775 classes[i] = X86_64_SSE_CLASS;
1777 return words;
1780 /* Compute alignment needed. We align all types to natural boundaries with
1781 exception of XFmode that is aligned to 64bits. */
1782 if (mode != VOIDmode && mode != BLKmode)
1784 int mode_alignment = GET_MODE_BITSIZE (mode);
1786 if (mode == XFmode)
1787 mode_alignment = 128;
1788 else if (mode == XCmode)
1789 mode_alignment = 256;
1790 /* Misaligned fields are always returned in memory. */
1791 if (bit_offset % mode_alignment)
1792 return 0;
1795 /* Classification of atomic types. */
1796 switch (mode)
1798 case DImode:
1799 case SImode:
1800 case HImode:
1801 case QImode:
1802 case CSImode:
1803 case CHImode:
1804 case CQImode:
1805 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1806 classes[0] = X86_64_INTEGERSI_CLASS;
1807 else
1808 classes[0] = X86_64_INTEGER_CLASS;
1809 return 1;
1810 case CDImode:
1811 case TImode:
1812 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1813 return 2;
1814 case CTImode:
1815 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1816 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1817 return 4;
1818 case SFmode:
1819 if (!(bit_offset % 64))
1820 classes[0] = X86_64_SSESF_CLASS;
1821 else
1822 classes[0] = X86_64_SSE_CLASS;
1823 return 1;
1824 case DFmode:
1825 classes[0] = X86_64_SSEDF_CLASS;
1826 return 1;
1827 case TFmode:
1828 classes[0] = X86_64_X87_CLASS;
1829 classes[1] = X86_64_X87UP_CLASS;
1830 return 2;
1831 case TCmode:
1832 classes[0] = X86_64_X87_CLASS;
1833 classes[1] = X86_64_X87UP_CLASS;
1834 classes[2] = X86_64_X87_CLASS;
1835 classes[3] = X86_64_X87UP_CLASS;
1836 return 4;
1837 case DCmode:
1838 classes[0] = X86_64_SSEDF_CLASS;
1839 classes[1] = X86_64_SSEDF_CLASS;
1840 return 2;
1841 case SCmode:
1842 classes[0] = X86_64_SSE_CLASS;
1843 return 1;
1844 case V4SFmode:
1845 case V4SImode:
1846 case V16QImode:
1847 case V8HImode:
1848 case V2DFmode:
1849 case V2DImode:
1850 classes[0] = X86_64_SSE_CLASS;
1851 classes[1] = X86_64_SSEUP_CLASS;
1852 return 2;
1853 case V2SFmode:
1854 case V2SImode:
1855 case V4HImode:
1856 case V8QImode:
1857 classes[0] = X86_64_SSE_CLASS;
1858 return 1;
1859 case BLKmode:
1860 case VOIDmode:
1861 return 0;
1862 default:
1863 abort ();
1867 /* Examine the argument and return set number of register required in each
1868 class. Return 0 iff parameter should be passed in memory. */
1869 static int
1870 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1871 enum machine_mode mode;
1872 tree type;
1873 int *int_nregs, *sse_nregs;
1874 int in_return;
1876 enum x86_64_reg_class class[MAX_CLASSES];
1877 int n = classify_argument (mode, type, class, 0);
1879 *int_nregs = 0;
1880 *sse_nregs = 0;
1881 if (!n)
1882 return 0;
1883 for (n--; n >= 0; n--)
1884 switch (class[n])
1886 case X86_64_INTEGER_CLASS:
1887 case X86_64_INTEGERSI_CLASS:
1888 (*int_nregs)++;
1889 break;
1890 case X86_64_SSE_CLASS:
1891 case X86_64_SSESF_CLASS:
1892 case X86_64_SSEDF_CLASS:
1893 (*sse_nregs)++;
1894 break;
1895 case X86_64_NO_CLASS:
1896 case X86_64_SSEUP_CLASS:
1897 break;
1898 case X86_64_X87_CLASS:
1899 case X86_64_X87UP_CLASS:
1900 if (!in_return)
1901 return 0;
1902 break;
1903 case X86_64_MEMORY_CLASS:
1904 abort ();
1906 return 1;
1908 /* Construct container for the argument used by GCC interface. See
1909 FUNCTION_ARG for the detailed description. */
1910 static rtx
1911 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1912 enum machine_mode mode;
1913 tree type;
1914 int in_return;
1915 int nintregs, nsseregs;
1916 const int * intreg;
1917 int sse_regno;
1919 enum machine_mode tmpmode;
1920 int bytes =
1921 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1922 enum x86_64_reg_class class[MAX_CLASSES];
1923 int n;
1924 int i;
1925 int nexps = 0;
1926 int needed_sseregs, needed_intregs;
1927 rtx exp[MAX_CLASSES];
1928 rtx ret;
1930 n = classify_argument (mode, type, class, 0);
1931 if (TARGET_DEBUG_ARG)
1933 if (!n)
1934 fprintf (stderr, "Memory class\n");
1935 else
1937 fprintf (stderr, "Classes:");
1938 for (i = 0; i < n; i++)
1940 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1942 fprintf (stderr, "\n");
1945 if (!n)
1946 return NULL;
1947 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1948 return NULL;
1949 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1950 return NULL;
1952 /* First construct simple cases. Avoid SCmode, since we want to use
1953 single register to pass this type. */
1954 if (n == 1 && mode != SCmode)
1955 switch (class[0])
1957 case X86_64_INTEGER_CLASS:
1958 case X86_64_INTEGERSI_CLASS:
1959 return gen_rtx_REG (mode, intreg[0]);
1960 case X86_64_SSE_CLASS:
1961 case X86_64_SSESF_CLASS:
1962 case X86_64_SSEDF_CLASS:
1963 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1964 case X86_64_X87_CLASS:
1965 return gen_rtx_REG (mode, FIRST_STACK_REG);
1966 case X86_64_NO_CLASS:
1967 /* Zero sized array, struct or class. */
1968 return NULL;
1969 default:
1970 abort ();
1972 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1973 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1974 if (n == 2
1975 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1976 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1977 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1978 && class[1] == X86_64_INTEGER_CLASS
1979 && (mode == CDImode || mode == TImode)
1980 && intreg[0] + 1 == intreg[1])
1981 return gen_rtx_REG (mode, intreg[0]);
1982 if (n == 4
1983 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1984 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1985 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1987 /* Otherwise figure out the entries of the PARALLEL. */
1988 for (i = 0; i < n; i++)
1990 switch (class[i])
1992 case X86_64_NO_CLASS:
1993 break;
1994 case X86_64_INTEGER_CLASS:
1995 case X86_64_INTEGERSI_CLASS:
1996 /* Merge TImodes on aligned occassions here too. */
1997 if (i * 8 + 8 > bytes)
1998 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1999 else if (class[i] == X86_64_INTEGERSI_CLASS)
2000 tmpmode = SImode;
2001 else
2002 tmpmode = DImode;
2003 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2004 if (tmpmode == BLKmode)
2005 tmpmode = DImode;
2006 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2007 gen_rtx_REG (tmpmode, *intreg),
2008 GEN_INT (i*8));
2009 intreg++;
2010 break;
2011 case X86_64_SSESF_CLASS:
2012 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2013 gen_rtx_REG (SFmode,
2014 SSE_REGNO (sse_regno)),
2015 GEN_INT (i*8));
2016 sse_regno++;
2017 break;
2018 case X86_64_SSEDF_CLASS:
2019 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2020 gen_rtx_REG (DFmode,
2021 SSE_REGNO (sse_regno)),
2022 GEN_INT (i*8));
2023 sse_regno++;
2024 break;
2025 case X86_64_SSE_CLASS:
2026 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2027 tmpmode = TImode, i++;
2028 else
2029 tmpmode = DImode;
2030 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2031 gen_rtx_REG (tmpmode,
2032 SSE_REGNO (sse_regno)),
2033 GEN_INT (i*8));
2034 sse_regno++;
2035 break;
2036 default:
2037 abort ();
2040 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2041 for (i = 0; i < nexps; i++)
2042 XVECEXP (ret, 0, i) = exp [i];
2043 return ret;
2046 /* Update the data in CUM to advance over an argument
2047 of mode MODE and data type TYPE.
2048 (TYPE is null for libcalls where that information may not be available.) */
2050 void
2051 function_arg_advance (cum, mode, type, named)
2052 CUMULATIVE_ARGS *cum; /* current arg information */
2053 enum machine_mode mode; /* current arg mode */
2054 tree type; /* type of the argument or 0 if lib support */
2055 int named; /* whether or not the argument was named */
2057 int bytes =
2058 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2059 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2061 if (TARGET_DEBUG_ARG)
2062 fprintf (stderr,
2063 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2064 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2065 if (TARGET_64BIT)
2067 int int_nregs, sse_nregs;
2068 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2069 cum->words += words;
2070 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2072 cum->nregs -= int_nregs;
2073 cum->sse_nregs -= sse_nregs;
2074 cum->regno += int_nregs;
2075 cum->sse_regno += sse_nregs;
2077 else
2078 cum->words += words;
2080 else
2082 if (TARGET_SSE && mode == TImode)
2084 cum->sse_words += words;
2085 cum->sse_nregs -= 1;
2086 cum->sse_regno += 1;
2087 if (cum->sse_nregs <= 0)
2089 cum->sse_nregs = 0;
2090 cum->sse_regno = 0;
2093 else
2095 cum->words += words;
2096 cum->nregs -= words;
2097 cum->regno += words;
2099 if (cum->nregs <= 0)
2101 cum->nregs = 0;
2102 cum->regno = 0;
2106 return;
2109 /* Define where to put the arguments to a function.
2110 Value is zero to push the argument on the stack,
2111 or a hard register in which to store the argument.
2113 MODE is the argument's machine mode.
2114 TYPE is the data type of the argument (as a tree).
2115 This is null for libcalls where that information may
2116 not be available.
2117 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2118 the preceding args and about the function being called.
2119 NAMED is nonzero if this argument is a named parameter
2120 (otherwise it is an extra parameter matching an ellipsis). */
2123 function_arg (cum, mode, type, named)
2124 CUMULATIVE_ARGS *cum; /* current arg information */
2125 enum machine_mode mode; /* current arg mode */
2126 tree type; /* type of the argument or 0 if lib support */
2127 int named; /* != 0 for normal args, == 0 for ... args */
2129 rtx ret = NULL_RTX;
2130 int bytes =
2131 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2132 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2134 /* Handle an hidden AL argument containing number of registers for varargs
2135 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2136 any AL settings. */
2137 if (mode == VOIDmode)
2139 if (TARGET_64BIT)
2140 return GEN_INT (cum->maybe_vaarg
2141 ? (cum->sse_nregs < 0
2142 ? SSE_REGPARM_MAX
2143 : cum->sse_regno)
2144 : -1);
2145 else
2146 return constm1_rtx;
2148 if (TARGET_64BIT)
2149 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2150 &x86_64_int_parameter_registers [cum->regno],
2151 cum->sse_regno);
2152 else
2153 switch (mode)
2155 /* For now, pass fp/complex values on the stack. */
2156 default:
2157 break;
2159 case BLKmode:
2160 case DImode:
2161 case SImode:
2162 case HImode:
2163 case QImode:
2164 if (words <= cum->nregs)
2165 ret = gen_rtx_REG (mode, cum->regno);
2166 break;
2167 case TImode:
2168 if (cum->sse_nregs)
2169 ret = gen_rtx_REG (mode, cum->sse_regno);
2170 break;
2173 if (TARGET_DEBUG_ARG)
2175 fprintf (stderr,
2176 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2177 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2179 if (ret)
2180 print_simple_rtl (stderr, ret);
2181 else
2182 fprintf (stderr, ", stack");
2184 fprintf (stderr, " )\n");
2187 return ret;
2190 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2191 and type. */
2194 ix86_function_arg_boundary (mode, type)
2195 enum machine_mode mode;
2196 tree type;
2198 int align;
2199 if (!TARGET_64BIT)
2200 return PARM_BOUNDARY;
2201 if (type)
2202 align = TYPE_ALIGN (type);
2203 else
2204 align = GET_MODE_ALIGNMENT (mode);
2205 if (align < PARM_BOUNDARY)
2206 align = PARM_BOUNDARY;
2207 if (align > 128)
2208 align = 128;
2209 return align;
2212 /* Return true if N is a possible register number of function value. */
2213 bool
2214 ix86_function_value_regno_p (regno)
2215 int regno;
2217 if (!TARGET_64BIT)
2219 return ((regno) == 0
2220 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2221 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2223 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2224 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2225 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2228 /* Define how to find the value returned by a function.
2229 VALTYPE is the data type of the value (as a tree).
2230 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2231 otherwise, FUNC is 0. */
2233 ix86_function_value (valtype)
2234 tree valtype;
2236 if (TARGET_64BIT)
2238 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2239 REGPARM_MAX, SSE_REGPARM_MAX,
2240 x86_64_int_return_registers, 0);
2241 /* For zero sized structures, construct_continer return NULL, but we need
2242 to keep rest of compiler happy by returning meaningfull value. */
2243 if (!ret)
2244 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2245 return ret;
2247 else
2248 return gen_rtx_REG (TYPE_MODE (valtype),
2249 ix86_value_regno (TYPE_MODE (valtype)));
2252 /* Return false iff type is returned in memory. */
2254 ix86_return_in_memory (type)
2255 tree type;
2257 int needed_intregs, needed_sseregs;
2258 if (TARGET_64BIT)
2260 return !examine_argument (TYPE_MODE (type), type, 1,
2261 &needed_intregs, &needed_sseregs);
2263 else
2265 if (TYPE_MODE (type) == BLKmode
2266 || (VECTOR_MODE_P (TYPE_MODE (type))
2267 && int_size_in_bytes (type) == 8)
2268 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2269 && TYPE_MODE (type) != TFmode
2270 && !VECTOR_MODE_P (TYPE_MODE (type))))
2271 return 1;
2272 return 0;
2276 /* Define how to find the value returned by a library function
2277 assuming the value has mode MODE. */
2279 ix86_libcall_value (mode)
2280 enum machine_mode mode;
2282 if (TARGET_64BIT)
2284 switch (mode)
2286 case SFmode:
2287 case SCmode:
2288 case DFmode:
2289 case DCmode:
2290 return gen_rtx_REG (mode, FIRST_SSE_REG);
2291 case TFmode:
2292 case TCmode:
2293 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2294 default:
2295 return gen_rtx_REG (mode, 0);
2298 else
2299 return gen_rtx_REG (mode, ix86_value_regno (mode));
2302 /* Given a mode, return the register to use for a return value. */
2304 static int
2305 ix86_value_regno (mode)
2306 enum machine_mode mode;
2308 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2309 return FIRST_FLOAT_REG;
2310 if (mode == TImode || VECTOR_MODE_P (mode))
2311 return FIRST_SSE_REG;
2312 return 0;
2315 /* Create the va_list data type. */
2317 tree
2318 ix86_build_va_list ()
2320 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2322 /* For i386 we use plain pointer to argument area. */
2323 if (!TARGET_64BIT)
2324 return build_pointer_type (char_type_node);
2326 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2327 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2329 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2330 unsigned_type_node);
2331 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2332 unsigned_type_node);
2333 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2334 ptr_type_node);
2335 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2336 ptr_type_node);
2338 DECL_FIELD_CONTEXT (f_gpr) = record;
2339 DECL_FIELD_CONTEXT (f_fpr) = record;
2340 DECL_FIELD_CONTEXT (f_ovf) = record;
2341 DECL_FIELD_CONTEXT (f_sav) = record;
2343 TREE_CHAIN (record) = type_decl;
2344 TYPE_NAME (record) = type_decl;
2345 TYPE_FIELDS (record) = f_gpr;
2346 TREE_CHAIN (f_gpr) = f_fpr;
2347 TREE_CHAIN (f_fpr) = f_ovf;
2348 TREE_CHAIN (f_ovf) = f_sav;
2350 layout_type (record);
2352 /* The correct type is an array type of one element. */
2353 return build_array_type (record, build_index_type (size_zero_node));
2356 /* Perform any needed actions needed for a function that is receiving a
2357 variable number of arguments.
2359 CUM is as above.
2361 MODE and TYPE are the mode and type of the current parameter.
2363 PRETEND_SIZE is a variable that should be set to the amount of stack
2364 that must be pushed by the prolog to pretend that our caller pushed
2367 Normally, this macro will push all remaining incoming registers on the
2368 stack and set PRETEND_SIZE to the length of the registers pushed. */
2370 void
2371 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2372 CUMULATIVE_ARGS *cum;
2373 enum machine_mode mode;
2374 tree type;
2375 int *pretend_size ATTRIBUTE_UNUSED;
2376 int no_rtl;
2379 CUMULATIVE_ARGS next_cum;
2380 rtx save_area = NULL_RTX, mem;
2381 rtx label;
2382 rtx label_ref;
2383 rtx tmp_reg;
2384 rtx nsse_reg;
2385 int set;
2386 tree fntype;
2387 int stdarg_p;
2388 int i;
2390 if (!TARGET_64BIT)
2391 return;
2393 /* Indicate to allocate space on the stack for varargs save area. */
2394 ix86_save_varrargs_registers = 1;
2396 fntype = TREE_TYPE (current_function_decl);
2397 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2398 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2399 != void_type_node));
2401 /* For varargs, we do not want to skip the dummy va_dcl argument.
2402 For stdargs, we do want to skip the last named argument. */
2403 next_cum = *cum;
2404 if (stdarg_p)
2405 function_arg_advance (&next_cum, mode, type, 1);
2407 if (!no_rtl)
2408 save_area = frame_pointer_rtx;
2410 set = get_varargs_alias_set ();
2412 for (i = next_cum.regno; i < ix86_regparm; i++)
2414 mem = gen_rtx_MEM (Pmode,
2415 plus_constant (save_area, i * UNITS_PER_WORD));
2416 set_mem_alias_set (mem, set);
2417 emit_move_insn (mem, gen_rtx_REG (Pmode,
2418 x86_64_int_parameter_registers[i]));
2421 if (next_cum.sse_nregs)
2423 /* Now emit code to save SSE registers. The AX parameter contains number
2424 of SSE parameter regsiters used to call this function. We use
2425 sse_prologue_save insn template that produces computed jump across
2426 SSE saves. We need some preparation work to get this working. */
2428 label = gen_label_rtx ();
2429 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2431 /* Compute address to jump to :
2432 label - 5*eax + nnamed_sse_arguments*5 */
2433 tmp_reg = gen_reg_rtx (Pmode);
2434 nsse_reg = gen_reg_rtx (Pmode);
2435 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2436 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2437 gen_rtx_MULT (Pmode, nsse_reg,
2438 GEN_INT (4))));
2439 if (next_cum.sse_regno)
2440 emit_move_insn
2441 (nsse_reg,
2442 gen_rtx_CONST (DImode,
2443 gen_rtx_PLUS (DImode,
2444 label_ref,
2445 GEN_INT (next_cum.sse_regno * 4))));
2446 else
2447 emit_move_insn (nsse_reg, label_ref);
2448 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2450 /* Compute address of memory block we save into. We always use pointer
2451 pointing 127 bytes after first byte to store - this is needed to keep
2452 instruction size limited by 4 bytes. */
2453 tmp_reg = gen_reg_rtx (Pmode);
2454 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2455 plus_constant (save_area,
2456 8 * REGPARM_MAX + 127)));
2457 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2458 set_mem_alias_set (mem, set);
2459 set_mem_align (mem, BITS_PER_WORD);
2461 /* And finally do the dirty job! */
2462 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2463 GEN_INT (next_cum.sse_regno), label));
2468 /* Implement va_start. */
2470 void
2471 ix86_va_start (valist, nextarg)
2472 tree valist;
2473 rtx nextarg;
2475 HOST_WIDE_INT words, n_gpr, n_fpr;
2476 tree f_gpr, f_fpr, f_ovf, f_sav;
2477 tree gpr, fpr, ovf, sav, t;
2479 /* Only 64bit target needs something special. */
2480 if (!TARGET_64BIT)
2482 std_expand_builtin_va_start (valist, nextarg);
2483 return;
2486 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2487 f_fpr = TREE_CHAIN (f_gpr);
2488 f_ovf = TREE_CHAIN (f_fpr);
2489 f_sav = TREE_CHAIN (f_ovf);
2491 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2492 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2493 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2494 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2495 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2497 /* Count number of gp and fp argument registers used. */
2498 words = current_function_args_info.words;
2499 n_gpr = current_function_args_info.regno;
2500 n_fpr = current_function_args_info.sse_regno;
2502 if (TARGET_DEBUG_ARG)
2503 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2504 (int) words, (int) n_gpr, (int) n_fpr);
2506 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2507 build_int_2 (n_gpr * 8, 0));
2508 TREE_SIDE_EFFECTS (t) = 1;
2509 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2511 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2512 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2513 TREE_SIDE_EFFECTS (t) = 1;
2514 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2516 /* Find the overflow area. */
2517 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2518 if (words != 0)
2519 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2520 build_int_2 (words * UNITS_PER_WORD, 0));
2521 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2522 TREE_SIDE_EFFECTS (t) = 1;
2523 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2525 /* Find the register save area.
2526 Prologue of the function save it right above stack frame. */
2527 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2528 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2529 TREE_SIDE_EFFECTS (t) = 1;
2530 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2533 /* Implement va_arg. */
2535 ix86_va_arg (valist, type)
2536 tree valist, type;
2538 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2539 tree f_gpr, f_fpr, f_ovf, f_sav;
2540 tree gpr, fpr, ovf, sav, t;
2541 int size, rsize;
2542 rtx lab_false, lab_over = NULL_RTX;
2543 rtx addr_rtx, r;
2544 rtx container;
2546 /* Only 64bit target needs something special. */
2547 if (!TARGET_64BIT)
2549 return std_expand_builtin_va_arg (valist, type);
2552 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2553 f_fpr = TREE_CHAIN (f_gpr);
2554 f_ovf = TREE_CHAIN (f_fpr);
2555 f_sav = TREE_CHAIN (f_ovf);
2557 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2558 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2559 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2560 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2561 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2563 size = int_size_in_bytes (type);
2564 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2566 container = construct_container (TYPE_MODE (type), type, 0,
2567 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2569 * Pull the value out of the saved registers ...
2572 addr_rtx = gen_reg_rtx (Pmode);
2574 if (container)
2576 rtx int_addr_rtx, sse_addr_rtx;
2577 int needed_intregs, needed_sseregs;
2578 int need_temp;
2580 lab_over = gen_label_rtx ();
2581 lab_false = gen_label_rtx ();
2583 examine_argument (TYPE_MODE (type), type, 0,
2584 &needed_intregs, &needed_sseregs);
2587 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2588 || TYPE_ALIGN (type) > 128);
2590 /* In case we are passing structure, verify that it is consetuctive block
2591 on the register save area. If not we need to do moves. */
2592 if (!need_temp && !REG_P (container))
2594 /* Verify that all registers are strictly consetuctive */
2595 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2597 int i;
2599 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2601 rtx slot = XVECEXP (container, 0, i);
2602 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2603 || INTVAL (XEXP (slot, 1)) != i * 16)
2604 need_temp = 1;
2607 else
2609 int i;
2611 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2613 rtx slot = XVECEXP (container, 0, i);
2614 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2615 || INTVAL (XEXP (slot, 1)) != i * 8)
2616 need_temp = 1;
2620 if (!need_temp)
2622 int_addr_rtx = addr_rtx;
2623 sse_addr_rtx = addr_rtx;
2625 else
2627 int_addr_rtx = gen_reg_rtx (Pmode);
2628 sse_addr_rtx = gen_reg_rtx (Pmode);
2630 /* First ensure that we fit completely in registers. */
2631 if (needed_intregs)
2633 emit_cmp_and_jump_insns (expand_expr
2634 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2635 GEN_INT ((REGPARM_MAX - needed_intregs +
2636 1) * 8), GE, const1_rtx, SImode,
2637 1, lab_false);
2639 if (needed_sseregs)
2641 emit_cmp_and_jump_insns (expand_expr
2642 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2643 GEN_INT ((SSE_REGPARM_MAX -
2644 needed_sseregs + 1) * 16 +
2645 REGPARM_MAX * 8), GE, const1_rtx,
2646 SImode, 1, lab_false);
2649 /* Compute index to start of area used for integer regs. */
2650 if (needed_intregs)
2652 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2653 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2654 if (r != int_addr_rtx)
2655 emit_move_insn (int_addr_rtx, r);
2657 if (needed_sseregs)
2659 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2660 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2661 if (r != sse_addr_rtx)
2662 emit_move_insn (sse_addr_rtx, r);
2664 if (need_temp)
2666 int i;
2667 rtx mem;
2669 /* Never use the memory itself, as it has the alias set. */
2670 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2671 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2672 set_mem_alias_set (mem, get_varargs_alias_set ());
2673 set_mem_align (mem, BITS_PER_UNIT);
2675 for (i = 0; i < XVECLEN (container, 0); i++)
2677 rtx slot = XVECEXP (container, 0, i);
2678 rtx reg = XEXP (slot, 0);
2679 enum machine_mode mode = GET_MODE (reg);
2680 rtx src_addr;
2681 rtx src_mem;
2682 int src_offset;
2683 rtx dest_mem;
2685 if (SSE_REGNO_P (REGNO (reg)))
2687 src_addr = sse_addr_rtx;
2688 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2690 else
2692 src_addr = int_addr_rtx;
2693 src_offset = REGNO (reg) * 8;
2695 src_mem = gen_rtx_MEM (mode, src_addr);
2696 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2697 src_mem = adjust_address (src_mem, mode, src_offset);
2698 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2699 emit_move_insn (dest_mem, src_mem);
2703 if (needed_intregs)
2706 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2707 build_int_2 (needed_intregs * 8, 0));
2708 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2709 TREE_SIDE_EFFECTS (t) = 1;
2710 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2712 if (needed_sseregs)
2715 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2716 build_int_2 (needed_sseregs * 16, 0));
2717 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2718 TREE_SIDE_EFFECTS (t) = 1;
2719 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2722 emit_jump_insn (gen_jump (lab_over));
2723 emit_barrier ();
2724 emit_label (lab_false);
2727 /* ... otherwise out of the overflow area. */
2729 /* Care for on-stack alignment if needed. */
2730 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2731 t = ovf;
2732 else
2734 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2735 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2736 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2738 t = save_expr (t);
2740 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2741 if (r != addr_rtx)
2742 emit_move_insn (addr_rtx, r);
2745 build (PLUS_EXPR, TREE_TYPE (t), t,
2746 build_int_2 (rsize * UNITS_PER_WORD, 0));
2747 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2748 TREE_SIDE_EFFECTS (t) = 1;
2749 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2751 if (container)
2752 emit_label (lab_over);
2754 return addr_rtx;
2757 /* Return nonzero if OP is general operand representable on x86_64. */
2760 x86_64_general_operand (op, mode)
2761 rtx op;
2762 enum machine_mode mode;
2764 if (!TARGET_64BIT)
2765 return general_operand (op, mode);
2766 if (nonimmediate_operand (op, mode))
2767 return 1;
2768 return x86_64_sign_extended_value (op);
2771 /* Return nonzero if OP is general operand representable on x86_64
2772 as either sign extended or zero extended constant. */
2775 x86_64_szext_general_operand (op, mode)
2776 rtx op;
2777 enum machine_mode mode;
2779 if (!TARGET_64BIT)
2780 return general_operand (op, mode);
2781 if (nonimmediate_operand (op, mode))
2782 return 1;
2783 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2786 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2789 x86_64_nonmemory_operand (op, mode)
2790 rtx op;
2791 enum machine_mode mode;
2793 if (!TARGET_64BIT)
2794 return nonmemory_operand (op, mode);
2795 if (register_operand (op, mode))
2796 return 1;
2797 return x86_64_sign_extended_value (op);
2800 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2803 x86_64_movabs_operand (op, mode)
2804 rtx op;
2805 enum machine_mode mode;
2807 if (!TARGET_64BIT || !flag_pic)
2808 return nonmemory_operand (op, mode);
2809 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2810 return 1;
2811 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2812 return 1;
2813 return 0;
2816 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2819 x86_64_szext_nonmemory_operand (op, mode)
2820 rtx op;
2821 enum machine_mode mode;
2823 if (!TARGET_64BIT)
2824 return nonmemory_operand (op, mode);
2825 if (register_operand (op, mode))
2826 return 1;
2827 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2830 /* Return nonzero if OP is immediate operand representable on x86_64. */
2833 x86_64_immediate_operand (op, mode)
2834 rtx op;
2835 enum machine_mode mode;
2837 if (!TARGET_64BIT)
2838 return immediate_operand (op, mode);
2839 return x86_64_sign_extended_value (op);
2842 /* Return nonzero if OP is immediate operand representable on x86_64. */
2845 x86_64_zext_immediate_operand (op, mode)
2846 rtx op;
2847 enum machine_mode mode ATTRIBUTE_UNUSED;
2849 return x86_64_zero_extended_value (op);
2852 /* Return nonzero if OP is (const_int 1), else return zero. */
2855 const_int_1_operand (op, mode)
2856 rtx op;
2857 enum machine_mode mode ATTRIBUTE_UNUSED;
2859 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2862 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2863 for shift & compare patterns, as shifting by 0 does not change flags),
2864 else return zero. */
2867 const_int_1_31_operand (op, mode)
2868 rtx op;
2869 enum machine_mode mode ATTRIBUTE_UNUSED;
2871 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2874 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2875 reference and a constant. */
2878 symbolic_operand (op, mode)
2879 register rtx op;
2880 enum machine_mode mode ATTRIBUTE_UNUSED;
2882 switch (GET_CODE (op))
2884 case SYMBOL_REF:
2885 case LABEL_REF:
2886 return 1;
2888 case CONST:
2889 op = XEXP (op, 0);
2890 if (GET_CODE (op) == SYMBOL_REF
2891 || GET_CODE (op) == LABEL_REF
2892 || (GET_CODE (op) == UNSPEC
2893 && (XINT (op, 1) == UNSPEC_GOT
2894 || XINT (op, 1) == UNSPEC_GOTOFF
2895 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2896 return 1;
2897 if (GET_CODE (op) != PLUS
2898 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2899 return 0;
2901 op = XEXP (op, 0);
2902 if (GET_CODE (op) == SYMBOL_REF
2903 || GET_CODE (op) == LABEL_REF)
2904 return 1;
2905 /* Only @GOTOFF gets offsets. */
2906 if (GET_CODE (op) != UNSPEC
2907 || XINT (op, 1) != UNSPEC_GOTOFF)
2908 return 0;
2910 op = XVECEXP (op, 0, 0);
2911 if (GET_CODE (op) == SYMBOL_REF
2912 || GET_CODE (op) == LABEL_REF)
2913 return 1;
2914 return 0;
2916 default:
2917 return 0;
2921 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2924 pic_symbolic_operand (op, mode)
2925 register rtx op;
2926 enum machine_mode mode ATTRIBUTE_UNUSED;
2928 if (GET_CODE (op) != CONST)
2929 return 0;
2930 op = XEXP (op, 0);
2931 if (TARGET_64BIT)
2933 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2934 return 1;
2936 else
2938 if (GET_CODE (op) == UNSPEC)
2939 return 1;
2940 if (GET_CODE (op) != PLUS
2941 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2942 return 0;
2943 op = XEXP (op, 0);
2944 if (GET_CODE (op) == UNSPEC)
2945 return 1;
2947 return 0;
2950 /* Return true if OP is a symbolic operand that resolves locally. */
2952 static int
2953 local_symbolic_operand (op, mode)
2954 rtx op;
2955 enum machine_mode mode ATTRIBUTE_UNUSED;
2957 if (GET_CODE (op) == LABEL_REF)
2958 return 1;
2960 if (GET_CODE (op) == CONST
2961 && GET_CODE (XEXP (op, 0)) == PLUS
2962 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2963 op = XEXP (XEXP (op, 0), 0);
2965 if (GET_CODE (op) != SYMBOL_REF)
2966 return 0;
2968 /* These we've been told are local by varasm and encode_section_info
2969 respectively. */
2970 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2971 return 1;
2973 /* There is, however, a not insubstantial body of code in the rest of
2974 the compiler that assumes it can just stick the results of
2975 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2976 /* ??? This is a hack. Should update the body of the compiler to
2977 always create a DECL an invoke targetm.encode_section_info. */
2978 if (strncmp (XSTR (op, 0), internal_label_prefix,
2979 internal_label_prefix_len) == 0)
2980 return 1;
2982 return 0;
2985 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2988 tls_symbolic_operand (op, mode)
2989 register rtx op;
2990 enum machine_mode mode ATTRIBUTE_UNUSED;
2992 const char *symbol_str;
2994 if (GET_CODE (op) != SYMBOL_REF)
2995 return 0;
2996 symbol_str = XSTR (op, 0);
2998 if (symbol_str[0] != '%')
2999 return 0;
3000 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3003 static int
3004 tls_symbolic_operand_1 (op, kind)
3005 rtx op;
3006 enum tls_model kind;
3008 const char *symbol_str;
3010 if (GET_CODE (op) != SYMBOL_REF)
3011 return 0;
3012 symbol_str = XSTR (op, 0);
3014 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3018 global_dynamic_symbolic_operand (op, mode)
3019 register rtx op;
3020 enum machine_mode mode ATTRIBUTE_UNUSED;
3022 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3026 local_dynamic_symbolic_operand (op, mode)
3027 register rtx op;
3028 enum machine_mode mode ATTRIBUTE_UNUSED;
3030 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3034 initial_exec_symbolic_operand (op, mode)
3035 register rtx op;
3036 enum machine_mode mode ATTRIBUTE_UNUSED;
3038 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3042 local_exec_symbolic_operand (op, mode)
3043 register rtx op;
3044 enum machine_mode mode ATTRIBUTE_UNUSED;
3046 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3049 /* Test for a valid operand for a call instruction. Don't allow the
3050 arg pointer register or virtual regs since they may decay into
3051 reg + const, which the patterns can't handle. */
3054 call_insn_operand (op, mode)
3055 rtx op;
3056 enum machine_mode mode ATTRIBUTE_UNUSED;
3058 /* Disallow indirect through a virtual register. This leads to
3059 compiler aborts when trying to eliminate them. */
3060 if (GET_CODE (op) == REG
3061 && (op == arg_pointer_rtx
3062 || op == frame_pointer_rtx
3063 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3064 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3065 return 0;
3067 /* Disallow `call 1234'. Due to varying assembler lameness this
3068 gets either rejected or translated to `call .+1234'. */
3069 if (GET_CODE (op) == CONST_INT)
3070 return 0;
3072 /* Explicitly allow SYMBOL_REF even if pic. */
3073 if (GET_CODE (op) == SYMBOL_REF)
3074 return 1;
3076 /* Otherwise we can allow any general_operand in the address. */
3077 return general_operand (op, Pmode);
3081 constant_call_address_operand (op, mode)
3082 rtx op;
3083 enum machine_mode mode ATTRIBUTE_UNUSED;
3085 if (GET_CODE (op) == CONST
3086 && GET_CODE (XEXP (op, 0)) == PLUS
3087 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3088 op = XEXP (XEXP (op, 0), 0);
3089 return GET_CODE (op) == SYMBOL_REF;
3092 /* Match exactly zero and one. */
3095 const0_operand (op, mode)
3096 register rtx op;
3097 enum machine_mode mode;
3099 return op == CONST0_RTX (mode);
3103 const1_operand (op, mode)
3104 register rtx op;
3105 enum machine_mode mode ATTRIBUTE_UNUSED;
3107 return op == const1_rtx;
3110 /* Match 2, 4, or 8. Used for leal multiplicands. */
3113 const248_operand (op, mode)
3114 register rtx op;
3115 enum machine_mode mode ATTRIBUTE_UNUSED;
3117 return (GET_CODE (op) == CONST_INT
3118 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3121 /* True if this is a constant appropriate for an increment or decremenmt. */
3124 incdec_operand (op, mode)
3125 register rtx op;
3126 enum machine_mode mode ATTRIBUTE_UNUSED;
3128 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3129 registers, since carry flag is not set. */
3130 if (TARGET_PENTIUM4 && !optimize_size)
3131 return 0;
3132 return op == const1_rtx || op == constm1_rtx;
3135 /* Return nonzero if OP is acceptable as operand of DImode shift
3136 expander. */
3139 shiftdi_operand (op, mode)
3140 rtx op;
3141 enum machine_mode mode ATTRIBUTE_UNUSED;
3143 if (TARGET_64BIT)
3144 return nonimmediate_operand (op, mode);
3145 else
3146 return register_operand (op, mode);
3149 /* Return false if this is the stack pointer, or any other fake
3150 register eliminable to the stack pointer. Otherwise, this is
3151 a register operand.
3153 This is used to prevent esp from being used as an index reg.
3154 Which would only happen in pathological cases. */
3157 reg_no_sp_operand (op, mode)
3158 register rtx op;
3159 enum machine_mode mode;
3161 rtx t = op;
3162 if (GET_CODE (t) == SUBREG)
3163 t = SUBREG_REG (t);
3164 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3165 return 0;
3167 return register_operand (op, mode);
3171 mmx_reg_operand (op, mode)
3172 register rtx op;
3173 enum machine_mode mode ATTRIBUTE_UNUSED;
3175 return MMX_REG_P (op);
3178 /* Return false if this is any eliminable register. Otherwise
3179 general_operand. */
3182 general_no_elim_operand (op, mode)
3183 register rtx op;
3184 enum machine_mode mode;
3186 rtx t = op;
3187 if (GET_CODE (t) == SUBREG)
3188 t = SUBREG_REG (t);
3189 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3190 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3191 || t == virtual_stack_dynamic_rtx)
3192 return 0;
3193 if (REG_P (t)
3194 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3195 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3196 return 0;
3198 return general_operand (op, mode);
3201 /* Return false if this is any eliminable register. Otherwise
3202 register_operand or const_int. */
3205 nonmemory_no_elim_operand (op, mode)
3206 register rtx op;
3207 enum machine_mode mode;
3209 rtx t = op;
3210 if (GET_CODE (t) == SUBREG)
3211 t = SUBREG_REG (t);
3212 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3213 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3214 || t == virtual_stack_dynamic_rtx)
3215 return 0;
3217 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3220 /* Return false if this is any eliminable register or stack register,
3221 otherwise work like register_operand. */
3224 index_register_operand (op, mode)
3225 register rtx op;
3226 enum machine_mode mode;
3228 rtx t = op;
3229 if (GET_CODE (t) == SUBREG)
3230 t = SUBREG_REG (t);
3231 if (!REG_P (t))
3232 return 0;
3233 if (t == arg_pointer_rtx
3234 || t == frame_pointer_rtx
3235 || t == virtual_incoming_args_rtx
3236 || t == virtual_stack_vars_rtx
3237 || t == virtual_stack_dynamic_rtx
3238 || REGNO (t) == STACK_POINTER_REGNUM)
3239 return 0;
3241 return general_operand (op, mode);
3244 /* Return true if op is a Q_REGS class register. */
3247 q_regs_operand (op, mode)
3248 register rtx op;
3249 enum machine_mode mode;
3251 if (mode != VOIDmode && GET_MODE (op) != mode)
3252 return 0;
3253 if (GET_CODE (op) == SUBREG)
3254 op = SUBREG_REG (op);
3255 return ANY_QI_REG_P (op);
3258 /* Return true if op is a NON_Q_REGS class register. */
3261 non_q_regs_operand (op, mode)
3262 register rtx op;
3263 enum machine_mode mode;
3265 if (mode != VOIDmode && GET_MODE (op) != mode)
3266 return 0;
3267 if (GET_CODE (op) == SUBREG)
3268 op = SUBREG_REG (op);
3269 return NON_QI_REG_P (op);
3272 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3273 insns. */
3275 sse_comparison_operator (op, mode)
3276 rtx op;
3277 enum machine_mode mode ATTRIBUTE_UNUSED;
3279 enum rtx_code code = GET_CODE (op);
3280 switch (code)
3282 /* Operations supported directly. */
3283 case EQ:
3284 case LT:
3285 case LE:
3286 case UNORDERED:
3287 case NE:
3288 case UNGE:
3289 case UNGT:
3290 case ORDERED:
3291 return 1;
3292 /* These are equivalent to ones above in non-IEEE comparisons. */
3293 case UNEQ:
3294 case UNLT:
3295 case UNLE:
3296 case LTGT:
3297 case GE:
3298 case GT:
3299 return !TARGET_IEEE_FP;
3300 default:
3301 return 0;
3304 /* Return 1 if OP is a valid comparison operator in valid mode. */
3306 ix86_comparison_operator (op, mode)
3307 register rtx op;
3308 enum machine_mode mode;
3310 enum machine_mode inmode;
3311 enum rtx_code code = GET_CODE (op);
3312 if (mode != VOIDmode && GET_MODE (op) != mode)
3313 return 0;
3314 if (GET_RTX_CLASS (code) != '<')
3315 return 0;
3316 inmode = GET_MODE (XEXP (op, 0));
3318 if (inmode == CCFPmode || inmode == CCFPUmode)
3320 enum rtx_code second_code, bypass_code;
3321 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3322 return (bypass_code == NIL && second_code == NIL);
3324 switch (code)
3326 case EQ: case NE:
3327 return 1;
3328 case LT: case GE:
3329 if (inmode == CCmode || inmode == CCGCmode
3330 || inmode == CCGOCmode || inmode == CCNOmode)
3331 return 1;
3332 return 0;
3333 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3334 if (inmode == CCmode)
3335 return 1;
3336 return 0;
3337 case GT: case LE:
3338 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3339 return 1;
3340 return 0;
3341 default:
3342 return 0;
3346 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3349 fcmov_comparison_operator (op, mode)
3350 register rtx op;
3351 enum machine_mode mode;
3353 enum machine_mode inmode;
3354 enum rtx_code code = GET_CODE (op);
3355 if (mode != VOIDmode && GET_MODE (op) != mode)
3356 return 0;
3357 if (GET_RTX_CLASS (code) != '<')
3358 return 0;
3359 inmode = GET_MODE (XEXP (op, 0));
3360 if (inmode == CCFPmode || inmode == CCFPUmode)
3362 enum rtx_code second_code, bypass_code;
3363 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3364 if (bypass_code != NIL || second_code != NIL)
3365 return 0;
3366 code = ix86_fp_compare_code_to_integer (code);
3368 /* i387 supports just limited amount of conditional codes. */
3369 switch (code)
3371 case LTU: case GTU: case LEU: case GEU:
3372 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3373 return 1;
3374 return 0;
3375 case ORDERED: case UNORDERED:
3376 case EQ: case NE:
3377 return 1;
3378 default:
3379 return 0;
3383 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3386 promotable_binary_operator (op, mode)
3387 register rtx op;
3388 enum machine_mode mode ATTRIBUTE_UNUSED;
3390 switch (GET_CODE (op))
3392 case MULT:
3393 /* Modern CPUs have same latency for HImode and SImode multiply,
3394 but 386 and 486 do HImode multiply faster. */
3395 return ix86_cpu > PROCESSOR_I486;
3396 case PLUS:
3397 case AND:
3398 case IOR:
3399 case XOR:
3400 case ASHIFT:
3401 return 1;
3402 default:
3403 return 0;
3407 /* Nearly general operand, but accept any const_double, since we wish
3408 to be able to drop them into memory rather than have them get pulled
3409 into registers. */
3412 cmp_fp_expander_operand (op, mode)
3413 register rtx op;
3414 enum machine_mode mode;
3416 if (mode != VOIDmode && mode != GET_MODE (op))
3417 return 0;
3418 if (GET_CODE (op) == CONST_DOUBLE)
3419 return 1;
3420 return general_operand (op, mode);
3423 /* Match an SI or HImode register for a zero_extract. */
3426 ext_register_operand (op, mode)
3427 register rtx op;
3428 enum machine_mode mode ATTRIBUTE_UNUSED;
3430 int regno;
3431 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3432 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3433 return 0;
3435 if (!register_operand (op, VOIDmode))
3436 return 0;
3438 /* Be curefull to accept only registers having upper parts. */
3439 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3440 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3443 /* Return 1 if this is a valid binary floating-point operation.
3444 OP is the expression matched, and MODE is its mode. */
3447 binary_fp_operator (op, mode)
3448 register rtx op;
3449 enum machine_mode mode;
3451 if (mode != VOIDmode && mode != GET_MODE (op))
3452 return 0;
3454 switch (GET_CODE (op))
3456 case PLUS:
3457 case MINUS:
3458 case MULT:
3459 case DIV:
3460 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3462 default:
3463 return 0;
3468 mult_operator (op, mode)
3469 register rtx op;
3470 enum machine_mode mode ATTRIBUTE_UNUSED;
3472 return GET_CODE (op) == MULT;
3476 div_operator (op, mode)
3477 register rtx op;
3478 enum machine_mode mode ATTRIBUTE_UNUSED;
3480 return GET_CODE (op) == DIV;
3484 arith_or_logical_operator (op, mode)
3485 rtx op;
3486 enum machine_mode mode;
3488 return ((mode == VOIDmode || GET_MODE (op) == mode)
3489 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3490 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3493 /* Returns 1 if OP is memory operand with a displacement. */
3496 memory_displacement_operand (op, mode)
3497 register rtx op;
3498 enum machine_mode mode;
3500 struct ix86_address parts;
3502 if (! memory_operand (op, mode))
3503 return 0;
3505 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3506 abort ();
3508 return parts.disp != NULL_RTX;
3511 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3512 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3514 ??? It seems likely that this will only work because cmpsi is an
3515 expander, and no actual insns use this. */
3518 cmpsi_operand (op, mode)
3519 rtx op;
3520 enum machine_mode mode;
3522 if (nonimmediate_operand (op, mode))
3523 return 1;
3525 if (GET_CODE (op) == AND
3526 && GET_MODE (op) == SImode
3527 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3528 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3529 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3530 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3531 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3532 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3533 return 1;
3535 return 0;
3538 /* Returns 1 if OP is memory operand that can not be represented by the
3539 modRM array. */
3542 long_memory_operand (op, mode)
3543 register rtx op;
3544 enum machine_mode mode;
3546 if (! memory_operand (op, mode))
3547 return 0;
3549 return memory_address_length (op) != 0;
3552 /* Return nonzero if the rtx is known aligned. */
3555 aligned_operand (op, mode)
3556 rtx op;
3557 enum machine_mode mode;
3559 struct ix86_address parts;
3561 if (!general_operand (op, mode))
3562 return 0;
3564 /* Registers and immediate operands are always "aligned". */
3565 if (GET_CODE (op) != MEM)
3566 return 1;
3568 /* Don't even try to do any aligned optimizations with volatiles. */
3569 if (MEM_VOLATILE_P (op))
3570 return 0;
3572 op = XEXP (op, 0);
3574 /* Pushes and pops are only valid on the stack pointer. */
3575 if (GET_CODE (op) == PRE_DEC
3576 || GET_CODE (op) == POST_INC)
3577 return 1;
3579 /* Decode the address. */
3580 if (! ix86_decompose_address (op, &parts))
3581 abort ();
3583 if (parts.base && GET_CODE (parts.base) == SUBREG)
3584 parts.base = SUBREG_REG (parts.base);
3585 if (parts.index && GET_CODE (parts.index) == SUBREG)
3586 parts.index = SUBREG_REG (parts.index);
3588 /* Look for some component that isn't known to be aligned. */
3589 if (parts.index)
3591 if (parts.scale < 4
3592 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3593 return 0;
3595 if (parts.base)
3597 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3598 return 0;
3600 if (parts.disp)
3602 if (GET_CODE (parts.disp) != CONST_INT
3603 || (INTVAL (parts.disp) & 3) != 0)
3604 return 0;
3607 /* Didn't find one -- this must be an aligned address. */
3608 return 1;
3611 /* Return true if the constant is something that can be loaded with
3612 a special instruction. Only handle 0.0 and 1.0; others are less
3613 worthwhile. */
3616 standard_80387_constant_p (x)
3617 rtx x;
3619 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3620 return -1;
3621 /* Note that on the 80387, other constants, such as pi, that we should support
3622 too. On some machines, these are much slower to load as standard constant,
3623 than to load from doubles in memory. */
3624 if (x == CONST0_RTX (GET_MODE (x)))
3625 return 1;
3626 if (x == CONST1_RTX (GET_MODE (x)))
3627 return 2;
3628 return 0;
3631 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3634 standard_sse_constant_p (x)
3635 rtx x;
3637 if (GET_CODE (x) != CONST_DOUBLE)
3638 return -1;
3639 return (x == CONST0_RTX (GET_MODE (x)));
3642 /* Returns 1 if OP contains a symbol reference */
3645 symbolic_reference_mentioned_p (op)
3646 rtx op;
3648 register const char *fmt;
3649 register int i;
3651 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3652 return 1;
3654 fmt = GET_RTX_FORMAT (GET_CODE (op));
3655 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3657 if (fmt[i] == 'E')
3659 register int j;
3661 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3662 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3663 return 1;
3666 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3667 return 1;
3670 return 0;
3673 /* Return 1 if it is appropriate to emit `ret' instructions in the
3674 body of a function. Do this only if the epilogue is simple, needing a
3675 couple of insns. Prior to reloading, we can't tell how many registers
3676 must be saved, so return 0 then. Return 0 if there is no frame
3677 marker to de-allocate.
3679 If NON_SAVING_SETJMP is defined and true, then it is not possible
3680 for the epilogue to be simple, so return 0. This is a special case
3681 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3682 until final, but jump_optimize may need to know sooner if a
3683 `return' is OK. */
3686 ix86_can_use_return_insn_p ()
3688 struct ix86_frame frame;
3690 #ifdef NON_SAVING_SETJMP
3691 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3692 return 0;
3693 #endif
3695 if (! reload_completed || frame_pointer_needed)
3696 return 0;
3698 /* Don't allow more than 32 pop, since that's all we can do
3699 with one instruction. */
3700 if (current_function_pops_args
3701 && current_function_args_size >= 32768)
3702 return 0;
3704 ix86_compute_frame_layout (&frame);
3705 return frame.to_allocate == 0 && frame.nregs == 0;
3708 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3710 x86_64_sign_extended_value (value)
3711 rtx value;
3713 switch (GET_CODE (value))
3715 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3716 to be at least 32 and this all acceptable constants are
3717 represented as CONST_INT. */
3718 case CONST_INT:
3719 if (HOST_BITS_PER_WIDE_INT == 32)
3720 return 1;
3721 else
3723 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3724 return trunc_int_for_mode (val, SImode) == val;
3726 break;
3728 /* For certain code models, the symbolic references are known to fit. */
3729 case SYMBOL_REF:
3730 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3732 /* For certain code models, the code is near as well. */
3733 case LABEL_REF:
3734 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3736 /* We also may accept the offsetted memory references in certain special
3737 cases. */
3738 case CONST:
3739 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3740 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3741 return 1;
3742 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3744 rtx op1 = XEXP (XEXP (value, 0), 0);
3745 rtx op2 = XEXP (XEXP (value, 0), 1);
3746 HOST_WIDE_INT offset;
3748 if (ix86_cmodel == CM_LARGE)
3749 return 0;
3750 if (GET_CODE (op2) != CONST_INT)
3751 return 0;
3752 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3753 switch (GET_CODE (op1))
3755 case SYMBOL_REF:
3756 /* For CM_SMALL assume that latest object is 1MB before
3757 end of 31bits boundary. We may also accept pretty
3758 large negative constants knowing that all objects are
3759 in the positive half of address space. */
3760 if (ix86_cmodel == CM_SMALL
3761 && offset < 1024*1024*1024
3762 && trunc_int_for_mode (offset, SImode) == offset)
3763 return 1;
3764 /* For CM_KERNEL we know that all object resist in the
3765 negative half of 32bits address space. We may not
3766 accept negative offsets, since they may be just off
3767 and we may accept pretty large positive ones. */
3768 if (ix86_cmodel == CM_KERNEL
3769 && offset > 0
3770 && trunc_int_for_mode (offset, SImode) == offset)
3771 return 1;
3772 break;
3773 case LABEL_REF:
3774 /* These conditions are similar to SYMBOL_REF ones, just the
3775 constraints for code models differ. */
3776 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3777 && offset < 1024*1024*1024
3778 && trunc_int_for_mode (offset, SImode) == offset)
3779 return 1;
3780 if (ix86_cmodel == CM_KERNEL
3781 && offset > 0
3782 && trunc_int_for_mode (offset, SImode) == offset)
3783 return 1;
3784 break;
3785 default:
3786 return 0;
3789 return 0;
3790 default:
3791 return 0;
3795 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3797 x86_64_zero_extended_value (value)
3798 rtx value;
3800 switch (GET_CODE (value))
3802 case CONST_DOUBLE:
3803 if (HOST_BITS_PER_WIDE_INT == 32)
3804 return (GET_MODE (value) == VOIDmode
3805 && !CONST_DOUBLE_HIGH (value));
3806 else
3807 return 0;
3808 case CONST_INT:
3809 if (HOST_BITS_PER_WIDE_INT == 32)
3810 return INTVAL (value) >= 0;
3811 else
3812 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3813 break;
3815 /* For certain code models, the symbolic references are known to fit. */
3816 case SYMBOL_REF:
3817 return ix86_cmodel == CM_SMALL;
3819 /* For certain code models, the code is near as well. */
3820 case LABEL_REF:
3821 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3823 /* We also may accept the offsetted memory references in certain special
3824 cases. */
3825 case CONST:
3826 if (GET_CODE (XEXP (value, 0)) == PLUS)
3828 rtx op1 = XEXP (XEXP (value, 0), 0);
3829 rtx op2 = XEXP (XEXP (value, 0), 1);
3831 if (ix86_cmodel == CM_LARGE)
3832 return 0;
3833 switch (GET_CODE (op1))
3835 case SYMBOL_REF:
3836 return 0;
3837 /* For small code model we may accept pretty large positive
3838 offsets, since one bit is available for free. Negative
3839 offsets are limited by the size of NULL pointer area
3840 specified by the ABI. */
3841 if (ix86_cmodel == CM_SMALL
3842 && GET_CODE (op2) == CONST_INT
3843 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3844 && (trunc_int_for_mode (INTVAL (op2), SImode)
3845 == INTVAL (op2)))
3846 return 1;
3847 /* ??? For the kernel, we may accept adjustment of
3848 -0x10000000, since we know that it will just convert
3849 negative address space to positive, but perhaps this
3850 is not worthwhile. */
3851 break;
3852 case LABEL_REF:
3853 /* These conditions are similar to SYMBOL_REF ones, just the
3854 constraints for code models differ. */
3855 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3856 && GET_CODE (op2) == CONST_INT
3857 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3858 && (trunc_int_for_mode (INTVAL (op2), SImode)
3859 == INTVAL (op2)))
3860 return 1;
3861 break;
3862 default:
3863 return 0;
3866 return 0;
3867 default:
3868 return 0;
3872 /* Value should be nonzero if functions must have frame pointers.
3873 Zero means the frame pointer need not be set up (and parms may
3874 be accessed via the stack pointer) in functions that seem suitable. */
3877 ix86_frame_pointer_required ()
3879 /* If we accessed previous frames, then the generated code expects
3880 to be able to access the saved ebp value in our frame. */
3881 if (cfun->machine->accesses_prev_frame)
3882 return 1;
3884 /* Several x86 os'es need a frame pointer for other reasons,
3885 usually pertaining to setjmp. */
3886 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3887 return 1;
3889 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3890 the frame pointer by default. Turn it back on now if we've not
3891 got a leaf function. */
3892 if (TARGET_OMIT_LEAF_FRAME_POINTER
3893 && (!current_function_is_leaf || current_function_profile))
3894 return 1;
3896 return 0;
3899 /* Record that the current function accesses previous call frames. */
3901 void
3902 ix86_setup_frame_addresses ()
3904 cfun->machine->accesses_prev_frame = 1;
3907 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3908 # define USE_HIDDEN_LINKONCE 1
3909 #else
3910 # define USE_HIDDEN_LINKONCE 0
3911 #endif
3913 static int pic_labels_used;
3915 /* Fills in the label name that should be used for a pc thunk for
3916 the given register. */
3918 static void
3919 get_pc_thunk_name (name, regno)
3920 char name[32];
3921 unsigned int regno;
3923 if (USE_HIDDEN_LINKONCE)
3924 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3925 else
3926 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3930 /* This function generates code for -fpic that loads %ebx with
3931 the return address of the caller and then returns. */
3933 void
3934 ix86_asm_file_end (file)
3935 FILE *file;
3937 rtx xops[2];
3938 int regno;
3940 for (regno = 0; regno < 8; ++regno)
3942 char name[32];
3944 if (! ((pic_labels_used >> regno) & 1))
3945 continue;
3947 get_pc_thunk_name (name, regno);
3949 if (USE_HIDDEN_LINKONCE)
3951 tree decl;
3953 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3954 error_mark_node);
3955 TREE_PUBLIC (decl) = 1;
3956 TREE_STATIC (decl) = 1;
3957 DECL_ONE_ONLY (decl) = 1;
3959 (*targetm.asm_out.unique_section) (decl, 0);
3960 named_section (decl, NULL, 0);
3962 (*targetm.asm_out.globalize_label) (file, name);
3963 fputs ("\t.hidden\t", file);
3964 assemble_name (file, name);
3965 fputc ('\n', file);
3966 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3968 else
3970 text_section ();
3971 ASM_OUTPUT_LABEL (file, name);
3974 xops[0] = gen_rtx_REG (SImode, regno);
3975 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3976 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3977 output_asm_insn ("ret", xops);
3981 /* Emit code for the SET_GOT patterns. */
3983 const char *
3984 output_set_got (dest)
3985 rtx dest;
3987 rtx xops[3];
3989 xops[0] = dest;
3990 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
3992 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3994 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3996 if (!flag_pic)
3997 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3998 else
3999 output_asm_insn ("call\t%a2", xops);
4001 #if TARGET_MACHO
4002 /* Output the "canonical" label name ("Lxx$pb") here too. This
4003 is what will be referred to by the Mach-O PIC subsystem. */
4004 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4005 #endif
4006 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4007 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4009 if (flag_pic)
4010 output_asm_insn ("pop{l}\t%0", xops);
4012 else
4014 char name[32];
4015 get_pc_thunk_name (name, REGNO (dest));
4016 pic_labels_used |= 1 << REGNO (dest);
4018 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4019 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4020 output_asm_insn ("call\t%X2", xops);
4023 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4024 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4025 else if (!TARGET_MACHO)
4026 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4028 return "";
4031 /* Generate an "push" pattern for input ARG. */
4033 static rtx
4034 gen_push (arg)
4035 rtx arg;
4037 return gen_rtx_SET (VOIDmode,
4038 gen_rtx_MEM (Pmode,
4039 gen_rtx_PRE_DEC (Pmode,
4040 stack_pointer_rtx)),
4041 arg);
4044 /* Return >= 0 if there is an unused call-clobbered register available
4045 for the entire function. */
4047 static unsigned int
4048 ix86_select_alt_pic_regnum ()
4050 if (current_function_is_leaf && !current_function_profile)
4052 int i;
4053 for (i = 2; i >= 0; --i)
4054 if (!regs_ever_live[i])
4055 return i;
4058 return INVALID_REGNUM;
4061 /* Return 1 if we need to save REGNO. */
4062 static int
4063 ix86_save_reg (regno, maybe_eh_return)
4064 unsigned int regno;
4065 int maybe_eh_return;
4067 if (pic_offset_table_rtx
4068 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4069 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4070 || current_function_profile
4071 || current_function_calls_eh_return))
4073 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4074 return 0;
4075 return 1;
4078 if (current_function_calls_eh_return && maybe_eh_return)
4080 unsigned i;
4081 for (i = 0; ; i++)
4083 unsigned test = EH_RETURN_DATA_REGNO (i);
4084 if (test == INVALID_REGNUM)
4085 break;
4086 if (test == regno)
4087 return 1;
4091 return (regs_ever_live[regno]
4092 && !call_used_regs[regno]
4093 && !fixed_regs[regno]
4094 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4097 /* Return number of registers to be saved on the stack. */
4099 static int
4100 ix86_nsaved_regs ()
4102 int nregs = 0;
4103 int regno;
4105 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4106 if (ix86_save_reg (regno, true))
4107 nregs++;
4108 return nregs;
4111 /* Return the offset between two registers, one to be eliminated, and the other
4112 its replacement, at the start of a routine. */
4114 HOST_WIDE_INT
4115 ix86_initial_elimination_offset (from, to)
4116 int from;
4117 int to;
4119 struct ix86_frame frame;
4120 ix86_compute_frame_layout (&frame);
4122 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4123 return frame.hard_frame_pointer_offset;
4124 else if (from == FRAME_POINTER_REGNUM
4125 && to == HARD_FRAME_POINTER_REGNUM)
4126 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4127 else
4129 if (to != STACK_POINTER_REGNUM)
4130 abort ();
4131 else if (from == ARG_POINTER_REGNUM)
4132 return frame.stack_pointer_offset;
4133 else if (from != FRAME_POINTER_REGNUM)
4134 abort ();
4135 else
4136 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4140 /* Fill structure ix86_frame about frame of currently computed function. */
4142 static void
4143 ix86_compute_frame_layout (frame)
4144 struct ix86_frame *frame;
4146 HOST_WIDE_INT total_size;
4147 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4148 int offset;
4149 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4150 HOST_WIDE_INT size = get_frame_size ();
4152 frame->nregs = ix86_nsaved_regs ();
4153 total_size = size;
4155 /* Skip return address and saved base pointer. */
4156 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4158 frame->hard_frame_pointer_offset = offset;
4160 /* Do some sanity checking of stack_alignment_needed and
4161 preferred_alignment, since i386 port is the only using those features
4162 that may break easily. */
4164 if (size && !stack_alignment_needed)
4165 abort ();
4166 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4167 abort ();
4168 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4169 abort ();
4170 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4171 abort ();
4173 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4174 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4176 /* Register save area */
4177 offset += frame->nregs * UNITS_PER_WORD;
4179 /* Va-arg area */
4180 if (ix86_save_varrargs_registers)
4182 offset += X86_64_VARARGS_SIZE;
4183 frame->va_arg_size = X86_64_VARARGS_SIZE;
4185 else
4186 frame->va_arg_size = 0;
4188 /* Align start of frame for local function. */
4189 frame->padding1 = ((offset + stack_alignment_needed - 1)
4190 & -stack_alignment_needed) - offset;
4192 offset += frame->padding1;
4194 /* Frame pointer points here. */
4195 frame->frame_pointer_offset = offset;
4197 offset += size;
4199 /* Add outgoing arguments area. Can be skipped if we eliminated
4200 all the function calls as dead code. */
4201 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4203 offset += current_function_outgoing_args_size;
4204 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4206 else
4207 frame->outgoing_arguments_size = 0;
4209 /* Align stack boundary. Only needed if we're calling another function
4210 or using alloca. */
4211 if (!current_function_is_leaf || current_function_calls_alloca)
4212 frame->padding2 = ((offset + preferred_alignment - 1)
4213 & -preferred_alignment) - offset;
4214 else
4215 frame->padding2 = 0;
4217 offset += frame->padding2;
4219 /* We've reached end of stack frame. */
4220 frame->stack_pointer_offset = offset;
4222 /* Size prologue needs to allocate. */
4223 frame->to_allocate =
4224 (size + frame->padding1 + frame->padding2
4225 + frame->outgoing_arguments_size + frame->va_arg_size);
4227 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4228 && current_function_is_leaf)
4230 frame->red_zone_size = frame->to_allocate;
4231 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4232 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4234 else
4235 frame->red_zone_size = 0;
4236 frame->to_allocate -= frame->red_zone_size;
4237 frame->stack_pointer_offset -= frame->red_zone_size;
4238 #if 0
4239 fprintf (stderr, "nregs: %i\n", frame->nregs);
4240 fprintf (stderr, "size: %i\n", size);
4241 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4242 fprintf (stderr, "padding1: %i\n", frame->padding1);
4243 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4244 fprintf (stderr, "padding2: %i\n", frame->padding2);
4245 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4246 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4247 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4248 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4249 frame->hard_frame_pointer_offset);
4250 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4251 #endif
4254 /* Emit code to save registers in the prologue. */
4256 static void
4257 ix86_emit_save_regs ()
4259 register int regno;
4260 rtx insn;
4262 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4263 if (ix86_save_reg (regno, true))
4265 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4266 RTX_FRAME_RELATED_P (insn) = 1;
4270 /* Emit code to save registers using MOV insns. First register
4271 is restored from POINTER + OFFSET. */
4272 static void
4273 ix86_emit_save_regs_using_mov (pointer, offset)
4274 rtx pointer;
4275 HOST_WIDE_INT offset;
4277 int regno;
4278 rtx insn;
4280 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4281 if (ix86_save_reg (regno, true))
4283 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4284 Pmode, offset),
4285 gen_rtx_REG (Pmode, regno));
4286 RTX_FRAME_RELATED_P (insn) = 1;
4287 offset += UNITS_PER_WORD;
4291 /* Expand the prologue into a bunch of separate insns. */
4293 void
4294 ix86_expand_prologue ()
4296 rtx insn;
4297 bool pic_reg_used;
4298 struct ix86_frame frame;
4299 int use_mov = 0;
4300 HOST_WIDE_INT allocate;
4302 if (!optimize_size)
4304 use_fast_prologue_epilogue
4305 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4306 if (TARGET_PROLOGUE_USING_MOVE)
4307 use_mov = use_fast_prologue_epilogue;
4309 ix86_compute_frame_layout (&frame);
4311 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4312 slower on all targets. Also sdb doesn't like it. */
4314 if (frame_pointer_needed)
4316 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4317 RTX_FRAME_RELATED_P (insn) = 1;
4319 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4320 RTX_FRAME_RELATED_P (insn) = 1;
4323 allocate = frame.to_allocate;
4324 /* In case we are dealing only with single register and empty frame,
4325 push is equivalent of the mov+add sequence. */
4326 if (allocate == 0 && frame.nregs <= 1)
4327 use_mov = 0;
4329 if (!use_mov)
4330 ix86_emit_save_regs ();
4331 else
4332 allocate += frame.nregs * UNITS_PER_WORD;
4334 if (allocate == 0)
4336 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4338 insn = emit_insn (gen_pro_epilogue_adjust_stack
4339 (stack_pointer_rtx, stack_pointer_rtx,
4340 GEN_INT (-allocate)));
4341 RTX_FRAME_RELATED_P (insn) = 1;
4343 else
4345 /* ??? Is this only valid for Win32? */
4347 rtx arg0, sym;
4349 if (TARGET_64BIT)
4350 abort ();
4352 arg0 = gen_rtx_REG (SImode, 0);
4353 emit_move_insn (arg0, GEN_INT (allocate));
4355 sym = gen_rtx_MEM (FUNCTION_MODE,
4356 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4357 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4359 CALL_INSN_FUNCTION_USAGE (insn)
4360 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4361 CALL_INSN_FUNCTION_USAGE (insn));
4363 if (use_mov)
4365 if (!frame_pointer_needed || !frame.to_allocate)
4366 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4367 else
4368 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4369 -frame.nregs * UNITS_PER_WORD);
4372 #ifdef SUBTARGET_PROLOGUE
4373 SUBTARGET_PROLOGUE;
4374 #endif
4376 pic_reg_used = false;
4377 if (pic_offset_table_rtx
4378 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4379 || current_function_profile))
4381 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4383 if (alt_pic_reg_used != INVALID_REGNUM)
4384 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4386 pic_reg_used = true;
4389 if (pic_reg_used)
4391 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4393 /* Even with accurate pre-reload life analysis, we can wind up
4394 deleting all references to the pic register after reload.
4395 Consider if cross-jumping unifies two sides of a branch
4396 controled by a comparison vs the only read from a global.
4397 In which case, allow the set_got to be deleted, though we're
4398 too late to do anything about the ebx save in the prologue. */
4399 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4402 /* Prevent function calls from be scheduled before the call to mcount.
4403 In the pic_reg_used case, make sure that the got load isn't deleted. */
4404 if (current_function_profile)
4405 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4408 /* Emit code to restore saved registers using MOV insns. First register
4409 is restored from POINTER + OFFSET. */
4410 static void
4411 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4412 rtx pointer;
4413 int offset;
4414 int maybe_eh_return;
4416 int regno;
4418 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4419 if (ix86_save_reg (regno, maybe_eh_return))
4421 emit_move_insn (gen_rtx_REG (Pmode, regno),
4422 adjust_address (gen_rtx_MEM (Pmode, pointer),
4423 Pmode, offset));
4424 offset += UNITS_PER_WORD;
4428 /* Restore function stack, frame, and registers. */
4430 void
4431 ix86_expand_epilogue (style)
4432 int style;
4434 int regno;
4435 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4436 struct ix86_frame frame;
4437 HOST_WIDE_INT offset;
4439 ix86_compute_frame_layout (&frame);
4441 /* Calculate start of saved registers relative to ebp. Special care
4442 must be taken for the normal return case of a function using
4443 eh_return: the eax and edx registers are marked as saved, but not
4444 restored along this path. */
4445 offset = frame.nregs;
4446 if (current_function_calls_eh_return && style != 2)
4447 offset -= 2;
4448 offset *= -UNITS_PER_WORD;
4450 /* If we're only restoring one register and sp is not valid then
4451 using a move instruction to restore the register since it's
4452 less work than reloading sp and popping the register.
4454 The default code result in stack adjustment using add/lea instruction,
4455 while this code results in LEAVE instruction (or discrete equivalent),
4456 so it is profitable in some other cases as well. Especially when there
4457 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4458 and there is exactly one register to pop. This heruistic may need some
4459 tuning in future. */
4460 if ((!sp_valid && frame.nregs <= 1)
4461 || (TARGET_EPILOGUE_USING_MOVE
4462 && use_fast_prologue_epilogue
4463 && (frame.nregs > 1 || frame.to_allocate))
4464 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4465 || (frame_pointer_needed && TARGET_USE_LEAVE
4466 && use_fast_prologue_epilogue && frame.nregs == 1)
4467 || current_function_calls_eh_return)
4469 /* Restore registers. We can use ebp or esp to address the memory
4470 locations. If both are available, default to ebp, since offsets
4471 are known to be small. Only exception is esp pointing directly to the
4472 end of block of saved registers, where we may simplify addressing
4473 mode. */
4475 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4476 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4477 frame.to_allocate, style == 2);
4478 else
4479 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4480 offset, style == 2);
4482 /* eh_return epilogues need %ecx added to the stack pointer. */
4483 if (style == 2)
4485 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4487 if (frame_pointer_needed)
4489 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4490 tmp = plus_constant (tmp, UNITS_PER_WORD);
4491 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4493 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4494 emit_move_insn (hard_frame_pointer_rtx, tmp);
4496 emit_insn (gen_pro_epilogue_adjust_stack
4497 (stack_pointer_rtx, sa, const0_rtx));
4499 else
4501 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4502 tmp = plus_constant (tmp, (frame.to_allocate
4503 + frame.nregs * UNITS_PER_WORD));
4504 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4507 else if (!frame_pointer_needed)
4508 emit_insn (gen_pro_epilogue_adjust_stack
4509 (stack_pointer_rtx, stack_pointer_rtx,
4510 GEN_INT (frame.to_allocate
4511 + frame.nregs * UNITS_PER_WORD)));
4512 /* If not an i386, mov & pop is faster than "leave". */
4513 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4514 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4515 else
4517 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4518 hard_frame_pointer_rtx,
4519 const0_rtx));
4520 if (TARGET_64BIT)
4521 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4522 else
4523 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4526 else
4528 /* First step is to deallocate the stack frame so that we can
4529 pop the registers. */
4530 if (!sp_valid)
4532 if (!frame_pointer_needed)
4533 abort ();
4534 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4535 hard_frame_pointer_rtx,
4536 GEN_INT (offset)));
4538 else if (frame.to_allocate)
4539 emit_insn (gen_pro_epilogue_adjust_stack
4540 (stack_pointer_rtx, stack_pointer_rtx,
4541 GEN_INT (frame.to_allocate)));
4543 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4544 if (ix86_save_reg (regno, false))
4546 if (TARGET_64BIT)
4547 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4548 else
4549 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4551 if (frame_pointer_needed)
4553 /* Leave results in shorter dependency chains on CPUs that are
4554 able to grok it fast. */
4555 if (TARGET_USE_LEAVE)
4556 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4557 else if (TARGET_64BIT)
4558 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4559 else
4560 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4564 /* Sibcall epilogues don't want a return instruction. */
4565 if (style == 0)
4566 return;
4568 if (current_function_pops_args && current_function_args_size)
4570 rtx popc = GEN_INT (current_function_pops_args);
4572 /* i386 can only pop 64K bytes. If asked to pop more, pop
4573 return address, do explicit add, and jump indirectly to the
4574 caller. */
4576 if (current_function_pops_args >= 65536)
4578 rtx ecx = gen_rtx_REG (SImode, 2);
4580 /* There are is no "pascal" calling convention in 64bit ABI. */
4581 if (TARGET_64BIT)
4582 abort ();
4584 emit_insn (gen_popsi1 (ecx));
4585 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4586 emit_jump_insn (gen_return_indirect_internal (ecx));
4588 else
4589 emit_jump_insn (gen_return_pop_internal (popc));
4591 else
4592 emit_jump_insn (gen_return_internal ());
4595 /* Reset from the function's potential modifications. */
4597 static void
4598 ix86_output_function_epilogue (file, size)
4599 FILE *file ATTRIBUTE_UNUSED;
4600 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4602 if (pic_offset_table_rtx)
4603 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4606 /* Extract the parts of an RTL expression that is a valid memory address
4607 for an instruction. Return 0 if the structure of the address is
4608 grossly off. Return -1 if the address contains ASHIFT, so it is not
4609 strictly valid, but still used for computing length of lea instruction.
4612 static int
4613 ix86_decompose_address (addr, out)
4614 register rtx addr;
4615 struct ix86_address *out;
4617 rtx base = NULL_RTX;
4618 rtx index = NULL_RTX;
4619 rtx disp = NULL_RTX;
4620 HOST_WIDE_INT scale = 1;
4621 rtx scale_rtx = NULL_RTX;
4622 int retval = 1;
4624 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4625 base = addr;
4626 else if (GET_CODE (addr) == PLUS)
4628 rtx op0 = XEXP (addr, 0);
4629 rtx op1 = XEXP (addr, 1);
4630 enum rtx_code code0 = GET_CODE (op0);
4631 enum rtx_code code1 = GET_CODE (op1);
4633 if (code0 == REG || code0 == SUBREG)
4635 if (code1 == REG || code1 == SUBREG)
4636 index = op0, base = op1; /* index + base */
4637 else
4638 base = op0, disp = op1; /* base + displacement */
4640 else if (code0 == MULT)
4642 index = XEXP (op0, 0);
4643 scale_rtx = XEXP (op0, 1);
4644 if (code1 == REG || code1 == SUBREG)
4645 base = op1; /* index*scale + base */
4646 else
4647 disp = op1; /* index*scale + disp */
4649 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4651 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4652 scale_rtx = XEXP (XEXP (op0, 0), 1);
4653 base = XEXP (op0, 1);
4654 disp = op1;
4656 else if (code0 == PLUS)
4658 index = XEXP (op0, 0); /* index + base + disp */
4659 base = XEXP (op0, 1);
4660 disp = op1;
4662 else
4663 return 0;
4665 else if (GET_CODE (addr) == MULT)
4667 index = XEXP (addr, 0); /* index*scale */
4668 scale_rtx = XEXP (addr, 1);
4670 else if (GET_CODE (addr) == ASHIFT)
4672 rtx tmp;
4674 /* We're called for lea too, which implements ashift on occasion. */
4675 index = XEXP (addr, 0);
4676 tmp = XEXP (addr, 1);
4677 if (GET_CODE (tmp) != CONST_INT)
4678 return 0;
4679 scale = INTVAL (tmp);
4680 if ((unsigned HOST_WIDE_INT) scale > 3)
4681 return 0;
4682 scale = 1 << scale;
4683 retval = -1;
4685 else
4686 disp = addr; /* displacement */
4688 /* Extract the integral value of scale. */
4689 if (scale_rtx)
4691 if (GET_CODE (scale_rtx) != CONST_INT)
4692 return 0;
4693 scale = INTVAL (scale_rtx);
4696 /* Allow arg pointer and stack pointer as index if there is not scaling */
4697 if (base && index && scale == 1
4698 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4699 || index == stack_pointer_rtx))
4701 rtx tmp = base;
4702 base = index;
4703 index = tmp;
4706 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4707 if ((base == hard_frame_pointer_rtx
4708 || base == frame_pointer_rtx
4709 || base == arg_pointer_rtx) && !disp)
4710 disp = const0_rtx;
4712 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4713 Avoid this by transforming to [%esi+0]. */
4714 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4715 && base && !index && !disp
4716 && REG_P (base)
4717 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4718 disp = const0_rtx;
4720 /* Special case: encode reg+reg instead of reg*2. */
4721 if (!base && index && scale && scale == 2)
4722 base = index, scale = 1;
4724 /* Special case: scaling cannot be encoded without base or displacement. */
4725 if (!base && !disp && index && scale != 1)
4726 disp = const0_rtx;
4728 out->base = base;
4729 out->index = index;
4730 out->disp = disp;
4731 out->scale = scale;
4733 return retval;
4736 /* Return cost of the memory address x.
4737 For i386, it is better to use a complex address than let gcc copy
4738 the address into a reg and make a new pseudo. But not if the address
4739 requires to two regs - that would mean more pseudos with longer
4740 lifetimes. */
4742 ix86_address_cost (x)
4743 rtx x;
4745 struct ix86_address parts;
4746 int cost = 1;
4748 if (!ix86_decompose_address (x, &parts))
4749 abort ();
4751 if (parts.base && GET_CODE (parts.base) == SUBREG)
4752 parts.base = SUBREG_REG (parts.base);
4753 if (parts.index && GET_CODE (parts.index) == SUBREG)
4754 parts.index = SUBREG_REG (parts.index);
4756 /* More complex memory references are better. */
4757 if (parts.disp && parts.disp != const0_rtx)
4758 cost--;
4760 /* Attempt to minimize number of registers in the address. */
4761 if ((parts.base
4762 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4763 || (parts.index
4764 && (!REG_P (parts.index)
4765 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4766 cost++;
4768 if (parts.base
4769 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4770 && parts.index
4771 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4772 && parts.base != parts.index)
4773 cost++;
4775 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4776 since it's predecode logic can't detect the length of instructions
4777 and it degenerates to vector decoded. Increase cost of such
4778 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4779 to split such addresses or even refuse such addresses at all.
4781 Following addressing modes are affected:
4782 [base+scale*index]
4783 [scale*index+disp]
4784 [base+index]
4786 The first and last case may be avoidable by explicitly coding the zero in
4787 memory address, but I don't have AMD-K6 machine handy to check this
4788 theory. */
4790 if (TARGET_K6
4791 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4792 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4793 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4794 cost += 10;
4796 return cost;
4799 /* If X is a machine specific address (i.e. a symbol or label being
4800 referenced as a displacement from the GOT implemented using an
4801 UNSPEC), then return the base term. Otherwise return X. */
4804 ix86_find_base_term (x)
4805 rtx x;
4807 rtx term;
4809 if (TARGET_64BIT)
4811 if (GET_CODE (x) != CONST)
4812 return x;
4813 term = XEXP (x, 0);
4814 if (GET_CODE (term) == PLUS
4815 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4816 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4817 term = XEXP (term, 0);
4818 if (GET_CODE (term) != UNSPEC
4819 || XINT (term, 1) != UNSPEC_GOTPCREL)
4820 return x;
4822 term = XVECEXP (term, 0, 0);
4824 if (GET_CODE (term) != SYMBOL_REF
4825 && GET_CODE (term) != LABEL_REF)
4826 return x;
4828 return term;
4831 if (GET_CODE (x) != PLUS
4832 || XEXP (x, 0) != pic_offset_table_rtx
4833 || GET_CODE (XEXP (x, 1)) != CONST)
4834 return x;
4836 term = XEXP (XEXP (x, 1), 0);
4838 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4839 term = XEXP (term, 0);
4841 if (GET_CODE (term) != UNSPEC
4842 || XINT (term, 1) != UNSPEC_GOTOFF)
4843 return x;
4845 term = XVECEXP (term, 0, 0);
4847 if (GET_CODE (term) != SYMBOL_REF
4848 && GET_CODE (term) != LABEL_REF)
4849 return x;
4851 return term;
4854 /* Determine if a given RTX is a valid constant. We already know this
4855 satisfies CONSTANT_P. */
4857 bool
4858 legitimate_constant_p (x)
4859 rtx x;
4861 rtx inner;
4863 switch (GET_CODE (x))
4865 case SYMBOL_REF:
4866 /* TLS symbols are not constant. */
4867 if (tls_symbolic_operand (x, Pmode))
4868 return false;
4869 break;
4871 case CONST:
4872 inner = XEXP (x, 0);
4874 /* Offsets of TLS symbols are never valid.
4875 Discourage CSE from creating them. */
4876 if (GET_CODE (inner) == PLUS
4877 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4878 return false;
4880 /* Only some unspecs are valid as "constants". */
4881 if (GET_CODE (inner) == UNSPEC)
4882 switch (XINT (inner, 1))
4884 case UNSPEC_TPOFF:
4885 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4886 default:
4887 return false;
4889 break;
4891 default:
4892 break;
4895 /* Otherwise we handle everything else in the move patterns. */
4896 return true;
4899 /* Determine if a given RTX is a valid constant address. */
4901 bool
4902 constant_address_p (x)
4903 rtx x;
4905 switch (GET_CODE (x))
4907 case LABEL_REF:
4908 case CONST_INT:
4909 return true;
4911 case CONST_DOUBLE:
4912 return TARGET_64BIT;
4914 case CONST:
4915 /* For Mach-O, really believe the CONST. */
4916 if (TARGET_MACHO)
4917 return true;
4918 /* Otherwise fall through. */
4919 case SYMBOL_REF:
4920 return !flag_pic && legitimate_constant_p (x);
4922 default:
4923 return false;
4927 /* Nonzero if the constant value X is a legitimate general operand
4928 when generating PIC code. It is given that flag_pic is on and
4929 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4931 bool
4932 legitimate_pic_operand_p (x)
4933 rtx x;
4935 rtx inner;
4937 switch (GET_CODE (x))
4939 case CONST:
4940 inner = XEXP (x, 0);
4942 /* Only some unspecs are valid as "constants". */
4943 if (GET_CODE (inner) == UNSPEC)
4944 switch (XINT (inner, 1))
4946 case UNSPEC_TPOFF:
4947 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4948 default:
4949 return false;
4951 /* FALLTHRU */
4953 case SYMBOL_REF:
4954 case LABEL_REF:
4955 return legitimate_pic_address_disp_p (x);
4957 default:
4958 return true;
4962 /* Determine if a given CONST RTX is a valid memory displacement
4963 in PIC mode. */
4966 legitimate_pic_address_disp_p (disp)
4967 register rtx disp;
4969 bool saw_plus;
4971 /* In 64bit mode we can allow direct addresses of symbols and labels
4972 when they are not dynamic symbols. */
4973 if (TARGET_64BIT)
4975 rtx x = disp;
4976 if (GET_CODE (disp) == CONST)
4977 x = XEXP (disp, 0);
4978 /* ??? Handle PIC code models */
4979 if (GET_CODE (x) == PLUS
4980 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4981 && ix86_cmodel == CM_SMALL_PIC
4982 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4983 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4984 x = XEXP (x, 0);
4985 if (local_symbolic_operand (x, Pmode))
4986 return 1;
4988 if (GET_CODE (disp) != CONST)
4989 return 0;
4990 disp = XEXP (disp, 0);
4992 if (TARGET_64BIT)
4994 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4995 of GOT tables. We should not need these anyway. */
4996 if (GET_CODE (disp) != UNSPEC
4997 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4998 return 0;
5000 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5001 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5002 return 0;
5003 return 1;
5006 saw_plus = false;
5007 if (GET_CODE (disp) == PLUS)
5009 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5010 return 0;
5011 disp = XEXP (disp, 0);
5012 saw_plus = true;
5015 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5016 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5018 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5019 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5020 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5022 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5023 if (strstr (sym_name, "$pb") != 0)
5024 return 1;
5028 if (GET_CODE (disp) != UNSPEC)
5029 return 0;
5031 switch (XINT (disp, 1))
5033 case UNSPEC_GOT:
5034 if (saw_plus)
5035 return false;
5036 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5037 case UNSPEC_GOTOFF:
5038 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5039 case UNSPEC_GOTTPOFF:
5040 if (saw_plus)
5041 return false;
5042 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5043 case UNSPEC_NTPOFF:
5044 /* ??? Could support offset here. */
5045 if (saw_plus)
5046 return false;
5047 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5048 case UNSPEC_DTPOFF:
5049 /* ??? Could support offset here. */
5050 if (saw_plus)
5051 return false;
5052 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5055 return 0;
5058 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5059 memory address for an instruction. The MODE argument is the machine mode
5060 for the MEM expression that wants to use this address.
5062 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5063 convert common non-canonical forms to canonical form so that they will
5064 be recognized. */
5067 legitimate_address_p (mode, addr, strict)
5068 enum machine_mode mode;
5069 register rtx addr;
5070 int strict;
5072 struct ix86_address parts;
5073 rtx base, index, disp;
5074 HOST_WIDE_INT scale;
5075 const char *reason = NULL;
5076 rtx reason_rtx = NULL_RTX;
5078 if (TARGET_DEBUG_ADDR)
5080 fprintf (stderr,
5081 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5082 GET_MODE_NAME (mode), strict);
5083 debug_rtx (addr);
5086 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5088 if (TARGET_DEBUG_ADDR)
5089 fprintf (stderr, "Success.\n");
5090 return TRUE;
5093 if (ix86_decompose_address (addr, &parts) <= 0)
5095 reason = "decomposition failed";
5096 goto report_error;
5099 base = parts.base;
5100 index = parts.index;
5101 disp = parts.disp;
5102 scale = parts.scale;
5104 /* Validate base register.
5106 Don't allow SUBREG's here, it can lead to spill failures when the base
5107 is one word out of a two word structure, which is represented internally
5108 as a DImode int. */
5110 if (base)
5112 rtx reg;
5113 reason_rtx = base;
5115 if (GET_CODE (base) == SUBREG)
5116 reg = SUBREG_REG (base);
5117 else
5118 reg = base;
5120 if (GET_CODE (reg) != REG)
5122 reason = "base is not a register";
5123 goto report_error;
5126 if (GET_MODE (base) != Pmode)
5128 reason = "base is not in Pmode";
5129 goto report_error;
5132 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5133 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5135 reason = "base is not valid";
5136 goto report_error;
5140 /* Validate index register.
5142 Don't allow SUBREG's here, it can lead to spill failures when the index
5143 is one word out of a two word structure, which is represented internally
5144 as a DImode int. */
5146 if (index)
5148 rtx reg;
5149 reason_rtx = index;
5151 if (GET_CODE (index) == SUBREG)
5152 reg = SUBREG_REG (index);
5153 else
5154 reg = index;
5156 if (GET_CODE (reg) != REG)
5158 reason = "index is not a register";
5159 goto report_error;
5162 if (GET_MODE (index) != Pmode)
5164 reason = "index is not in Pmode";
5165 goto report_error;
5168 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5169 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5171 reason = "index is not valid";
5172 goto report_error;
5176 /* Validate scale factor. */
5177 if (scale != 1)
5179 reason_rtx = GEN_INT (scale);
5180 if (!index)
5182 reason = "scale without index";
5183 goto report_error;
5186 if (scale != 2 && scale != 4 && scale != 8)
5188 reason = "scale is not a valid multiplier";
5189 goto report_error;
5193 /* Validate displacement. */
5194 if (disp)
5196 reason_rtx = disp;
5198 if (TARGET_64BIT)
5200 if (!x86_64_sign_extended_value (disp))
5202 reason = "displacement is out of range";
5203 goto report_error;
5206 else
5208 if (GET_CODE (disp) == CONST_DOUBLE)
5210 reason = "displacement is a const_double";
5211 goto report_error;
5215 if (GET_CODE (disp) == CONST
5216 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5217 switch (XINT (XEXP (disp, 0), 1))
5219 case UNSPEC_GOT:
5220 case UNSPEC_GOTOFF:
5221 case UNSPEC_GOTPCREL:
5222 if (!flag_pic)
5223 abort ();
5224 goto is_legitimate_pic;
5226 case UNSPEC_GOTTPOFF:
5227 case UNSPEC_NTPOFF:
5228 case UNSPEC_DTPOFF:
5229 break;
5231 default:
5232 reason = "invalid address unspec";
5233 goto report_error;
5236 else if (flag_pic && (SYMBOLIC_CONST (disp)
5237 #if TARGET_MACHO
5238 && !machopic_operand_p (disp)
5239 #endif
5242 is_legitimate_pic:
5243 if (TARGET_64BIT && (index || base))
5245 reason = "non-constant pic memory reference";
5246 goto report_error;
5248 if (! legitimate_pic_address_disp_p (disp))
5250 reason = "displacement is an invalid pic construct";
5251 goto report_error;
5254 /* This code used to verify that a symbolic pic displacement
5255 includes the pic_offset_table_rtx register.
5257 While this is good idea, unfortunately these constructs may
5258 be created by "adds using lea" optimization for incorrect
5259 code like:
5261 int a;
5262 int foo(int i)
5264 return *(&a+i);
5267 This code is nonsensical, but results in addressing
5268 GOT table with pic_offset_table_rtx base. We can't
5269 just refuse it easily, since it gets matched by
5270 "addsi3" pattern, that later gets split to lea in the
5271 case output register differs from input. While this
5272 can be handled by separate addsi pattern for this case
5273 that never results in lea, this seems to be easier and
5274 correct fix for crash to disable this test. */
5276 else if (!CONSTANT_ADDRESS_P (disp))
5278 reason = "displacement is not constant";
5279 goto report_error;
5283 /* Everything looks valid. */
5284 if (TARGET_DEBUG_ADDR)
5285 fprintf (stderr, "Success.\n");
5286 return TRUE;
5288 report_error:
5289 if (TARGET_DEBUG_ADDR)
5291 fprintf (stderr, "Error: %s\n", reason);
5292 debug_rtx (reason_rtx);
5294 return FALSE;
5297 /* Return an unique alias set for the GOT. */
5299 static HOST_WIDE_INT
5300 ix86_GOT_alias_set ()
5302 static HOST_WIDE_INT set = -1;
5303 if (set == -1)
5304 set = new_alias_set ();
5305 return set;
5308 /* Return a legitimate reference for ORIG (an address) using the
5309 register REG. If REG is 0, a new pseudo is generated.
5311 There are two types of references that must be handled:
5313 1. Global data references must load the address from the GOT, via
5314 the PIC reg. An insn is emitted to do this load, and the reg is
5315 returned.
5317 2. Static data references, constant pool addresses, and code labels
5318 compute the address as an offset from the GOT, whose base is in
5319 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5320 differentiate them from global data objects. The returned
5321 address is the PIC reg + an unspec constant.
5323 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5324 reg also appears in the address. */
5327 legitimize_pic_address (orig, reg)
5328 rtx orig;
5329 rtx reg;
5331 rtx addr = orig;
5332 rtx new = orig;
5333 rtx base;
5335 #if TARGET_MACHO
5336 if (reg == 0)
5337 reg = gen_reg_rtx (Pmode);
5338 /* Use the generic Mach-O PIC machinery. */
5339 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5340 #endif
5342 if (local_symbolic_operand (addr, Pmode))
5344 /* In 64bit mode we can address such objects directly. */
5345 if (TARGET_64BIT)
5346 new = addr;
5347 else
5349 /* This symbol may be referenced via a displacement from the PIC
5350 base address (@GOTOFF). */
5352 if (reload_in_progress)
5353 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5354 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5355 new = gen_rtx_CONST (Pmode, new);
5356 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5358 if (reg != 0)
5360 emit_move_insn (reg, new);
5361 new = reg;
5365 else if (GET_CODE (addr) == SYMBOL_REF)
5367 if (TARGET_64BIT)
5369 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5370 new = gen_rtx_CONST (Pmode, new);
5371 new = gen_rtx_MEM (Pmode, new);
5372 RTX_UNCHANGING_P (new) = 1;
5373 set_mem_alias_set (new, ix86_GOT_alias_set ());
5375 if (reg == 0)
5376 reg = gen_reg_rtx (Pmode);
5377 /* Use directly gen_movsi, otherwise the address is loaded
5378 into register for CSE. We don't want to CSE this addresses,
5379 instead we CSE addresses from the GOT table, so skip this. */
5380 emit_insn (gen_movsi (reg, new));
5381 new = reg;
5383 else
5385 /* This symbol must be referenced via a load from the
5386 Global Offset Table (@GOT). */
5388 if (reload_in_progress)
5389 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5390 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5391 new = gen_rtx_CONST (Pmode, new);
5392 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5393 new = gen_rtx_MEM (Pmode, new);
5394 RTX_UNCHANGING_P (new) = 1;
5395 set_mem_alias_set (new, ix86_GOT_alias_set ());
5397 if (reg == 0)
5398 reg = gen_reg_rtx (Pmode);
5399 emit_move_insn (reg, new);
5400 new = reg;
5403 else
5405 if (GET_CODE (addr) == CONST)
5407 addr = XEXP (addr, 0);
5409 /* We must match stuff we generate before. Assume the only
5410 unspecs that can get here are ours. Not that we could do
5411 anything with them anyway... */
5412 if (GET_CODE (addr) == UNSPEC
5413 || (GET_CODE (addr) == PLUS
5414 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5415 return orig;
5416 if (GET_CODE (addr) != PLUS)
5417 abort ();
5419 if (GET_CODE (addr) == PLUS)
5421 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5423 /* Check first to see if this is a constant offset from a @GOTOFF
5424 symbol reference. */
5425 if (local_symbolic_operand (op0, Pmode)
5426 && GET_CODE (op1) == CONST_INT)
5428 if (!TARGET_64BIT)
5430 if (reload_in_progress)
5431 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5432 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5433 UNSPEC_GOTOFF);
5434 new = gen_rtx_PLUS (Pmode, new, op1);
5435 new = gen_rtx_CONST (Pmode, new);
5436 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5438 if (reg != 0)
5440 emit_move_insn (reg, new);
5441 new = reg;
5444 else
5446 /* ??? We need to limit offsets here. */
5449 else
5451 base = legitimize_pic_address (XEXP (addr, 0), reg);
5452 new = legitimize_pic_address (XEXP (addr, 1),
5453 base == reg ? NULL_RTX : reg);
5455 if (GET_CODE (new) == CONST_INT)
5456 new = plus_constant (base, INTVAL (new));
5457 else
5459 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5461 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5462 new = XEXP (new, 1);
5464 new = gen_rtx_PLUS (Pmode, base, new);
5469 return new;
5472 static void
5473 ix86_encode_section_info (decl, first)
5474 tree decl;
5475 int first ATTRIBUTE_UNUSED;
5477 bool local_p = (*targetm.binds_local_p) (decl);
5478 rtx rtl, symbol;
5480 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5481 if (GET_CODE (rtl) != MEM)
5482 return;
5483 symbol = XEXP (rtl, 0);
5484 if (GET_CODE (symbol) != SYMBOL_REF)
5485 return;
5487 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5488 symbol so that we may access it directly in the GOT. */
5490 if (flag_pic)
5491 SYMBOL_REF_FLAG (symbol) = local_p;
5493 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5494 "local dynamic", "initial exec" or "local exec" TLS models
5495 respectively. */
5497 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5499 const char *symbol_str;
5500 char *newstr;
5501 size_t len;
5502 enum tls_model kind;
5504 if (!flag_pic)
5506 if (local_p)
5507 kind = TLS_MODEL_LOCAL_EXEC;
5508 else
5509 kind = TLS_MODEL_INITIAL_EXEC;
5511 /* Local dynamic is inefficient when we're not combining the
5512 parts of the address. */
5513 else if (optimize && local_p)
5514 kind = TLS_MODEL_LOCAL_DYNAMIC;
5515 else
5516 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5517 if (kind < flag_tls_default)
5518 kind = flag_tls_default;
5520 symbol_str = XSTR (symbol, 0);
5522 if (symbol_str[0] == '%')
5524 if (symbol_str[1] == tls_model_chars[kind])
5525 return;
5526 symbol_str += 2;
5528 len = strlen (symbol_str) + 1;
5529 newstr = alloca (len + 2);
5531 newstr[0] = '%';
5532 newstr[1] = tls_model_chars[kind];
5533 memcpy (newstr + 2, symbol_str, len);
5535 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5539 /* Undo the above when printing symbol names. */
5541 static const char *
5542 ix86_strip_name_encoding (str)
5543 const char *str;
5545 if (str[0] == '%')
5546 str += 2;
5547 if (str [0] == '*')
5548 str += 1;
5549 return str;
5552 /* Load the thread pointer into a register. */
5554 static rtx
5555 get_thread_pointer ()
5557 rtx tp;
5559 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5560 tp = gen_rtx_MEM (Pmode, tp);
5561 RTX_UNCHANGING_P (tp) = 1;
5562 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5563 tp = force_reg (Pmode, tp);
5565 return tp;
5568 /* Try machine-dependent ways of modifying an illegitimate address
5569 to be legitimate. If we find one, return the new, valid address.
5570 This macro is used in only one place: `memory_address' in explow.c.
5572 OLDX is the address as it was before break_out_memory_refs was called.
5573 In some cases it is useful to look at this to decide what needs to be done.
5575 MODE and WIN are passed so that this macro can use
5576 GO_IF_LEGITIMATE_ADDRESS.
5578 It is always safe for this macro to do nothing. It exists to recognize
5579 opportunities to optimize the output.
5581 For the 80386, we handle X+REG by loading X into a register R and
5582 using R+REG. R will go in a general reg and indexing will be used.
5583 However, if REG is a broken-out memory address or multiplication,
5584 nothing needs to be done because REG can certainly go in a general reg.
5586 When -fpic is used, special handling is needed for symbolic references.
5587 See comments by legitimize_pic_address in i386.c for details. */
5590 legitimize_address (x, oldx, mode)
5591 register rtx x;
5592 register rtx oldx ATTRIBUTE_UNUSED;
5593 enum machine_mode mode;
5595 int changed = 0;
5596 unsigned log;
5598 if (TARGET_DEBUG_ADDR)
5600 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5601 GET_MODE_NAME (mode));
5602 debug_rtx (x);
5605 log = tls_symbolic_operand (x, mode);
5606 if (log)
5608 rtx dest, base, off, pic;
5610 switch (log)
5612 case TLS_MODEL_GLOBAL_DYNAMIC:
5613 dest = gen_reg_rtx (Pmode);
5614 emit_insn (gen_tls_global_dynamic (dest, x));
5615 break;
5617 case TLS_MODEL_LOCAL_DYNAMIC:
5618 base = gen_reg_rtx (Pmode);
5619 emit_insn (gen_tls_local_dynamic_base (base));
5621 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5622 off = gen_rtx_CONST (Pmode, off);
5624 return gen_rtx_PLUS (Pmode, base, off);
5626 case TLS_MODEL_INITIAL_EXEC:
5627 if (flag_pic)
5629 if (reload_in_progress)
5630 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5631 pic = pic_offset_table_rtx;
5633 else
5635 pic = gen_reg_rtx (Pmode);
5636 emit_insn (gen_set_got (pic));
5639 base = get_thread_pointer ();
5641 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5642 off = gen_rtx_CONST (Pmode, off);
5643 off = gen_rtx_PLUS (Pmode, pic, off);
5644 off = gen_rtx_MEM (Pmode, off);
5645 RTX_UNCHANGING_P (off) = 1;
5646 set_mem_alias_set (off, ix86_GOT_alias_set ());
5648 /* Damn Sun for specifing a set of dynamic relocations without
5649 considering the two-operand nature of the architecture!
5650 We'd be much better off with a "GOTNTPOFF" relocation that
5651 already contained the negated constant. */
5652 /* ??? Using negl and reg+reg addressing appears to be a lose
5653 size-wise. The negl is two bytes, just like the extra movl
5654 incurred by the two-operand subl, but reg+reg addressing
5655 uses the two-byte modrm form, unlike plain reg. */
5657 dest = gen_reg_rtx (Pmode);
5658 emit_insn (gen_subsi3 (dest, base, off));
5659 break;
5661 case TLS_MODEL_LOCAL_EXEC:
5662 base = get_thread_pointer ();
5664 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5665 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5666 off = gen_rtx_CONST (Pmode, off);
5668 if (TARGET_GNU_TLS)
5669 return gen_rtx_PLUS (Pmode, base, off);
5670 else
5672 dest = gen_reg_rtx (Pmode);
5673 emit_insn (gen_subsi3 (dest, base, off));
5675 break;
5677 default:
5678 abort ();
5681 return dest;
5684 if (flag_pic && SYMBOLIC_CONST (x))
5685 return legitimize_pic_address (x, 0);
5687 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5688 if (GET_CODE (x) == ASHIFT
5689 && GET_CODE (XEXP (x, 1)) == CONST_INT
5690 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5692 changed = 1;
5693 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5694 GEN_INT (1 << log));
5697 if (GET_CODE (x) == PLUS)
5699 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5701 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5702 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5703 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5705 changed = 1;
5706 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5707 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5708 GEN_INT (1 << log));
5711 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5712 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5713 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5715 changed = 1;
5716 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5717 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5718 GEN_INT (1 << log));
5721 /* Put multiply first if it isn't already. */
5722 if (GET_CODE (XEXP (x, 1)) == MULT)
5724 rtx tmp = XEXP (x, 0);
5725 XEXP (x, 0) = XEXP (x, 1);
5726 XEXP (x, 1) = tmp;
5727 changed = 1;
5730 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5731 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5732 created by virtual register instantiation, register elimination, and
5733 similar optimizations. */
5734 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5736 changed = 1;
5737 x = gen_rtx_PLUS (Pmode,
5738 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5739 XEXP (XEXP (x, 1), 0)),
5740 XEXP (XEXP (x, 1), 1));
5743 /* Canonicalize
5744 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5745 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5746 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5747 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5748 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5749 && CONSTANT_P (XEXP (x, 1)))
5751 rtx constant;
5752 rtx other = NULL_RTX;
5754 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5756 constant = XEXP (x, 1);
5757 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5759 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5761 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5762 other = XEXP (x, 1);
5764 else
5765 constant = 0;
5767 if (constant)
5769 changed = 1;
5770 x = gen_rtx_PLUS (Pmode,
5771 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5772 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5773 plus_constant (other, INTVAL (constant)));
5777 if (changed && legitimate_address_p (mode, x, FALSE))
5778 return x;
5780 if (GET_CODE (XEXP (x, 0)) == MULT)
5782 changed = 1;
5783 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5786 if (GET_CODE (XEXP (x, 1)) == MULT)
5788 changed = 1;
5789 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5792 if (changed
5793 && GET_CODE (XEXP (x, 1)) == REG
5794 && GET_CODE (XEXP (x, 0)) == REG)
5795 return x;
5797 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5799 changed = 1;
5800 x = legitimize_pic_address (x, 0);
5803 if (changed && legitimate_address_p (mode, x, FALSE))
5804 return x;
5806 if (GET_CODE (XEXP (x, 0)) == REG)
5808 register rtx temp = gen_reg_rtx (Pmode);
5809 register rtx val = force_operand (XEXP (x, 1), temp);
5810 if (val != temp)
5811 emit_move_insn (temp, val);
5813 XEXP (x, 1) = temp;
5814 return x;
5817 else if (GET_CODE (XEXP (x, 1)) == REG)
5819 register rtx temp = gen_reg_rtx (Pmode);
5820 register rtx val = force_operand (XEXP (x, 0), temp);
5821 if (val != temp)
5822 emit_move_insn (temp, val);
5824 XEXP (x, 0) = temp;
5825 return x;
5829 return x;
5832 /* Print an integer constant expression in assembler syntax. Addition
5833 and subtraction are the only arithmetic that may appear in these
5834 expressions. FILE is the stdio stream to write to, X is the rtx, and
5835 CODE is the operand print code from the output string. */
5837 static void
5838 output_pic_addr_const (file, x, code)
5839 FILE *file;
5840 rtx x;
5841 int code;
5843 char buf[256];
5845 switch (GET_CODE (x))
5847 case PC:
5848 if (flag_pic)
5849 putc ('.', file);
5850 else
5851 abort ();
5852 break;
5854 case SYMBOL_REF:
5855 assemble_name (file, XSTR (x, 0));
5856 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5857 fputs ("@PLT", file);
5858 break;
5860 case LABEL_REF:
5861 x = XEXP (x, 0);
5862 /* FALLTHRU */
5863 case CODE_LABEL:
5864 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5865 assemble_name (asm_out_file, buf);
5866 break;
5868 case CONST_INT:
5869 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5870 break;
5872 case CONST:
5873 /* This used to output parentheses around the expression,
5874 but that does not work on the 386 (either ATT or BSD assembler). */
5875 output_pic_addr_const (file, XEXP (x, 0), code);
5876 break;
5878 case CONST_DOUBLE:
5879 if (GET_MODE (x) == VOIDmode)
5881 /* We can use %d if the number is <32 bits and positive. */
5882 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5883 fprintf (file, "0x%lx%08lx",
5884 (unsigned long) CONST_DOUBLE_HIGH (x),
5885 (unsigned long) CONST_DOUBLE_LOW (x));
5886 else
5887 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5889 else
5890 /* We can't handle floating point constants;
5891 PRINT_OPERAND must handle them. */
5892 output_operand_lossage ("floating constant misused");
5893 break;
5895 case PLUS:
5896 /* Some assemblers need integer constants to appear first. */
5897 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5899 output_pic_addr_const (file, XEXP (x, 0), code);
5900 putc ('+', file);
5901 output_pic_addr_const (file, XEXP (x, 1), code);
5903 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5905 output_pic_addr_const (file, XEXP (x, 1), code);
5906 putc ('+', file);
5907 output_pic_addr_const (file, XEXP (x, 0), code);
5909 else
5910 abort ();
5911 break;
5913 case MINUS:
5914 if (!TARGET_MACHO)
5915 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5916 output_pic_addr_const (file, XEXP (x, 0), code);
5917 putc ('-', file);
5918 output_pic_addr_const (file, XEXP (x, 1), code);
5919 if (!TARGET_MACHO)
5920 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5921 break;
5923 case UNSPEC:
5924 if (XVECLEN (x, 0) != 1)
5925 abort ();
5926 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5927 switch (XINT (x, 1))
5929 case UNSPEC_GOT:
5930 fputs ("@GOT", file);
5931 break;
5932 case UNSPEC_GOTOFF:
5933 fputs ("@GOTOFF", file);
5934 break;
5935 case UNSPEC_GOTPCREL:
5936 fputs ("@GOTPCREL(%rip)", file);
5937 break;
5938 case UNSPEC_GOTTPOFF:
5939 fputs ("@GOTTPOFF", file);
5940 break;
5941 case UNSPEC_TPOFF:
5942 fputs ("@TPOFF", file);
5943 break;
5944 case UNSPEC_NTPOFF:
5945 fputs ("@NTPOFF", file);
5946 break;
5947 case UNSPEC_DTPOFF:
5948 fputs ("@DTPOFF", file);
5949 break;
5950 default:
5951 output_operand_lossage ("invalid UNSPEC as operand");
5952 break;
5954 break;
5956 default:
5957 output_operand_lossage ("invalid expression as operand");
5961 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5962 We need to handle our special PIC relocations. */
5964 void
5965 i386_dwarf_output_addr_const (file, x)
5966 FILE *file;
5967 rtx x;
5969 #ifdef ASM_QUAD
5970 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5971 #else
5972 if (TARGET_64BIT)
5973 abort ();
5974 fprintf (file, "%s", ASM_LONG);
5975 #endif
5976 if (flag_pic)
5977 output_pic_addr_const (file, x, '\0');
5978 else
5979 output_addr_const (file, x);
5980 fputc ('\n', file);
5983 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
5984 We need to emit DTP-relative relocations. */
5986 void
5987 i386_output_dwarf_dtprel (file, size, x)
5988 FILE *file;
5989 int size;
5990 rtx x;
5992 switch (size)
5994 case 4:
5995 fputs (ASM_LONG, file);
5996 break;
5997 case 8:
5998 #ifdef ASM_QUAD
5999 fputs (ASM_QUAD, file);
6000 break;
6001 #endif
6002 default:
6003 abort ();
6006 output_addr_const (file, x);
6007 fputs ("@DTPOFF", file);
6010 /* In the name of slightly smaller debug output, and to cater to
6011 general assembler losage, recognize PIC+GOTOFF and turn it back
6012 into a direct symbol reference. */
6015 i386_simplify_dwarf_addr (orig_x)
6016 rtx orig_x;
6018 rtx x = orig_x, y;
6020 if (GET_CODE (x) == MEM)
6021 x = XEXP (x, 0);
6023 if (TARGET_64BIT)
6025 if (GET_CODE (x) != CONST
6026 || GET_CODE (XEXP (x, 0)) != UNSPEC
6027 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6028 || GET_CODE (orig_x) != MEM)
6029 return orig_x;
6030 return XVECEXP (XEXP (x, 0), 0, 0);
6033 if (GET_CODE (x) != PLUS
6034 || GET_CODE (XEXP (x, 1)) != CONST)
6035 return orig_x;
6037 if (GET_CODE (XEXP (x, 0)) == REG
6038 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6039 /* %ebx + GOT/GOTOFF */
6040 y = NULL;
6041 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6043 /* %ebx + %reg * scale + GOT/GOTOFF */
6044 y = XEXP (x, 0);
6045 if (GET_CODE (XEXP (y, 0)) == REG
6046 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6047 y = XEXP (y, 1);
6048 else if (GET_CODE (XEXP (y, 1)) == REG
6049 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6050 y = XEXP (y, 0);
6051 else
6052 return orig_x;
6053 if (GET_CODE (y) != REG
6054 && GET_CODE (y) != MULT
6055 && GET_CODE (y) != ASHIFT)
6056 return orig_x;
6058 else
6059 return orig_x;
6061 x = XEXP (XEXP (x, 1), 0);
6062 if (GET_CODE (x) == UNSPEC
6063 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6064 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6066 if (y)
6067 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6068 return XVECEXP (x, 0, 0);
6071 if (GET_CODE (x) == PLUS
6072 && GET_CODE (XEXP (x, 0)) == UNSPEC
6073 && GET_CODE (XEXP (x, 1)) == CONST_INT
6074 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6075 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6076 && GET_CODE (orig_x) != MEM)))
6078 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6079 if (y)
6080 return gen_rtx_PLUS (Pmode, y, x);
6081 return x;
6084 return orig_x;
6087 static void
6088 put_condition_code (code, mode, reverse, fp, file)
6089 enum rtx_code code;
6090 enum machine_mode mode;
6091 int reverse, fp;
6092 FILE *file;
6094 const char *suffix;
6096 if (mode == CCFPmode || mode == CCFPUmode)
6098 enum rtx_code second_code, bypass_code;
6099 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6100 if (bypass_code != NIL || second_code != NIL)
6101 abort ();
6102 code = ix86_fp_compare_code_to_integer (code);
6103 mode = CCmode;
6105 if (reverse)
6106 code = reverse_condition (code);
6108 switch (code)
6110 case EQ:
6111 suffix = "e";
6112 break;
6113 case NE:
6114 suffix = "ne";
6115 break;
6116 case GT:
6117 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6118 abort ();
6119 suffix = "g";
6120 break;
6121 case GTU:
6122 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6123 Those same assemblers have the same but opposite losage on cmov. */
6124 if (mode != CCmode)
6125 abort ();
6126 suffix = fp ? "nbe" : "a";
6127 break;
6128 case LT:
6129 if (mode == CCNOmode || mode == CCGOCmode)
6130 suffix = "s";
6131 else if (mode == CCmode || mode == CCGCmode)
6132 suffix = "l";
6133 else
6134 abort ();
6135 break;
6136 case LTU:
6137 if (mode != CCmode)
6138 abort ();
6139 suffix = "b";
6140 break;
6141 case GE:
6142 if (mode == CCNOmode || mode == CCGOCmode)
6143 suffix = "ns";
6144 else if (mode == CCmode || mode == CCGCmode)
6145 suffix = "ge";
6146 else
6147 abort ();
6148 break;
6149 case GEU:
6150 /* ??? As above. */
6151 if (mode != CCmode)
6152 abort ();
6153 suffix = fp ? "nb" : "ae";
6154 break;
6155 case LE:
6156 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6157 abort ();
6158 suffix = "le";
6159 break;
6160 case LEU:
6161 if (mode != CCmode)
6162 abort ();
6163 suffix = "be";
6164 break;
6165 case UNORDERED:
6166 suffix = fp ? "u" : "p";
6167 break;
6168 case ORDERED:
6169 suffix = fp ? "nu" : "np";
6170 break;
6171 default:
6172 abort ();
6174 fputs (suffix, file);
6177 void
6178 print_reg (x, code, file)
6179 rtx x;
6180 int code;
6181 FILE *file;
6183 if (REGNO (x) == ARG_POINTER_REGNUM
6184 || REGNO (x) == FRAME_POINTER_REGNUM
6185 || REGNO (x) == FLAGS_REG
6186 || REGNO (x) == FPSR_REG)
6187 abort ();
6189 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6190 putc ('%', file);
6192 if (code == 'w' || MMX_REG_P (x))
6193 code = 2;
6194 else if (code == 'b')
6195 code = 1;
6196 else if (code == 'k')
6197 code = 4;
6198 else if (code == 'q')
6199 code = 8;
6200 else if (code == 'y')
6201 code = 3;
6202 else if (code == 'h')
6203 code = 0;
6204 else
6205 code = GET_MODE_SIZE (GET_MODE (x));
6207 /* Irritatingly, AMD extended registers use different naming convention
6208 from the normal registers. */
6209 if (REX_INT_REG_P (x))
6211 if (!TARGET_64BIT)
6212 abort ();
6213 switch (code)
6215 case 0:
6216 error ("extended registers have no high halves");
6217 break;
6218 case 1:
6219 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6220 break;
6221 case 2:
6222 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6223 break;
6224 case 4:
6225 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6226 break;
6227 case 8:
6228 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6229 break;
6230 default:
6231 error ("unsupported operand size for extended register");
6232 break;
6234 return;
6236 switch (code)
6238 case 3:
6239 if (STACK_TOP_P (x))
6241 fputs ("st(0)", file);
6242 break;
6244 /* FALLTHRU */
6245 case 8:
6246 case 4:
6247 case 12:
6248 if (! ANY_FP_REG_P (x))
6249 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6250 /* FALLTHRU */
6251 case 16:
6252 case 2:
6253 fputs (hi_reg_name[REGNO (x)], file);
6254 break;
6255 case 1:
6256 fputs (qi_reg_name[REGNO (x)], file);
6257 break;
6258 case 0:
6259 fputs (qi_high_reg_name[REGNO (x)], file);
6260 break;
6261 default:
6262 abort ();
6266 /* Locate some local-dynamic symbol still in use by this function
6267 so that we can print its name in some tls_local_dynamic_base
6268 pattern. */
6270 static const char *
6271 get_some_local_dynamic_name ()
6273 rtx insn;
6275 if (cfun->machine->some_ld_name)
6276 return cfun->machine->some_ld_name;
6278 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6279 if (INSN_P (insn)
6280 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6281 return cfun->machine->some_ld_name;
6283 abort ();
6286 static int
6287 get_some_local_dynamic_name_1 (px, data)
6288 rtx *px;
6289 void *data ATTRIBUTE_UNUSED;
6291 rtx x = *px;
6293 if (GET_CODE (x) == SYMBOL_REF
6294 && local_dynamic_symbolic_operand (x, Pmode))
6296 cfun->machine->some_ld_name = XSTR (x, 0);
6297 return 1;
6300 return 0;
6303 /* Meaning of CODE:
6304 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6305 C -- print opcode suffix for set/cmov insn.
6306 c -- like C, but print reversed condition
6307 F,f -- likewise, but for floating-point.
6308 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6309 nothing
6310 R -- print the prefix for register names.
6311 z -- print the opcode suffix for the size of the current operand.
6312 * -- print a star (in certain assembler syntax)
6313 A -- print an absolute memory reference.
6314 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6315 s -- print a shift double count, followed by the assemblers argument
6316 delimiter.
6317 b -- print the QImode name of the register for the indicated operand.
6318 %b0 would print %al if operands[0] is reg 0.
6319 w -- likewise, print the HImode name of the register.
6320 k -- likewise, print the SImode name of the register.
6321 q -- likewise, print the DImode name of the register.
6322 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6323 y -- print "st(0)" instead of "st" as a register.
6324 D -- print condition for SSE cmp instruction.
6325 P -- if PIC, print an @PLT suffix.
6326 X -- don't print any sort of PIC '@' suffix for a symbol.
6327 & -- print some in-use local-dynamic symbol name.
6330 void
6331 print_operand (file, x, code)
6332 FILE *file;
6333 rtx x;
6334 int code;
6336 if (code)
6338 switch (code)
6340 case '*':
6341 if (ASSEMBLER_DIALECT == ASM_ATT)
6342 putc ('*', file);
6343 return;
6345 case '&':
6346 assemble_name (file, get_some_local_dynamic_name ());
6347 return;
6349 case 'A':
6350 if (ASSEMBLER_DIALECT == ASM_ATT)
6351 putc ('*', file);
6352 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6354 /* Intel syntax. For absolute addresses, registers should not
6355 be surrounded by braces. */
6356 if (GET_CODE (x) != REG)
6358 putc ('[', file);
6359 PRINT_OPERAND (file, x, 0);
6360 putc (']', file);
6361 return;
6364 else
6365 abort ();
6367 PRINT_OPERAND (file, x, 0);
6368 return;
6371 case 'L':
6372 if (ASSEMBLER_DIALECT == ASM_ATT)
6373 putc ('l', file);
6374 return;
6376 case 'W':
6377 if (ASSEMBLER_DIALECT == ASM_ATT)
6378 putc ('w', file);
6379 return;
6381 case 'B':
6382 if (ASSEMBLER_DIALECT == ASM_ATT)
6383 putc ('b', file);
6384 return;
6386 case 'Q':
6387 if (ASSEMBLER_DIALECT == ASM_ATT)
6388 putc ('l', file);
6389 return;
6391 case 'S':
6392 if (ASSEMBLER_DIALECT == ASM_ATT)
6393 putc ('s', file);
6394 return;
6396 case 'T':
6397 if (ASSEMBLER_DIALECT == ASM_ATT)
6398 putc ('t', file);
6399 return;
6401 case 'z':
6402 /* 387 opcodes don't get size suffixes if the operands are
6403 registers. */
6404 if (STACK_REG_P (x))
6405 return;
6407 /* Likewise if using Intel opcodes. */
6408 if (ASSEMBLER_DIALECT == ASM_INTEL)
6409 return;
6411 /* This is the size of op from size of operand. */
6412 switch (GET_MODE_SIZE (GET_MODE (x)))
6414 case 2:
6415 #ifdef HAVE_GAS_FILDS_FISTS
6416 putc ('s', file);
6417 #endif
6418 return;
6420 case 4:
6421 if (GET_MODE (x) == SFmode)
6423 putc ('s', file);
6424 return;
6426 else
6427 putc ('l', file);
6428 return;
6430 case 12:
6431 case 16:
6432 putc ('t', file);
6433 return;
6435 case 8:
6436 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6438 #ifdef GAS_MNEMONICS
6439 putc ('q', file);
6440 #else
6441 putc ('l', file);
6442 putc ('l', file);
6443 #endif
6445 else
6446 putc ('l', file);
6447 return;
6449 default:
6450 abort ();
6453 case 'b':
6454 case 'w':
6455 case 'k':
6456 case 'q':
6457 case 'h':
6458 case 'y':
6459 case 'X':
6460 case 'P':
6461 break;
6463 case 's':
6464 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6466 PRINT_OPERAND (file, x, 0);
6467 putc (',', file);
6469 return;
6471 case 'D':
6472 /* Little bit of braindamage here. The SSE compare instructions
6473 does use completely different names for the comparisons that the
6474 fp conditional moves. */
6475 switch (GET_CODE (x))
6477 case EQ:
6478 case UNEQ:
6479 fputs ("eq", file);
6480 break;
6481 case LT:
6482 case UNLT:
6483 fputs ("lt", file);
6484 break;
6485 case LE:
6486 case UNLE:
6487 fputs ("le", file);
6488 break;
6489 case UNORDERED:
6490 fputs ("unord", file);
6491 break;
6492 case NE:
6493 case LTGT:
6494 fputs ("neq", file);
6495 break;
6496 case UNGE:
6497 case GE:
6498 fputs ("nlt", file);
6499 break;
6500 case UNGT:
6501 case GT:
6502 fputs ("nle", file);
6503 break;
6504 case ORDERED:
6505 fputs ("ord", file);
6506 break;
6507 default:
6508 abort ();
6509 break;
6511 return;
6512 case 'O':
6513 #ifdef CMOV_SUN_AS_SYNTAX
6514 if (ASSEMBLER_DIALECT == ASM_ATT)
6516 switch (GET_MODE (x))
6518 case HImode: putc ('w', file); break;
6519 case SImode:
6520 case SFmode: putc ('l', file); break;
6521 case DImode:
6522 case DFmode: putc ('q', file); break;
6523 default: abort ();
6525 putc ('.', file);
6527 #endif
6528 return;
6529 case 'C':
6530 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6531 return;
6532 case 'F':
6533 #ifdef CMOV_SUN_AS_SYNTAX
6534 if (ASSEMBLER_DIALECT == ASM_ATT)
6535 putc ('.', file);
6536 #endif
6537 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6538 return;
6540 /* Like above, but reverse condition */
6541 case 'c':
6542 /* Check to see if argument to %c is really a constant
6543 and not a condition code which needs to be reversed. */
6544 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6546 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6547 return;
6549 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6550 return;
6551 case 'f':
6552 #ifdef CMOV_SUN_AS_SYNTAX
6553 if (ASSEMBLER_DIALECT == ASM_ATT)
6554 putc ('.', file);
6555 #endif
6556 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6557 return;
6558 case '+':
6560 rtx x;
6562 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6563 return;
6565 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6566 if (x)
6568 int pred_val = INTVAL (XEXP (x, 0));
6570 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6571 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6573 int taken = pred_val > REG_BR_PROB_BASE / 2;
6574 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6576 /* Emit hints only in the case default branch prediction
6577 heruistics would fail. */
6578 if (taken != cputaken)
6580 /* We use 3e (DS) prefix for taken branches and
6581 2e (CS) prefix for not taken branches. */
6582 if (taken)
6583 fputs ("ds ; ", file);
6584 else
6585 fputs ("cs ; ", file);
6589 return;
6591 default:
6592 output_operand_lossage ("invalid operand code `%c'", code);
6596 if (GET_CODE (x) == REG)
6598 PRINT_REG (x, code, file);
6601 else if (GET_CODE (x) == MEM)
6603 /* No `byte ptr' prefix for call instructions. */
6604 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6606 const char * size;
6607 switch (GET_MODE_SIZE (GET_MODE (x)))
6609 case 1: size = "BYTE"; break;
6610 case 2: size = "WORD"; break;
6611 case 4: size = "DWORD"; break;
6612 case 8: size = "QWORD"; break;
6613 case 12: size = "XWORD"; break;
6614 case 16: size = "XMMWORD"; break;
6615 default:
6616 abort ();
6619 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6620 if (code == 'b')
6621 size = "BYTE";
6622 else if (code == 'w')
6623 size = "WORD";
6624 else if (code == 'k')
6625 size = "DWORD";
6627 fputs (size, file);
6628 fputs (" PTR ", file);
6631 x = XEXP (x, 0);
6632 if (flag_pic && CONSTANT_ADDRESS_P (x))
6633 output_pic_addr_const (file, x, code);
6634 /* Avoid (%rip) for call operands. */
6635 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6636 && GET_CODE (x) != CONST_INT)
6637 output_addr_const (file, x);
6638 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6639 output_operand_lossage ("invalid constraints for operand");
6640 else
6641 output_address (x);
6644 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6646 REAL_VALUE_TYPE r;
6647 long l;
6649 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6650 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6652 if (ASSEMBLER_DIALECT == ASM_ATT)
6653 putc ('$', file);
6654 fprintf (file, "0x%lx", l);
6657 /* These float cases don't actually occur as immediate operands. */
6658 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6660 REAL_VALUE_TYPE r;
6661 char dstr[30];
6663 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6664 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6665 fprintf (file, "%s", dstr);
6668 else if (GET_CODE (x) == CONST_DOUBLE
6669 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6671 REAL_VALUE_TYPE r;
6672 char dstr[30];
6674 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6675 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
6676 fprintf (file, "%s", dstr);
6679 else
6681 if (code != 'P')
6683 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6685 if (ASSEMBLER_DIALECT == ASM_ATT)
6686 putc ('$', file);
6688 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6689 || GET_CODE (x) == LABEL_REF)
6691 if (ASSEMBLER_DIALECT == ASM_ATT)
6692 putc ('$', file);
6693 else
6694 fputs ("OFFSET FLAT:", file);
6697 if (GET_CODE (x) == CONST_INT)
6698 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6699 else if (flag_pic)
6700 output_pic_addr_const (file, x, code);
6701 else
6702 output_addr_const (file, x);
6706 /* Print a memory operand whose address is ADDR. */
6708 void
6709 print_operand_address (file, addr)
6710 FILE *file;
6711 register rtx addr;
6713 struct ix86_address parts;
6714 rtx base, index, disp;
6715 int scale;
6717 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6719 if (ASSEMBLER_DIALECT == ASM_INTEL)
6720 fputs ("DWORD PTR ", file);
6721 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6722 putc ('%', file);
6723 fputs ("gs:0", file);
6724 return;
6727 if (! ix86_decompose_address (addr, &parts))
6728 abort ();
6730 base = parts.base;
6731 index = parts.index;
6732 disp = parts.disp;
6733 scale = parts.scale;
6735 if (!base && !index)
6737 /* Displacement only requires special attention. */
6739 if (GET_CODE (disp) == CONST_INT)
6741 if (ASSEMBLER_DIALECT == ASM_INTEL)
6743 if (USER_LABEL_PREFIX[0] == 0)
6744 putc ('%', file);
6745 fputs ("ds:", file);
6747 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6749 else if (flag_pic)
6750 output_pic_addr_const (file, addr, 0);
6751 else
6752 output_addr_const (file, addr);
6754 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6755 if (TARGET_64BIT
6756 && (GET_CODE (addr) == SYMBOL_REF
6757 || GET_CODE (addr) == LABEL_REF
6758 || (GET_CODE (addr) == CONST
6759 && GET_CODE (XEXP (addr, 0)) == PLUS
6760 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6761 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6762 fputs ("(%rip)", file);
6764 else
6766 if (ASSEMBLER_DIALECT == ASM_ATT)
6768 if (disp)
6770 if (flag_pic)
6771 output_pic_addr_const (file, disp, 0);
6772 else if (GET_CODE (disp) == LABEL_REF)
6773 output_asm_label (disp);
6774 else
6775 output_addr_const (file, disp);
6778 putc ('(', file);
6779 if (base)
6780 PRINT_REG (base, 0, file);
6781 if (index)
6783 putc (',', file);
6784 PRINT_REG (index, 0, file);
6785 if (scale != 1)
6786 fprintf (file, ",%d", scale);
6788 putc (')', file);
6790 else
6792 rtx offset = NULL_RTX;
6794 if (disp)
6796 /* Pull out the offset of a symbol; print any symbol itself. */
6797 if (GET_CODE (disp) == CONST
6798 && GET_CODE (XEXP (disp, 0)) == PLUS
6799 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6801 offset = XEXP (XEXP (disp, 0), 1);
6802 disp = gen_rtx_CONST (VOIDmode,
6803 XEXP (XEXP (disp, 0), 0));
6806 if (flag_pic)
6807 output_pic_addr_const (file, disp, 0);
6808 else if (GET_CODE (disp) == LABEL_REF)
6809 output_asm_label (disp);
6810 else if (GET_CODE (disp) == CONST_INT)
6811 offset = disp;
6812 else
6813 output_addr_const (file, disp);
6816 putc ('[', file);
6817 if (base)
6819 PRINT_REG (base, 0, file);
6820 if (offset)
6822 if (INTVAL (offset) >= 0)
6823 putc ('+', file);
6824 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6827 else if (offset)
6828 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6829 else
6830 putc ('0', file);
6832 if (index)
6834 putc ('+', file);
6835 PRINT_REG (index, 0, file);
6836 if (scale != 1)
6837 fprintf (file, "*%d", scale);
6839 putc (']', file);
6844 bool
6845 output_addr_const_extra (file, x)
6846 FILE *file;
6847 rtx x;
6849 rtx op;
6851 if (GET_CODE (x) != UNSPEC)
6852 return false;
6854 op = XVECEXP (x, 0, 0);
6855 switch (XINT (x, 1))
6857 case UNSPEC_GOTTPOFF:
6858 output_addr_const (file, op);
6859 fputs ("@GOTTPOFF", file);
6860 break;
6861 case UNSPEC_TPOFF:
6862 output_addr_const (file, op);
6863 fputs ("@TPOFF", file);
6864 break;
6865 case UNSPEC_NTPOFF:
6866 output_addr_const (file, op);
6867 fputs ("@NTPOFF", file);
6868 break;
6869 case UNSPEC_DTPOFF:
6870 output_addr_const (file, op);
6871 fputs ("@DTPOFF", file);
6872 break;
6874 default:
6875 return false;
6878 return true;
6881 /* Split one or more DImode RTL references into pairs of SImode
6882 references. The RTL can be REG, offsettable MEM, integer constant, or
6883 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6884 split and "num" is its length. lo_half and hi_half are output arrays
6885 that parallel "operands". */
6887 void
6888 split_di (operands, num, lo_half, hi_half)
6889 rtx operands[];
6890 int num;
6891 rtx lo_half[], hi_half[];
6893 while (num--)
6895 rtx op = operands[num];
6897 /* simplify_subreg refuse to split volatile memory addresses,
6898 but we still have to handle it. */
6899 if (GET_CODE (op) == MEM)
6901 lo_half[num] = adjust_address (op, SImode, 0);
6902 hi_half[num] = adjust_address (op, SImode, 4);
6904 else
6906 lo_half[num] = simplify_gen_subreg (SImode, op,
6907 GET_MODE (op) == VOIDmode
6908 ? DImode : GET_MODE (op), 0);
6909 hi_half[num] = simplify_gen_subreg (SImode, op,
6910 GET_MODE (op) == VOIDmode
6911 ? DImode : GET_MODE (op), 4);
6915 /* Split one or more TImode RTL references into pairs of SImode
6916 references. The RTL can be REG, offsettable MEM, integer constant, or
6917 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6918 split and "num" is its length. lo_half and hi_half are output arrays
6919 that parallel "operands". */
6921 void
6922 split_ti (operands, num, lo_half, hi_half)
6923 rtx operands[];
6924 int num;
6925 rtx lo_half[], hi_half[];
6927 while (num--)
6929 rtx op = operands[num];
6931 /* simplify_subreg refuse to split volatile memory addresses, but we
6932 still have to handle it. */
6933 if (GET_CODE (op) == MEM)
6935 lo_half[num] = adjust_address (op, DImode, 0);
6936 hi_half[num] = adjust_address (op, DImode, 8);
6938 else
6940 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6941 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6946 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6947 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6948 is the expression of the binary operation. The output may either be
6949 emitted here, or returned to the caller, like all output_* functions.
6951 There is no guarantee that the operands are the same mode, as they
6952 might be within FLOAT or FLOAT_EXTEND expressions. */
6954 #ifndef SYSV386_COMPAT
6955 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6956 wants to fix the assemblers because that causes incompatibility
6957 with gcc. No-one wants to fix gcc because that causes
6958 incompatibility with assemblers... You can use the option of
6959 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6960 #define SYSV386_COMPAT 1
6961 #endif
6963 const char *
6964 output_387_binary_op (insn, operands)
6965 rtx insn;
6966 rtx *operands;
6968 static char buf[30];
6969 const char *p;
6970 const char *ssep;
6971 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6973 #ifdef ENABLE_CHECKING
6974 /* Even if we do not want to check the inputs, this documents input
6975 constraints. Which helps in understanding the following code. */
6976 if (STACK_REG_P (operands[0])
6977 && ((REG_P (operands[1])
6978 && REGNO (operands[0]) == REGNO (operands[1])
6979 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6980 || (REG_P (operands[2])
6981 && REGNO (operands[0]) == REGNO (operands[2])
6982 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6983 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6984 ; /* ok */
6985 else if (!is_sse)
6986 abort ();
6987 #endif
6989 switch (GET_CODE (operands[3]))
6991 case PLUS:
6992 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6993 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6994 p = "fiadd";
6995 else
6996 p = "fadd";
6997 ssep = "add";
6998 break;
7000 case MINUS:
7001 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7002 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7003 p = "fisub";
7004 else
7005 p = "fsub";
7006 ssep = "sub";
7007 break;
7009 case MULT:
7010 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7011 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7012 p = "fimul";
7013 else
7014 p = "fmul";
7015 ssep = "mul";
7016 break;
7018 case DIV:
7019 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7020 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7021 p = "fidiv";
7022 else
7023 p = "fdiv";
7024 ssep = "div";
7025 break;
7027 default:
7028 abort ();
7031 if (is_sse)
7033 strcpy (buf, ssep);
7034 if (GET_MODE (operands[0]) == SFmode)
7035 strcat (buf, "ss\t{%2, %0|%0, %2}");
7036 else
7037 strcat (buf, "sd\t{%2, %0|%0, %2}");
7038 return buf;
7040 strcpy (buf, p);
7042 switch (GET_CODE (operands[3]))
7044 case MULT:
7045 case PLUS:
7046 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7048 rtx temp = operands[2];
7049 operands[2] = operands[1];
7050 operands[1] = temp;
7053 /* know operands[0] == operands[1]. */
7055 if (GET_CODE (operands[2]) == MEM)
7057 p = "%z2\t%2";
7058 break;
7061 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7063 if (STACK_TOP_P (operands[0]))
7064 /* How is it that we are storing to a dead operand[2]?
7065 Well, presumably operands[1] is dead too. We can't
7066 store the result to st(0) as st(0) gets popped on this
7067 instruction. Instead store to operands[2] (which I
7068 think has to be st(1)). st(1) will be popped later.
7069 gcc <= 2.8.1 didn't have this check and generated
7070 assembly code that the Unixware assembler rejected. */
7071 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7072 else
7073 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7074 break;
7077 if (STACK_TOP_P (operands[0]))
7078 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7079 else
7080 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7081 break;
7083 case MINUS:
7084 case DIV:
7085 if (GET_CODE (operands[1]) == MEM)
7087 p = "r%z1\t%1";
7088 break;
7091 if (GET_CODE (operands[2]) == MEM)
7093 p = "%z2\t%2";
7094 break;
7097 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7099 #if SYSV386_COMPAT
7100 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7101 derived assemblers, confusingly reverse the direction of
7102 the operation for fsub{r} and fdiv{r} when the
7103 destination register is not st(0). The Intel assembler
7104 doesn't have this brain damage. Read !SYSV386_COMPAT to
7105 figure out what the hardware really does. */
7106 if (STACK_TOP_P (operands[0]))
7107 p = "{p\t%0, %2|rp\t%2, %0}";
7108 else
7109 p = "{rp\t%2, %0|p\t%0, %2}";
7110 #else
7111 if (STACK_TOP_P (operands[0]))
7112 /* As above for fmul/fadd, we can't store to st(0). */
7113 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7114 else
7115 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7116 #endif
7117 break;
7120 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7122 #if SYSV386_COMPAT
7123 if (STACK_TOP_P (operands[0]))
7124 p = "{rp\t%0, %1|p\t%1, %0}";
7125 else
7126 p = "{p\t%1, %0|rp\t%0, %1}";
7127 #else
7128 if (STACK_TOP_P (operands[0]))
7129 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7130 else
7131 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7132 #endif
7133 break;
7136 if (STACK_TOP_P (operands[0]))
7138 if (STACK_TOP_P (operands[1]))
7139 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7140 else
7141 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7142 break;
7144 else if (STACK_TOP_P (operands[1]))
7146 #if SYSV386_COMPAT
7147 p = "{\t%1, %0|r\t%0, %1}";
7148 #else
7149 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7150 #endif
7152 else
7154 #if SYSV386_COMPAT
7155 p = "{r\t%2, %0|\t%0, %2}";
7156 #else
7157 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7158 #endif
7160 break;
7162 default:
7163 abort ();
7166 strcat (buf, p);
7167 return buf;
7170 /* Output code to initialize control word copies used by
7171 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7172 is set to control word rounding downwards. */
7173 void
7174 emit_i387_cw_initialization (normal, round_down)
7175 rtx normal, round_down;
7177 rtx reg = gen_reg_rtx (HImode);
7179 emit_insn (gen_x86_fnstcw_1 (normal));
7180 emit_move_insn (reg, normal);
7181 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7182 && !TARGET_64BIT)
7183 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7184 else
7185 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7186 emit_move_insn (round_down, reg);
7189 /* Output code for INSN to convert a float to a signed int. OPERANDS
7190 are the insn operands. The output may be [HSD]Imode and the input
7191 operand may be [SDX]Fmode. */
7193 const char *
7194 output_fix_trunc (insn, operands)
7195 rtx insn;
7196 rtx *operands;
7198 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7199 int dimode_p = GET_MODE (operands[0]) == DImode;
7201 /* Jump through a hoop or two for DImode, since the hardware has no
7202 non-popping instruction. We used to do this a different way, but
7203 that was somewhat fragile and broke with post-reload splitters. */
7204 if (dimode_p && !stack_top_dies)
7205 output_asm_insn ("fld\t%y1", operands);
7207 if (!STACK_TOP_P (operands[1]))
7208 abort ();
7210 if (GET_CODE (operands[0]) != MEM)
7211 abort ();
7213 output_asm_insn ("fldcw\t%3", operands);
7214 if (stack_top_dies || dimode_p)
7215 output_asm_insn ("fistp%z0\t%0", operands);
7216 else
7217 output_asm_insn ("fist%z0\t%0", operands);
7218 output_asm_insn ("fldcw\t%2", operands);
7220 return "";
7223 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7224 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7225 when fucom should be used. */
7227 const char *
7228 output_fp_compare (insn, operands, eflags_p, unordered_p)
7229 rtx insn;
7230 rtx *operands;
7231 int eflags_p, unordered_p;
7233 int stack_top_dies;
7234 rtx cmp_op0 = operands[0];
7235 rtx cmp_op1 = operands[1];
7236 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7238 if (eflags_p == 2)
7240 cmp_op0 = cmp_op1;
7241 cmp_op1 = operands[2];
7243 if (is_sse)
7245 if (GET_MODE (operands[0]) == SFmode)
7246 if (unordered_p)
7247 return "ucomiss\t{%1, %0|%0, %1}";
7248 else
7249 return "comiss\t{%1, %0|%0, %y}";
7250 else
7251 if (unordered_p)
7252 return "ucomisd\t{%1, %0|%0, %1}";
7253 else
7254 return "comisd\t{%1, %0|%0, %y}";
7257 if (! STACK_TOP_P (cmp_op0))
7258 abort ();
7260 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7262 if (STACK_REG_P (cmp_op1)
7263 && stack_top_dies
7264 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7265 && REGNO (cmp_op1) != FIRST_STACK_REG)
7267 /* If both the top of the 387 stack dies, and the other operand
7268 is also a stack register that dies, then this must be a
7269 `fcompp' float compare */
7271 if (eflags_p == 1)
7273 /* There is no double popping fcomi variant. Fortunately,
7274 eflags is immune from the fstp's cc clobbering. */
7275 if (unordered_p)
7276 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7277 else
7278 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7279 return "fstp\t%y0";
7281 else
7283 if (eflags_p == 2)
7285 if (unordered_p)
7286 return "fucompp\n\tfnstsw\t%0";
7287 else
7288 return "fcompp\n\tfnstsw\t%0";
7290 else
7292 if (unordered_p)
7293 return "fucompp";
7294 else
7295 return "fcompp";
7299 else
7301 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7303 static const char * const alt[24] =
7305 "fcom%z1\t%y1",
7306 "fcomp%z1\t%y1",
7307 "fucom%z1\t%y1",
7308 "fucomp%z1\t%y1",
7310 "ficom%z1\t%y1",
7311 "ficomp%z1\t%y1",
7312 NULL,
7313 NULL,
7315 "fcomi\t{%y1, %0|%0, %y1}",
7316 "fcomip\t{%y1, %0|%0, %y1}",
7317 "fucomi\t{%y1, %0|%0, %y1}",
7318 "fucomip\t{%y1, %0|%0, %y1}",
7320 NULL,
7321 NULL,
7322 NULL,
7323 NULL,
7325 "fcom%z2\t%y2\n\tfnstsw\t%0",
7326 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7327 "fucom%z2\t%y2\n\tfnstsw\t%0",
7328 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7330 "ficom%z2\t%y2\n\tfnstsw\t%0",
7331 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7332 NULL,
7333 NULL
7336 int mask;
7337 const char *ret;
7339 mask = eflags_p << 3;
7340 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7341 mask |= unordered_p << 1;
7342 mask |= stack_top_dies;
7344 if (mask >= 24)
7345 abort ();
7346 ret = alt[mask];
7347 if (ret == NULL)
7348 abort ();
7350 return ret;
7354 void
7355 ix86_output_addr_vec_elt (file, value)
7356 FILE *file;
7357 int value;
7359 const char *directive = ASM_LONG;
7361 if (TARGET_64BIT)
7363 #ifdef ASM_QUAD
7364 directive = ASM_QUAD;
7365 #else
7366 abort ();
7367 #endif
7370 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7373 void
7374 ix86_output_addr_diff_elt (file, value, rel)
7375 FILE *file;
7376 int value, rel;
7378 if (TARGET_64BIT)
7379 fprintf (file, "%s%s%d-%s%d\n",
7380 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7381 else if (HAVE_AS_GOTOFF_IN_DATA)
7382 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7383 #if TARGET_MACHO
7384 else if (TARGET_MACHO)
7385 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7386 machopic_function_base_name () + 1);
7387 #endif
7388 else
7389 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7390 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7393 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7394 for the target. */
7396 void
7397 ix86_expand_clear (dest)
7398 rtx dest;
7400 rtx tmp;
7402 /* We play register width games, which are only valid after reload. */
7403 if (!reload_completed)
7404 abort ();
7406 /* Avoid HImode and its attendant prefix byte. */
7407 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7408 dest = gen_rtx_REG (SImode, REGNO (dest));
7410 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7412 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7413 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7415 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7416 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7419 emit_insn (tmp);
7422 /* X is an unchanging MEM. If it is a constant pool reference, return
7423 the constant pool rtx, else NULL. */
7425 static rtx
7426 maybe_get_pool_constant (x)
7427 rtx x;
7429 x = XEXP (x, 0);
7431 if (flag_pic)
7433 if (GET_CODE (x) != PLUS)
7434 return NULL_RTX;
7435 if (XEXP (x, 0) != pic_offset_table_rtx)
7436 return NULL_RTX;
7437 x = XEXP (x, 1);
7438 if (GET_CODE (x) != CONST)
7439 return NULL_RTX;
7440 x = XEXP (x, 0);
7441 if (GET_CODE (x) != UNSPEC)
7442 return NULL_RTX;
7443 if (XINT (x, 1) != UNSPEC_GOTOFF)
7444 return NULL_RTX;
7445 x = XVECEXP (x, 0, 0);
7448 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7449 return get_pool_constant (x);
7451 return NULL_RTX;
7454 void
7455 ix86_expand_move (mode, operands)
7456 enum machine_mode mode;
7457 rtx operands[];
7459 int strict = (reload_in_progress || reload_completed);
7460 rtx insn, op0, op1, tmp;
7462 op0 = operands[0];
7463 op1 = operands[1];
7465 /* ??? We have a slight problem. We need to say that tls symbols are
7466 not legitimate constants so that reload does not helpfully reload
7467 these constants from a REG_EQUIV, which we cannot handle. (Recall
7468 that general- and local-dynamic address resolution requires a
7469 function call.)
7471 However, if we say that tls symbols are not legitimate constants,
7472 then emit_move_insn helpfully drop them into the constant pool.
7474 It is far easier to work around emit_move_insn than reload. Recognize
7475 the MEM that we would have created and extract the symbol_ref. */
7477 if (mode == Pmode
7478 && GET_CODE (op1) == MEM
7479 && RTX_UNCHANGING_P (op1))
7481 tmp = maybe_get_pool_constant (op1);
7482 /* Note that we only care about symbolic constants here, which
7483 unlike CONST_INT will always have a proper mode. */
7484 if (tmp && GET_MODE (tmp) == Pmode)
7485 op1 = tmp;
7488 if (tls_symbolic_operand (op1, Pmode))
7490 op1 = legitimize_address (op1, op1, VOIDmode);
7491 if (GET_CODE (op0) == MEM)
7493 tmp = gen_reg_rtx (mode);
7494 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7495 op1 = tmp;
7498 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7500 #if TARGET_MACHO
7501 if (MACHOPIC_PURE)
7503 rtx temp = ((reload_in_progress
7504 || ((op0 && GET_CODE (op0) == REG)
7505 && mode == Pmode))
7506 ? op0 : gen_reg_rtx (Pmode));
7507 op1 = machopic_indirect_data_reference (op1, temp);
7508 op1 = machopic_legitimize_pic_address (op1, mode,
7509 temp == op1 ? 0 : temp);
7511 else
7513 if (MACHOPIC_INDIRECT)
7514 op1 = machopic_indirect_data_reference (op1, 0);
7516 if (op0 != op1)
7518 insn = gen_rtx_SET (VOIDmode, op0, op1);
7519 emit_insn (insn);
7521 return;
7522 #endif /* TARGET_MACHO */
7523 if (GET_CODE (op0) == MEM)
7524 op1 = force_reg (Pmode, op1);
7525 else
7527 rtx temp = op0;
7528 if (GET_CODE (temp) != REG)
7529 temp = gen_reg_rtx (Pmode);
7530 temp = legitimize_pic_address (op1, temp);
7531 if (temp == op0)
7532 return;
7533 op1 = temp;
7536 else
7538 if (GET_CODE (op0) == MEM
7539 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7540 || !push_operand (op0, mode))
7541 && GET_CODE (op1) == MEM)
7542 op1 = force_reg (mode, op1);
7544 if (push_operand (op0, mode)
7545 && ! general_no_elim_operand (op1, mode))
7546 op1 = copy_to_mode_reg (mode, op1);
7548 /* Force large constants in 64bit compilation into register
7549 to get them CSEed. */
7550 if (TARGET_64BIT && mode == DImode
7551 && immediate_operand (op1, mode)
7552 && !x86_64_zero_extended_value (op1)
7553 && !register_operand (op0, mode)
7554 && optimize && !reload_completed && !reload_in_progress)
7555 op1 = copy_to_mode_reg (mode, op1);
7557 if (FLOAT_MODE_P (mode))
7559 /* If we are loading a floating point constant to a register,
7560 force the value to memory now, since we'll get better code
7561 out the back end. */
7563 if (strict)
7565 else if (GET_CODE (op1) == CONST_DOUBLE
7566 && register_operand (op0, mode))
7567 op1 = validize_mem (force_const_mem (mode, op1));
7571 insn = gen_rtx_SET (VOIDmode, op0, op1);
7573 emit_insn (insn);
7576 void
7577 ix86_expand_vector_move (mode, operands)
7578 enum machine_mode mode;
7579 rtx operands[];
7581 /* Force constants other than zero into memory. We do not know how
7582 the instructions used to build constants modify the upper 64 bits
7583 of the register, once we have that information we may be able
7584 to handle some of them more efficiently. */
7585 if ((reload_in_progress | reload_completed) == 0
7586 && register_operand (operands[0], mode)
7587 && CONSTANT_P (operands[1]))
7589 rtx addr = gen_reg_rtx (Pmode);
7590 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7591 operands[1] = gen_rtx_MEM (mode, addr);
7594 /* Make operand1 a register if it isn't already. */
7595 if ((reload_in_progress | reload_completed) == 0
7596 && !register_operand (operands[0], mode)
7597 && !register_operand (operands[1], mode)
7598 && operands[1] != CONST0_RTX (mode))
7600 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7601 emit_move_insn (operands[0], temp);
7602 return;
7605 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7608 /* Attempt to expand a binary operator. Make the expansion closer to the
7609 actual machine, then just general_operand, which will allow 3 separate
7610 memory references (one output, two input) in a single insn. */
7612 void
7613 ix86_expand_binary_operator (code, mode, operands)
7614 enum rtx_code code;
7615 enum machine_mode mode;
7616 rtx operands[];
7618 int matching_memory;
7619 rtx src1, src2, dst, op, clob;
7621 dst = operands[0];
7622 src1 = operands[1];
7623 src2 = operands[2];
7625 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7626 if (GET_RTX_CLASS (code) == 'c'
7627 && (rtx_equal_p (dst, src2)
7628 || immediate_operand (src1, mode)))
7630 rtx temp = src1;
7631 src1 = src2;
7632 src2 = temp;
7635 /* If the destination is memory, and we do not have matching source
7636 operands, do things in registers. */
7637 matching_memory = 0;
7638 if (GET_CODE (dst) == MEM)
7640 if (rtx_equal_p (dst, src1))
7641 matching_memory = 1;
7642 else if (GET_RTX_CLASS (code) == 'c'
7643 && rtx_equal_p (dst, src2))
7644 matching_memory = 2;
7645 else
7646 dst = gen_reg_rtx (mode);
7649 /* Both source operands cannot be in memory. */
7650 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7652 if (matching_memory != 2)
7653 src2 = force_reg (mode, src2);
7654 else
7655 src1 = force_reg (mode, src1);
7658 /* If the operation is not commutable, source 1 cannot be a constant
7659 or non-matching memory. */
7660 if ((CONSTANT_P (src1)
7661 || (!matching_memory && GET_CODE (src1) == MEM))
7662 && GET_RTX_CLASS (code) != 'c')
7663 src1 = force_reg (mode, src1);
7665 /* If optimizing, copy to regs to improve CSE */
7666 if (optimize && ! no_new_pseudos)
7668 if (GET_CODE (dst) == MEM)
7669 dst = gen_reg_rtx (mode);
7670 if (GET_CODE (src1) == MEM)
7671 src1 = force_reg (mode, src1);
7672 if (GET_CODE (src2) == MEM)
7673 src2 = force_reg (mode, src2);
7676 /* Emit the instruction. */
7678 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7679 if (reload_in_progress)
7681 /* Reload doesn't know about the flags register, and doesn't know that
7682 it doesn't want to clobber it. We can only do this with PLUS. */
7683 if (code != PLUS)
7684 abort ();
7685 emit_insn (op);
7687 else
7689 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7690 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7693 /* Fix up the destination if needed. */
7694 if (dst != operands[0])
7695 emit_move_insn (operands[0], dst);
7698 /* Return TRUE or FALSE depending on whether the binary operator meets the
7699 appropriate constraints. */
7702 ix86_binary_operator_ok (code, mode, operands)
7703 enum rtx_code code;
7704 enum machine_mode mode ATTRIBUTE_UNUSED;
7705 rtx operands[3];
7707 /* Both source operands cannot be in memory. */
7708 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7709 return 0;
7710 /* If the operation is not commutable, source 1 cannot be a constant. */
7711 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7712 return 0;
7713 /* If the destination is memory, we must have a matching source operand. */
7714 if (GET_CODE (operands[0]) == MEM
7715 && ! (rtx_equal_p (operands[0], operands[1])
7716 || (GET_RTX_CLASS (code) == 'c'
7717 && rtx_equal_p (operands[0], operands[2]))))
7718 return 0;
7719 /* If the operation is not commutable and the source 1 is memory, we must
7720 have a matching destination. */
7721 if (GET_CODE (operands[1]) == MEM
7722 && GET_RTX_CLASS (code) != 'c'
7723 && ! rtx_equal_p (operands[0], operands[1]))
7724 return 0;
7725 return 1;
7728 /* Attempt to expand a unary operator. Make the expansion closer to the
7729 actual machine, then just general_operand, which will allow 2 separate
7730 memory references (one output, one input) in a single insn. */
7732 void
7733 ix86_expand_unary_operator (code, mode, operands)
7734 enum rtx_code code;
7735 enum machine_mode mode;
7736 rtx operands[];
7738 int matching_memory;
7739 rtx src, dst, op, clob;
7741 dst = operands[0];
7742 src = operands[1];
7744 /* If the destination is memory, and we do not have matching source
7745 operands, do things in registers. */
7746 matching_memory = 0;
7747 if (GET_CODE (dst) == MEM)
7749 if (rtx_equal_p (dst, src))
7750 matching_memory = 1;
7751 else
7752 dst = gen_reg_rtx (mode);
7755 /* When source operand is memory, destination must match. */
7756 if (!matching_memory && GET_CODE (src) == MEM)
7757 src = force_reg (mode, src);
7759 /* If optimizing, copy to regs to improve CSE */
7760 if (optimize && ! no_new_pseudos)
7762 if (GET_CODE (dst) == MEM)
7763 dst = gen_reg_rtx (mode);
7764 if (GET_CODE (src) == MEM)
7765 src = force_reg (mode, src);
7768 /* Emit the instruction. */
7770 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7771 if (reload_in_progress || code == NOT)
7773 /* Reload doesn't know about the flags register, and doesn't know that
7774 it doesn't want to clobber it. */
7775 if (code != NOT)
7776 abort ();
7777 emit_insn (op);
7779 else
7781 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7782 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7785 /* Fix up the destination if needed. */
7786 if (dst != operands[0])
7787 emit_move_insn (operands[0], dst);
7790 /* Return TRUE or FALSE depending on whether the unary operator meets the
7791 appropriate constraints. */
7794 ix86_unary_operator_ok (code, mode, operands)
7795 enum rtx_code code ATTRIBUTE_UNUSED;
7796 enum machine_mode mode ATTRIBUTE_UNUSED;
7797 rtx operands[2] ATTRIBUTE_UNUSED;
7799 /* If one of operands is memory, source and destination must match. */
7800 if ((GET_CODE (operands[0]) == MEM
7801 || GET_CODE (operands[1]) == MEM)
7802 && ! rtx_equal_p (operands[0], operands[1]))
7803 return FALSE;
7804 return TRUE;
7807 /* Return TRUE or FALSE depending on whether the first SET in INSN
7808 has source and destination with matching CC modes, and that the
7809 CC mode is at least as constrained as REQ_MODE. */
7812 ix86_match_ccmode (insn, req_mode)
7813 rtx insn;
7814 enum machine_mode req_mode;
7816 rtx set;
7817 enum machine_mode set_mode;
7819 set = PATTERN (insn);
7820 if (GET_CODE (set) == PARALLEL)
7821 set = XVECEXP (set, 0, 0);
7822 if (GET_CODE (set) != SET)
7823 abort ();
7824 if (GET_CODE (SET_SRC (set)) != COMPARE)
7825 abort ();
7827 set_mode = GET_MODE (SET_DEST (set));
7828 switch (set_mode)
7830 case CCNOmode:
7831 if (req_mode != CCNOmode
7832 && (req_mode != CCmode
7833 || XEXP (SET_SRC (set), 1) != const0_rtx))
7834 return 0;
7835 break;
7836 case CCmode:
7837 if (req_mode == CCGCmode)
7838 return 0;
7839 /* FALLTHRU */
7840 case CCGCmode:
7841 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7842 return 0;
7843 /* FALLTHRU */
7844 case CCGOCmode:
7845 if (req_mode == CCZmode)
7846 return 0;
7847 /* FALLTHRU */
7848 case CCZmode:
7849 break;
7851 default:
7852 abort ();
7855 return (GET_MODE (SET_SRC (set)) == set_mode);
7858 /* Generate insn patterns to do an integer compare of OPERANDS. */
7860 static rtx
7861 ix86_expand_int_compare (code, op0, op1)
7862 enum rtx_code code;
7863 rtx op0, op1;
7865 enum machine_mode cmpmode;
7866 rtx tmp, flags;
7868 cmpmode = SELECT_CC_MODE (code, op0, op1);
7869 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7871 /* This is very simple, but making the interface the same as in the
7872 FP case makes the rest of the code easier. */
7873 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7874 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7876 /* Return the test that should be put into the flags user, i.e.
7877 the bcc, scc, or cmov instruction. */
7878 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7881 /* Figure out whether to use ordered or unordered fp comparisons.
7882 Return the appropriate mode to use. */
7884 enum machine_mode
7885 ix86_fp_compare_mode (code)
7886 enum rtx_code code ATTRIBUTE_UNUSED;
7888 /* ??? In order to make all comparisons reversible, we do all comparisons
7889 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7890 all forms trapping and nontrapping comparisons, we can make inequality
7891 comparisons trapping again, since it results in better code when using
7892 FCOM based compares. */
7893 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7896 enum machine_mode
7897 ix86_cc_mode (code, op0, op1)
7898 enum rtx_code code;
7899 rtx op0, op1;
7901 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7902 return ix86_fp_compare_mode (code);
7903 switch (code)
7905 /* Only zero flag is needed. */
7906 case EQ: /* ZF=0 */
7907 case NE: /* ZF!=0 */
7908 return CCZmode;
7909 /* Codes needing carry flag. */
7910 case GEU: /* CF=0 */
7911 case GTU: /* CF=0 & ZF=0 */
7912 case LTU: /* CF=1 */
7913 case LEU: /* CF=1 | ZF=1 */
7914 return CCmode;
7915 /* Codes possibly doable only with sign flag when
7916 comparing against zero. */
7917 case GE: /* SF=OF or SF=0 */
7918 case LT: /* SF<>OF or SF=1 */
7919 if (op1 == const0_rtx)
7920 return CCGOCmode;
7921 else
7922 /* For other cases Carry flag is not required. */
7923 return CCGCmode;
7924 /* Codes doable only with sign flag when comparing
7925 against zero, but we miss jump instruction for it
7926 so we need to use relational tests agains overflow
7927 that thus needs to be zero. */
7928 case GT: /* ZF=0 & SF=OF */
7929 case LE: /* ZF=1 | SF<>OF */
7930 if (op1 == const0_rtx)
7931 return CCNOmode;
7932 else
7933 return CCGCmode;
7934 /* strcmp pattern do (use flags) and combine may ask us for proper
7935 mode. */
7936 case USE:
7937 return CCmode;
7938 default:
7939 abort ();
7943 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7946 ix86_use_fcomi_compare (code)
7947 enum rtx_code code ATTRIBUTE_UNUSED;
7949 enum rtx_code swapped_code = swap_condition (code);
7950 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7951 || (ix86_fp_comparison_cost (swapped_code)
7952 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7955 /* Swap, force into registers, or otherwise massage the two operands
7956 to a fp comparison. The operands are updated in place; the new
7957 comparsion code is returned. */
7959 static enum rtx_code
7960 ix86_prepare_fp_compare_args (code, pop0, pop1)
7961 enum rtx_code code;
7962 rtx *pop0, *pop1;
7964 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7965 rtx op0 = *pop0, op1 = *pop1;
7966 enum machine_mode op_mode = GET_MODE (op0);
7967 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7969 /* All of the unordered compare instructions only work on registers.
7970 The same is true of the XFmode compare instructions. The same is
7971 true of the fcomi compare instructions. */
7973 if (!is_sse
7974 && (fpcmp_mode == CCFPUmode
7975 || op_mode == XFmode
7976 || op_mode == TFmode
7977 || ix86_use_fcomi_compare (code)))
7979 op0 = force_reg (op_mode, op0);
7980 op1 = force_reg (op_mode, op1);
7982 else
7984 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7985 things around if they appear profitable, otherwise force op0
7986 into a register. */
7988 if (standard_80387_constant_p (op0) == 0
7989 || (GET_CODE (op0) == MEM
7990 && ! (standard_80387_constant_p (op1) == 0
7991 || GET_CODE (op1) == MEM)))
7993 rtx tmp;
7994 tmp = op0, op0 = op1, op1 = tmp;
7995 code = swap_condition (code);
7998 if (GET_CODE (op0) != REG)
7999 op0 = force_reg (op_mode, op0);
8001 if (CONSTANT_P (op1))
8003 if (standard_80387_constant_p (op1))
8004 op1 = force_reg (op_mode, op1);
8005 else
8006 op1 = validize_mem (force_const_mem (op_mode, op1));
8010 /* Try to rearrange the comparison to make it cheaper. */
8011 if (ix86_fp_comparison_cost (code)
8012 > ix86_fp_comparison_cost (swap_condition (code))
8013 && (GET_CODE (op1) == REG || !no_new_pseudos))
8015 rtx tmp;
8016 tmp = op0, op0 = op1, op1 = tmp;
8017 code = swap_condition (code);
8018 if (GET_CODE (op0) != REG)
8019 op0 = force_reg (op_mode, op0);
8022 *pop0 = op0;
8023 *pop1 = op1;
8024 return code;
8027 /* Convert comparison codes we use to represent FP comparison to integer
8028 code that will result in proper branch. Return UNKNOWN if no such code
8029 is available. */
8030 static enum rtx_code
8031 ix86_fp_compare_code_to_integer (code)
8032 enum rtx_code code;
8034 switch (code)
8036 case GT:
8037 return GTU;
8038 case GE:
8039 return GEU;
8040 case ORDERED:
8041 case UNORDERED:
8042 return code;
8043 break;
8044 case UNEQ:
8045 return EQ;
8046 break;
8047 case UNLT:
8048 return LTU;
8049 break;
8050 case UNLE:
8051 return LEU;
8052 break;
8053 case LTGT:
8054 return NE;
8055 break;
8056 default:
8057 return UNKNOWN;
8061 /* Split comparison code CODE into comparisons we can do using branch
8062 instructions. BYPASS_CODE is comparison code for branch that will
8063 branch around FIRST_CODE and SECOND_CODE. If some of branches
8064 is not required, set value to NIL.
8065 We never require more than two branches. */
8066 static void
8067 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8068 enum rtx_code code, *bypass_code, *first_code, *second_code;
8070 *first_code = code;
8071 *bypass_code = NIL;
8072 *second_code = NIL;
8074 /* The fcomi comparison sets flags as follows:
8076 cmp ZF PF CF
8077 > 0 0 0
8078 < 0 0 1
8079 = 1 0 0
8080 un 1 1 1 */
8082 switch (code)
8084 case GT: /* GTU - CF=0 & ZF=0 */
8085 case GE: /* GEU - CF=0 */
8086 case ORDERED: /* PF=0 */
8087 case UNORDERED: /* PF=1 */
8088 case UNEQ: /* EQ - ZF=1 */
8089 case UNLT: /* LTU - CF=1 */
8090 case UNLE: /* LEU - CF=1 | ZF=1 */
8091 case LTGT: /* EQ - ZF=0 */
8092 break;
8093 case LT: /* LTU - CF=1 - fails on unordered */
8094 *first_code = UNLT;
8095 *bypass_code = UNORDERED;
8096 break;
8097 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8098 *first_code = UNLE;
8099 *bypass_code = UNORDERED;
8100 break;
8101 case EQ: /* EQ - ZF=1 - fails on unordered */
8102 *first_code = UNEQ;
8103 *bypass_code = UNORDERED;
8104 break;
8105 case NE: /* NE - ZF=0 - fails on unordered */
8106 *first_code = LTGT;
8107 *second_code = UNORDERED;
8108 break;
8109 case UNGE: /* GEU - CF=0 - fails on unordered */
8110 *first_code = GE;
8111 *second_code = UNORDERED;
8112 break;
8113 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8114 *first_code = GT;
8115 *second_code = UNORDERED;
8116 break;
8117 default:
8118 abort ();
8120 if (!TARGET_IEEE_FP)
8122 *second_code = NIL;
8123 *bypass_code = NIL;
8127 /* Return cost of comparison done fcom + arithmetics operations on AX.
8128 All following functions do use number of instructions as an cost metrics.
8129 In future this should be tweaked to compute bytes for optimize_size and
8130 take into account performance of various instructions on various CPUs. */
8131 static int
8132 ix86_fp_comparison_arithmetics_cost (code)
8133 enum rtx_code code;
8135 if (!TARGET_IEEE_FP)
8136 return 4;
8137 /* The cost of code output by ix86_expand_fp_compare. */
8138 switch (code)
8140 case UNLE:
8141 case UNLT:
8142 case LTGT:
8143 case GT:
8144 case GE:
8145 case UNORDERED:
8146 case ORDERED:
8147 case UNEQ:
8148 return 4;
8149 break;
8150 case LT:
8151 case NE:
8152 case EQ:
8153 case UNGE:
8154 return 5;
8155 break;
8156 case LE:
8157 case UNGT:
8158 return 6;
8159 break;
8160 default:
8161 abort ();
8165 /* Return cost of comparison done using fcomi operation.
8166 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8167 static int
8168 ix86_fp_comparison_fcomi_cost (code)
8169 enum rtx_code code;
8171 enum rtx_code bypass_code, first_code, second_code;
8172 /* Return arbitarily high cost when instruction is not supported - this
8173 prevents gcc from using it. */
8174 if (!TARGET_CMOVE)
8175 return 1024;
8176 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8177 return (bypass_code != NIL || second_code != NIL) + 2;
8180 /* Return cost of comparison done using sahf operation.
8181 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8182 static int
8183 ix86_fp_comparison_sahf_cost (code)
8184 enum rtx_code code;
8186 enum rtx_code bypass_code, first_code, second_code;
8187 /* Return arbitarily high cost when instruction is not preferred - this
8188 avoids gcc from using it. */
8189 if (!TARGET_USE_SAHF && !optimize_size)
8190 return 1024;
8191 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8192 return (bypass_code != NIL || second_code != NIL) + 3;
8195 /* Compute cost of the comparison done using any method.
8196 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8197 static int
8198 ix86_fp_comparison_cost (code)
8199 enum rtx_code code;
8201 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8202 int min;
8204 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8205 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8207 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8208 if (min > sahf_cost)
8209 min = sahf_cost;
8210 if (min > fcomi_cost)
8211 min = fcomi_cost;
8212 return min;
8215 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8217 static rtx
8218 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8219 enum rtx_code code;
8220 rtx op0, op1, scratch;
8221 rtx *second_test;
8222 rtx *bypass_test;
8224 enum machine_mode fpcmp_mode, intcmp_mode;
8225 rtx tmp, tmp2;
8226 int cost = ix86_fp_comparison_cost (code);
8227 enum rtx_code bypass_code, first_code, second_code;
8229 fpcmp_mode = ix86_fp_compare_mode (code);
8230 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8232 if (second_test)
8233 *second_test = NULL_RTX;
8234 if (bypass_test)
8235 *bypass_test = NULL_RTX;
8237 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8239 /* Do fcomi/sahf based test when profitable. */
8240 if ((bypass_code == NIL || bypass_test)
8241 && (second_code == NIL || second_test)
8242 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8244 if (TARGET_CMOVE)
8246 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8247 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8248 tmp);
8249 emit_insn (tmp);
8251 else
8253 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8254 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8255 if (!scratch)
8256 scratch = gen_reg_rtx (HImode);
8257 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8258 emit_insn (gen_x86_sahf_1 (scratch));
8261 /* The FP codes work out to act like unsigned. */
8262 intcmp_mode = fpcmp_mode;
8263 code = first_code;
8264 if (bypass_code != NIL)
8265 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8266 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8267 const0_rtx);
8268 if (second_code != NIL)
8269 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8270 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8271 const0_rtx);
8273 else
8275 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8276 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8277 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8278 if (!scratch)
8279 scratch = gen_reg_rtx (HImode);
8280 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8282 /* In the unordered case, we have to check C2 for NaN's, which
8283 doesn't happen to work out to anything nice combination-wise.
8284 So do some bit twiddling on the value we've got in AH to come
8285 up with an appropriate set of condition codes. */
8287 intcmp_mode = CCNOmode;
8288 switch (code)
8290 case GT:
8291 case UNGT:
8292 if (code == GT || !TARGET_IEEE_FP)
8294 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8295 code = EQ;
8297 else
8299 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8300 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8301 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8302 intcmp_mode = CCmode;
8303 code = GEU;
8305 break;
8306 case LT:
8307 case UNLT:
8308 if (code == LT && TARGET_IEEE_FP)
8310 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8311 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8312 intcmp_mode = CCmode;
8313 code = EQ;
8315 else
8317 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8318 code = NE;
8320 break;
8321 case GE:
8322 case UNGE:
8323 if (code == GE || !TARGET_IEEE_FP)
8325 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8326 code = EQ;
8328 else
8330 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8331 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8332 GEN_INT (0x01)));
8333 code = NE;
8335 break;
8336 case LE:
8337 case UNLE:
8338 if (code == LE && TARGET_IEEE_FP)
8340 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8341 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8342 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8343 intcmp_mode = CCmode;
8344 code = LTU;
8346 else
8348 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8349 code = NE;
8351 break;
8352 case EQ:
8353 case UNEQ:
8354 if (code == EQ && TARGET_IEEE_FP)
8356 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8357 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8358 intcmp_mode = CCmode;
8359 code = EQ;
8361 else
8363 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8364 code = NE;
8365 break;
8367 break;
8368 case NE:
8369 case LTGT:
8370 if (code == NE && TARGET_IEEE_FP)
8372 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8373 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8374 GEN_INT (0x40)));
8375 code = NE;
8377 else
8379 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8380 code = EQ;
8382 break;
8384 case UNORDERED:
8385 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8386 code = NE;
8387 break;
8388 case ORDERED:
8389 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8390 code = EQ;
8391 break;
8393 default:
8394 abort ();
8398 /* Return the test that should be put into the flags user, i.e.
8399 the bcc, scc, or cmov instruction. */
8400 return gen_rtx_fmt_ee (code, VOIDmode,
8401 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8402 const0_rtx);
8406 ix86_expand_compare (code, second_test, bypass_test)
8407 enum rtx_code code;
8408 rtx *second_test, *bypass_test;
8410 rtx op0, op1, ret;
8411 op0 = ix86_compare_op0;
8412 op1 = ix86_compare_op1;
8414 if (second_test)
8415 *second_test = NULL_RTX;
8416 if (bypass_test)
8417 *bypass_test = NULL_RTX;
8419 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8420 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8421 second_test, bypass_test);
8422 else
8423 ret = ix86_expand_int_compare (code, op0, op1);
8425 return ret;
8428 /* Return true if the CODE will result in nontrivial jump sequence. */
8429 bool
8430 ix86_fp_jump_nontrivial_p (code)
8431 enum rtx_code code;
8433 enum rtx_code bypass_code, first_code, second_code;
8434 if (!TARGET_CMOVE)
8435 return true;
8436 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8437 return bypass_code != NIL || second_code != NIL;
8440 void
8441 ix86_expand_branch (code, label)
8442 enum rtx_code code;
8443 rtx label;
8445 rtx tmp;
8447 switch (GET_MODE (ix86_compare_op0))
8449 case QImode:
8450 case HImode:
8451 case SImode:
8452 simple:
8453 tmp = ix86_expand_compare (code, NULL, NULL);
8454 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8455 gen_rtx_LABEL_REF (VOIDmode, label),
8456 pc_rtx);
8457 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8458 return;
8460 case SFmode:
8461 case DFmode:
8462 case XFmode:
8463 case TFmode:
8465 rtvec vec;
8466 int use_fcomi;
8467 enum rtx_code bypass_code, first_code, second_code;
8469 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8470 &ix86_compare_op1);
8472 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8474 /* Check whether we will use the natural sequence with one jump. If
8475 so, we can expand jump early. Otherwise delay expansion by
8476 creating compound insn to not confuse optimizers. */
8477 if (bypass_code == NIL && second_code == NIL
8478 && TARGET_CMOVE)
8480 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8481 gen_rtx_LABEL_REF (VOIDmode, label),
8482 pc_rtx, NULL_RTX);
8484 else
8486 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8487 ix86_compare_op0, ix86_compare_op1);
8488 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8489 gen_rtx_LABEL_REF (VOIDmode, label),
8490 pc_rtx);
8491 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8493 use_fcomi = ix86_use_fcomi_compare (code);
8494 vec = rtvec_alloc (3 + !use_fcomi);
8495 RTVEC_ELT (vec, 0) = tmp;
8496 RTVEC_ELT (vec, 1)
8497 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8498 RTVEC_ELT (vec, 2)
8499 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8500 if (! use_fcomi)
8501 RTVEC_ELT (vec, 3)
8502 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8504 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8506 return;
8509 case DImode:
8510 if (TARGET_64BIT)
8511 goto simple;
8512 /* Expand DImode branch into multiple compare+branch. */
8514 rtx lo[2], hi[2], label2;
8515 enum rtx_code code1, code2, code3;
8517 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8519 tmp = ix86_compare_op0;
8520 ix86_compare_op0 = ix86_compare_op1;
8521 ix86_compare_op1 = tmp;
8522 code = swap_condition (code);
8524 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8525 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8527 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8528 avoid two branches. This costs one extra insn, so disable when
8529 optimizing for size. */
8531 if ((code == EQ || code == NE)
8532 && (!optimize_size
8533 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8535 rtx xor0, xor1;
8537 xor1 = hi[0];
8538 if (hi[1] != const0_rtx)
8539 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8540 NULL_RTX, 0, OPTAB_WIDEN);
8542 xor0 = lo[0];
8543 if (lo[1] != const0_rtx)
8544 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8545 NULL_RTX, 0, OPTAB_WIDEN);
8547 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8548 NULL_RTX, 0, OPTAB_WIDEN);
8550 ix86_compare_op0 = tmp;
8551 ix86_compare_op1 = const0_rtx;
8552 ix86_expand_branch (code, label);
8553 return;
8556 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8557 op1 is a constant and the low word is zero, then we can just
8558 examine the high word. */
8560 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8561 switch (code)
8563 case LT: case LTU: case GE: case GEU:
8564 ix86_compare_op0 = hi[0];
8565 ix86_compare_op1 = hi[1];
8566 ix86_expand_branch (code, label);
8567 return;
8568 default:
8569 break;
8572 /* Otherwise, we need two or three jumps. */
8574 label2 = gen_label_rtx ();
8576 code1 = code;
8577 code2 = swap_condition (code);
8578 code3 = unsigned_condition (code);
8580 switch (code)
8582 case LT: case GT: case LTU: case GTU:
8583 break;
8585 case LE: code1 = LT; code2 = GT; break;
8586 case GE: code1 = GT; code2 = LT; break;
8587 case LEU: code1 = LTU; code2 = GTU; break;
8588 case GEU: code1 = GTU; code2 = LTU; break;
8590 case EQ: code1 = NIL; code2 = NE; break;
8591 case NE: code2 = NIL; break;
8593 default:
8594 abort ();
8598 * a < b =>
8599 * if (hi(a) < hi(b)) goto true;
8600 * if (hi(a) > hi(b)) goto false;
8601 * if (lo(a) < lo(b)) goto true;
8602 * false:
8605 ix86_compare_op0 = hi[0];
8606 ix86_compare_op1 = hi[1];
8608 if (code1 != NIL)
8609 ix86_expand_branch (code1, label);
8610 if (code2 != NIL)
8611 ix86_expand_branch (code2, label2);
8613 ix86_compare_op0 = lo[0];
8614 ix86_compare_op1 = lo[1];
8615 ix86_expand_branch (code3, label);
8617 if (code2 != NIL)
8618 emit_label (label2);
8619 return;
8622 default:
8623 abort ();
8627 /* Split branch based on floating point condition. */
8628 void
8629 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8630 enum rtx_code code;
8631 rtx op1, op2, target1, target2, tmp;
8633 rtx second, bypass;
8634 rtx label = NULL_RTX;
8635 rtx condition;
8636 int bypass_probability = -1, second_probability = -1, probability = -1;
8637 rtx i;
8639 if (target2 != pc_rtx)
8641 rtx tmp = target2;
8642 code = reverse_condition_maybe_unordered (code);
8643 target2 = target1;
8644 target1 = tmp;
8647 condition = ix86_expand_fp_compare (code, op1, op2,
8648 tmp, &second, &bypass);
8650 if (split_branch_probability >= 0)
8652 /* Distribute the probabilities across the jumps.
8653 Assume the BYPASS and SECOND to be always test
8654 for UNORDERED. */
8655 probability = split_branch_probability;
8657 /* Value of 1 is low enough to make no need for probability
8658 to be updated. Later we may run some experiments and see
8659 if unordered values are more frequent in practice. */
8660 if (bypass)
8661 bypass_probability = 1;
8662 if (second)
8663 second_probability = 1;
8665 if (bypass != NULL_RTX)
8667 label = gen_label_rtx ();
8668 i = emit_jump_insn (gen_rtx_SET
8669 (VOIDmode, pc_rtx,
8670 gen_rtx_IF_THEN_ELSE (VOIDmode,
8671 bypass,
8672 gen_rtx_LABEL_REF (VOIDmode,
8673 label),
8674 pc_rtx)));
8675 if (bypass_probability >= 0)
8676 REG_NOTES (i)
8677 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8678 GEN_INT (bypass_probability),
8679 REG_NOTES (i));
8681 i = emit_jump_insn (gen_rtx_SET
8682 (VOIDmode, pc_rtx,
8683 gen_rtx_IF_THEN_ELSE (VOIDmode,
8684 condition, target1, target2)));
8685 if (probability >= 0)
8686 REG_NOTES (i)
8687 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8688 GEN_INT (probability),
8689 REG_NOTES (i));
8690 if (second != NULL_RTX)
8692 i = emit_jump_insn (gen_rtx_SET
8693 (VOIDmode, pc_rtx,
8694 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8695 target2)));
8696 if (second_probability >= 0)
8697 REG_NOTES (i)
8698 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8699 GEN_INT (second_probability),
8700 REG_NOTES (i));
8702 if (label != NULL_RTX)
8703 emit_label (label);
8707 ix86_expand_setcc (code, dest)
8708 enum rtx_code code;
8709 rtx dest;
8711 rtx ret, tmp, tmpreg;
8712 rtx second_test, bypass_test;
8714 if (GET_MODE (ix86_compare_op0) == DImode
8715 && !TARGET_64BIT)
8716 return 0; /* FAIL */
8718 if (GET_MODE (dest) != QImode)
8719 abort ();
8721 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8722 PUT_MODE (ret, QImode);
8724 tmp = dest;
8725 tmpreg = dest;
8727 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8728 if (bypass_test || second_test)
8730 rtx test = second_test;
8731 int bypass = 0;
8732 rtx tmp2 = gen_reg_rtx (QImode);
8733 if (bypass_test)
8735 if (second_test)
8736 abort ();
8737 test = bypass_test;
8738 bypass = 1;
8739 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8741 PUT_MODE (test, QImode);
8742 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8744 if (bypass)
8745 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8746 else
8747 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8750 return 1; /* DONE */
8754 ix86_expand_int_movcc (operands)
8755 rtx operands[];
8757 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8758 rtx compare_seq, compare_op;
8759 rtx second_test, bypass_test;
8760 enum machine_mode mode = GET_MODE (operands[0]);
8762 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8763 In case comparsion is done with immediate, we can convert it to LTU or
8764 GEU by altering the integer. */
8766 if ((code == LEU || code == GTU)
8767 && GET_CODE (ix86_compare_op1) == CONST_INT
8768 && mode != HImode
8769 && INTVAL (ix86_compare_op1) != -1
8770 /* For x86-64, the immediate field in the instruction is 32-bit
8771 signed, so we can't increment a DImode value above 0x7fffffff. */
8772 && (!TARGET_64BIT
8773 || GET_MODE (ix86_compare_op0) != DImode
8774 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8775 && GET_CODE (operands[2]) == CONST_INT
8776 && GET_CODE (operands[3]) == CONST_INT)
8778 if (code == LEU)
8779 code = LTU;
8780 else
8781 code = GEU;
8782 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8783 GET_MODE (ix86_compare_op0));
8786 start_sequence ();
8787 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8788 compare_seq = get_insns ();
8789 end_sequence ();
8791 compare_code = GET_CODE (compare_op);
8793 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8794 HImode insns, we'd be swallowed in word prefix ops. */
8796 if (mode != HImode
8797 && (mode != DImode || TARGET_64BIT)
8798 && GET_CODE (operands[2]) == CONST_INT
8799 && GET_CODE (operands[3]) == CONST_INT)
8801 rtx out = operands[0];
8802 HOST_WIDE_INT ct = INTVAL (operands[2]);
8803 HOST_WIDE_INT cf = INTVAL (operands[3]);
8804 HOST_WIDE_INT diff;
8806 if ((compare_code == LTU || compare_code == GEU)
8807 && !second_test && !bypass_test)
8809 /* Detect overlap between destination and compare sources. */
8810 rtx tmp = out;
8812 /* To simplify rest of code, restrict to the GEU case. */
8813 if (compare_code == LTU)
8815 int tmp = ct;
8816 ct = cf;
8817 cf = tmp;
8818 compare_code = reverse_condition (compare_code);
8819 code = reverse_condition (code);
8821 diff = ct - cf;
8823 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8824 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8825 tmp = gen_reg_rtx (mode);
8827 emit_insn (compare_seq);
8828 if (mode == DImode)
8829 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8830 else
8831 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8833 if (diff == 1)
8836 * cmpl op0,op1
8837 * sbbl dest,dest
8838 * [addl dest, ct]
8840 * Size 5 - 8.
8842 if (ct)
8843 tmp = expand_simple_binop (mode, PLUS,
8844 tmp, GEN_INT (ct),
8845 tmp, 1, OPTAB_DIRECT);
8847 else if (cf == -1)
8850 * cmpl op0,op1
8851 * sbbl dest,dest
8852 * orl $ct, dest
8854 * Size 8.
8856 tmp = expand_simple_binop (mode, IOR,
8857 tmp, GEN_INT (ct),
8858 tmp, 1, OPTAB_DIRECT);
8860 else if (diff == -1 && ct)
8863 * cmpl op0,op1
8864 * sbbl dest,dest
8865 * notl dest
8866 * [addl dest, cf]
8868 * Size 8 - 11.
8870 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8871 if (cf)
8872 tmp = expand_simple_binop (mode, PLUS,
8873 tmp, GEN_INT (cf),
8874 tmp, 1, OPTAB_DIRECT);
8876 else
8879 * cmpl op0,op1
8880 * sbbl dest,dest
8881 * [notl dest]
8882 * andl cf - ct, dest
8883 * [addl dest, ct]
8885 * Size 8 - 11.
8888 if (cf == 0)
8890 cf = ct;
8891 ct = 0;
8892 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8895 tmp = expand_simple_binop (mode, AND,
8896 tmp,
8897 gen_int_mode (cf - ct, mode),
8898 tmp, 1, OPTAB_DIRECT);
8899 if (ct)
8900 tmp = expand_simple_binop (mode, PLUS,
8901 tmp, GEN_INT (ct),
8902 tmp, 1, OPTAB_DIRECT);
8905 if (tmp != out)
8906 emit_move_insn (out, tmp);
8908 return 1; /* DONE */
8911 diff = ct - cf;
8912 if (diff < 0)
8914 HOST_WIDE_INT tmp;
8915 tmp = ct, ct = cf, cf = tmp;
8916 diff = -diff;
8917 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8919 /* We may be reversing unordered compare to normal compare, that
8920 is not valid in general (we may convert non-trapping condition
8921 to trapping one), however on i386 we currently emit all
8922 comparisons unordered. */
8923 compare_code = reverse_condition_maybe_unordered (compare_code);
8924 code = reverse_condition_maybe_unordered (code);
8926 else
8928 compare_code = reverse_condition (compare_code);
8929 code = reverse_condition (code);
8933 compare_code = NIL;
8934 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8935 && GET_CODE (ix86_compare_op1) == CONST_INT)
8937 if (ix86_compare_op1 == const0_rtx
8938 && (code == LT || code == GE))
8939 compare_code = code;
8940 else if (ix86_compare_op1 == constm1_rtx)
8942 if (code == LE)
8943 compare_code = LT;
8944 else if (code == GT)
8945 compare_code = GE;
8949 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8950 if (compare_code != NIL
8951 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8952 && (cf == -1 || ct == -1))
8954 /* If lea code below could be used, only optimize
8955 if it results in a 2 insn sequence. */
8957 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8958 || diff == 3 || diff == 5 || diff == 9)
8959 || (compare_code == LT && ct == -1)
8960 || (compare_code == GE && cf == -1))
8963 * notl op1 (if necessary)
8964 * sarl $31, op1
8965 * orl cf, op1
8967 if (ct != -1)
8969 cf = ct;
8970 ct = -1;
8971 code = reverse_condition (code);
8974 out = emit_store_flag (out, code, ix86_compare_op0,
8975 ix86_compare_op1, VOIDmode, 0, -1);
8977 out = expand_simple_binop (mode, IOR,
8978 out, GEN_INT (cf),
8979 out, 1, OPTAB_DIRECT);
8980 if (out != operands[0])
8981 emit_move_insn (operands[0], out);
8983 return 1; /* DONE */
8987 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8988 || diff == 3 || diff == 5 || diff == 9)
8989 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8992 * xorl dest,dest
8993 * cmpl op1,op2
8994 * setcc dest
8995 * lea cf(dest*(ct-cf)),dest
8997 * Size 14.
8999 * This also catches the degenerate setcc-only case.
9002 rtx tmp;
9003 int nops;
9005 out = emit_store_flag (out, code, ix86_compare_op0,
9006 ix86_compare_op1, VOIDmode, 0, 1);
9008 nops = 0;
9009 /* On x86_64 the lea instruction operates on Pmode, so we need
9010 to get arithmetics done in proper mode to match. */
9011 if (diff == 1)
9012 tmp = out;
9013 else
9015 rtx out1;
9016 out1 = out;
9017 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9018 nops++;
9019 if (diff & 1)
9021 tmp = gen_rtx_PLUS (mode, tmp, out1);
9022 nops++;
9025 if (cf != 0)
9027 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9028 nops++;
9030 if (tmp != out
9031 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9033 if (nops == 1)
9035 rtx clob;
9037 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9038 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9040 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9041 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9042 emit_insn (tmp);
9044 else
9045 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9047 if (out != operands[0])
9048 emit_move_insn (operands[0], copy_rtx (out));
9050 return 1; /* DONE */
9054 * General case: Jumpful:
9055 * xorl dest,dest cmpl op1, op2
9056 * cmpl op1, op2 movl ct, dest
9057 * setcc dest jcc 1f
9058 * decl dest movl cf, dest
9059 * andl (cf-ct),dest 1:
9060 * addl ct,dest
9062 * Size 20. Size 14.
9064 * This is reasonably steep, but branch mispredict costs are
9065 * high on modern cpus, so consider failing only if optimizing
9066 * for space.
9068 * %%% Parameterize branch_cost on the tuning architecture, then
9069 * use that. The 80386 couldn't care less about mispredicts.
9072 if (!optimize_size && !TARGET_CMOVE)
9074 if (cf == 0)
9076 cf = ct;
9077 ct = 0;
9078 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9079 /* We may be reversing unordered compare to normal compare,
9080 that is not valid in general (we may convert non-trapping
9081 condition to trapping one), however on i386 we currently
9082 emit all comparisons unordered. */
9083 code = reverse_condition_maybe_unordered (code);
9084 else
9086 code = reverse_condition (code);
9087 if (compare_code != NIL)
9088 compare_code = reverse_condition (compare_code);
9092 if (compare_code != NIL)
9094 /* notl op1 (if needed)
9095 sarl $31, op1
9096 andl (cf-ct), op1
9097 addl ct, op1
9099 For x < 0 (resp. x <= -1) there will be no notl,
9100 so if possible swap the constants to get rid of the
9101 complement.
9102 True/false will be -1/0 while code below (store flag
9103 followed by decrement) is 0/-1, so the constants need
9104 to be exchanged once more. */
9106 if (compare_code == GE || !cf)
9108 code = reverse_condition (code);
9109 compare_code = LT;
9111 else
9113 HOST_WIDE_INT tmp = cf;
9114 cf = ct;
9115 ct = tmp;
9118 out = emit_store_flag (out, code, ix86_compare_op0,
9119 ix86_compare_op1, VOIDmode, 0, -1);
9121 else
9123 out = emit_store_flag (out, code, ix86_compare_op0,
9124 ix86_compare_op1, VOIDmode, 0, 1);
9126 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9127 out, 1, OPTAB_DIRECT);
9130 out = expand_simple_binop (mode, AND, out,
9131 gen_int_mode (cf - ct, mode),
9132 out, 1, OPTAB_DIRECT);
9133 if (ct)
9134 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9135 out, 1, OPTAB_DIRECT);
9136 if (out != operands[0])
9137 emit_move_insn (operands[0], out);
9139 return 1; /* DONE */
9143 if (!TARGET_CMOVE)
9145 /* Try a few things more with specific constants and a variable. */
9147 optab op;
9148 rtx var, orig_out, out, tmp;
9150 if (optimize_size)
9151 return 0; /* FAIL */
9153 /* If one of the two operands is an interesting constant, load a
9154 constant with the above and mask it in with a logical operation. */
9156 if (GET_CODE (operands[2]) == CONST_INT)
9158 var = operands[3];
9159 if (INTVAL (operands[2]) == 0)
9160 operands[3] = constm1_rtx, op = and_optab;
9161 else if (INTVAL (operands[2]) == -1)
9162 operands[3] = const0_rtx, op = ior_optab;
9163 else
9164 return 0; /* FAIL */
9166 else if (GET_CODE (operands[3]) == CONST_INT)
9168 var = operands[2];
9169 if (INTVAL (operands[3]) == 0)
9170 operands[2] = constm1_rtx, op = and_optab;
9171 else if (INTVAL (operands[3]) == -1)
9172 operands[2] = const0_rtx, op = ior_optab;
9173 else
9174 return 0; /* FAIL */
9176 else
9177 return 0; /* FAIL */
9179 orig_out = operands[0];
9180 tmp = gen_reg_rtx (mode);
9181 operands[0] = tmp;
9183 /* Recurse to get the constant loaded. */
9184 if (ix86_expand_int_movcc (operands) == 0)
9185 return 0; /* FAIL */
9187 /* Mask in the interesting variable. */
9188 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9189 OPTAB_WIDEN);
9190 if (out != orig_out)
9191 emit_move_insn (orig_out, out);
9193 return 1; /* DONE */
9197 * For comparison with above,
9199 * movl cf,dest
9200 * movl ct,tmp
9201 * cmpl op1,op2
9202 * cmovcc tmp,dest
9204 * Size 15.
9207 if (! nonimmediate_operand (operands[2], mode))
9208 operands[2] = force_reg (mode, operands[2]);
9209 if (! nonimmediate_operand (operands[3], mode))
9210 operands[3] = force_reg (mode, operands[3]);
9212 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9214 rtx tmp = gen_reg_rtx (mode);
9215 emit_move_insn (tmp, operands[3]);
9216 operands[3] = tmp;
9218 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9220 rtx tmp = gen_reg_rtx (mode);
9221 emit_move_insn (tmp, operands[2]);
9222 operands[2] = tmp;
9224 if (! register_operand (operands[2], VOIDmode)
9225 && ! register_operand (operands[3], VOIDmode))
9226 operands[2] = force_reg (mode, operands[2]);
9228 emit_insn (compare_seq);
9229 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9230 gen_rtx_IF_THEN_ELSE (mode,
9231 compare_op, operands[2],
9232 operands[3])));
9233 if (bypass_test)
9234 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9235 gen_rtx_IF_THEN_ELSE (mode,
9236 bypass_test,
9237 operands[3],
9238 operands[0])));
9239 if (second_test)
9240 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9241 gen_rtx_IF_THEN_ELSE (mode,
9242 second_test,
9243 operands[2],
9244 operands[0])));
9246 return 1; /* DONE */
9250 ix86_expand_fp_movcc (operands)
9251 rtx operands[];
9253 enum rtx_code code;
9254 rtx tmp;
9255 rtx compare_op, second_test, bypass_test;
9257 /* For SF/DFmode conditional moves based on comparisons
9258 in same mode, we may want to use SSE min/max instructions. */
9259 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9260 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9261 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9262 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9263 && (!TARGET_IEEE_FP
9264 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9265 /* We may be called from the post-reload splitter. */
9266 && (!REG_P (operands[0])
9267 || SSE_REG_P (operands[0])
9268 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9270 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9271 code = GET_CODE (operands[1]);
9273 /* See if we have (cross) match between comparison operands and
9274 conditional move operands. */
9275 if (rtx_equal_p (operands[2], op1))
9277 rtx tmp = op0;
9278 op0 = op1;
9279 op1 = tmp;
9280 code = reverse_condition_maybe_unordered (code);
9282 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9284 /* Check for min operation. */
9285 if (code == LT)
9287 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9288 if (memory_operand (op0, VOIDmode))
9289 op0 = force_reg (GET_MODE (operands[0]), op0);
9290 if (GET_MODE (operands[0]) == SFmode)
9291 emit_insn (gen_minsf3 (operands[0], op0, op1));
9292 else
9293 emit_insn (gen_mindf3 (operands[0], op0, op1));
9294 return 1;
9296 /* Check for max operation. */
9297 if (code == GT)
9299 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9300 if (memory_operand (op0, VOIDmode))
9301 op0 = force_reg (GET_MODE (operands[0]), op0);
9302 if (GET_MODE (operands[0]) == SFmode)
9303 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9304 else
9305 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9306 return 1;
9309 /* Manage condition to be sse_comparison_operator. In case we are
9310 in non-ieee mode, try to canonicalize the destination operand
9311 to be first in the comparison - this helps reload to avoid extra
9312 moves. */
9313 if (!sse_comparison_operator (operands[1], VOIDmode)
9314 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9316 rtx tmp = ix86_compare_op0;
9317 ix86_compare_op0 = ix86_compare_op1;
9318 ix86_compare_op1 = tmp;
9319 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9320 VOIDmode, ix86_compare_op0,
9321 ix86_compare_op1);
9323 /* Similary try to manage result to be first operand of conditional
9324 move. We also don't support the NE comparison on SSE, so try to
9325 avoid it. */
9326 if ((rtx_equal_p (operands[0], operands[3])
9327 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9328 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9330 rtx tmp = operands[2];
9331 operands[2] = operands[3];
9332 operands[3] = tmp;
9333 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9334 (GET_CODE (operands[1])),
9335 VOIDmode, ix86_compare_op0,
9336 ix86_compare_op1);
9338 if (GET_MODE (operands[0]) == SFmode)
9339 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9340 operands[2], operands[3],
9341 ix86_compare_op0, ix86_compare_op1));
9342 else
9343 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9344 operands[2], operands[3],
9345 ix86_compare_op0, ix86_compare_op1));
9346 return 1;
9349 /* The floating point conditional move instructions don't directly
9350 support conditions resulting from a signed integer comparison. */
9352 code = GET_CODE (operands[1]);
9353 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9355 /* The floating point conditional move instructions don't directly
9356 support signed integer comparisons. */
9358 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9360 if (second_test != NULL || bypass_test != NULL)
9361 abort ();
9362 tmp = gen_reg_rtx (QImode);
9363 ix86_expand_setcc (code, tmp);
9364 code = NE;
9365 ix86_compare_op0 = tmp;
9366 ix86_compare_op1 = const0_rtx;
9367 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9369 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9371 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9372 emit_move_insn (tmp, operands[3]);
9373 operands[3] = tmp;
9375 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9377 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9378 emit_move_insn (tmp, operands[2]);
9379 operands[2] = tmp;
9382 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9383 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9384 compare_op,
9385 operands[2],
9386 operands[3])));
9387 if (bypass_test)
9388 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9389 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9390 bypass_test,
9391 operands[3],
9392 operands[0])));
9393 if (second_test)
9394 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9395 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9396 second_test,
9397 operands[2],
9398 operands[0])));
9400 return 1;
9403 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9404 works for floating pointer parameters and nonoffsetable memories.
9405 For pushes, it returns just stack offsets; the values will be saved
9406 in the right order. Maximally three parts are generated. */
9408 static int
9409 ix86_split_to_parts (operand, parts, mode)
9410 rtx operand;
9411 rtx *parts;
9412 enum machine_mode mode;
9414 int size;
9416 if (!TARGET_64BIT)
9417 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9418 else
9419 size = (GET_MODE_SIZE (mode) + 4) / 8;
9421 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9422 abort ();
9423 if (size < 2 || size > 3)
9424 abort ();
9426 /* Optimize constant pool reference to immediates. This is used by fp
9427 moves, that force all constants to memory to allow combining. */
9428 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9430 rtx tmp = maybe_get_pool_constant (operand);
9431 if (tmp)
9432 operand = tmp;
9435 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9437 /* The only non-offsetable memories we handle are pushes. */
9438 if (! push_operand (operand, VOIDmode))
9439 abort ();
9441 operand = copy_rtx (operand);
9442 PUT_MODE (operand, Pmode);
9443 parts[0] = parts[1] = parts[2] = operand;
9445 else if (!TARGET_64BIT)
9447 if (mode == DImode)
9448 split_di (&operand, 1, &parts[0], &parts[1]);
9449 else
9451 if (REG_P (operand))
9453 if (!reload_completed)
9454 abort ();
9455 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9456 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9457 if (size == 3)
9458 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9460 else if (offsettable_memref_p (operand))
9462 operand = adjust_address (operand, SImode, 0);
9463 parts[0] = operand;
9464 parts[1] = adjust_address (operand, SImode, 4);
9465 if (size == 3)
9466 parts[2] = adjust_address (operand, SImode, 8);
9468 else if (GET_CODE (operand) == CONST_DOUBLE)
9470 REAL_VALUE_TYPE r;
9471 long l[4];
9473 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9474 switch (mode)
9476 case XFmode:
9477 case TFmode:
9478 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9479 parts[2] = gen_int_mode (l[2], SImode);
9480 break;
9481 case DFmode:
9482 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9483 break;
9484 default:
9485 abort ();
9487 parts[1] = gen_int_mode (l[1], SImode);
9488 parts[0] = gen_int_mode (l[0], SImode);
9490 else
9491 abort ();
9494 else
9496 if (mode == TImode)
9497 split_ti (&operand, 1, &parts[0], &parts[1]);
9498 if (mode == XFmode || mode == TFmode)
9500 if (REG_P (operand))
9502 if (!reload_completed)
9503 abort ();
9504 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9505 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9507 else if (offsettable_memref_p (operand))
9509 operand = adjust_address (operand, DImode, 0);
9510 parts[0] = operand;
9511 parts[1] = adjust_address (operand, SImode, 8);
9513 else if (GET_CODE (operand) == CONST_DOUBLE)
9515 REAL_VALUE_TYPE r;
9516 long l[3];
9518 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9519 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9520 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9521 if (HOST_BITS_PER_WIDE_INT >= 64)
9522 parts[0]
9523 = gen_int_mode
9524 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9525 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9526 DImode);
9527 else
9528 parts[0] = immed_double_const (l[0], l[1], DImode);
9529 parts[1] = gen_int_mode (l[2], SImode);
9531 else
9532 abort ();
9536 return size;
9539 /* Emit insns to perform a move or push of DI, DF, and XF values.
9540 Return false when normal moves are needed; true when all required
9541 insns have been emitted. Operands 2-4 contain the input values
9542 int the correct order; operands 5-7 contain the output values. */
9544 void
9545 ix86_split_long_move (operands)
9546 rtx operands[];
9548 rtx part[2][3];
9549 int nparts;
9550 int push = 0;
9551 int collisions = 0;
9552 enum machine_mode mode = GET_MODE (operands[0]);
9554 /* The DFmode expanders may ask us to move double.
9555 For 64bit target this is single move. By hiding the fact
9556 here we simplify i386.md splitters. */
9557 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9559 /* Optimize constant pool reference to immediates. This is used by
9560 fp moves, that force all constants to memory to allow combining. */
9562 if (GET_CODE (operands[1]) == MEM
9563 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9564 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9565 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9566 if (push_operand (operands[0], VOIDmode))
9568 operands[0] = copy_rtx (operands[0]);
9569 PUT_MODE (operands[0], Pmode);
9571 else
9572 operands[0] = gen_lowpart (DImode, operands[0]);
9573 operands[1] = gen_lowpart (DImode, operands[1]);
9574 emit_move_insn (operands[0], operands[1]);
9575 return;
9578 /* The only non-offsettable memory we handle is push. */
9579 if (push_operand (operands[0], VOIDmode))
9580 push = 1;
9581 else if (GET_CODE (operands[0]) == MEM
9582 && ! offsettable_memref_p (operands[0]))
9583 abort ();
9585 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9586 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9588 /* When emitting push, take care for source operands on the stack. */
9589 if (push && GET_CODE (operands[1]) == MEM
9590 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9592 if (nparts == 3)
9593 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9594 XEXP (part[1][2], 0));
9595 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9596 XEXP (part[1][1], 0));
9599 /* We need to do copy in the right order in case an address register
9600 of the source overlaps the destination. */
9601 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9603 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9604 collisions++;
9605 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9606 collisions++;
9607 if (nparts == 3
9608 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9609 collisions++;
9611 /* Collision in the middle part can be handled by reordering. */
9612 if (collisions == 1 && nparts == 3
9613 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9615 rtx tmp;
9616 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9617 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9620 /* If there are more collisions, we can't handle it by reordering.
9621 Do an lea to the last part and use only one colliding move. */
9622 else if (collisions > 1)
9624 collisions = 1;
9625 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9626 XEXP (part[1][0], 0)));
9627 part[1][0] = change_address (part[1][0],
9628 TARGET_64BIT ? DImode : SImode,
9629 part[0][nparts - 1]);
9630 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9631 if (nparts == 3)
9632 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9636 if (push)
9638 if (!TARGET_64BIT)
9640 if (nparts == 3)
9642 /* We use only first 12 bytes of TFmode value, but for pushing we
9643 are required to adjust stack as if we were pushing real 16byte
9644 value. */
9645 if (mode == TFmode && !TARGET_64BIT)
9646 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9647 GEN_INT (-4)));
9648 emit_move_insn (part[0][2], part[1][2]);
9651 else
9653 /* In 64bit mode we don't have 32bit push available. In case this is
9654 register, it is OK - we will just use larger counterpart. We also
9655 retype memory - these comes from attempt to avoid REX prefix on
9656 moving of second half of TFmode value. */
9657 if (GET_MODE (part[1][1]) == SImode)
9659 if (GET_CODE (part[1][1]) == MEM)
9660 part[1][1] = adjust_address (part[1][1], DImode, 0);
9661 else if (REG_P (part[1][1]))
9662 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9663 else
9664 abort ();
9665 if (GET_MODE (part[1][0]) == SImode)
9666 part[1][0] = part[1][1];
9669 emit_move_insn (part[0][1], part[1][1]);
9670 emit_move_insn (part[0][0], part[1][0]);
9671 return;
9674 /* Choose correct order to not overwrite the source before it is copied. */
9675 if ((REG_P (part[0][0])
9676 && REG_P (part[1][1])
9677 && (REGNO (part[0][0]) == REGNO (part[1][1])
9678 || (nparts == 3
9679 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9680 || (collisions > 0
9681 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9683 if (nparts == 3)
9685 operands[2] = part[0][2];
9686 operands[3] = part[0][1];
9687 operands[4] = part[0][0];
9688 operands[5] = part[1][2];
9689 operands[6] = part[1][1];
9690 operands[7] = part[1][0];
9692 else
9694 operands[2] = part[0][1];
9695 operands[3] = part[0][0];
9696 operands[5] = part[1][1];
9697 operands[6] = part[1][0];
9700 else
9702 if (nparts == 3)
9704 operands[2] = part[0][0];
9705 operands[3] = part[0][1];
9706 operands[4] = part[0][2];
9707 operands[5] = part[1][0];
9708 operands[6] = part[1][1];
9709 operands[7] = part[1][2];
9711 else
9713 operands[2] = part[0][0];
9714 operands[3] = part[0][1];
9715 operands[5] = part[1][0];
9716 operands[6] = part[1][1];
9719 emit_move_insn (operands[2], operands[5]);
9720 emit_move_insn (operands[3], operands[6]);
9721 if (nparts == 3)
9722 emit_move_insn (operands[4], operands[7]);
9724 return;
9727 void
9728 ix86_split_ashldi (operands, scratch)
9729 rtx *operands, scratch;
9731 rtx low[2], high[2];
9732 int count;
9734 if (GET_CODE (operands[2]) == CONST_INT)
9736 split_di (operands, 2, low, high);
9737 count = INTVAL (operands[2]) & 63;
9739 if (count >= 32)
9741 emit_move_insn (high[0], low[1]);
9742 emit_move_insn (low[0], const0_rtx);
9744 if (count > 32)
9745 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9747 else
9749 if (!rtx_equal_p (operands[0], operands[1]))
9750 emit_move_insn (operands[0], operands[1]);
9751 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9752 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9755 else
9757 if (!rtx_equal_p (operands[0], operands[1]))
9758 emit_move_insn (operands[0], operands[1]);
9760 split_di (operands, 1, low, high);
9762 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9763 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9765 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9767 if (! no_new_pseudos)
9768 scratch = force_reg (SImode, const0_rtx);
9769 else
9770 emit_move_insn (scratch, const0_rtx);
9772 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9773 scratch));
9775 else
9776 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9780 void
9781 ix86_split_ashrdi (operands, scratch)
9782 rtx *operands, scratch;
9784 rtx low[2], high[2];
9785 int count;
9787 if (GET_CODE (operands[2]) == CONST_INT)
9789 split_di (operands, 2, low, high);
9790 count = INTVAL (operands[2]) & 63;
9792 if (count >= 32)
9794 emit_move_insn (low[0], high[1]);
9796 if (! reload_completed)
9797 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9798 else
9800 emit_move_insn (high[0], low[0]);
9801 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9804 if (count > 32)
9805 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9807 else
9809 if (!rtx_equal_p (operands[0], operands[1]))
9810 emit_move_insn (operands[0], operands[1]);
9811 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9812 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9815 else
9817 if (!rtx_equal_p (operands[0], operands[1]))
9818 emit_move_insn (operands[0], operands[1]);
9820 split_di (operands, 1, low, high);
9822 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9823 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9825 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9827 if (! no_new_pseudos)
9828 scratch = gen_reg_rtx (SImode);
9829 emit_move_insn (scratch, high[0]);
9830 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9831 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9832 scratch));
9834 else
9835 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9839 void
9840 ix86_split_lshrdi (operands, scratch)
9841 rtx *operands, scratch;
9843 rtx low[2], high[2];
9844 int count;
9846 if (GET_CODE (operands[2]) == CONST_INT)
9848 split_di (operands, 2, low, high);
9849 count = INTVAL (operands[2]) & 63;
9851 if (count >= 32)
9853 emit_move_insn (low[0], high[1]);
9854 emit_move_insn (high[0], const0_rtx);
9856 if (count > 32)
9857 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9859 else
9861 if (!rtx_equal_p (operands[0], operands[1]))
9862 emit_move_insn (operands[0], operands[1]);
9863 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9864 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9867 else
9869 if (!rtx_equal_p (operands[0], operands[1]))
9870 emit_move_insn (operands[0], operands[1]);
9872 split_di (operands, 1, low, high);
9874 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9875 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9877 /* Heh. By reversing the arguments, we can reuse this pattern. */
9878 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9880 if (! no_new_pseudos)
9881 scratch = force_reg (SImode, const0_rtx);
9882 else
9883 emit_move_insn (scratch, const0_rtx);
9885 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9886 scratch));
9888 else
9889 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9893 /* Helper function for the string operations below. Dest VARIABLE whether
9894 it is aligned to VALUE bytes. If true, jump to the label. */
9895 static rtx
9896 ix86_expand_aligntest (variable, value)
9897 rtx variable;
9898 int value;
9900 rtx label = gen_label_rtx ();
9901 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9902 if (GET_MODE (variable) == DImode)
9903 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9904 else
9905 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9906 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9907 1, label);
9908 return label;
9911 /* Adjust COUNTER by the VALUE. */
9912 static void
9913 ix86_adjust_counter (countreg, value)
9914 rtx countreg;
9915 HOST_WIDE_INT value;
9917 if (GET_MODE (countreg) == DImode)
9918 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9919 else
9920 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9923 /* Zero extend possibly SImode EXP to Pmode register. */
9925 ix86_zero_extend_to_Pmode (exp)
9926 rtx exp;
9928 rtx r;
9929 if (GET_MODE (exp) == VOIDmode)
9930 return force_reg (Pmode, exp);
9931 if (GET_MODE (exp) == Pmode)
9932 return copy_to_mode_reg (Pmode, exp);
9933 r = gen_reg_rtx (Pmode);
9934 emit_insn (gen_zero_extendsidi2 (r, exp));
9935 return r;
9938 /* Expand string move (memcpy) operation. Use i386 string operations when
9939 profitable. expand_clrstr contains similar code. */
9941 ix86_expand_movstr (dst, src, count_exp, align_exp)
9942 rtx dst, src, count_exp, align_exp;
9944 rtx srcreg, destreg, countreg;
9945 enum machine_mode counter_mode;
9946 HOST_WIDE_INT align = 0;
9947 unsigned HOST_WIDE_INT count = 0;
9948 rtx insns;
9950 start_sequence ();
9952 if (GET_CODE (align_exp) == CONST_INT)
9953 align = INTVAL (align_exp);
9955 /* This simple hack avoids all inlining code and simplifies code below. */
9956 if (!TARGET_ALIGN_STRINGOPS)
9957 align = 64;
9959 if (GET_CODE (count_exp) == CONST_INT)
9960 count = INTVAL (count_exp);
9962 /* Figure out proper mode for counter. For 32bits it is always SImode,
9963 for 64bits use SImode when possible, otherwise DImode.
9964 Set count to number of bytes copied when known at compile time. */
9965 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9966 || x86_64_zero_extended_value (count_exp))
9967 counter_mode = SImode;
9968 else
9969 counter_mode = DImode;
9971 if (counter_mode != SImode && counter_mode != DImode)
9972 abort ();
9974 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9975 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9977 emit_insn (gen_cld ());
9979 /* When optimizing for size emit simple rep ; movsb instruction for
9980 counts not divisible by 4. */
9982 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9984 countreg = ix86_zero_extend_to_Pmode (count_exp);
9985 if (TARGET_64BIT)
9986 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9987 destreg, srcreg, countreg));
9988 else
9989 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9990 destreg, srcreg, countreg));
9993 /* For constant aligned (or small unaligned) copies use rep movsl
9994 followed by code copying the rest. For PentiumPro ensure 8 byte
9995 alignment to allow rep movsl acceleration. */
9997 else if (count != 0
9998 && (align >= 8
9999 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10000 || optimize_size || count < (unsigned int) 64))
10002 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10003 if (count & ~(size - 1))
10005 countreg = copy_to_mode_reg (counter_mode,
10006 GEN_INT ((count >> (size == 4 ? 2 : 3))
10007 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10008 countreg = ix86_zero_extend_to_Pmode (countreg);
10009 if (size == 4)
10011 if (TARGET_64BIT)
10012 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10013 destreg, srcreg, countreg));
10014 else
10015 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10016 destreg, srcreg, countreg));
10018 else
10019 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10020 destreg, srcreg, countreg));
10022 if (size == 8 && (count & 0x04))
10023 emit_insn (gen_strmovsi (destreg, srcreg));
10024 if (count & 0x02)
10025 emit_insn (gen_strmovhi (destreg, srcreg));
10026 if (count & 0x01)
10027 emit_insn (gen_strmovqi (destreg, srcreg));
10029 /* The generic code based on the glibc implementation:
10030 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10031 allowing accelerated copying there)
10032 - copy the data using rep movsl
10033 - copy the rest. */
10034 else
10036 rtx countreg2;
10037 rtx label = NULL;
10038 int desired_alignment = (TARGET_PENTIUMPRO
10039 && (count == 0 || count >= (unsigned int) 260)
10040 ? 8 : UNITS_PER_WORD);
10042 /* In case we don't know anything about the alignment, default to
10043 library version, since it is usually equally fast and result in
10044 shorter code. */
10045 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10047 end_sequence ();
10048 return 0;
10051 if (TARGET_SINGLE_STRINGOP)
10052 emit_insn (gen_cld ());
10054 countreg2 = gen_reg_rtx (Pmode);
10055 countreg = copy_to_mode_reg (counter_mode, count_exp);
10057 /* We don't use loops to align destination and to copy parts smaller
10058 than 4 bytes, because gcc is able to optimize such code better (in
10059 the case the destination or the count really is aligned, gcc is often
10060 able to predict the branches) and also it is friendlier to the
10061 hardware branch prediction.
10063 Using loops is benefical for generic case, because we can
10064 handle small counts using the loops. Many CPUs (such as Athlon)
10065 have large REP prefix setup costs.
10067 This is quite costy. Maybe we can revisit this decision later or
10068 add some customizability to this code. */
10070 if (count == 0 && align < desired_alignment)
10072 label = gen_label_rtx ();
10073 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10074 LEU, 0, counter_mode, 1, label);
10076 if (align <= 1)
10078 rtx label = ix86_expand_aligntest (destreg, 1);
10079 emit_insn (gen_strmovqi (destreg, srcreg));
10080 ix86_adjust_counter (countreg, 1);
10081 emit_label (label);
10082 LABEL_NUSES (label) = 1;
10084 if (align <= 2)
10086 rtx label = ix86_expand_aligntest (destreg, 2);
10087 emit_insn (gen_strmovhi (destreg, srcreg));
10088 ix86_adjust_counter (countreg, 2);
10089 emit_label (label);
10090 LABEL_NUSES (label) = 1;
10092 if (align <= 4 && desired_alignment > 4)
10094 rtx label = ix86_expand_aligntest (destreg, 4);
10095 emit_insn (gen_strmovsi (destreg, srcreg));
10096 ix86_adjust_counter (countreg, 4);
10097 emit_label (label);
10098 LABEL_NUSES (label) = 1;
10101 if (label && desired_alignment > 4 && !TARGET_64BIT)
10103 emit_label (label);
10104 LABEL_NUSES (label) = 1;
10105 label = NULL_RTX;
10107 if (!TARGET_SINGLE_STRINGOP)
10108 emit_insn (gen_cld ());
10109 if (TARGET_64BIT)
10111 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10112 GEN_INT (3)));
10113 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10114 destreg, srcreg, countreg2));
10116 else
10118 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10119 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10120 destreg, srcreg, countreg2));
10123 if (label)
10125 emit_label (label);
10126 LABEL_NUSES (label) = 1;
10128 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10129 emit_insn (gen_strmovsi (destreg, srcreg));
10130 if ((align <= 4 || count == 0) && TARGET_64BIT)
10132 rtx label = ix86_expand_aligntest (countreg, 4);
10133 emit_insn (gen_strmovsi (destreg, srcreg));
10134 emit_label (label);
10135 LABEL_NUSES (label) = 1;
10137 if (align > 2 && count != 0 && (count & 2))
10138 emit_insn (gen_strmovhi (destreg, srcreg));
10139 if (align <= 2 || count == 0)
10141 rtx label = ix86_expand_aligntest (countreg, 2);
10142 emit_insn (gen_strmovhi (destreg, srcreg));
10143 emit_label (label);
10144 LABEL_NUSES (label) = 1;
10146 if (align > 1 && count != 0 && (count & 1))
10147 emit_insn (gen_strmovqi (destreg, srcreg));
10148 if (align <= 1 || count == 0)
10150 rtx label = ix86_expand_aligntest (countreg, 1);
10151 emit_insn (gen_strmovqi (destreg, srcreg));
10152 emit_label (label);
10153 LABEL_NUSES (label) = 1;
10157 insns = get_insns ();
10158 end_sequence ();
10160 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10161 emit_insn (insns);
10162 return 1;
10165 /* Expand string clear operation (bzero). Use i386 string operations when
10166 profitable. expand_movstr contains similar code. */
10168 ix86_expand_clrstr (src, count_exp, align_exp)
10169 rtx src, count_exp, align_exp;
10171 rtx destreg, zeroreg, countreg;
10172 enum machine_mode counter_mode;
10173 HOST_WIDE_INT align = 0;
10174 unsigned HOST_WIDE_INT count = 0;
10176 if (GET_CODE (align_exp) == CONST_INT)
10177 align = INTVAL (align_exp);
10179 /* This simple hack avoids all inlining code and simplifies code below. */
10180 if (!TARGET_ALIGN_STRINGOPS)
10181 align = 32;
10183 if (GET_CODE (count_exp) == CONST_INT)
10184 count = INTVAL (count_exp);
10185 /* Figure out proper mode for counter. For 32bits it is always SImode,
10186 for 64bits use SImode when possible, otherwise DImode.
10187 Set count to number of bytes copied when known at compile time. */
10188 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10189 || x86_64_zero_extended_value (count_exp))
10190 counter_mode = SImode;
10191 else
10192 counter_mode = DImode;
10194 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10196 emit_insn (gen_cld ());
10198 /* When optimizing for size emit simple rep ; movsb instruction for
10199 counts not divisible by 4. */
10201 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10203 countreg = ix86_zero_extend_to_Pmode (count_exp);
10204 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10205 if (TARGET_64BIT)
10206 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10207 destreg, countreg));
10208 else
10209 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10210 destreg, countreg));
10212 else if (count != 0
10213 && (align >= 8
10214 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10215 || optimize_size || count < (unsigned int) 64))
10217 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10218 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10219 if (count & ~(size - 1))
10221 countreg = copy_to_mode_reg (counter_mode,
10222 GEN_INT ((count >> (size == 4 ? 2 : 3))
10223 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10224 countreg = ix86_zero_extend_to_Pmode (countreg);
10225 if (size == 4)
10227 if (TARGET_64BIT)
10228 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10229 destreg, countreg));
10230 else
10231 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10232 destreg, countreg));
10234 else
10235 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10236 destreg, countreg));
10238 if (size == 8 && (count & 0x04))
10239 emit_insn (gen_strsetsi (destreg,
10240 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10241 if (count & 0x02)
10242 emit_insn (gen_strsethi (destreg,
10243 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10244 if (count & 0x01)
10245 emit_insn (gen_strsetqi (destreg,
10246 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10248 else
10250 rtx countreg2;
10251 rtx label = NULL;
10252 /* Compute desired alignment of the string operation. */
10253 int desired_alignment = (TARGET_PENTIUMPRO
10254 && (count == 0 || count >= (unsigned int) 260)
10255 ? 8 : UNITS_PER_WORD);
10257 /* In case we don't know anything about the alignment, default to
10258 library version, since it is usually equally fast and result in
10259 shorter code. */
10260 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10261 return 0;
10263 if (TARGET_SINGLE_STRINGOP)
10264 emit_insn (gen_cld ());
10266 countreg2 = gen_reg_rtx (Pmode);
10267 countreg = copy_to_mode_reg (counter_mode, count_exp);
10268 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10270 if (count == 0 && align < desired_alignment)
10272 label = gen_label_rtx ();
10273 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10274 LEU, 0, counter_mode, 1, label);
10276 if (align <= 1)
10278 rtx label = ix86_expand_aligntest (destreg, 1);
10279 emit_insn (gen_strsetqi (destreg,
10280 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10281 ix86_adjust_counter (countreg, 1);
10282 emit_label (label);
10283 LABEL_NUSES (label) = 1;
10285 if (align <= 2)
10287 rtx label = ix86_expand_aligntest (destreg, 2);
10288 emit_insn (gen_strsethi (destreg,
10289 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10290 ix86_adjust_counter (countreg, 2);
10291 emit_label (label);
10292 LABEL_NUSES (label) = 1;
10294 if (align <= 4 && desired_alignment > 4)
10296 rtx label = ix86_expand_aligntest (destreg, 4);
10297 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10298 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10299 : zeroreg)));
10300 ix86_adjust_counter (countreg, 4);
10301 emit_label (label);
10302 LABEL_NUSES (label) = 1;
10305 if (label && desired_alignment > 4 && !TARGET_64BIT)
10307 emit_label (label);
10308 LABEL_NUSES (label) = 1;
10309 label = NULL_RTX;
10312 if (!TARGET_SINGLE_STRINGOP)
10313 emit_insn (gen_cld ());
10314 if (TARGET_64BIT)
10316 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10317 GEN_INT (3)));
10318 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10319 destreg, countreg2));
10321 else
10323 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10324 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10325 destreg, countreg2));
10327 if (label)
10329 emit_label (label);
10330 LABEL_NUSES (label) = 1;
10333 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10334 emit_insn (gen_strsetsi (destreg,
10335 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10336 if (TARGET_64BIT && (align <= 4 || count == 0))
10338 rtx label = ix86_expand_aligntest (countreg, 4);
10339 emit_insn (gen_strsetsi (destreg,
10340 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10341 emit_label (label);
10342 LABEL_NUSES (label) = 1;
10344 if (align > 2 && count != 0 && (count & 2))
10345 emit_insn (gen_strsethi (destreg,
10346 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10347 if (align <= 2 || count == 0)
10349 rtx label = ix86_expand_aligntest (countreg, 2);
10350 emit_insn (gen_strsethi (destreg,
10351 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10352 emit_label (label);
10353 LABEL_NUSES (label) = 1;
10355 if (align > 1 && count != 0 && (count & 1))
10356 emit_insn (gen_strsetqi (destreg,
10357 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10358 if (align <= 1 || count == 0)
10360 rtx label = ix86_expand_aligntest (countreg, 1);
10361 emit_insn (gen_strsetqi (destreg,
10362 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10363 emit_label (label);
10364 LABEL_NUSES (label) = 1;
10367 return 1;
10369 /* Expand strlen. */
10371 ix86_expand_strlen (out, src, eoschar, align)
10372 rtx out, src, eoschar, align;
10374 rtx addr, scratch1, scratch2, scratch3, scratch4;
10376 /* The generic case of strlen expander is long. Avoid it's
10377 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10379 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10380 && !TARGET_INLINE_ALL_STRINGOPS
10381 && !optimize_size
10382 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10383 return 0;
10385 addr = force_reg (Pmode, XEXP (src, 0));
10386 scratch1 = gen_reg_rtx (Pmode);
10388 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10389 && !optimize_size)
10391 /* Well it seems that some optimizer does not combine a call like
10392 foo(strlen(bar), strlen(bar));
10393 when the move and the subtraction is done here. It does calculate
10394 the length just once when these instructions are done inside of
10395 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10396 often used and I use one fewer register for the lifetime of
10397 output_strlen_unroll() this is better. */
10399 emit_move_insn (out, addr);
10401 ix86_expand_strlensi_unroll_1 (out, align);
10403 /* strlensi_unroll_1 returns the address of the zero at the end of
10404 the string, like memchr(), so compute the length by subtracting
10405 the start address. */
10406 if (TARGET_64BIT)
10407 emit_insn (gen_subdi3 (out, out, addr));
10408 else
10409 emit_insn (gen_subsi3 (out, out, addr));
10411 else
10413 scratch2 = gen_reg_rtx (Pmode);
10414 scratch3 = gen_reg_rtx (Pmode);
10415 scratch4 = force_reg (Pmode, constm1_rtx);
10417 emit_move_insn (scratch3, addr);
10418 eoschar = force_reg (QImode, eoschar);
10420 emit_insn (gen_cld ());
10421 if (TARGET_64BIT)
10423 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10424 align, scratch4, scratch3));
10425 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10426 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10428 else
10430 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10431 align, scratch4, scratch3));
10432 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10433 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10436 return 1;
10439 /* Expand the appropriate insns for doing strlen if not just doing
10440 repnz; scasb
10442 out = result, initialized with the start address
10443 align_rtx = alignment of the address.
10444 scratch = scratch register, initialized with the startaddress when
10445 not aligned, otherwise undefined
10447 This is just the body. It needs the initialisations mentioned above and
10448 some address computing at the end. These things are done in i386.md. */
10450 static void
10451 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10452 rtx out, align_rtx;
10454 int align;
10455 rtx tmp;
10456 rtx align_2_label = NULL_RTX;
10457 rtx align_3_label = NULL_RTX;
10458 rtx align_4_label = gen_label_rtx ();
10459 rtx end_0_label = gen_label_rtx ();
10460 rtx mem;
10461 rtx tmpreg = gen_reg_rtx (SImode);
10462 rtx scratch = gen_reg_rtx (SImode);
10464 align = 0;
10465 if (GET_CODE (align_rtx) == CONST_INT)
10466 align = INTVAL (align_rtx);
10468 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10470 /* Is there a known alignment and is it less than 4? */
10471 if (align < 4)
10473 rtx scratch1 = gen_reg_rtx (Pmode);
10474 emit_move_insn (scratch1, out);
10475 /* Is there a known alignment and is it not 2? */
10476 if (align != 2)
10478 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10479 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10481 /* Leave just the 3 lower bits. */
10482 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10483 NULL_RTX, 0, OPTAB_WIDEN);
10485 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10486 Pmode, 1, align_4_label);
10487 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10488 Pmode, 1, align_2_label);
10489 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10490 Pmode, 1, align_3_label);
10492 else
10494 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10495 check if is aligned to 4 - byte. */
10497 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10498 NULL_RTX, 0, OPTAB_WIDEN);
10500 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10501 Pmode, 1, align_4_label);
10504 mem = gen_rtx_MEM (QImode, out);
10506 /* Now compare the bytes. */
10508 /* Compare the first n unaligned byte on a byte per byte basis. */
10509 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10510 QImode, 1, end_0_label);
10512 /* Increment the address. */
10513 if (TARGET_64BIT)
10514 emit_insn (gen_adddi3 (out, out, const1_rtx));
10515 else
10516 emit_insn (gen_addsi3 (out, out, const1_rtx));
10518 /* Not needed with an alignment of 2 */
10519 if (align != 2)
10521 emit_label (align_2_label);
10523 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10524 end_0_label);
10526 if (TARGET_64BIT)
10527 emit_insn (gen_adddi3 (out, out, const1_rtx));
10528 else
10529 emit_insn (gen_addsi3 (out, out, const1_rtx));
10531 emit_label (align_3_label);
10534 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10535 end_0_label);
10537 if (TARGET_64BIT)
10538 emit_insn (gen_adddi3 (out, out, const1_rtx));
10539 else
10540 emit_insn (gen_addsi3 (out, out, const1_rtx));
10543 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10544 align this loop. It gives only huge programs, but does not help to
10545 speed up. */
10546 emit_label (align_4_label);
10548 mem = gen_rtx_MEM (SImode, out);
10549 emit_move_insn (scratch, mem);
10550 if (TARGET_64BIT)
10551 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10552 else
10553 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10555 /* This formula yields a nonzero result iff one of the bytes is zero.
10556 This saves three branches inside loop and many cycles. */
10558 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10559 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10560 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10561 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10562 gen_int_mode (0x80808080, SImode)));
10563 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10564 align_4_label);
10566 if (TARGET_CMOVE)
10568 rtx reg = gen_reg_rtx (SImode);
10569 rtx reg2 = gen_reg_rtx (Pmode);
10570 emit_move_insn (reg, tmpreg);
10571 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10573 /* If zero is not in the first two bytes, move two bytes forward. */
10574 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10575 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10576 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10577 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10578 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10579 reg,
10580 tmpreg)));
10581 /* Emit lea manually to avoid clobbering of flags. */
10582 emit_insn (gen_rtx_SET (SImode, reg2,
10583 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10585 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10586 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10587 emit_insn (gen_rtx_SET (VOIDmode, out,
10588 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10589 reg2,
10590 out)));
10593 else
10595 rtx end_2_label = gen_label_rtx ();
10596 /* Is zero in the first two bytes? */
10598 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10599 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10600 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10601 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10602 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10603 pc_rtx);
10604 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10605 JUMP_LABEL (tmp) = end_2_label;
10607 /* Not in the first two. Move two bytes forward. */
10608 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10609 if (TARGET_64BIT)
10610 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10611 else
10612 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10614 emit_label (end_2_label);
10618 /* Avoid branch in fixing the byte. */
10619 tmpreg = gen_lowpart (QImode, tmpreg);
10620 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10621 if (TARGET_64BIT)
10622 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10623 else
10624 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10626 emit_label (end_0_label);
10629 void
10630 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10631 rtx retval, fnaddr, callarg1, callarg2, pop;
10633 rtx use = NULL, call;
10635 if (pop == const0_rtx)
10636 pop = NULL;
10637 if (TARGET_64BIT && pop)
10638 abort ();
10640 #if TARGET_MACHO
10641 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10642 fnaddr = machopic_indirect_call_target (fnaddr);
10643 #else
10644 /* Static functions and indirect calls don't need the pic register. */
10645 if (! TARGET_64BIT && flag_pic
10646 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10647 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10648 use_reg (&use, pic_offset_table_rtx);
10650 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10652 rtx al = gen_rtx_REG (QImode, 0);
10653 emit_move_insn (al, callarg2);
10654 use_reg (&use, al);
10656 #endif /* TARGET_MACHO */
10658 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10660 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10661 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10664 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10665 if (retval)
10666 call = gen_rtx_SET (VOIDmode, retval, call);
10667 if (pop)
10669 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10670 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10671 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10674 call = emit_call_insn (call);
10675 if (use)
10676 CALL_INSN_FUNCTION_USAGE (call) = use;
10680 /* Clear stack slot assignments remembered from previous functions.
10681 This is called from INIT_EXPANDERS once before RTL is emitted for each
10682 function. */
10684 static struct machine_function *
10685 ix86_init_machine_status ()
10687 return ggc_alloc_cleared (sizeof (struct machine_function));
10690 /* Return a MEM corresponding to a stack slot with mode MODE.
10691 Allocate a new slot if necessary.
10693 The RTL for a function can have several slots available: N is
10694 which slot to use. */
10697 assign_386_stack_local (mode, n)
10698 enum machine_mode mode;
10699 int n;
10701 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10702 abort ();
10704 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10705 ix86_stack_locals[(int) mode][n]
10706 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10708 return ix86_stack_locals[(int) mode][n];
10711 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10713 static GTY(()) rtx ix86_tls_symbol;
10715 ix86_tls_get_addr ()
10718 if (!ix86_tls_symbol)
10720 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10721 ? "___tls_get_addr"
10722 : "__tls_get_addr"));
10725 return ix86_tls_symbol;
10728 /* Calculate the length of the memory address in the instruction
10729 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10731 static int
10732 memory_address_length (addr)
10733 rtx addr;
10735 struct ix86_address parts;
10736 rtx base, index, disp;
10737 int len;
10739 if (GET_CODE (addr) == PRE_DEC
10740 || GET_CODE (addr) == POST_INC
10741 || GET_CODE (addr) == PRE_MODIFY
10742 || GET_CODE (addr) == POST_MODIFY)
10743 return 0;
10745 if (! ix86_decompose_address (addr, &parts))
10746 abort ();
10748 base = parts.base;
10749 index = parts.index;
10750 disp = parts.disp;
10751 len = 0;
10753 /* Register Indirect. */
10754 if (base && !index && !disp)
10756 /* Special cases: ebp and esp need the two-byte modrm form. */
10757 if (addr == stack_pointer_rtx
10758 || addr == arg_pointer_rtx
10759 || addr == frame_pointer_rtx
10760 || addr == hard_frame_pointer_rtx)
10761 len = 1;
10764 /* Direct Addressing. */
10765 else if (disp && !base && !index)
10766 len = 4;
10768 else
10770 /* Find the length of the displacement constant. */
10771 if (disp)
10773 if (GET_CODE (disp) == CONST_INT
10774 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10775 len = 1;
10776 else
10777 len = 4;
10780 /* An index requires the two-byte modrm form. */
10781 if (index)
10782 len += 1;
10785 return len;
10788 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10789 is set, expect that insn have 8bit immediate alternative. */
10791 ix86_attr_length_immediate_default (insn, shortform)
10792 rtx insn;
10793 int shortform;
10795 int len = 0;
10796 int i;
10797 extract_insn_cached (insn);
10798 for (i = recog_data.n_operands - 1; i >= 0; --i)
10799 if (CONSTANT_P (recog_data.operand[i]))
10801 if (len)
10802 abort ();
10803 if (shortform
10804 && GET_CODE (recog_data.operand[i]) == CONST_INT
10805 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10806 len = 1;
10807 else
10809 switch (get_attr_mode (insn))
10811 case MODE_QI:
10812 len+=1;
10813 break;
10814 case MODE_HI:
10815 len+=2;
10816 break;
10817 case MODE_SI:
10818 len+=4;
10819 break;
10820 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10821 case MODE_DI:
10822 len+=4;
10823 break;
10824 default:
10825 fatal_insn ("unknown insn mode", insn);
10829 return len;
10831 /* Compute default value for "length_address" attribute. */
10833 ix86_attr_length_address_default (insn)
10834 rtx insn;
10836 int i;
10837 extract_insn_cached (insn);
10838 for (i = recog_data.n_operands - 1; i >= 0; --i)
10839 if (GET_CODE (recog_data.operand[i]) == MEM)
10841 return memory_address_length (XEXP (recog_data.operand[i], 0));
10842 break;
10844 return 0;
10847 /* Return the maximum number of instructions a cpu can issue. */
10849 static int
10850 ix86_issue_rate ()
10852 switch (ix86_cpu)
10854 case PROCESSOR_PENTIUM:
10855 case PROCESSOR_K6:
10856 return 2;
10858 case PROCESSOR_PENTIUMPRO:
10859 case PROCESSOR_PENTIUM4:
10860 case PROCESSOR_ATHLON:
10861 return 3;
10863 default:
10864 return 1;
10868 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10869 by DEP_INSN and nothing set by DEP_INSN. */
10871 static int
10872 ix86_flags_dependant (insn, dep_insn, insn_type)
10873 rtx insn, dep_insn;
10874 enum attr_type insn_type;
10876 rtx set, set2;
10878 /* Simplify the test for uninteresting insns. */
10879 if (insn_type != TYPE_SETCC
10880 && insn_type != TYPE_ICMOV
10881 && insn_type != TYPE_FCMOV
10882 && insn_type != TYPE_IBR)
10883 return 0;
10885 if ((set = single_set (dep_insn)) != 0)
10887 set = SET_DEST (set);
10888 set2 = NULL_RTX;
10890 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10891 && XVECLEN (PATTERN (dep_insn), 0) == 2
10892 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10893 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10895 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10896 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10898 else
10899 return 0;
10901 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10902 return 0;
10904 /* This test is true if the dependent insn reads the flags but
10905 not any other potentially set register. */
10906 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10907 return 0;
10909 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10910 return 0;
10912 return 1;
10915 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10916 address with operands set by DEP_INSN. */
10918 static int
10919 ix86_agi_dependant (insn, dep_insn, insn_type)
10920 rtx insn, dep_insn;
10921 enum attr_type insn_type;
10923 rtx addr;
10925 if (insn_type == TYPE_LEA
10926 && TARGET_PENTIUM)
10928 addr = PATTERN (insn);
10929 if (GET_CODE (addr) == SET)
10931 else if (GET_CODE (addr) == PARALLEL
10932 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10933 addr = XVECEXP (addr, 0, 0);
10934 else
10935 abort ();
10936 addr = SET_SRC (addr);
10938 else
10940 int i;
10941 extract_insn_cached (insn);
10942 for (i = recog_data.n_operands - 1; i >= 0; --i)
10943 if (GET_CODE (recog_data.operand[i]) == MEM)
10945 addr = XEXP (recog_data.operand[i], 0);
10946 goto found;
10948 return 0;
10949 found:;
10952 return modified_in_p (addr, dep_insn);
10955 static int
10956 ix86_adjust_cost (insn, link, dep_insn, cost)
10957 rtx insn, link, dep_insn;
10958 int cost;
10960 enum attr_type insn_type, dep_insn_type;
10961 enum attr_memory memory, dep_memory;
10962 rtx set, set2;
10963 int dep_insn_code_number;
10965 /* Anti and output depenancies have zero cost on all CPUs. */
10966 if (REG_NOTE_KIND (link) != 0)
10967 return 0;
10969 dep_insn_code_number = recog_memoized (dep_insn);
10971 /* If we can't recognize the insns, we can't really do anything. */
10972 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10973 return cost;
10975 insn_type = get_attr_type (insn);
10976 dep_insn_type = get_attr_type (dep_insn);
10978 switch (ix86_cpu)
10980 case PROCESSOR_PENTIUM:
10981 /* Address Generation Interlock adds a cycle of latency. */
10982 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10983 cost += 1;
10985 /* ??? Compares pair with jump/setcc. */
10986 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10987 cost = 0;
10989 /* Floating point stores require value to be ready one cycle ealier. */
10990 if (insn_type == TYPE_FMOV
10991 && get_attr_memory (insn) == MEMORY_STORE
10992 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10993 cost += 1;
10994 break;
10996 case PROCESSOR_PENTIUMPRO:
10997 memory = get_attr_memory (insn);
10998 dep_memory = get_attr_memory (dep_insn);
11000 /* Since we can't represent delayed latencies of load+operation,
11001 increase the cost here for non-imov insns. */
11002 if (dep_insn_type != TYPE_IMOV
11003 && dep_insn_type != TYPE_FMOV
11004 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11005 cost += 1;
11007 /* INT->FP conversion is expensive. */
11008 if (get_attr_fp_int_src (dep_insn))
11009 cost += 5;
11011 /* There is one cycle extra latency between an FP op and a store. */
11012 if (insn_type == TYPE_FMOV
11013 && (set = single_set (dep_insn)) != NULL_RTX
11014 && (set2 = single_set (insn)) != NULL_RTX
11015 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11016 && GET_CODE (SET_DEST (set2)) == MEM)
11017 cost += 1;
11019 /* Show ability of reorder buffer to hide latency of load by executing
11020 in parallel with previous instruction in case
11021 previous instruction is not needed to compute the address. */
11022 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11023 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11025 /* Claim moves to take one cycle, as core can issue one load
11026 at time and the next load can start cycle later. */
11027 if (dep_insn_type == TYPE_IMOV
11028 || dep_insn_type == TYPE_FMOV)
11029 cost = 1;
11030 else if (cost > 1)
11031 cost--;
11033 break;
11035 case PROCESSOR_K6:
11036 memory = get_attr_memory (insn);
11037 dep_memory = get_attr_memory (dep_insn);
11038 /* The esp dependency is resolved before the instruction is really
11039 finished. */
11040 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11041 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11042 return 1;
11044 /* Since we can't represent delayed latencies of load+operation,
11045 increase the cost here for non-imov insns. */
11046 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11047 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11049 /* INT->FP conversion is expensive. */
11050 if (get_attr_fp_int_src (dep_insn))
11051 cost += 5;
11053 /* Show ability of reorder buffer to hide latency of load by executing
11054 in parallel with previous instruction in case
11055 previous instruction is not needed to compute the address. */
11056 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11057 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11059 /* Claim moves to take one cycle, as core can issue one load
11060 at time and the next load can start cycle later. */
11061 if (dep_insn_type == TYPE_IMOV
11062 || dep_insn_type == TYPE_FMOV)
11063 cost = 1;
11064 else if (cost > 2)
11065 cost -= 2;
11066 else
11067 cost = 1;
11069 break;
11071 case PROCESSOR_ATHLON:
11072 memory = get_attr_memory (insn);
11073 dep_memory = get_attr_memory (dep_insn);
11075 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11077 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11078 cost += 2;
11079 else
11080 cost += 3;
11082 /* Show ability of reorder buffer to hide latency of load by executing
11083 in parallel with previous instruction in case
11084 previous instruction is not needed to compute the address. */
11085 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11086 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11088 /* Claim moves to take one cycle, as core can issue one load
11089 at time and the next load can start cycle later. */
11090 if (dep_insn_type == TYPE_IMOV
11091 || dep_insn_type == TYPE_FMOV)
11092 cost = 0;
11093 else if (cost >= 3)
11094 cost -= 3;
11095 else
11096 cost = 0;
11099 default:
11100 break;
11103 return cost;
11106 static union
11108 struct ppro_sched_data
11110 rtx decode[3];
11111 int issued_this_cycle;
11112 } ppro;
11113 } ix86_sched_data;
11115 static enum attr_ppro_uops
11116 ix86_safe_ppro_uops (insn)
11117 rtx insn;
11119 if (recog_memoized (insn) >= 0)
11120 return get_attr_ppro_uops (insn);
11121 else
11122 return PPRO_UOPS_MANY;
11125 static void
11126 ix86_dump_ppro_packet (dump)
11127 FILE *dump;
11129 if (ix86_sched_data.ppro.decode[0])
11131 fprintf (dump, "PPRO packet: %d",
11132 INSN_UID (ix86_sched_data.ppro.decode[0]));
11133 if (ix86_sched_data.ppro.decode[1])
11134 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11135 if (ix86_sched_data.ppro.decode[2])
11136 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11137 fputc ('\n', dump);
11141 /* We're beginning a new block. Initialize data structures as necessary. */
11143 static void
11144 ix86_sched_init (dump, sched_verbose, veclen)
11145 FILE *dump ATTRIBUTE_UNUSED;
11146 int sched_verbose ATTRIBUTE_UNUSED;
11147 int veclen ATTRIBUTE_UNUSED;
11149 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11152 /* Shift INSN to SLOT, and shift everything else down. */
11154 static void
11155 ix86_reorder_insn (insnp, slot)
11156 rtx *insnp, *slot;
11158 if (insnp != slot)
11160 rtx insn = *insnp;
11162 insnp[0] = insnp[1];
11163 while (++insnp != slot);
11164 *insnp = insn;
11168 static void
11169 ix86_sched_reorder_ppro (ready, e_ready)
11170 rtx *ready;
11171 rtx *e_ready;
11173 rtx decode[3];
11174 enum attr_ppro_uops cur_uops;
11175 int issued_this_cycle;
11176 rtx *insnp;
11177 int i;
11179 /* At this point .ppro.decode contains the state of the three
11180 decoders from last "cycle". That is, those insns that were
11181 actually independent. But here we're scheduling for the
11182 decoder, and we may find things that are decodable in the
11183 same cycle. */
11185 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11186 issued_this_cycle = 0;
11188 insnp = e_ready;
11189 cur_uops = ix86_safe_ppro_uops (*insnp);
11191 /* If the decoders are empty, and we've a complex insn at the
11192 head of the priority queue, let it issue without complaint. */
11193 if (decode[0] == NULL)
11195 if (cur_uops == PPRO_UOPS_MANY)
11197 decode[0] = *insnp;
11198 goto ppro_done;
11201 /* Otherwise, search for a 2-4 uop unsn to issue. */
11202 while (cur_uops != PPRO_UOPS_FEW)
11204 if (insnp == ready)
11205 break;
11206 cur_uops = ix86_safe_ppro_uops (*--insnp);
11209 /* If so, move it to the head of the line. */
11210 if (cur_uops == PPRO_UOPS_FEW)
11211 ix86_reorder_insn (insnp, e_ready);
11213 /* Issue the head of the queue. */
11214 issued_this_cycle = 1;
11215 decode[0] = *e_ready--;
11218 /* Look for simple insns to fill in the other two slots. */
11219 for (i = 1; i < 3; ++i)
11220 if (decode[i] == NULL)
11222 if (ready > e_ready)
11223 goto ppro_done;
11225 insnp = e_ready;
11226 cur_uops = ix86_safe_ppro_uops (*insnp);
11227 while (cur_uops != PPRO_UOPS_ONE)
11229 if (insnp == ready)
11230 break;
11231 cur_uops = ix86_safe_ppro_uops (*--insnp);
11234 /* Found one. Move it to the head of the queue and issue it. */
11235 if (cur_uops == PPRO_UOPS_ONE)
11237 ix86_reorder_insn (insnp, e_ready);
11238 decode[i] = *e_ready--;
11239 issued_this_cycle++;
11240 continue;
11243 /* ??? Didn't find one. Ideally, here we would do a lazy split
11244 of 2-uop insns, issue one and queue the other. */
11247 ppro_done:
11248 if (issued_this_cycle == 0)
11249 issued_this_cycle = 1;
11250 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11253 /* We are about to being issuing insns for this clock cycle.
11254 Override the default sort algorithm to better slot instructions. */
11255 static int
11256 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11257 FILE *dump ATTRIBUTE_UNUSED;
11258 int sched_verbose ATTRIBUTE_UNUSED;
11259 rtx *ready;
11260 int *n_readyp;
11261 int clock_var ATTRIBUTE_UNUSED;
11263 int n_ready = *n_readyp;
11264 rtx *e_ready = ready + n_ready - 1;
11266 /* Make sure to go ahead and initialize key items in
11267 ix86_sched_data if we are not going to bother trying to
11268 reorder the ready queue. */
11269 if (n_ready < 2)
11271 ix86_sched_data.ppro.issued_this_cycle = 1;
11272 goto out;
11275 switch (ix86_cpu)
11277 default:
11278 break;
11280 case PROCESSOR_PENTIUMPRO:
11281 ix86_sched_reorder_ppro (ready, e_ready);
11282 break;
11285 out:
11286 return ix86_issue_rate ();
11289 /* We are about to issue INSN. Return the number of insns left on the
11290 ready queue that can be issued this cycle. */
11292 static int
11293 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11294 FILE *dump;
11295 int sched_verbose;
11296 rtx insn;
11297 int can_issue_more;
11299 int i;
11300 switch (ix86_cpu)
11302 default:
11303 return can_issue_more - 1;
11305 case PROCESSOR_PENTIUMPRO:
11307 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11309 if (uops == PPRO_UOPS_MANY)
11311 if (sched_verbose)
11312 ix86_dump_ppro_packet (dump);
11313 ix86_sched_data.ppro.decode[0] = insn;
11314 ix86_sched_data.ppro.decode[1] = NULL;
11315 ix86_sched_data.ppro.decode[2] = NULL;
11316 if (sched_verbose)
11317 ix86_dump_ppro_packet (dump);
11318 ix86_sched_data.ppro.decode[0] = NULL;
11320 else if (uops == PPRO_UOPS_FEW)
11322 if (sched_verbose)
11323 ix86_dump_ppro_packet (dump);
11324 ix86_sched_data.ppro.decode[0] = insn;
11325 ix86_sched_data.ppro.decode[1] = NULL;
11326 ix86_sched_data.ppro.decode[2] = NULL;
11328 else
11330 for (i = 0; i < 3; ++i)
11331 if (ix86_sched_data.ppro.decode[i] == NULL)
11333 ix86_sched_data.ppro.decode[i] = insn;
11334 break;
11336 if (i == 3)
11337 abort ();
11338 if (i == 2)
11340 if (sched_verbose)
11341 ix86_dump_ppro_packet (dump);
11342 ix86_sched_data.ppro.decode[0] = NULL;
11343 ix86_sched_data.ppro.decode[1] = NULL;
11344 ix86_sched_data.ppro.decode[2] = NULL;
11348 return --ix86_sched_data.ppro.issued_this_cycle;
11352 static int
11353 ia32_use_dfa_pipeline_interface ()
11355 if (ix86_cpu == PROCESSOR_PENTIUM)
11356 return 1;
11357 return 0;
11360 /* How many alternative schedules to try. This should be as wide as the
11361 scheduling freedom in the DFA, but no wider. Making this value too
11362 large results extra work for the scheduler. */
11364 static int
11365 ia32_multipass_dfa_lookahead ()
11367 if (ix86_cpu == PROCESSOR_PENTIUM)
11368 return 2;
11369 else
11370 return 0;
11374 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11375 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11376 appropriate. */
11378 void
11379 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11380 rtx insns;
11381 rtx dstref, srcref, dstreg, srcreg;
11383 rtx insn;
11385 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11386 if (INSN_P (insn))
11387 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11388 dstreg, srcreg);
11391 /* Subroutine of above to actually do the updating by recursively walking
11392 the rtx. */
11394 static void
11395 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11396 rtx x;
11397 rtx dstref, srcref, dstreg, srcreg;
11399 enum rtx_code code = GET_CODE (x);
11400 const char *format_ptr = GET_RTX_FORMAT (code);
11401 int i, j;
11403 if (code == MEM && XEXP (x, 0) == dstreg)
11404 MEM_COPY_ATTRIBUTES (x, dstref);
11405 else if (code == MEM && XEXP (x, 0) == srcreg)
11406 MEM_COPY_ATTRIBUTES (x, srcref);
11408 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11410 if (*format_ptr == 'e')
11411 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11412 dstreg, srcreg);
11413 else if (*format_ptr == 'E')
11414 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11415 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11416 dstreg, srcreg);
11420 /* Compute the alignment given to a constant that is being placed in memory.
11421 EXP is the constant and ALIGN is the alignment that the object would
11422 ordinarily have.
11423 The value of this function is used instead of that alignment to align
11424 the object. */
11427 ix86_constant_alignment (exp, align)
11428 tree exp;
11429 int align;
11431 if (TREE_CODE (exp) == REAL_CST)
11433 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11434 return 64;
11435 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11436 return 128;
11438 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11439 && align < 256)
11440 return 256;
11442 return align;
11445 /* Compute the alignment for a static variable.
11446 TYPE is the data type, and ALIGN is the alignment that
11447 the object would ordinarily have. The value of this function is used
11448 instead of that alignment to align the object. */
11451 ix86_data_alignment (type, align)
11452 tree type;
11453 int align;
11455 if (AGGREGATE_TYPE_P (type)
11456 && TYPE_SIZE (type)
11457 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11458 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11459 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11460 return 256;
11462 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11463 to 16byte boundary. */
11464 if (TARGET_64BIT)
11466 if (AGGREGATE_TYPE_P (type)
11467 && TYPE_SIZE (type)
11468 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11469 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11470 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11471 return 128;
11474 if (TREE_CODE (type) == ARRAY_TYPE)
11476 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11477 return 64;
11478 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11479 return 128;
11481 else if (TREE_CODE (type) == COMPLEX_TYPE)
11484 if (TYPE_MODE (type) == DCmode && align < 64)
11485 return 64;
11486 if (TYPE_MODE (type) == XCmode && align < 128)
11487 return 128;
11489 else if ((TREE_CODE (type) == RECORD_TYPE
11490 || TREE_CODE (type) == UNION_TYPE
11491 || TREE_CODE (type) == QUAL_UNION_TYPE)
11492 && TYPE_FIELDS (type))
11494 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11495 return 64;
11496 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11497 return 128;
11499 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11500 || TREE_CODE (type) == INTEGER_TYPE)
11502 if (TYPE_MODE (type) == DFmode && align < 64)
11503 return 64;
11504 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11505 return 128;
11508 return align;
11511 /* Compute the alignment for a local variable.
11512 TYPE is the data type, and ALIGN is the alignment that
11513 the object would ordinarily have. The value of this macro is used
11514 instead of that alignment to align the object. */
11517 ix86_local_alignment (type, align)
11518 tree type;
11519 int align;
11521 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11522 to 16byte boundary. */
11523 if (TARGET_64BIT)
11525 if (AGGREGATE_TYPE_P (type)
11526 && TYPE_SIZE (type)
11527 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11528 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11529 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11530 return 128;
11532 if (TREE_CODE (type) == ARRAY_TYPE)
11534 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11535 return 64;
11536 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11537 return 128;
11539 else if (TREE_CODE (type) == COMPLEX_TYPE)
11541 if (TYPE_MODE (type) == DCmode && align < 64)
11542 return 64;
11543 if (TYPE_MODE (type) == XCmode && align < 128)
11544 return 128;
11546 else if ((TREE_CODE (type) == RECORD_TYPE
11547 || TREE_CODE (type) == UNION_TYPE
11548 || TREE_CODE (type) == QUAL_UNION_TYPE)
11549 && TYPE_FIELDS (type))
11551 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11552 return 64;
11553 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11554 return 128;
11556 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11557 || TREE_CODE (type) == INTEGER_TYPE)
11560 if (TYPE_MODE (type) == DFmode && align < 64)
11561 return 64;
11562 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11563 return 128;
11565 return align;
11568 /* Emit RTL insns to initialize the variable parts of a trampoline.
11569 FNADDR is an RTX for the address of the function's pure code.
11570 CXT is an RTX for the static chain value for the function. */
11571 void
11572 x86_initialize_trampoline (tramp, fnaddr, cxt)
11573 rtx tramp, fnaddr, cxt;
11575 if (!TARGET_64BIT)
11577 /* Compute offset from the end of the jmp to the target function. */
11578 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11579 plus_constant (tramp, 10),
11580 NULL_RTX, 1, OPTAB_DIRECT);
11581 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11582 gen_int_mode (0xb9, QImode));
11583 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11584 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11585 gen_int_mode (0xe9, QImode));
11586 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11588 else
11590 int offset = 0;
11591 /* Try to load address using shorter movl instead of movabs.
11592 We may want to support movq for kernel mode, but kernel does not use
11593 trampolines at the moment. */
11594 if (x86_64_zero_extended_value (fnaddr))
11596 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11597 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11598 gen_int_mode (0xbb41, HImode));
11599 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11600 gen_lowpart (SImode, fnaddr));
11601 offset += 6;
11603 else
11605 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11606 gen_int_mode (0xbb49, HImode));
11607 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11608 fnaddr);
11609 offset += 10;
11611 /* Load static chain using movabs to r10. */
11612 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11613 gen_int_mode (0xba49, HImode));
11614 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11615 cxt);
11616 offset += 10;
11617 /* Jump to the r11 */
11618 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11619 gen_int_mode (0xff49, HImode));
11620 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11621 gen_int_mode (0xe3, QImode));
11622 offset += 3;
11623 if (offset > TRAMPOLINE_SIZE)
11624 abort ();
11628 #define def_builtin(MASK, NAME, TYPE, CODE) \
11629 do { \
11630 if ((MASK) & target_flags) \
11631 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11632 NULL, NULL_TREE); \
11633 } while (0)
11635 struct builtin_description
11637 const unsigned int mask;
11638 const enum insn_code icode;
11639 const char *const name;
11640 const enum ix86_builtins code;
11641 const enum rtx_code comparison;
11642 const unsigned int flag;
11645 /* Used for builtins that are enabled both by -msse and -msse2. */
11646 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11648 static const struct builtin_description bdesc_comi[] =
11650 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11651 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11652 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11653 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11654 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11655 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11656 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11657 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11658 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11659 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11660 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11661 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11662 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11663 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11664 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11665 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11666 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11667 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11668 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11669 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11670 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11671 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11672 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11673 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11676 static const struct builtin_description bdesc_2arg[] =
11678 /* SSE */
11679 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11680 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11681 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11682 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11683 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11684 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11685 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11686 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11688 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11689 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11690 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11691 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11692 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11693 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11694 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11695 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11696 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11697 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11698 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11699 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11700 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11701 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11702 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11703 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11704 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11705 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11706 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11707 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11708 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11709 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11710 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11711 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11713 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11714 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11715 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11716 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11718 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11719 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11720 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11721 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11722 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11724 /* MMX */
11725 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11726 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11727 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11728 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11729 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11730 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11732 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11733 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11734 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11735 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11736 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11737 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11738 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11739 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11741 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11742 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11743 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11745 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11746 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11747 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11748 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11750 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11751 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11753 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11754 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11755 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11756 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11757 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11758 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11760 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11761 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11762 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11763 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11765 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11766 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11767 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11768 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11769 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11770 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11772 /* Special. */
11773 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11774 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11775 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11777 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11778 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11780 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11781 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11782 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11783 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11784 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11785 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11787 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11788 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11789 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11790 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11791 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11792 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11794 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11795 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11796 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11797 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11799 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11800 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11802 /* SSE2 */
11803 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11804 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11805 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11806 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11807 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11808 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11809 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11810 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11812 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11813 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11814 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11815 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11816 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11817 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11818 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11819 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11820 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11821 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11822 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11823 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11824 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11825 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11826 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11827 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11828 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11829 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11830 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11831 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11832 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11833 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11834 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11835 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11837 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11838 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11839 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11840 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11842 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11843 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11844 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11845 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11847 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11848 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11849 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11851 /* SSE2 MMX */
11852 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11853 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11854 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11855 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11856 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11857 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11858 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11859 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11861 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11862 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11863 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11864 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11865 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11866 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11867 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11868 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11870 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11871 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11872 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11873 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11875 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11876 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11877 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11878 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11880 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11881 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11883 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11884 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11885 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11886 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11887 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11888 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11890 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11891 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11892 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11893 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11895 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11896 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11897 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11898 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11899 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11902 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11903 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11909 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11910 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11911 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11912 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11913 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11914 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11916 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11917 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11918 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11919 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11920 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11921 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11923 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11924 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11925 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11926 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11928 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11930 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11931 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11932 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11935 static const struct builtin_description bdesc_1arg[] =
11937 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11938 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11940 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11941 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11942 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11944 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11945 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11946 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11947 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11949 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11950 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11951 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11953 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11955 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11956 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11958 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11959 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11960 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11961 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11962 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11964 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11966 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11967 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11969 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11970 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11971 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11974 void
11975 ix86_init_builtins ()
11977 if (TARGET_MMX)
11978 ix86_init_mmx_sse_builtins ();
11981 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11982 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11983 builtins. */
11984 static void
11985 ix86_init_mmx_sse_builtins ()
11987 const struct builtin_description * d;
11988 size_t i;
11990 tree pchar_type_node = build_pointer_type (char_type_node);
11991 tree pfloat_type_node = build_pointer_type (float_type_node);
11992 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11993 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11994 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11996 /* Comparisons. */
11997 tree int_ftype_v4sf_v4sf
11998 = build_function_type_list (integer_type_node,
11999 V4SF_type_node, V4SF_type_node, NULL_TREE);
12000 tree v4si_ftype_v4sf_v4sf
12001 = build_function_type_list (V4SI_type_node,
12002 V4SF_type_node, V4SF_type_node, NULL_TREE);
12003 /* MMX/SSE/integer conversions. */
12004 tree int_ftype_v4sf
12005 = build_function_type_list (integer_type_node,
12006 V4SF_type_node, NULL_TREE);
12007 tree int_ftype_v8qi
12008 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12009 tree v4sf_ftype_v4sf_int
12010 = build_function_type_list (V4SF_type_node,
12011 V4SF_type_node, integer_type_node, NULL_TREE);
12012 tree v4sf_ftype_v4sf_v2si
12013 = build_function_type_list (V4SF_type_node,
12014 V4SF_type_node, V2SI_type_node, NULL_TREE);
12015 tree int_ftype_v4hi_int
12016 = build_function_type_list (integer_type_node,
12017 V4HI_type_node, integer_type_node, NULL_TREE);
12018 tree v4hi_ftype_v4hi_int_int
12019 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12020 integer_type_node, integer_type_node,
12021 NULL_TREE);
12022 /* Miscellaneous. */
12023 tree v8qi_ftype_v4hi_v4hi
12024 = build_function_type_list (V8QI_type_node,
12025 V4HI_type_node, V4HI_type_node, NULL_TREE);
12026 tree v4hi_ftype_v2si_v2si
12027 = build_function_type_list (V4HI_type_node,
12028 V2SI_type_node, V2SI_type_node, NULL_TREE);
12029 tree v4sf_ftype_v4sf_v4sf_int
12030 = build_function_type_list (V4SF_type_node,
12031 V4SF_type_node, V4SF_type_node,
12032 integer_type_node, NULL_TREE);
12033 tree v2si_ftype_v4hi_v4hi
12034 = build_function_type_list (V2SI_type_node,
12035 V4HI_type_node, V4HI_type_node, NULL_TREE);
12036 tree v4hi_ftype_v4hi_int
12037 = build_function_type_list (V4HI_type_node,
12038 V4HI_type_node, integer_type_node, NULL_TREE);
12039 tree v4hi_ftype_v4hi_di
12040 = build_function_type_list (V4HI_type_node,
12041 V4HI_type_node, long_long_unsigned_type_node,
12042 NULL_TREE);
12043 tree v2si_ftype_v2si_di
12044 = build_function_type_list (V2SI_type_node,
12045 V2SI_type_node, long_long_unsigned_type_node,
12046 NULL_TREE);
12047 tree void_ftype_void
12048 = build_function_type (void_type_node, void_list_node);
12049 tree void_ftype_unsigned
12050 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12051 tree unsigned_ftype_void
12052 = build_function_type (unsigned_type_node, void_list_node);
12053 tree di_ftype_void
12054 = build_function_type (long_long_unsigned_type_node, void_list_node);
12055 tree v4sf_ftype_void
12056 = build_function_type (V4SF_type_node, void_list_node);
12057 tree v2si_ftype_v4sf
12058 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12059 /* Loads/stores. */
12060 tree void_ftype_v8qi_v8qi_pchar
12061 = build_function_type_list (void_type_node,
12062 V8QI_type_node, V8QI_type_node,
12063 pchar_type_node, NULL_TREE);
12064 tree v4sf_ftype_pfloat
12065 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12066 /* @@@ the type is bogus */
12067 tree v4sf_ftype_v4sf_pv2si
12068 = build_function_type_list (V4SF_type_node,
12069 V4SF_type_node, pv2di_type_node, NULL_TREE);
12070 tree void_ftype_pv2si_v4sf
12071 = build_function_type_list (void_type_node,
12072 pv2di_type_node, V4SF_type_node, NULL_TREE);
12073 tree void_ftype_pfloat_v4sf
12074 = build_function_type_list (void_type_node,
12075 pfloat_type_node, V4SF_type_node, NULL_TREE);
12076 tree void_ftype_pdi_di
12077 = build_function_type_list (void_type_node,
12078 pdi_type_node, long_long_unsigned_type_node,
12079 NULL_TREE);
12080 tree void_ftype_pv2di_v2di
12081 = build_function_type_list (void_type_node,
12082 pv2di_type_node, V2DI_type_node, NULL_TREE);
12083 /* Normal vector unops. */
12084 tree v4sf_ftype_v4sf
12085 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12087 /* Normal vector binops. */
12088 tree v4sf_ftype_v4sf_v4sf
12089 = build_function_type_list (V4SF_type_node,
12090 V4SF_type_node, V4SF_type_node, NULL_TREE);
12091 tree v8qi_ftype_v8qi_v8qi
12092 = build_function_type_list (V8QI_type_node,
12093 V8QI_type_node, V8QI_type_node, NULL_TREE);
12094 tree v4hi_ftype_v4hi_v4hi
12095 = build_function_type_list (V4HI_type_node,
12096 V4HI_type_node, V4HI_type_node, NULL_TREE);
12097 tree v2si_ftype_v2si_v2si
12098 = build_function_type_list (V2SI_type_node,
12099 V2SI_type_node, V2SI_type_node, NULL_TREE);
12100 tree di_ftype_di_di
12101 = build_function_type_list (long_long_unsigned_type_node,
12102 long_long_unsigned_type_node,
12103 long_long_unsigned_type_node, NULL_TREE);
12105 tree v2si_ftype_v2sf
12106 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12107 tree v2sf_ftype_v2si
12108 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12109 tree v2si_ftype_v2si
12110 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12111 tree v2sf_ftype_v2sf
12112 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12113 tree v2sf_ftype_v2sf_v2sf
12114 = build_function_type_list (V2SF_type_node,
12115 V2SF_type_node, V2SF_type_node, NULL_TREE);
12116 tree v2si_ftype_v2sf_v2sf
12117 = build_function_type_list (V2SI_type_node,
12118 V2SF_type_node, V2SF_type_node, NULL_TREE);
12119 tree pint_type_node = build_pointer_type (integer_type_node);
12120 tree pdouble_type_node = build_pointer_type (double_type_node);
12121 tree int_ftype_v2df_v2df
12122 = build_function_type_list (integer_type_node,
12123 V2DF_type_node, V2DF_type_node, NULL_TREE);
12125 tree ti_ftype_void
12126 = build_function_type (intTI_type_node, void_list_node);
12127 tree ti_ftype_ti_ti
12128 = build_function_type_list (intTI_type_node,
12129 intTI_type_node, intTI_type_node, NULL_TREE);
12130 tree void_ftype_pvoid
12131 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12132 tree v2di_ftype_di
12133 = build_function_type_list (V2DI_type_node,
12134 long_long_unsigned_type_node, NULL_TREE);
12135 tree v4sf_ftype_v4si
12136 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12137 tree v4si_ftype_v4sf
12138 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12139 tree v2df_ftype_v4si
12140 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12141 tree v4si_ftype_v2df
12142 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12143 tree v2si_ftype_v2df
12144 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12145 tree v4sf_ftype_v2df
12146 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12147 tree v2df_ftype_v2si
12148 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12149 tree v2df_ftype_v4sf
12150 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12151 tree int_ftype_v2df
12152 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12153 tree v2df_ftype_v2df_int
12154 = build_function_type_list (V2DF_type_node,
12155 V2DF_type_node, integer_type_node, NULL_TREE);
12156 tree v4sf_ftype_v4sf_v2df
12157 = build_function_type_list (V4SF_type_node,
12158 V4SF_type_node, V2DF_type_node, NULL_TREE);
12159 tree v2df_ftype_v2df_v4sf
12160 = build_function_type_list (V2DF_type_node,
12161 V2DF_type_node, V4SF_type_node, NULL_TREE);
12162 tree v2df_ftype_v2df_v2df_int
12163 = build_function_type_list (V2DF_type_node,
12164 V2DF_type_node, V2DF_type_node,
12165 integer_type_node,
12166 NULL_TREE);
12167 tree v2df_ftype_v2df_pv2si
12168 = build_function_type_list (V2DF_type_node,
12169 V2DF_type_node, pv2si_type_node, NULL_TREE);
12170 tree void_ftype_pv2si_v2df
12171 = build_function_type_list (void_type_node,
12172 pv2si_type_node, V2DF_type_node, NULL_TREE);
12173 tree void_ftype_pdouble_v2df
12174 = build_function_type_list (void_type_node,
12175 pdouble_type_node, V2DF_type_node, NULL_TREE);
12176 tree void_ftype_pint_int
12177 = build_function_type_list (void_type_node,
12178 pint_type_node, integer_type_node, NULL_TREE);
12179 tree void_ftype_v16qi_v16qi_pchar
12180 = build_function_type_list (void_type_node,
12181 V16QI_type_node, V16QI_type_node,
12182 pchar_type_node, NULL_TREE);
12183 tree v2df_ftype_pdouble
12184 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12185 tree v2df_ftype_v2df_v2df
12186 = build_function_type_list (V2DF_type_node,
12187 V2DF_type_node, V2DF_type_node, NULL_TREE);
12188 tree v16qi_ftype_v16qi_v16qi
12189 = build_function_type_list (V16QI_type_node,
12190 V16QI_type_node, V16QI_type_node, NULL_TREE);
12191 tree v8hi_ftype_v8hi_v8hi
12192 = build_function_type_list (V8HI_type_node,
12193 V8HI_type_node, V8HI_type_node, NULL_TREE);
12194 tree v4si_ftype_v4si_v4si
12195 = build_function_type_list (V4SI_type_node,
12196 V4SI_type_node, V4SI_type_node, NULL_TREE);
12197 tree v2di_ftype_v2di_v2di
12198 = build_function_type_list (V2DI_type_node,
12199 V2DI_type_node, V2DI_type_node, NULL_TREE);
12200 tree v2di_ftype_v2df_v2df
12201 = build_function_type_list (V2DI_type_node,
12202 V2DF_type_node, V2DF_type_node, NULL_TREE);
12203 tree v2df_ftype_v2df
12204 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12205 tree v2df_ftype_double
12206 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12207 tree v2df_ftype_double_double
12208 = build_function_type_list (V2DF_type_node,
12209 double_type_node, double_type_node, NULL_TREE);
12210 tree int_ftype_v8hi_int
12211 = build_function_type_list (integer_type_node,
12212 V8HI_type_node, integer_type_node, NULL_TREE);
12213 tree v8hi_ftype_v8hi_int_int
12214 = build_function_type_list (V8HI_type_node,
12215 V8HI_type_node, integer_type_node,
12216 integer_type_node, NULL_TREE);
12217 tree v2di_ftype_v2di_int
12218 = build_function_type_list (V2DI_type_node,
12219 V2DI_type_node, integer_type_node, NULL_TREE);
12220 tree v4si_ftype_v4si_int
12221 = build_function_type_list (V4SI_type_node,
12222 V4SI_type_node, integer_type_node, NULL_TREE);
12223 tree v8hi_ftype_v8hi_int
12224 = build_function_type_list (V8HI_type_node,
12225 V8HI_type_node, integer_type_node, NULL_TREE);
12226 tree v8hi_ftype_v8hi_v2di
12227 = build_function_type_list (V8HI_type_node,
12228 V8HI_type_node, V2DI_type_node, NULL_TREE);
12229 tree v4si_ftype_v4si_v2di
12230 = build_function_type_list (V4SI_type_node,
12231 V4SI_type_node, V2DI_type_node, NULL_TREE);
12232 tree v4si_ftype_v8hi_v8hi
12233 = build_function_type_list (V4SI_type_node,
12234 V8HI_type_node, V8HI_type_node, NULL_TREE);
12235 tree di_ftype_v8qi_v8qi
12236 = build_function_type_list (long_long_unsigned_type_node,
12237 V8QI_type_node, V8QI_type_node, NULL_TREE);
12238 tree v2di_ftype_v16qi_v16qi
12239 = build_function_type_list (V2DI_type_node,
12240 V16QI_type_node, V16QI_type_node, NULL_TREE);
12241 tree int_ftype_v16qi
12242 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12244 /* Add all builtins that are more or less simple operations on two
12245 operands. */
12246 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12248 /* Use one of the operands; the target can have a different mode for
12249 mask-generating compares. */
12250 enum machine_mode mode;
12251 tree type;
12253 if (d->name == 0)
12254 continue;
12255 mode = insn_data[d->icode].operand[1].mode;
12257 switch (mode)
12259 case V16QImode:
12260 type = v16qi_ftype_v16qi_v16qi;
12261 break;
12262 case V8HImode:
12263 type = v8hi_ftype_v8hi_v8hi;
12264 break;
12265 case V4SImode:
12266 type = v4si_ftype_v4si_v4si;
12267 break;
12268 case V2DImode:
12269 type = v2di_ftype_v2di_v2di;
12270 break;
12271 case V2DFmode:
12272 type = v2df_ftype_v2df_v2df;
12273 break;
12274 case TImode:
12275 type = ti_ftype_ti_ti;
12276 break;
12277 case V4SFmode:
12278 type = v4sf_ftype_v4sf_v4sf;
12279 break;
12280 case V8QImode:
12281 type = v8qi_ftype_v8qi_v8qi;
12282 break;
12283 case V4HImode:
12284 type = v4hi_ftype_v4hi_v4hi;
12285 break;
12286 case V2SImode:
12287 type = v2si_ftype_v2si_v2si;
12288 break;
12289 case DImode:
12290 type = di_ftype_di_di;
12291 break;
12293 default:
12294 abort ();
12297 /* Override for comparisons. */
12298 if (d->icode == CODE_FOR_maskcmpv4sf3
12299 || d->icode == CODE_FOR_maskncmpv4sf3
12300 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12301 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12302 type = v4si_ftype_v4sf_v4sf;
12304 if (d->icode == CODE_FOR_maskcmpv2df3
12305 || d->icode == CODE_FOR_maskncmpv2df3
12306 || d->icode == CODE_FOR_vmmaskcmpv2df3
12307 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12308 type = v2di_ftype_v2df_v2df;
12310 def_builtin (d->mask, d->name, type, d->code);
12313 /* Add the remaining MMX insns with somewhat more complicated types. */
12314 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12315 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12316 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12317 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12318 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12319 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12320 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12322 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12323 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12324 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12326 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12327 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12329 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12330 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12332 /* comi/ucomi insns. */
12333 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12334 if (d->mask == MASK_SSE2)
12335 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12336 else
12337 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12339 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12340 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12341 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12343 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12344 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12345 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12346 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12347 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12348 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12350 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12351 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12352 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12353 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12355 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12356 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12358 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12360 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12361 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12362 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12363 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12364 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12365 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12367 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12368 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12369 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12370 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12372 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12373 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12374 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12375 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12377 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12379 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12381 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12382 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12383 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12384 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12385 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12386 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12388 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12390 /* Original 3DNow! */
12391 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12392 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12393 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12394 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12395 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12396 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12397 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12398 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12399 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12400 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12401 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12402 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12403 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12404 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12405 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12406 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12407 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12408 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12409 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12410 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12412 /* 3DNow! extension as used in the Athlon CPU. */
12413 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12414 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12415 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12416 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12417 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12418 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12420 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12422 /* SSE2 */
12423 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12424 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12426 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12427 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12429 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12430 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12431 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12432 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12433 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12434 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12436 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12437 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12438 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12439 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12441 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12442 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12443 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12444 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12445 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12447 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12448 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12449 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12450 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12452 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12453 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12455 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12457 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12458 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12460 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12461 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12462 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12463 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12464 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12466 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12468 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12469 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12471 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12472 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12473 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12475 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12476 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12477 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12479 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12480 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12481 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12482 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12483 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12484 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12485 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12487 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12488 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12489 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12491 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12492 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12493 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12495 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12496 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12497 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12499 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12500 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12502 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12503 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12504 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12506 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12507 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12508 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12510 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12511 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12513 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12516 /* Errors in the source file can cause expand_expr to return const0_rtx
12517 where we expect a vector. To avoid crashing, use one of the vector
12518 clear instructions. */
12519 static rtx
12520 safe_vector_operand (x, mode)
12521 rtx x;
12522 enum machine_mode mode;
12524 if (x != const0_rtx)
12525 return x;
12526 x = gen_reg_rtx (mode);
12528 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12529 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12530 : gen_rtx_SUBREG (DImode, x, 0)));
12531 else
12532 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12533 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12534 return x;
12537 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12539 static rtx
12540 ix86_expand_binop_builtin (icode, arglist, target)
12541 enum insn_code icode;
12542 tree arglist;
12543 rtx target;
12545 rtx pat;
12546 tree arg0 = TREE_VALUE (arglist);
12547 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12548 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12549 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12550 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12551 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12552 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12554 if (VECTOR_MODE_P (mode0))
12555 op0 = safe_vector_operand (op0, mode0);
12556 if (VECTOR_MODE_P (mode1))
12557 op1 = safe_vector_operand (op1, mode1);
12559 if (! target
12560 || GET_MODE (target) != tmode
12561 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12562 target = gen_reg_rtx (tmode);
12564 /* In case the insn wants input operands in modes different from
12565 the result, abort. */
12566 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12567 abort ();
12569 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12570 op0 = copy_to_mode_reg (mode0, op0);
12571 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12572 op1 = copy_to_mode_reg (mode1, op1);
12574 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12575 yet one of the two must not be a memory. This is normally enforced
12576 by expanders, but we didn't bother to create one here. */
12577 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12578 op0 = copy_to_mode_reg (mode0, op0);
12580 pat = GEN_FCN (icode) (target, op0, op1);
12581 if (! pat)
12582 return 0;
12583 emit_insn (pat);
12584 return target;
12587 /* In type_for_mode we restrict the ability to create TImode types
12588 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12589 to have a V4SFmode signature. Convert them in-place to TImode. */
12591 static rtx
12592 ix86_expand_timode_binop_builtin (icode, arglist, target)
12593 enum insn_code icode;
12594 tree arglist;
12595 rtx target;
12597 rtx pat;
12598 tree arg0 = TREE_VALUE (arglist);
12599 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12600 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12601 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12603 op0 = gen_lowpart (TImode, op0);
12604 op1 = gen_lowpart (TImode, op1);
12605 target = gen_reg_rtx (TImode);
12607 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12608 op0 = copy_to_mode_reg (TImode, op0);
12609 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12610 op1 = copy_to_mode_reg (TImode, op1);
12612 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12613 yet one of the two must not be a memory. This is normally enforced
12614 by expanders, but we didn't bother to create one here. */
12615 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12616 op0 = copy_to_mode_reg (TImode, op0);
12618 pat = GEN_FCN (icode) (target, op0, op1);
12619 if (! pat)
12620 return 0;
12621 emit_insn (pat);
12623 return gen_lowpart (V4SFmode, target);
12626 /* Subroutine of ix86_expand_builtin to take care of stores. */
12628 static rtx
12629 ix86_expand_store_builtin (icode, arglist)
12630 enum insn_code icode;
12631 tree arglist;
12633 rtx pat;
12634 tree arg0 = TREE_VALUE (arglist);
12635 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12636 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12637 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12638 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12639 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12641 if (VECTOR_MODE_P (mode1))
12642 op1 = safe_vector_operand (op1, mode1);
12644 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12646 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12647 op1 = copy_to_mode_reg (mode1, op1);
12649 pat = GEN_FCN (icode) (op0, op1);
12650 if (pat)
12651 emit_insn (pat);
12652 return 0;
12655 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12657 static rtx
12658 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12659 enum insn_code icode;
12660 tree arglist;
12661 rtx target;
12662 int do_load;
12664 rtx pat;
12665 tree arg0 = TREE_VALUE (arglist);
12666 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12667 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12668 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12670 if (! target
12671 || GET_MODE (target) != tmode
12672 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12673 target = gen_reg_rtx (tmode);
12674 if (do_load)
12675 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12676 else
12678 if (VECTOR_MODE_P (mode0))
12679 op0 = safe_vector_operand (op0, mode0);
12681 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12682 op0 = copy_to_mode_reg (mode0, op0);
12685 pat = GEN_FCN (icode) (target, op0);
12686 if (! pat)
12687 return 0;
12688 emit_insn (pat);
12689 return target;
12692 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12693 sqrtss, rsqrtss, rcpss. */
12695 static rtx
12696 ix86_expand_unop1_builtin (icode, arglist, target)
12697 enum insn_code icode;
12698 tree arglist;
12699 rtx target;
12701 rtx pat;
12702 tree arg0 = TREE_VALUE (arglist);
12703 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12704 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12705 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12707 if (! target
12708 || GET_MODE (target) != tmode
12709 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12710 target = gen_reg_rtx (tmode);
12712 if (VECTOR_MODE_P (mode0))
12713 op0 = safe_vector_operand (op0, mode0);
12715 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12716 op0 = copy_to_mode_reg (mode0, op0);
12718 op1 = op0;
12719 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12720 op1 = copy_to_mode_reg (mode0, op1);
12722 pat = GEN_FCN (icode) (target, op0, op1);
12723 if (! pat)
12724 return 0;
12725 emit_insn (pat);
12726 return target;
12729 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12731 static rtx
12732 ix86_expand_sse_compare (d, arglist, target)
12733 const struct builtin_description *d;
12734 tree arglist;
12735 rtx target;
12737 rtx pat;
12738 tree arg0 = TREE_VALUE (arglist);
12739 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12740 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12741 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12742 rtx op2;
12743 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12744 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12745 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12746 enum rtx_code comparison = d->comparison;
12748 if (VECTOR_MODE_P (mode0))
12749 op0 = safe_vector_operand (op0, mode0);
12750 if (VECTOR_MODE_P (mode1))
12751 op1 = safe_vector_operand (op1, mode1);
12753 /* Swap operands if we have a comparison that isn't available in
12754 hardware. */
12755 if (d->flag)
12757 rtx tmp = gen_reg_rtx (mode1);
12758 emit_move_insn (tmp, op1);
12759 op1 = op0;
12760 op0 = tmp;
12763 if (! target
12764 || GET_MODE (target) != tmode
12765 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12766 target = gen_reg_rtx (tmode);
12768 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12769 op0 = copy_to_mode_reg (mode0, op0);
12770 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12771 op1 = copy_to_mode_reg (mode1, op1);
12773 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12774 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12775 if (! pat)
12776 return 0;
12777 emit_insn (pat);
12778 return target;
12781 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12783 static rtx
12784 ix86_expand_sse_comi (d, arglist, target)
12785 const struct builtin_description *d;
12786 tree arglist;
12787 rtx target;
12789 rtx pat;
12790 tree arg0 = TREE_VALUE (arglist);
12791 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12792 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12793 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12794 rtx op2;
12795 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12796 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12797 enum rtx_code comparison = d->comparison;
12799 if (VECTOR_MODE_P (mode0))
12800 op0 = safe_vector_operand (op0, mode0);
12801 if (VECTOR_MODE_P (mode1))
12802 op1 = safe_vector_operand (op1, mode1);
12804 /* Swap operands if we have a comparison that isn't available in
12805 hardware. */
12806 if (d->flag)
12808 rtx tmp = op1;
12809 op1 = op0;
12810 op0 = tmp;
12813 target = gen_reg_rtx (SImode);
12814 emit_move_insn (target, const0_rtx);
12815 target = gen_rtx_SUBREG (QImode, target, 0);
12817 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12818 op0 = copy_to_mode_reg (mode0, op0);
12819 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12820 op1 = copy_to_mode_reg (mode1, op1);
12822 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12823 pat = GEN_FCN (d->icode) (op0, op1, op2);
12824 if (! pat)
12825 return 0;
12826 emit_insn (pat);
12827 emit_insn (gen_rtx_SET (VOIDmode,
12828 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12829 gen_rtx_fmt_ee (comparison, QImode,
12830 gen_rtx_REG (CCmode, FLAGS_REG),
12831 const0_rtx)));
12833 return SUBREG_REG (target);
12836 /* Expand an expression EXP that calls a built-in function,
12837 with result going to TARGET if that's convenient
12838 (and in mode MODE if that's convenient).
12839 SUBTARGET may be used as the target for computing one of EXP's operands.
12840 IGNORE is nonzero if the value is to be ignored. */
12843 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12844 tree exp;
12845 rtx target;
12846 rtx subtarget ATTRIBUTE_UNUSED;
12847 enum machine_mode mode ATTRIBUTE_UNUSED;
12848 int ignore ATTRIBUTE_UNUSED;
12850 const struct builtin_description *d;
12851 size_t i;
12852 enum insn_code icode;
12853 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12854 tree arglist = TREE_OPERAND (exp, 1);
12855 tree arg0, arg1, arg2;
12856 rtx op0, op1, op2, pat;
12857 enum machine_mode tmode, mode0, mode1, mode2;
12858 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12860 switch (fcode)
12862 case IX86_BUILTIN_EMMS:
12863 emit_insn (gen_emms ());
12864 return 0;
12866 case IX86_BUILTIN_SFENCE:
12867 emit_insn (gen_sfence ());
12868 return 0;
12870 case IX86_BUILTIN_PEXTRW:
12871 case IX86_BUILTIN_PEXTRW128:
12872 icode = (fcode == IX86_BUILTIN_PEXTRW
12873 ? CODE_FOR_mmx_pextrw
12874 : CODE_FOR_sse2_pextrw);
12875 arg0 = TREE_VALUE (arglist);
12876 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12877 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12878 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12879 tmode = insn_data[icode].operand[0].mode;
12880 mode0 = insn_data[icode].operand[1].mode;
12881 mode1 = insn_data[icode].operand[2].mode;
12883 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12884 op0 = copy_to_mode_reg (mode0, op0);
12885 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12887 /* @@@ better error message */
12888 error ("selector must be an immediate");
12889 return gen_reg_rtx (tmode);
12891 if (target == 0
12892 || GET_MODE (target) != tmode
12893 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12894 target = gen_reg_rtx (tmode);
12895 pat = GEN_FCN (icode) (target, op0, op1);
12896 if (! pat)
12897 return 0;
12898 emit_insn (pat);
12899 return target;
12901 case IX86_BUILTIN_PINSRW:
12902 case IX86_BUILTIN_PINSRW128:
12903 icode = (fcode == IX86_BUILTIN_PINSRW
12904 ? CODE_FOR_mmx_pinsrw
12905 : CODE_FOR_sse2_pinsrw);
12906 arg0 = TREE_VALUE (arglist);
12907 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12908 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12909 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12910 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12911 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12912 tmode = insn_data[icode].operand[0].mode;
12913 mode0 = insn_data[icode].operand[1].mode;
12914 mode1 = insn_data[icode].operand[2].mode;
12915 mode2 = insn_data[icode].operand[3].mode;
12917 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12918 op0 = copy_to_mode_reg (mode0, op0);
12919 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12920 op1 = copy_to_mode_reg (mode1, op1);
12921 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12923 /* @@@ better error message */
12924 error ("selector must be an immediate");
12925 return const0_rtx;
12927 if (target == 0
12928 || GET_MODE (target) != tmode
12929 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12930 target = gen_reg_rtx (tmode);
12931 pat = GEN_FCN (icode) (target, op0, op1, op2);
12932 if (! pat)
12933 return 0;
12934 emit_insn (pat);
12935 return target;
12937 case IX86_BUILTIN_MASKMOVQ:
12938 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12939 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12940 : CODE_FOR_sse2_maskmovdqu);
12941 /* Note the arg order is different from the operand order. */
12942 arg1 = TREE_VALUE (arglist);
12943 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12944 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12945 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12946 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12947 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12948 mode0 = insn_data[icode].operand[0].mode;
12949 mode1 = insn_data[icode].operand[1].mode;
12950 mode2 = insn_data[icode].operand[2].mode;
12952 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12953 op0 = copy_to_mode_reg (mode0, op0);
12954 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12955 op1 = copy_to_mode_reg (mode1, op1);
12956 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12957 op2 = copy_to_mode_reg (mode2, op2);
12958 pat = GEN_FCN (icode) (op0, op1, op2);
12959 if (! pat)
12960 return 0;
12961 emit_insn (pat);
12962 return 0;
12964 case IX86_BUILTIN_SQRTSS:
12965 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12966 case IX86_BUILTIN_RSQRTSS:
12967 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12968 case IX86_BUILTIN_RCPSS:
12969 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12971 case IX86_BUILTIN_ANDPS:
12972 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12973 arglist, target);
12974 case IX86_BUILTIN_ANDNPS:
12975 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12976 arglist, target);
12977 case IX86_BUILTIN_ORPS:
12978 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12979 arglist, target);
12980 case IX86_BUILTIN_XORPS:
12981 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12982 arglist, target);
12984 case IX86_BUILTIN_LOADAPS:
12985 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12987 case IX86_BUILTIN_LOADUPS:
12988 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12990 case IX86_BUILTIN_STOREAPS:
12991 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12992 case IX86_BUILTIN_STOREUPS:
12993 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12995 case IX86_BUILTIN_LOADSS:
12996 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12998 case IX86_BUILTIN_STORESS:
12999 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13001 case IX86_BUILTIN_LOADHPS:
13002 case IX86_BUILTIN_LOADLPS:
13003 case IX86_BUILTIN_LOADHPD:
13004 case IX86_BUILTIN_LOADLPD:
13005 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13006 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13007 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13008 : CODE_FOR_sse2_movlpd);
13009 arg0 = TREE_VALUE (arglist);
13010 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13011 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13012 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13013 tmode = insn_data[icode].operand[0].mode;
13014 mode0 = insn_data[icode].operand[1].mode;
13015 mode1 = insn_data[icode].operand[2].mode;
13017 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13018 op0 = copy_to_mode_reg (mode0, op0);
13019 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13020 if (target == 0
13021 || GET_MODE (target) != tmode
13022 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13023 target = gen_reg_rtx (tmode);
13024 pat = GEN_FCN (icode) (target, op0, op1);
13025 if (! pat)
13026 return 0;
13027 emit_insn (pat);
13028 return target;
13030 case IX86_BUILTIN_STOREHPS:
13031 case IX86_BUILTIN_STORELPS:
13032 case IX86_BUILTIN_STOREHPD:
13033 case IX86_BUILTIN_STORELPD:
13034 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13035 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13036 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13037 : CODE_FOR_sse2_movlpd);
13038 arg0 = TREE_VALUE (arglist);
13039 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13040 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13041 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13042 mode0 = insn_data[icode].operand[1].mode;
13043 mode1 = insn_data[icode].operand[2].mode;
13045 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13046 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13047 op1 = copy_to_mode_reg (mode1, op1);
13049 pat = GEN_FCN (icode) (op0, op0, op1);
13050 if (! pat)
13051 return 0;
13052 emit_insn (pat);
13053 return 0;
13055 case IX86_BUILTIN_MOVNTPS:
13056 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13057 case IX86_BUILTIN_MOVNTQ:
13058 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13060 case IX86_BUILTIN_LDMXCSR:
13061 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13062 target = assign_386_stack_local (SImode, 0);
13063 emit_move_insn (target, op0);
13064 emit_insn (gen_ldmxcsr (target));
13065 return 0;
13067 case IX86_BUILTIN_STMXCSR:
13068 target = assign_386_stack_local (SImode, 0);
13069 emit_insn (gen_stmxcsr (target));
13070 return copy_to_mode_reg (SImode, target);
13072 case IX86_BUILTIN_SHUFPS:
13073 case IX86_BUILTIN_SHUFPD:
13074 icode = (fcode == IX86_BUILTIN_SHUFPS
13075 ? CODE_FOR_sse_shufps
13076 : CODE_FOR_sse2_shufpd);
13077 arg0 = TREE_VALUE (arglist);
13078 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13079 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13080 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13081 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13082 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13083 tmode = insn_data[icode].operand[0].mode;
13084 mode0 = insn_data[icode].operand[1].mode;
13085 mode1 = insn_data[icode].operand[2].mode;
13086 mode2 = insn_data[icode].operand[3].mode;
13088 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13089 op0 = copy_to_mode_reg (mode0, op0);
13090 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13091 op1 = copy_to_mode_reg (mode1, op1);
13092 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13094 /* @@@ better error message */
13095 error ("mask must be an immediate");
13096 return gen_reg_rtx (tmode);
13098 if (target == 0
13099 || GET_MODE (target) != tmode
13100 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13101 target = gen_reg_rtx (tmode);
13102 pat = GEN_FCN (icode) (target, op0, op1, op2);
13103 if (! pat)
13104 return 0;
13105 emit_insn (pat);
13106 return target;
13108 case IX86_BUILTIN_PSHUFW:
13109 case IX86_BUILTIN_PSHUFD:
13110 case IX86_BUILTIN_PSHUFHW:
13111 case IX86_BUILTIN_PSHUFLW:
13112 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13113 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13114 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13115 : CODE_FOR_mmx_pshufw);
13116 arg0 = TREE_VALUE (arglist);
13117 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13118 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13119 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13120 tmode = insn_data[icode].operand[0].mode;
13121 mode1 = insn_data[icode].operand[1].mode;
13122 mode2 = insn_data[icode].operand[2].mode;
13124 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13125 op0 = copy_to_mode_reg (mode1, op0);
13126 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13128 /* @@@ better error message */
13129 error ("mask must be an immediate");
13130 return const0_rtx;
13132 if (target == 0
13133 || GET_MODE (target) != tmode
13134 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13135 target = gen_reg_rtx (tmode);
13136 pat = GEN_FCN (icode) (target, op0, op1);
13137 if (! pat)
13138 return 0;
13139 emit_insn (pat);
13140 return target;
13142 case IX86_BUILTIN_FEMMS:
13143 emit_insn (gen_femms ());
13144 return NULL_RTX;
13146 case IX86_BUILTIN_PAVGUSB:
13147 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13149 case IX86_BUILTIN_PF2ID:
13150 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13152 case IX86_BUILTIN_PFACC:
13153 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13155 case IX86_BUILTIN_PFADD:
13156 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13158 case IX86_BUILTIN_PFCMPEQ:
13159 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13161 case IX86_BUILTIN_PFCMPGE:
13162 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13164 case IX86_BUILTIN_PFCMPGT:
13165 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13167 case IX86_BUILTIN_PFMAX:
13168 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13170 case IX86_BUILTIN_PFMIN:
13171 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13173 case IX86_BUILTIN_PFMUL:
13174 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13176 case IX86_BUILTIN_PFRCP:
13177 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13179 case IX86_BUILTIN_PFRCPIT1:
13180 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13182 case IX86_BUILTIN_PFRCPIT2:
13183 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13185 case IX86_BUILTIN_PFRSQIT1:
13186 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13188 case IX86_BUILTIN_PFRSQRT:
13189 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13191 case IX86_BUILTIN_PFSUB:
13192 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13194 case IX86_BUILTIN_PFSUBR:
13195 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13197 case IX86_BUILTIN_PI2FD:
13198 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13200 case IX86_BUILTIN_PMULHRW:
13201 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13203 case IX86_BUILTIN_PF2IW:
13204 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13206 case IX86_BUILTIN_PFNACC:
13207 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13209 case IX86_BUILTIN_PFPNACC:
13210 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13212 case IX86_BUILTIN_PI2FW:
13213 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13215 case IX86_BUILTIN_PSWAPDSI:
13216 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13218 case IX86_BUILTIN_PSWAPDSF:
13219 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13221 case IX86_BUILTIN_SSE_ZERO:
13222 target = gen_reg_rtx (V4SFmode);
13223 emit_insn (gen_sse_clrv4sf (target));
13224 return target;
13226 case IX86_BUILTIN_MMX_ZERO:
13227 target = gen_reg_rtx (DImode);
13228 emit_insn (gen_mmx_clrdi (target));
13229 return target;
13231 case IX86_BUILTIN_SQRTSD:
13232 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13233 case IX86_BUILTIN_LOADAPD:
13234 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13235 case IX86_BUILTIN_LOADUPD:
13236 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13238 case IX86_BUILTIN_STOREAPD:
13239 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13240 case IX86_BUILTIN_STOREUPD:
13241 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13243 case IX86_BUILTIN_LOADSD:
13244 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13246 case IX86_BUILTIN_STORESD:
13247 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13249 case IX86_BUILTIN_SETPD1:
13250 target = assign_386_stack_local (DFmode, 0);
13251 arg0 = TREE_VALUE (arglist);
13252 emit_move_insn (adjust_address (target, DFmode, 0),
13253 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13254 op0 = gen_reg_rtx (V2DFmode);
13255 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13256 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13257 return op0;
13259 case IX86_BUILTIN_SETPD:
13260 target = assign_386_stack_local (V2DFmode, 0);
13261 arg0 = TREE_VALUE (arglist);
13262 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13263 emit_move_insn (adjust_address (target, DFmode, 0),
13264 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13265 emit_move_insn (adjust_address (target, DFmode, 8),
13266 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13267 op0 = gen_reg_rtx (V2DFmode);
13268 emit_insn (gen_sse2_movapd (op0, target));
13269 return op0;
13271 case IX86_BUILTIN_LOADRPD:
13272 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13273 gen_reg_rtx (V2DFmode), 1);
13274 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13275 return target;
13277 case IX86_BUILTIN_LOADPD1:
13278 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13279 gen_reg_rtx (V2DFmode), 1);
13280 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13281 return target;
13283 case IX86_BUILTIN_STOREPD1:
13284 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13285 case IX86_BUILTIN_STORERPD:
13286 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13288 case IX86_BUILTIN_MFENCE:
13289 emit_insn (gen_sse2_mfence ());
13290 return 0;
13291 case IX86_BUILTIN_LFENCE:
13292 emit_insn (gen_sse2_lfence ());
13293 return 0;
13295 case IX86_BUILTIN_CLFLUSH:
13296 arg0 = TREE_VALUE (arglist);
13297 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13298 icode = CODE_FOR_sse2_clflush;
13299 mode0 = insn_data[icode].operand[0].mode;
13300 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13301 op0 = copy_to_mode_reg (mode0, op0);
13303 emit_insn (gen_sse2_clflush (op0));
13304 return 0;
13306 case IX86_BUILTIN_MOVNTPD:
13307 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13308 case IX86_BUILTIN_MOVNTDQ:
13309 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13310 case IX86_BUILTIN_MOVNTI:
13311 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13313 default:
13314 break;
13317 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13318 if (d->code == fcode)
13320 /* Compares are treated specially. */
13321 if (d->icode == CODE_FOR_maskcmpv4sf3
13322 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13323 || d->icode == CODE_FOR_maskncmpv4sf3
13324 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13325 || d->icode == CODE_FOR_maskcmpv2df3
13326 || d->icode == CODE_FOR_vmmaskcmpv2df3
13327 || d->icode == CODE_FOR_maskncmpv2df3
13328 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13329 return ix86_expand_sse_compare (d, arglist, target);
13331 return ix86_expand_binop_builtin (d->icode, arglist, target);
13334 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13335 if (d->code == fcode)
13336 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13338 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13339 if (d->code == fcode)
13340 return ix86_expand_sse_comi (d, arglist, target);
13342 /* @@@ Should really do something sensible here. */
13343 return 0;
13346 /* Store OPERAND to the memory after reload is completed. This means
13347 that we can't easily use assign_stack_local. */
13349 ix86_force_to_memory (mode, operand)
13350 enum machine_mode mode;
13351 rtx operand;
13353 rtx result;
13354 if (!reload_completed)
13355 abort ();
13356 if (TARGET_64BIT && TARGET_RED_ZONE)
13358 result = gen_rtx_MEM (mode,
13359 gen_rtx_PLUS (Pmode,
13360 stack_pointer_rtx,
13361 GEN_INT (-RED_ZONE_SIZE)));
13362 emit_move_insn (result, operand);
13364 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13366 switch (mode)
13368 case HImode:
13369 case SImode:
13370 operand = gen_lowpart (DImode, operand);
13371 /* FALLTHRU */
13372 case DImode:
13373 emit_insn (
13374 gen_rtx_SET (VOIDmode,
13375 gen_rtx_MEM (DImode,
13376 gen_rtx_PRE_DEC (DImode,
13377 stack_pointer_rtx)),
13378 operand));
13379 break;
13380 default:
13381 abort ();
13383 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13385 else
13387 switch (mode)
13389 case DImode:
13391 rtx operands[2];
13392 split_di (&operand, 1, operands, operands + 1);
13393 emit_insn (
13394 gen_rtx_SET (VOIDmode,
13395 gen_rtx_MEM (SImode,
13396 gen_rtx_PRE_DEC (Pmode,
13397 stack_pointer_rtx)),
13398 operands[1]));
13399 emit_insn (
13400 gen_rtx_SET (VOIDmode,
13401 gen_rtx_MEM (SImode,
13402 gen_rtx_PRE_DEC (Pmode,
13403 stack_pointer_rtx)),
13404 operands[0]));
13406 break;
13407 case HImode:
13408 /* It is better to store HImodes as SImodes. */
13409 if (!TARGET_PARTIAL_REG_STALL)
13410 operand = gen_lowpart (SImode, operand);
13411 /* FALLTHRU */
13412 case SImode:
13413 emit_insn (
13414 gen_rtx_SET (VOIDmode,
13415 gen_rtx_MEM (GET_MODE (operand),
13416 gen_rtx_PRE_DEC (SImode,
13417 stack_pointer_rtx)),
13418 operand));
13419 break;
13420 default:
13421 abort ();
13423 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13425 return result;
13428 /* Free operand from the memory. */
13429 void
13430 ix86_free_from_memory (mode)
13431 enum machine_mode mode;
13433 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13435 int size;
13437 if (mode == DImode || TARGET_64BIT)
13438 size = 8;
13439 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13440 size = 2;
13441 else
13442 size = 4;
13443 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13444 to pop or add instruction if registers are available. */
13445 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13446 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13447 GEN_INT (size))));
13451 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13452 QImode must go into class Q_REGS.
13453 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13454 movdf to do mem-to-mem moves through integer regs. */
13455 enum reg_class
13456 ix86_preferred_reload_class (x, class)
13457 rtx x;
13458 enum reg_class class;
13460 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13462 /* SSE can't load any constant directly yet. */
13463 if (SSE_CLASS_P (class))
13464 return NO_REGS;
13465 /* Floats can load 0 and 1. */
13466 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13468 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13469 if (MAYBE_SSE_CLASS_P (class))
13470 return (reg_class_subset_p (class, GENERAL_REGS)
13471 ? GENERAL_REGS : FLOAT_REGS);
13472 else
13473 return class;
13475 /* General regs can load everything. */
13476 if (reg_class_subset_p (class, GENERAL_REGS))
13477 return GENERAL_REGS;
13478 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13479 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13480 return NO_REGS;
13482 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13483 return NO_REGS;
13484 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13485 return Q_REGS;
13486 return class;
13489 /* If we are copying between general and FP registers, we need a memory
13490 location. The same is true for SSE and MMX registers.
13492 The macro can't work reliably when one of the CLASSES is class containing
13493 registers from multiple units (SSE, MMX, integer). We avoid this by never
13494 combining those units in single alternative in the machine description.
13495 Ensure that this constraint holds to avoid unexpected surprises.
13497 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13498 enforce these sanity checks. */
13500 ix86_secondary_memory_needed (class1, class2, mode, strict)
13501 enum reg_class class1, class2;
13502 enum machine_mode mode;
13503 int strict;
13505 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13506 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13507 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13508 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13509 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13510 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13512 if (strict)
13513 abort ();
13514 else
13515 return 1;
13517 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13518 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13519 && (mode) != SImode)
13520 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13521 && (mode) != SImode));
13523 /* Return the cost of moving data from a register in class CLASS1 to
13524 one in class CLASS2.
13526 It is not required that the cost always equal 2 when FROM is the same as TO;
13527 on some machines it is expensive to move between registers if they are not
13528 general registers. */
13530 ix86_register_move_cost (mode, class1, class2)
13531 enum machine_mode mode;
13532 enum reg_class class1, class2;
13534 /* In case we require secondary memory, compute cost of the store followed
13535 by load. In case of copying from general_purpose_register we may emit
13536 multiple stores followed by single load causing memory size mismatch
13537 stall. Count this as arbitarily high cost of 20. */
13538 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13540 int add_cost = 0;
13541 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13542 add_cost = 20;
13543 return (MEMORY_MOVE_COST (mode, class1, 0)
13544 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13546 /* Moves between SSE/MMX and integer unit are expensive. */
13547 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13548 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13549 return ix86_cost->mmxsse_to_integer;
13550 if (MAYBE_FLOAT_CLASS_P (class1))
13551 return ix86_cost->fp_move;
13552 if (MAYBE_SSE_CLASS_P (class1))
13553 return ix86_cost->sse_move;
13554 if (MAYBE_MMX_CLASS_P (class1))
13555 return ix86_cost->mmx_move;
13556 return 2;
13559 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13561 ix86_hard_regno_mode_ok (regno, mode)
13562 int regno;
13563 enum machine_mode mode;
13565 /* Flags and only flags can only hold CCmode values. */
13566 if (CC_REGNO_P (regno))
13567 return GET_MODE_CLASS (mode) == MODE_CC;
13568 if (GET_MODE_CLASS (mode) == MODE_CC
13569 || GET_MODE_CLASS (mode) == MODE_RANDOM
13570 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13571 return 0;
13572 if (FP_REGNO_P (regno))
13573 return VALID_FP_MODE_P (mode);
13574 if (SSE_REGNO_P (regno))
13575 return VALID_SSE_REG_MODE (mode);
13576 if (MMX_REGNO_P (regno))
13577 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13578 /* We handle both integer and floats in the general purpose registers.
13579 In future we should be able to handle vector modes as well. */
13580 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13581 return 0;
13582 /* Take care for QImode values - they can be in non-QI regs, but then
13583 they do cause partial register stalls. */
13584 if (regno < 4 || mode != QImode || TARGET_64BIT)
13585 return 1;
13586 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13589 /* Return the cost of moving data of mode M between a
13590 register and memory. A value of 2 is the default; this cost is
13591 relative to those in `REGISTER_MOVE_COST'.
13593 If moving between registers and memory is more expensive than
13594 between two registers, you should define this macro to express the
13595 relative cost.
13597 Model also increased moving costs of QImode registers in non
13598 Q_REGS classes.
13601 ix86_memory_move_cost (mode, class, in)
13602 enum machine_mode mode;
13603 enum reg_class class;
13604 int in;
13606 if (FLOAT_CLASS_P (class))
13608 int index;
13609 switch (mode)
13611 case SFmode:
13612 index = 0;
13613 break;
13614 case DFmode:
13615 index = 1;
13616 break;
13617 case XFmode:
13618 case TFmode:
13619 index = 2;
13620 break;
13621 default:
13622 return 100;
13624 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13626 if (SSE_CLASS_P (class))
13628 int index;
13629 switch (GET_MODE_SIZE (mode))
13631 case 4:
13632 index = 0;
13633 break;
13634 case 8:
13635 index = 1;
13636 break;
13637 case 16:
13638 index = 2;
13639 break;
13640 default:
13641 return 100;
13643 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13645 if (MMX_CLASS_P (class))
13647 int index;
13648 switch (GET_MODE_SIZE (mode))
13650 case 4:
13651 index = 0;
13652 break;
13653 case 8:
13654 index = 1;
13655 break;
13656 default:
13657 return 100;
13659 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13661 switch (GET_MODE_SIZE (mode))
13663 case 1:
13664 if (in)
13665 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13666 : ix86_cost->movzbl_load);
13667 else
13668 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13669 : ix86_cost->int_store[0] + 4);
13670 break;
13671 case 2:
13672 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13673 default:
13674 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13675 if (mode == TFmode)
13676 mode = XFmode;
13677 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13678 * (int) GET_MODE_SIZE (mode) / 4);
13682 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13683 static void
13684 ix86_svr3_asm_out_constructor (symbol, priority)
13685 rtx symbol;
13686 int priority ATTRIBUTE_UNUSED;
13688 init_section ();
13689 fputs ("\tpushl $", asm_out_file);
13690 assemble_name (asm_out_file, XSTR (symbol, 0));
13691 fputc ('\n', asm_out_file);
13693 #endif
13695 #if TARGET_MACHO
13697 static int current_machopic_label_num;
13699 /* Given a symbol name and its associated stub, write out the
13700 definition of the stub. */
13702 void
13703 machopic_output_stub (file, symb, stub)
13704 FILE *file;
13705 const char *symb, *stub;
13707 unsigned int length;
13708 char *binder_name, *symbol_name, lazy_ptr_name[32];
13709 int label = ++current_machopic_label_num;
13711 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13712 symb = (*targetm.strip_name_encoding) (symb);
13714 length = strlen (stub);
13715 binder_name = alloca (length + 32);
13716 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13718 length = strlen (symb);
13719 symbol_name = alloca (length + 32);
13720 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13722 sprintf (lazy_ptr_name, "L%d$lz", label);
13724 if (MACHOPIC_PURE)
13725 machopic_picsymbol_stub_section ();
13726 else
13727 machopic_symbol_stub_section ();
13729 fprintf (file, "%s:\n", stub);
13730 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13732 if (MACHOPIC_PURE)
13734 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13735 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13736 fprintf (file, "\tjmp %%edx\n");
13738 else
13739 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13741 fprintf (file, "%s:\n", binder_name);
13743 if (MACHOPIC_PURE)
13745 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13746 fprintf (file, "\tpushl %%eax\n");
13748 else
13749 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13751 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13753 machopic_lazy_symbol_ptr_section ();
13754 fprintf (file, "%s:\n", lazy_ptr_name);
13755 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13756 fprintf (file, "\t.long %s\n", binder_name);
13758 #endif /* TARGET_MACHO */
13760 /* Order the registers for register allocator. */
13762 void
13763 x86_order_regs_for_local_alloc ()
13765 int pos = 0;
13766 int i;
13768 /* First allocate the local general purpose registers. */
13769 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13770 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13771 reg_alloc_order [pos++] = i;
13773 /* Global general purpose registers. */
13774 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13775 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13776 reg_alloc_order [pos++] = i;
13778 /* x87 registers come first in case we are doing FP math
13779 using them. */
13780 if (!TARGET_SSE_MATH)
13781 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13782 reg_alloc_order [pos++] = i;
13784 /* SSE registers. */
13785 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13786 reg_alloc_order [pos++] = i;
13787 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13788 reg_alloc_order [pos++] = i;
13790 /* x87 registerts. */
13791 if (TARGET_SSE_MATH)
13792 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13793 reg_alloc_order [pos++] = i;
13795 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13796 reg_alloc_order [pos++] = i;
13798 /* Initialize the rest of array as we do not allocate some registers
13799 at all. */
13800 while (pos < FIRST_PSEUDO_REGISTER)
13801 reg_alloc_order [pos++] = 0;
13804 void
13805 x86_output_mi_thunk (file, delta, function)
13806 FILE *file;
13807 int delta;
13808 tree function;
13810 tree parm;
13811 rtx xops[3];
13813 if (ix86_regparm > 0)
13814 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13815 else
13816 parm = NULL_TREE;
13817 for (; parm; parm = TREE_CHAIN (parm))
13818 if (TREE_VALUE (parm) == void_type_node)
13819 break;
13821 xops[0] = GEN_INT (delta);
13822 if (TARGET_64BIT)
13824 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13825 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13826 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13827 if (flag_pic)
13829 fprintf (file, "\tjmp *");
13830 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13831 fprintf (file, "@GOTPCREL(%%rip)\n");
13833 else
13835 fprintf (file, "\tjmp ");
13836 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13837 fprintf (file, "\n");
13840 else
13842 if (parm)
13843 xops[1] = gen_rtx_REG (SImode, 0);
13844 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13845 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13846 else
13847 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13848 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13850 if (flag_pic)
13852 xops[0] = pic_offset_table_rtx;
13853 xops[1] = gen_label_rtx ();
13854 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
13856 if (ix86_regparm > 2)
13857 abort ();
13858 output_asm_insn ("push{l}\t%0", xops);
13859 output_asm_insn ("call\t%P1", xops);
13860 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13861 output_asm_insn ("pop{l}\t%0", xops);
13862 output_asm_insn
13863 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13864 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13865 output_asm_insn
13866 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13867 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13868 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13870 else
13872 fprintf (file, "\tjmp ");
13873 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13874 fprintf (file, "\n");
13880 x86_field_alignment (field, computed)
13881 tree field;
13882 int computed;
13884 enum machine_mode mode;
13885 tree type = TREE_TYPE (field);
13887 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
13888 return computed;
13889 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13890 ? get_inner_array_type (type) : type);
13891 if (mode == DFmode || mode == DCmode
13892 || GET_MODE_CLASS (mode) == MODE_INT
13893 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13894 return MIN (32, computed);
13895 return computed;
13898 /* Implement machine specific optimizations.
13899 At the moment we implement single transformation: AMD Athlon works faster
13900 when RET is not destination of conditional jump or directly preceeded
13901 by other jump instruction. We avoid the penalty by inserting NOP just
13902 before the RET instructions in such cases. */
13903 void
13904 x86_machine_dependent_reorg (first)
13905 rtx first ATTRIBUTE_UNUSED;
13907 edge e;
13909 if (!TARGET_ATHLON || !optimize || optimize_size)
13910 return;
13911 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13913 basic_block bb = e->src;
13914 rtx ret = bb->end;
13915 rtx prev;
13916 bool insert = false;
13918 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13919 continue;
13920 prev = prev_nonnote_insn (ret);
13921 if (prev && GET_CODE (prev) == CODE_LABEL)
13923 edge e;
13924 for (e = bb->pred; e; e = e->pred_next)
13925 if (EDGE_FREQUENCY (e) && e->src->index > 0
13926 && !(e->flags & EDGE_FALLTHRU))
13927 insert = 1;
13929 if (!insert)
13931 prev = prev_real_insn (ret);
13932 if (prev && GET_CODE (prev) == JUMP_INSN
13933 && any_condjump_p (prev))
13934 insert = 1;
13936 if (insert)
13937 emit_insn_before (gen_nop (), ret);
13941 #include "gt-i386.h"