i386.h (MACHINE_DEPENDENT_REORG): New macro.
[official-gcc.git] / gcc / config / i386 / i386.c
blob6e19b5cd887ef0c1638210dcb2161a0e3cb9550b
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
348 const struct processor_costs *ix86_cost = &pentium_cost;
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403 #define FAST_PROLOGUE_INSN_COUNT 30
405 /* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407 static int use_fast_prologue_epilogue;
409 #define AT_BP(MODE) (gen_rtx_MEM ((MODE), hard_frame_pointer_rtx))
411 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
412 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
413 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
414 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
416 /* Array of the smallest class containing reg number REGNO, indexed by
417 REGNO. Used by REGNO_REG_CLASS in i386.h. */
419 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
421 /* ax, dx, cx, bx */
422 AREG, DREG, CREG, BREG,
423 /* si, di, bp, sp */
424 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
425 /* FP registers */
426 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
427 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
428 /* arg pointer */
429 NON_Q_REGS,
430 /* flags, fpsr, dirflag, frame */
431 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
432 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
433 SSE_REGS, SSE_REGS,
434 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
435 MMX_REGS, MMX_REGS,
436 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
437 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
438 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
439 SSE_REGS, SSE_REGS,
442 /* The "default" register map used in 32bit mode. */
444 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
446 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
447 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
448 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
449 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
450 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
451 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
452 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
455 static int const x86_64_int_parameter_registers[6] =
457 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
458 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
461 static int const x86_64_int_return_registers[4] =
463 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
466 /* The "default" register map used in 64bit mode. */
467 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
469 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
470 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
471 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
472 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
473 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
474 8,9,10,11,12,13,14,15, /* extended integer registers */
475 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
478 /* Define the register numbers to be used in Dwarf debugging information.
479 The SVR4 reference port C compiler uses the following register numbers
480 in its Dwarf output code:
481 0 for %eax (gcc regno = 0)
482 1 for %ecx (gcc regno = 2)
483 2 for %edx (gcc regno = 1)
484 3 for %ebx (gcc regno = 3)
485 4 for %esp (gcc regno = 7)
486 5 for %ebp (gcc regno = 6)
487 6 for %esi (gcc regno = 4)
488 7 for %edi (gcc regno = 5)
489 The following three DWARF register numbers are never generated by
490 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
491 believes these numbers have these meanings.
492 8 for %eip (no gcc equivalent)
493 9 for %eflags (gcc regno = 17)
494 10 for %trapno (no gcc equivalent)
495 It is not at all clear how we should number the FP stack registers
496 for the x86 architecture. If the version of SDB on x86/svr4 were
497 a bit less brain dead with respect to floating-point then we would
498 have a precedent to follow with respect to DWARF register numbers
499 for x86 FP registers, but the SDB on x86/svr4 is so completely
500 broken with respect to FP registers that it is hardly worth thinking
501 of it as something to strive for compatibility with.
502 The version of x86/svr4 SDB I have at the moment does (partially)
503 seem to believe that DWARF register number 11 is associated with
504 the x86 register %st(0), but that's about all. Higher DWARF
505 register numbers don't seem to be associated with anything in
506 particular, and even for DWARF regno 11, SDB only seems to under-
507 stand that it should say that a variable lives in %st(0) (when
508 asked via an `=' command) if we said it was in DWARF regno 11,
509 but SDB still prints garbage when asked for the value of the
510 variable in question (via a `/' command).
511 (Also note that the labels SDB prints for various FP stack regs
512 when doing an `x' command are all wrong.)
513 Note that these problems generally don't affect the native SVR4
514 C compiler because it doesn't allow the use of -O with -g and
515 because when it is *not* optimizing, it allocates a memory
516 location for each floating-point variable, and the memory
517 location is what gets described in the DWARF AT_location
518 attribute for the variable in question.
519 Regardless of the severe mental illness of the x86/svr4 SDB, we
520 do something sensible here and we use the following DWARF
521 register numbers. Note that these are all stack-top-relative
522 numbers.
523 11 for %st(0) (gcc regno = 8)
524 12 for %st(1) (gcc regno = 9)
525 13 for %st(2) (gcc regno = 10)
526 14 for %st(3) (gcc regno = 11)
527 15 for %st(4) (gcc regno = 12)
528 16 for %st(5) (gcc regno = 13)
529 17 for %st(6) (gcc regno = 14)
530 18 for %st(7) (gcc regno = 15)
532 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
534 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
535 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
536 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
537 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
538 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
539 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
540 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
543 /* Test and compare insns in i386.md store the information needed to
544 generate branch and scc insns here. */
546 rtx ix86_compare_op0 = NULL_RTX;
547 rtx ix86_compare_op1 = NULL_RTX;
549 /* The encoding characters for the four TLS models present in ELF. */
551 static char const tls_model_chars[] = " GLil";
553 #define MAX_386_STACK_LOCALS 3
554 /* Size of the register save area. */
555 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
557 /* Define the structure for the machine field in struct function. */
558 struct machine_function GTY(())
560 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
561 const char *some_ld_name;
562 int save_varrargs_registers;
563 int accesses_prev_frame;
566 #define ix86_stack_locals (cfun->machine->stack_locals)
567 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
569 /* Structure describing stack frame layout.
570 Stack grows downward:
572 [arguments]
573 <- ARG_POINTER
574 saved pc
576 saved frame pointer if frame_pointer_needed
577 <- HARD_FRAME_POINTER
578 [saved regs]
580 [padding1] \
582 [va_arg registers] (
583 > to_allocate <- FRAME_POINTER
584 [frame] (
586 [padding2] /
588 struct ix86_frame
590 int nregs;
591 int padding1;
592 int va_arg_size;
593 HOST_WIDE_INT frame;
594 int padding2;
595 int outgoing_arguments_size;
596 int red_zone_size;
598 HOST_WIDE_INT to_allocate;
599 /* The offsets relative to ARG_POINTER. */
600 HOST_WIDE_INT frame_pointer_offset;
601 HOST_WIDE_INT hard_frame_pointer_offset;
602 HOST_WIDE_INT stack_pointer_offset;
605 /* Used to enable/disable debugging features. */
606 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
607 /* Code model option as passed by user. */
608 const char *ix86_cmodel_string;
609 /* Parsed value. */
610 enum cmodel ix86_cmodel;
611 /* Asm dialect. */
612 const char *ix86_asm_string;
613 enum asm_dialect ix86_asm_dialect = ASM_ATT;
614 /* TLS dialext. */
615 const char *ix86_tls_dialect_string;
616 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
618 /* Which unit we are generating floating point math for. */
619 enum fpmath_unit ix86_fpmath;
621 /* Which cpu are we scheduling for. */
622 enum processor_type ix86_cpu;
623 /* Which instruction set architecture to use. */
624 enum processor_type ix86_arch;
626 /* Strings to hold which cpu and instruction set architecture to use. */
627 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
628 const char *ix86_arch_string; /* for -march=<xxx> */
629 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
631 /* # of registers to use to pass arguments. */
632 const char *ix86_regparm_string;
634 /* true if sse prefetch instruction is not NOOP. */
635 int x86_prefetch_sse;
637 /* ix86_regparm_string as a number */
638 int ix86_regparm;
640 /* Alignment to use for loops and jumps: */
642 /* Power of two alignment for loops. */
643 const char *ix86_align_loops_string;
645 /* Power of two alignment for non-loop jumps. */
646 const char *ix86_align_jumps_string;
648 /* Power of two alignment for stack boundary in bytes. */
649 const char *ix86_preferred_stack_boundary_string;
651 /* Preferred alignment for stack boundary in bits. */
652 int ix86_preferred_stack_boundary;
654 /* Values 1-5: see jump.c */
655 int ix86_branch_cost;
656 const char *ix86_branch_cost_string;
658 /* Power of two alignment for functions. */
659 const char *ix86_align_funcs_string;
661 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
662 static char internal_label_prefix[16];
663 static int internal_label_prefix_len;
665 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
666 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
667 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
668 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
669 int, int, FILE *));
670 static const char *get_some_local_dynamic_name PARAMS ((void));
671 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
672 static rtx maybe_get_pool_constant PARAMS ((rtx));
673 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
674 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
675 rtx *, rtx *));
676 static rtx get_thread_pointer PARAMS ((void));
677 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
678 static rtx gen_push PARAMS ((rtx));
679 static int memory_address_length PARAMS ((rtx addr));
680 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
681 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
682 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
683 static void ix86_dump_ppro_packet PARAMS ((FILE *));
684 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
685 static struct machine_function * ix86_init_machine_status PARAMS ((void));
686 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
687 static int ix86_nsaved_regs PARAMS ((void));
688 static void ix86_emit_save_regs PARAMS ((void));
689 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
690 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
691 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
692 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
693 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
694 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
695 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
696 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
697 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
698 static int ix86_issue_rate PARAMS ((void));
699 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
700 static void ix86_sched_init PARAMS ((FILE *, int, int));
701 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
702 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
703 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
704 static int ia32_multipass_dfa_lookahead PARAMS ((void));
705 static void ix86_init_mmx_sse_builtins PARAMS ((void));
707 struct ix86_address
709 rtx base, index, disp;
710 HOST_WIDE_INT scale;
713 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
715 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
716 static const char *ix86_strip_name_encoding PARAMS ((const char *))
717 ATTRIBUTE_UNUSED;
719 struct builtin_description;
720 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
721 tree, rtx));
722 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
723 tree, rtx));
724 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
726 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
727 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
728 tree, rtx));
729 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
730 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
731 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
732 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
733 enum rtx_code *,
734 enum rtx_code *,
735 enum rtx_code *));
736 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
737 rtx *, rtx *));
738 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
740 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
741 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
742 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
743 static int ix86_save_reg PARAMS ((unsigned int, int));
744 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
745 static int ix86_comp_type_attributes PARAMS ((tree, tree));
746 const struct attribute_spec ix86_attribute_table[];
747 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
748 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
750 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
751 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
752 #endif
754 /* Register class used for passing given 64bit part of the argument.
755 These represent classes as documented by the PS ABI, with the exception
756 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
757 use SF or DFmode move instead of DImode to avoid reformating penalties.
759 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
760 whenever possible (upper half does contain padding).
762 enum x86_64_reg_class
764 X86_64_NO_CLASS,
765 X86_64_INTEGER_CLASS,
766 X86_64_INTEGERSI_CLASS,
767 X86_64_SSE_CLASS,
768 X86_64_SSESF_CLASS,
769 X86_64_SSEDF_CLASS,
770 X86_64_SSEUP_CLASS,
771 X86_64_X87_CLASS,
772 X86_64_X87UP_CLASS,
773 X86_64_MEMORY_CLASS
775 static const char * const x86_64_reg_class_name[] =
776 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
778 #define MAX_CLASSES 4
779 static int classify_argument PARAMS ((enum machine_mode, tree,
780 enum x86_64_reg_class [MAX_CLASSES],
781 int));
782 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
783 int *));
784 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
785 const int *, int));
786 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
787 enum x86_64_reg_class));
789 /* Initialize the GCC target structure. */
790 #undef TARGET_ATTRIBUTE_TABLE
791 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
792 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
793 # undef TARGET_MERGE_DECL_ATTRIBUTES
794 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
795 #endif
797 #undef TARGET_COMP_TYPE_ATTRIBUTES
798 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
800 #undef TARGET_INIT_BUILTINS
801 #define TARGET_INIT_BUILTINS ix86_init_builtins
803 #undef TARGET_EXPAND_BUILTIN
804 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
806 #undef TARGET_ASM_FUNCTION_EPILOGUE
807 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
809 #undef TARGET_ASM_OPEN_PAREN
810 #define TARGET_ASM_OPEN_PAREN ""
811 #undef TARGET_ASM_CLOSE_PAREN
812 #define TARGET_ASM_CLOSE_PAREN ""
814 #undef TARGET_ASM_ALIGNED_HI_OP
815 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
816 #undef TARGET_ASM_ALIGNED_SI_OP
817 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
818 #ifdef ASM_QUAD
819 #undef TARGET_ASM_ALIGNED_DI_OP
820 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
821 #endif
823 #undef TARGET_ASM_UNALIGNED_HI_OP
824 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
825 #undef TARGET_ASM_UNALIGNED_SI_OP
826 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
827 #undef TARGET_ASM_UNALIGNED_DI_OP
828 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
830 #undef TARGET_SCHED_ADJUST_COST
831 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
832 #undef TARGET_SCHED_ISSUE_RATE
833 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
834 #undef TARGET_SCHED_VARIABLE_ISSUE
835 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
836 #undef TARGET_SCHED_INIT
837 #define TARGET_SCHED_INIT ix86_sched_init
838 #undef TARGET_SCHED_REORDER
839 #define TARGET_SCHED_REORDER ix86_sched_reorder
840 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
841 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
842 ia32_use_dfa_pipeline_interface
843 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
844 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
845 ia32_multipass_dfa_lookahead
847 #ifdef HAVE_AS_TLS
848 #undef TARGET_HAVE_TLS
849 #define TARGET_HAVE_TLS true
850 #endif
852 struct gcc_target targetm = TARGET_INITIALIZER;
854 /* Sometimes certain combinations of command options do not make
855 sense on a particular target machine. You can define a macro
856 `OVERRIDE_OPTIONS' to take account of this. This macro, if
857 defined, is executed once just after all the command options have
858 been parsed.
860 Don't use this macro to turn on various extra optimizations for
861 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
863 void
864 override_options ()
866 int i;
867 /* Comes from final.c -- no real reason to change it. */
868 #define MAX_CODE_ALIGN 16
870 static struct ptt
872 const struct processor_costs *cost; /* Processor costs */
873 const int target_enable; /* Target flags to enable. */
874 const int target_disable; /* Target flags to disable. */
875 const int align_loop; /* Default alignments. */
876 const int align_loop_max_skip;
877 const int align_jump;
878 const int align_jump_max_skip;
879 const int align_func;
880 const int branch_cost;
882 const processor_target_table[PROCESSOR_max] =
884 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
885 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
886 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
887 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
888 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
889 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
890 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
893 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
894 static struct pta
896 const char *const name; /* processor name or nickname. */
897 const enum processor_type processor;
898 const enum pta_flags
900 PTA_SSE = 1,
901 PTA_SSE2 = 2,
902 PTA_MMX = 4,
903 PTA_PREFETCH_SSE = 8,
904 PTA_3DNOW = 16,
905 PTA_3DNOW_A = 64
906 } flags;
908 const processor_alias_table[] =
910 {"i386", PROCESSOR_I386, 0},
911 {"i486", PROCESSOR_I486, 0},
912 {"i586", PROCESSOR_PENTIUM, 0},
913 {"pentium", PROCESSOR_PENTIUM, 0},
914 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
915 {"i686", PROCESSOR_PENTIUMPRO, 0},
916 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
917 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
918 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
919 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
920 PTA_MMX | PTA_PREFETCH_SSE},
921 {"k6", PROCESSOR_K6, PTA_MMX},
922 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
923 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
924 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
925 | PTA_3DNOW_A},
926 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
927 | PTA_3DNOW | PTA_3DNOW_A},
928 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
929 | PTA_3DNOW_A | PTA_SSE},
930 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
931 | PTA_3DNOW_A | PTA_SSE},
932 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
933 | PTA_3DNOW_A | PTA_SSE},
936 int const pta_size = ARRAY_SIZE (processor_alias_table);
938 #ifdef SUBTARGET_OVERRIDE_OPTIONS
939 SUBTARGET_OVERRIDE_OPTIONS;
940 #endif
942 if (!ix86_cpu_string && ix86_arch_string)
943 ix86_cpu_string = ix86_arch_string;
944 if (!ix86_cpu_string)
945 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
946 if (!ix86_arch_string)
947 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
949 if (ix86_cmodel_string != 0)
951 if (!strcmp (ix86_cmodel_string, "small"))
952 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
953 else if (flag_pic)
954 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
955 else if (!strcmp (ix86_cmodel_string, "32"))
956 ix86_cmodel = CM_32;
957 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
958 ix86_cmodel = CM_KERNEL;
959 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
960 ix86_cmodel = CM_MEDIUM;
961 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
962 ix86_cmodel = CM_LARGE;
963 else
964 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
966 else
968 ix86_cmodel = CM_32;
969 if (TARGET_64BIT)
970 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
972 if (ix86_asm_string != 0)
974 if (!strcmp (ix86_asm_string, "intel"))
975 ix86_asm_dialect = ASM_INTEL;
976 else if (!strcmp (ix86_asm_string, "att"))
977 ix86_asm_dialect = ASM_ATT;
978 else
979 error ("bad value (%s) for -masm= switch", ix86_asm_string);
981 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
982 error ("code model `%s' not supported in the %s bit mode",
983 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
984 if (ix86_cmodel == CM_LARGE)
985 sorry ("code model `large' not supported yet");
986 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
987 sorry ("%i-bit mode not compiled in",
988 (target_flags & MASK_64BIT) ? 64 : 32);
990 for (i = 0; i < pta_size; i++)
991 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
993 ix86_arch = processor_alias_table[i].processor;
994 /* Default cpu tuning to the architecture. */
995 ix86_cpu = ix86_arch;
996 if (processor_alias_table[i].flags & PTA_MMX
997 && !(target_flags & MASK_MMX_SET))
998 target_flags |= MASK_MMX;
999 if (processor_alias_table[i].flags & PTA_3DNOW
1000 && !(target_flags & MASK_3DNOW_SET))
1001 target_flags |= MASK_3DNOW;
1002 if (processor_alias_table[i].flags & PTA_3DNOW_A
1003 && !(target_flags & MASK_3DNOW_A_SET))
1004 target_flags |= MASK_3DNOW_A;
1005 if (processor_alias_table[i].flags & PTA_SSE
1006 && !(target_flags & MASK_SSE_SET))
1007 target_flags |= MASK_SSE;
1008 if (processor_alias_table[i].flags & PTA_SSE2
1009 && !(target_flags & MASK_SSE2_SET))
1010 target_flags |= MASK_SSE2;
1011 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1012 x86_prefetch_sse = true;
1013 break;
1016 if (i == pta_size)
1017 error ("bad value (%s) for -march= switch", ix86_arch_string);
1019 for (i = 0; i < pta_size; i++)
1020 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1022 ix86_cpu = processor_alias_table[i].processor;
1023 break;
1025 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1026 x86_prefetch_sse = true;
1027 if (i == pta_size)
1028 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1030 if (optimize_size)
1031 ix86_cost = &size_cost;
1032 else
1033 ix86_cost = processor_target_table[ix86_cpu].cost;
1034 target_flags |= processor_target_table[ix86_cpu].target_enable;
1035 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1037 /* Arrange to set up i386_stack_locals for all functions. */
1038 init_machine_status = ix86_init_machine_status;
1040 /* Validate -mregparm= value. */
1041 if (ix86_regparm_string)
1043 i = atoi (ix86_regparm_string);
1044 if (i < 0 || i > REGPARM_MAX)
1045 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1046 else
1047 ix86_regparm = i;
1049 else
1050 if (TARGET_64BIT)
1051 ix86_regparm = REGPARM_MAX;
1053 /* If the user has provided any of the -malign-* options,
1054 warn and use that value only if -falign-* is not set.
1055 Remove this code in GCC 3.2 or later. */
1056 if (ix86_align_loops_string)
1058 warning ("-malign-loops is obsolete, use -falign-loops");
1059 if (align_loops == 0)
1061 i = atoi (ix86_align_loops_string);
1062 if (i < 0 || i > MAX_CODE_ALIGN)
1063 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1064 else
1065 align_loops = 1 << i;
1069 if (ix86_align_jumps_string)
1071 warning ("-malign-jumps is obsolete, use -falign-jumps");
1072 if (align_jumps == 0)
1074 i = atoi (ix86_align_jumps_string);
1075 if (i < 0 || i > MAX_CODE_ALIGN)
1076 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1077 else
1078 align_jumps = 1 << i;
1082 if (ix86_align_funcs_string)
1084 warning ("-malign-functions is obsolete, use -falign-functions");
1085 if (align_functions == 0)
1087 i = atoi (ix86_align_funcs_string);
1088 if (i < 0 || i > MAX_CODE_ALIGN)
1089 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1090 else
1091 align_functions = 1 << i;
1095 /* Default align_* from the processor table. */
1096 if (align_loops == 0)
1098 align_loops = processor_target_table[ix86_cpu].align_loop;
1099 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1101 if (align_jumps == 0)
1103 align_jumps = processor_target_table[ix86_cpu].align_jump;
1104 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1106 if (align_functions == 0)
1108 align_functions = processor_target_table[ix86_cpu].align_func;
1111 /* Validate -mpreferred-stack-boundary= value, or provide default.
1112 The default of 128 bits is for Pentium III's SSE __m128, but we
1113 don't want additional code to keep the stack aligned when
1114 optimizing for code size. */
1115 ix86_preferred_stack_boundary = (optimize_size
1116 ? TARGET_64BIT ? 64 : 32
1117 : 128);
1118 if (ix86_preferred_stack_boundary_string)
1120 i = atoi (ix86_preferred_stack_boundary_string);
1121 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1122 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1123 TARGET_64BIT ? 3 : 2);
1124 else
1125 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1128 /* Validate -mbranch-cost= value, or provide default. */
1129 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1130 if (ix86_branch_cost_string)
1132 i = atoi (ix86_branch_cost_string);
1133 if (i < 0 || i > 5)
1134 error ("-mbranch-cost=%d is not between 0 and 5", i);
1135 else
1136 ix86_branch_cost = i;
1139 if (ix86_tls_dialect_string)
1141 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1142 ix86_tls_dialect = TLS_DIALECT_GNU;
1143 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1144 ix86_tls_dialect = TLS_DIALECT_SUN;
1145 else
1146 error ("bad value (%s) for -mtls-dialect= switch",
1147 ix86_tls_dialect_string);
1150 /* Keep nonleaf frame pointers. */
1151 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1152 flag_omit_frame_pointer = 1;
1154 /* If we're doing fast math, we don't care about comparison order
1155 wrt NaNs. This lets us use a shorter comparison sequence. */
1156 if (flag_unsafe_math_optimizations)
1157 target_flags &= ~MASK_IEEE_FP;
1159 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1160 since the insns won't need emulation. */
1161 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1162 target_flags &= ~MASK_NO_FANCY_MATH_387;
1164 if (TARGET_64BIT)
1166 if (TARGET_ALIGN_DOUBLE)
1167 error ("-malign-double makes no sense in the 64bit mode");
1168 if (TARGET_RTD)
1169 error ("-mrtd calling convention not supported in the 64bit mode");
1170 /* Enable by default the SSE and MMX builtins. */
1171 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1172 ix86_fpmath = FPMATH_SSE;
1174 else
1175 ix86_fpmath = FPMATH_387;
1177 if (ix86_fpmath_string != 0)
1179 if (! strcmp (ix86_fpmath_string, "387"))
1180 ix86_fpmath = FPMATH_387;
1181 else if (! strcmp (ix86_fpmath_string, "sse"))
1183 if (!TARGET_SSE)
1185 warning ("SSE instruction set disabled, using 387 arithmetics");
1186 ix86_fpmath = FPMATH_387;
1188 else
1189 ix86_fpmath = FPMATH_SSE;
1191 else if (! strcmp (ix86_fpmath_string, "387,sse")
1192 || ! strcmp (ix86_fpmath_string, "sse,387"))
1194 if (!TARGET_SSE)
1196 warning ("SSE instruction set disabled, using 387 arithmetics");
1197 ix86_fpmath = FPMATH_387;
1199 else if (!TARGET_80387)
1201 warning ("387 instruction set disabled, using SSE arithmetics");
1202 ix86_fpmath = FPMATH_SSE;
1204 else
1205 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1207 else
1208 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1211 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1212 on by -msse. */
1213 if (TARGET_SSE)
1215 target_flags |= MASK_MMX;
1216 x86_prefetch_sse = true;
1219 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1220 if (TARGET_3DNOW)
1222 target_flags |= MASK_MMX;
1223 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1224 extensions it adds. */
1225 if (x86_3dnow_a & (1 << ix86_arch))
1226 target_flags |= MASK_3DNOW_A;
1228 if ((x86_accumulate_outgoing_args & CPUMASK)
1229 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1230 && !optimize_size)
1231 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1233 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1235 char *p;
1236 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1237 p = strchr (internal_label_prefix, 'X');
1238 internal_label_prefix_len = p - internal_label_prefix;
1239 *p = '\0';
1243 void
1244 optimization_options (level, size)
1245 int level;
1246 int size ATTRIBUTE_UNUSED;
1248 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1249 make the problem with not enough registers even worse. */
1250 #ifdef INSN_SCHEDULING
1251 if (level > 1)
1252 flag_schedule_insns = 0;
1253 #endif
1254 if (TARGET_64BIT && optimize >= 1)
1255 flag_omit_frame_pointer = 1;
1256 if (TARGET_64BIT)
1258 flag_pcc_struct_return = 0;
1259 flag_asynchronous_unwind_tables = 1;
1263 /* Table of valid machine attributes. */
1264 const struct attribute_spec ix86_attribute_table[] =
1266 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1267 /* Stdcall attribute says callee is responsible for popping arguments
1268 if they are not variable. */
1269 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1270 /* Cdecl attribute says the callee is a normal C declaration */
1271 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1272 /* Regparm attribute specifies how many integer arguments are to be
1273 passed in registers. */
1274 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1275 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1276 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1277 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1278 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1279 #endif
1280 { NULL, 0, 0, false, false, false, NULL }
1283 /* Handle a "cdecl" or "stdcall" attribute;
1284 arguments as in struct attribute_spec.handler. */
1285 static tree
1286 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1287 tree *node;
1288 tree name;
1289 tree args ATTRIBUTE_UNUSED;
1290 int flags ATTRIBUTE_UNUSED;
1291 bool *no_add_attrs;
1293 if (TREE_CODE (*node) != FUNCTION_TYPE
1294 && TREE_CODE (*node) != METHOD_TYPE
1295 && TREE_CODE (*node) != FIELD_DECL
1296 && TREE_CODE (*node) != TYPE_DECL)
1298 warning ("`%s' attribute only applies to functions",
1299 IDENTIFIER_POINTER (name));
1300 *no_add_attrs = true;
1303 if (TARGET_64BIT)
1305 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1306 *no_add_attrs = true;
1309 return NULL_TREE;
1312 /* Handle a "regparm" attribute;
1313 arguments as in struct attribute_spec.handler. */
1314 static tree
1315 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1316 tree *node;
1317 tree name;
1318 tree args;
1319 int flags ATTRIBUTE_UNUSED;
1320 bool *no_add_attrs;
1322 if (TREE_CODE (*node) != FUNCTION_TYPE
1323 && TREE_CODE (*node) != METHOD_TYPE
1324 && TREE_CODE (*node) != FIELD_DECL
1325 && TREE_CODE (*node) != TYPE_DECL)
1327 warning ("`%s' attribute only applies to functions",
1328 IDENTIFIER_POINTER (name));
1329 *no_add_attrs = true;
1331 else
1333 tree cst;
1335 cst = TREE_VALUE (args);
1336 if (TREE_CODE (cst) != INTEGER_CST)
1338 warning ("`%s' attribute requires an integer constant argument",
1339 IDENTIFIER_POINTER (name));
1340 *no_add_attrs = true;
1342 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1344 warning ("argument to `%s' attribute larger than %d",
1345 IDENTIFIER_POINTER (name), REGPARM_MAX);
1346 *no_add_attrs = true;
1350 return NULL_TREE;
1353 /* Return 0 if the attributes for two types are incompatible, 1 if they
1354 are compatible, and 2 if they are nearly compatible (which causes a
1355 warning to be generated). */
1357 static int
1358 ix86_comp_type_attributes (type1, type2)
1359 tree type1;
1360 tree type2;
1362 /* Check for mismatch of non-default calling convention. */
1363 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1365 if (TREE_CODE (type1) != FUNCTION_TYPE)
1366 return 1;
1368 /* Check for mismatched return types (cdecl vs stdcall). */
1369 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1370 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1371 return 0;
1372 return 1;
1375 /* Value is the number of bytes of arguments automatically
1376 popped when returning from a subroutine call.
1377 FUNDECL is the declaration node of the function (as a tree),
1378 FUNTYPE is the data type of the function (as a tree),
1379 or for a library call it is an identifier node for the subroutine name.
1380 SIZE is the number of bytes of arguments passed on the stack.
1382 On the 80386, the RTD insn may be used to pop them if the number
1383 of args is fixed, but if the number is variable then the caller
1384 must pop them all. RTD can't be used for library calls now
1385 because the library is compiled with the Unix compiler.
1386 Use of RTD is a selectable option, since it is incompatible with
1387 standard Unix calling sequences. If the option is not selected,
1388 the caller must always pop the args.
1390 The attribute stdcall is equivalent to RTD on a per module basis. */
1393 ix86_return_pops_args (fundecl, funtype, size)
1394 tree fundecl;
1395 tree funtype;
1396 int size;
1398 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1400 /* Cdecl functions override -mrtd, and never pop the stack. */
1401 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1403 /* Stdcall functions will pop the stack if not variable args. */
1404 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1405 rtd = 1;
1407 if (rtd
1408 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1409 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1410 == void_type_node)))
1411 return size;
1414 /* Lose any fake structure return argument if it is passed on the stack. */
1415 if (aggregate_value_p (TREE_TYPE (funtype))
1416 && !TARGET_64BIT)
1418 int nregs = ix86_regparm;
1420 if (funtype)
1422 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1424 if (attr)
1425 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1428 if (!nregs)
1429 return GET_MODE_SIZE (Pmode);
1432 return 0;
1435 /* Argument support functions. */
1437 /* Return true when register may be used to pass function parameters. */
1438 bool
1439 ix86_function_arg_regno_p (regno)
1440 int regno;
1442 int i;
1443 if (!TARGET_64BIT)
1444 return (regno < REGPARM_MAX
1445 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1446 if (SSE_REGNO_P (regno) && TARGET_SSE)
1447 return true;
1448 /* RAX is used as hidden argument to va_arg functions. */
1449 if (!regno)
1450 return true;
1451 for (i = 0; i < REGPARM_MAX; i++)
1452 if (regno == x86_64_int_parameter_registers[i])
1453 return true;
1454 return false;
1457 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1458 for a call to a function whose data type is FNTYPE.
1459 For a library call, FNTYPE is 0. */
1461 void
1462 init_cumulative_args (cum, fntype, libname)
1463 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1464 tree fntype; /* tree ptr for function decl */
1465 rtx libname; /* SYMBOL_REF of library name or 0 */
1467 static CUMULATIVE_ARGS zero_cum;
1468 tree param, next_param;
1470 if (TARGET_DEBUG_ARG)
1472 fprintf (stderr, "\ninit_cumulative_args (");
1473 if (fntype)
1474 fprintf (stderr, "fntype code = %s, ret code = %s",
1475 tree_code_name[(int) TREE_CODE (fntype)],
1476 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1477 else
1478 fprintf (stderr, "no fntype");
1480 if (libname)
1481 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1484 *cum = zero_cum;
1486 /* Set up the number of registers to use for passing arguments. */
1487 cum->nregs = ix86_regparm;
1488 cum->sse_nregs = SSE_REGPARM_MAX;
1489 if (fntype && !TARGET_64BIT)
1491 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1493 if (attr)
1494 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1496 cum->maybe_vaarg = false;
1498 /* Determine if this function has variable arguments. This is
1499 indicated by the last argument being 'void_type_mode' if there
1500 are no variable arguments. If there are variable arguments, then
1501 we won't pass anything in registers */
1503 if (cum->nregs)
1505 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1506 param != 0; param = next_param)
1508 next_param = TREE_CHAIN (param);
1509 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1511 if (!TARGET_64BIT)
1512 cum->nregs = 0;
1513 cum->maybe_vaarg = true;
1517 if ((!fntype && !libname)
1518 || (fntype && !TYPE_ARG_TYPES (fntype)))
1519 cum->maybe_vaarg = 1;
1521 if (TARGET_DEBUG_ARG)
1522 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1524 return;
1527 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1528 of this code is to classify each 8bytes of incoming argument by the register
1529 class and assign registers accordingly. */
1531 /* Return the union class of CLASS1 and CLASS2.
1532 See the x86-64 PS ABI for details. */
1534 static enum x86_64_reg_class
1535 merge_classes (class1, class2)
1536 enum x86_64_reg_class class1, class2;
1538 /* Rule #1: If both classes are equal, this is the resulting class. */
1539 if (class1 == class2)
1540 return class1;
1542 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1543 the other class. */
1544 if (class1 == X86_64_NO_CLASS)
1545 return class2;
1546 if (class2 == X86_64_NO_CLASS)
1547 return class1;
1549 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1550 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1551 return X86_64_MEMORY_CLASS;
1553 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1554 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1555 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1556 return X86_64_INTEGERSI_CLASS;
1557 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1558 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1559 return X86_64_INTEGER_CLASS;
1561 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1562 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1563 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1564 return X86_64_MEMORY_CLASS;
1566 /* Rule #6: Otherwise class SSE is used. */
1567 return X86_64_SSE_CLASS;
1570 /* Classify the argument of type TYPE and mode MODE.
1571 CLASSES will be filled by the register class used to pass each word
1572 of the operand. The number of words is returned. In case the parameter
1573 should be passed in memory, 0 is returned. As a special case for zero
1574 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1576 BIT_OFFSET is used internally for handling records and specifies offset
1577 of the offset in bits modulo 256 to avoid overflow cases.
1579 See the x86-64 PS ABI for details.
1582 static int
1583 classify_argument (mode, type, classes, bit_offset)
1584 enum machine_mode mode;
1585 tree type;
1586 enum x86_64_reg_class classes[MAX_CLASSES];
1587 int bit_offset;
1589 int bytes =
1590 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1591 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1593 if (type && AGGREGATE_TYPE_P (type))
1595 int i;
1596 tree field;
1597 enum x86_64_reg_class subclasses[MAX_CLASSES];
1599 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1600 if (bytes > 16)
1601 return 0;
1603 for (i = 0; i < words; i++)
1604 classes[i] = X86_64_NO_CLASS;
1606 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1607 signalize memory class, so handle it as special case. */
1608 if (!words)
1610 classes[0] = X86_64_NO_CLASS;
1611 return 1;
1614 /* Classify each field of record and merge classes. */
1615 if (TREE_CODE (type) == RECORD_TYPE)
1617 /* For classes first merge in the field of the subclasses. */
1618 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1620 tree bases = TYPE_BINFO_BASETYPES (type);
1621 int n_bases = TREE_VEC_LENGTH (bases);
1622 int i;
1624 for (i = 0; i < n_bases; ++i)
1626 tree binfo = TREE_VEC_ELT (bases, i);
1627 int num;
1628 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1629 tree type = BINFO_TYPE (binfo);
1631 num = classify_argument (TYPE_MODE (type),
1632 type, subclasses,
1633 (offset + bit_offset) % 256);
1634 if (!num)
1635 return 0;
1636 for (i = 0; i < num; i++)
1638 int pos = (offset + bit_offset) / 8 / 8;
1639 classes[i + pos] =
1640 merge_classes (subclasses[i], classes[i + pos]);
1644 /* And now merge the fields of structure. */
1645 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1647 if (TREE_CODE (field) == FIELD_DECL)
1649 int num;
1651 /* Bitfields are always classified as integer. Handle them
1652 early, since later code would consider them to be
1653 misaligned integers. */
1654 if (DECL_BIT_FIELD (field))
1656 for (i = int_bit_position (field) / 8 / 8;
1657 i < (int_bit_position (field)
1658 + tree_low_cst (DECL_SIZE (field), 0)
1659 + 63) / 8 / 8; i++)
1660 classes[i] =
1661 merge_classes (X86_64_INTEGER_CLASS,
1662 classes[i]);
1664 else
1666 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1667 TREE_TYPE (field), subclasses,
1668 (int_bit_position (field)
1669 + bit_offset) % 256);
1670 if (!num)
1671 return 0;
1672 for (i = 0; i < num; i++)
1674 int pos =
1675 (int_bit_position (field) + bit_offset) / 8 / 8;
1676 classes[i + pos] =
1677 merge_classes (subclasses[i], classes[i + pos]);
1683 /* Arrays are handled as small records. */
1684 else if (TREE_CODE (type) == ARRAY_TYPE)
1686 int num;
1687 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1688 TREE_TYPE (type), subclasses, bit_offset);
1689 if (!num)
1690 return 0;
1692 /* The partial classes are now full classes. */
1693 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1694 subclasses[0] = X86_64_SSE_CLASS;
1695 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1696 subclasses[0] = X86_64_INTEGER_CLASS;
1698 for (i = 0; i < words; i++)
1699 classes[i] = subclasses[i % num];
1701 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1702 else if (TREE_CODE (type) == UNION_TYPE
1703 || TREE_CODE (type) == QUAL_UNION_TYPE)
1705 /* For classes first merge in the field of the subclasses. */
1706 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1708 tree bases = TYPE_BINFO_BASETYPES (type);
1709 int n_bases = TREE_VEC_LENGTH (bases);
1710 int i;
1712 for (i = 0; i < n_bases; ++i)
1714 tree binfo = TREE_VEC_ELT (bases, i);
1715 int num;
1716 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1717 tree type = BINFO_TYPE (binfo);
1719 num = classify_argument (TYPE_MODE (type),
1720 type, subclasses,
1721 (offset + bit_offset) % 256);
1722 if (!num)
1723 return 0;
1724 for (i = 0; i < num; i++)
1726 int pos = (offset + bit_offset) / 8 / 8;
1727 classes[i + pos] =
1728 merge_classes (subclasses[i], classes[i + pos]);
1732 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1734 if (TREE_CODE (field) == FIELD_DECL)
1736 int num;
1737 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1738 TREE_TYPE (field), subclasses,
1739 bit_offset);
1740 if (!num)
1741 return 0;
1742 for (i = 0; i < num; i++)
1743 classes[i] = merge_classes (subclasses[i], classes[i]);
1747 else
1748 abort ();
1750 /* Final merger cleanup. */
1751 for (i = 0; i < words; i++)
1753 /* If one class is MEMORY, everything should be passed in
1754 memory. */
1755 if (classes[i] == X86_64_MEMORY_CLASS)
1756 return 0;
1758 /* The X86_64_SSEUP_CLASS should be always preceded by
1759 X86_64_SSE_CLASS. */
1760 if (classes[i] == X86_64_SSEUP_CLASS
1761 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1762 classes[i] = X86_64_SSE_CLASS;
1764 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1765 if (classes[i] == X86_64_X87UP_CLASS
1766 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1767 classes[i] = X86_64_SSE_CLASS;
1769 return words;
1772 /* Compute alignment needed. We align all types to natural boundaries with
1773 exception of XFmode that is aligned to 64bits. */
1774 if (mode != VOIDmode && mode != BLKmode)
1776 int mode_alignment = GET_MODE_BITSIZE (mode);
1778 if (mode == XFmode)
1779 mode_alignment = 128;
1780 else if (mode == XCmode)
1781 mode_alignment = 256;
1782 /* Misaligned fields are always returned in memory. */
1783 if (bit_offset % mode_alignment)
1784 return 0;
1787 /* Classification of atomic types. */
1788 switch (mode)
1790 case DImode:
1791 case SImode:
1792 case HImode:
1793 case QImode:
1794 case CSImode:
1795 case CHImode:
1796 case CQImode:
1797 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1798 classes[0] = X86_64_INTEGERSI_CLASS;
1799 else
1800 classes[0] = X86_64_INTEGER_CLASS;
1801 return 1;
1802 case CDImode:
1803 case TImode:
1804 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1805 return 2;
1806 case CTImode:
1807 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1808 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1809 return 4;
1810 case SFmode:
1811 if (!(bit_offset % 64))
1812 classes[0] = X86_64_SSESF_CLASS;
1813 else
1814 classes[0] = X86_64_SSE_CLASS;
1815 return 1;
1816 case DFmode:
1817 classes[0] = X86_64_SSEDF_CLASS;
1818 return 1;
1819 case TFmode:
1820 classes[0] = X86_64_X87_CLASS;
1821 classes[1] = X86_64_X87UP_CLASS;
1822 return 2;
1823 case TCmode:
1824 classes[0] = X86_64_X87_CLASS;
1825 classes[1] = X86_64_X87UP_CLASS;
1826 classes[2] = X86_64_X87_CLASS;
1827 classes[3] = X86_64_X87UP_CLASS;
1828 return 4;
1829 case DCmode:
1830 classes[0] = X86_64_SSEDF_CLASS;
1831 classes[1] = X86_64_SSEDF_CLASS;
1832 return 2;
1833 case SCmode:
1834 classes[0] = X86_64_SSE_CLASS;
1835 return 1;
1836 case V4SFmode:
1837 case V4SImode:
1838 case V16QImode:
1839 case V8HImode:
1840 case V2DFmode:
1841 case V2DImode:
1842 classes[0] = X86_64_SSE_CLASS;
1843 classes[1] = X86_64_SSEUP_CLASS;
1844 return 2;
1845 case V2SFmode:
1846 case V2SImode:
1847 case V4HImode:
1848 case V8QImode:
1849 classes[0] = X86_64_SSE_CLASS;
1850 return 1;
1851 case BLKmode:
1852 case VOIDmode:
1853 return 0;
1854 default:
1855 abort ();
1859 /* Examine the argument and return set number of register required in each
1860 class. Return 0 iff parameter should be passed in memory. */
1861 static int
1862 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1863 enum machine_mode mode;
1864 tree type;
1865 int *int_nregs, *sse_nregs;
1866 int in_return;
1868 enum x86_64_reg_class class[MAX_CLASSES];
1869 int n = classify_argument (mode, type, class, 0);
1871 *int_nregs = 0;
1872 *sse_nregs = 0;
1873 if (!n)
1874 return 0;
1875 for (n--; n >= 0; n--)
1876 switch (class[n])
1878 case X86_64_INTEGER_CLASS:
1879 case X86_64_INTEGERSI_CLASS:
1880 (*int_nregs)++;
1881 break;
1882 case X86_64_SSE_CLASS:
1883 case X86_64_SSESF_CLASS:
1884 case X86_64_SSEDF_CLASS:
1885 (*sse_nregs)++;
1886 break;
1887 case X86_64_NO_CLASS:
1888 case X86_64_SSEUP_CLASS:
1889 break;
1890 case X86_64_X87_CLASS:
1891 case X86_64_X87UP_CLASS:
1892 if (!in_return)
1893 return 0;
1894 break;
1895 case X86_64_MEMORY_CLASS:
1896 abort ();
1898 return 1;
1900 /* Construct container for the argument used by GCC interface. See
1901 FUNCTION_ARG for the detailed description. */
1902 static rtx
1903 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1904 enum machine_mode mode;
1905 tree type;
1906 int in_return;
1907 int nintregs, nsseregs;
1908 const int * intreg;
1909 int sse_regno;
1911 enum machine_mode tmpmode;
1912 int bytes =
1913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1914 enum x86_64_reg_class class[MAX_CLASSES];
1915 int n;
1916 int i;
1917 int nexps = 0;
1918 int needed_sseregs, needed_intregs;
1919 rtx exp[MAX_CLASSES];
1920 rtx ret;
1922 n = classify_argument (mode, type, class, 0);
1923 if (TARGET_DEBUG_ARG)
1925 if (!n)
1926 fprintf (stderr, "Memory class\n");
1927 else
1929 fprintf (stderr, "Classes:");
1930 for (i = 0; i < n; i++)
1932 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1934 fprintf (stderr, "\n");
1937 if (!n)
1938 return NULL;
1939 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1940 return NULL;
1941 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1942 return NULL;
1944 /* First construct simple cases. Avoid SCmode, since we want to use
1945 single register to pass this type. */
1946 if (n == 1 && mode != SCmode)
1947 switch (class[0])
1949 case X86_64_INTEGER_CLASS:
1950 case X86_64_INTEGERSI_CLASS:
1951 return gen_rtx_REG (mode, intreg[0]);
1952 case X86_64_SSE_CLASS:
1953 case X86_64_SSESF_CLASS:
1954 case X86_64_SSEDF_CLASS:
1955 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1956 case X86_64_X87_CLASS:
1957 return gen_rtx_REG (mode, FIRST_STACK_REG);
1958 case X86_64_NO_CLASS:
1959 /* Zero sized array, struct or class. */
1960 return NULL;
1961 default:
1962 abort ();
1964 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1965 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1966 if (n == 2
1967 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1968 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1969 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1970 && class[1] == X86_64_INTEGER_CLASS
1971 && (mode == CDImode || mode == TImode)
1972 && intreg[0] + 1 == intreg[1])
1973 return gen_rtx_REG (mode, intreg[0]);
1974 if (n == 4
1975 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1976 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1977 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1979 /* Otherwise figure out the entries of the PARALLEL. */
1980 for (i = 0; i < n; i++)
1982 switch (class[i])
1984 case X86_64_NO_CLASS:
1985 break;
1986 case X86_64_INTEGER_CLASS:
1987 case X86_64_INTEGERSI_CLASS:
1988 /* Merge TImodes on aligned occassions here too. */
1989 if (i * 8 + 8 > bytes)
1990 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1991 else if (class[i] == X86_64_INTEGERSI_CLASS)
1992 tmpmode = SImode;
1993 else
1994 tmpmode = DImode;
1995 /* We've requested 24 bytes we don't have mode for. Use DImode. */
1996 if (tmpmode == BLKmode)
1997 tmpmode = DImode;
1998 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
1999 gen_rtx_REG (tmpmode, *intreg),
2000 GEN_INT (i*8));
2001 intreg++;
2002 break;
2003 case X86_64_SSESF_CLASS:
2004 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2005 gen_rtx_REG (SFmode,
2006 SSE_REGNO (sse_regno)),
2007 GEN_INT (i*8));
2008 sse_regno++;
2009 break;
2010 case X86_64_SSEDF_CLASS:
2011 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2012 gen_rtx_REG (DFmode,
2013 SSE_REGNO (sse_regno)),
2014 GEN_INT (i*8));
2015 sse_regno++;
2016 break;
2017 case X86_64_SSE_CLASS:
2018 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2019 tmpmode = TImode, i++;
2020 else
2021 tmpmode = DImode;
2022 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2023 gen_rtx_REG (tmpmode,
2024 SSE_REGNO (sse_regno)),
2025 GEN_INT (i*8));
2026 sse_regno++;
2027 break;
2028 default:
2029 abort ();
2032 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2033 for (i = 0; i < nexps; i++)
2034 XVECEXP (ret, 0, i) = exp [i];
2035 return ret;
2038 /* Update the data in CUM to advance over an argument
2039 of mode MODE and data type TYPE.
2040 (TYPE is null for libcalls where that information may not be available.) */
2042 void
2043 function_arg_advance (cum, mode, type, named)
2044 CUMULATIVE_ARGS *cum; /* current arg information */
2045 enum machine_mode mode; /* current arg mode */
2046 tree type; /* type of the argument or 0 if lib support */
2047 int named; /* whether or not the argument was named */
2049 int bytes =
2050 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2051 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2053 if (TARGET_DEBUG_ARG)
2054 fprintf (stderr,
2055 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2056 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2057 if (TARGET_64BIT)
2059 int int_nregs, sse_nregs;
2060 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2061 cum->words += words;
2062 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2064 cum->nregs -= int_nregs;
2065 cum->sse_nregs -= sse_nregs;
2066 cum->regno += int_nregs;
2067 cum->sse_regno += sse_nregs;
2069 else
2070 cum->words += words;
2072 else
2074 if (TARGET_SSE && mode == TImode)
2076 cum->sse_words += words;
2077 cum->sse_nregs -= 1;
2078 cum->sse_regno += 1;
2079 if (cum->sse_nregs <= 0)
2081 cum->sse_nregs = 0;
2082 cum->sse_regno = 0;
2085 else
2087 cum->words += words;
2088 cum->nregs -= words;
2089 cum->regno += words;
2091 if (cum->nregs <= 0)
2093 cum->nregs = 0;
2094 cum->regno = 0;
2098 return;
2101 /* Define where to put the arguments to a function.
2102 Value is zero to push the argument on the stack,
2103 or a hard register in which to store the argument.
2105 MODE is the argument's machine mode.
2106 TYPE is the data type of the argument (as a tree).
2107 This is null for libcalls where that information may
2108 not be available.
2109 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2110 the preceding args and about the function being called.
2111 NAMED is nonzero if this argument is a named parameter
2112 (otherwise it is an extra parameter matching an ellipsis). */
2115 function_arg (cum, mode, type, named)
2116 CUMULATIVE_ARGS *cum; /* current arg information */
2117 enum machine_mode mode; /* current arg mode */
2118 tree type; /* type of the argument or 0 if lib support */
2119 int named; /* != 0 for normal args, == 0 for ... args */
2121 rtx ret = NULL_RTX;
2122 int bytes =
2123 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2124 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2126 /* Handle an hidden AL argument containing number of registers for varargs
2127 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2128 any AL settings. */
2129 if (mode == VOIDmode)
2131 if (TARGET_64BIT)
2132 return GEN_INT (cum->maybe_vaarg
2133 ? (cum->sse_nregs < 0
2134 ? SSE_REGPARM_MAX
2135 : cum->sse_regno)
2136 : -1);
2137 else
2138 return constm1_rtx;
2140 if (TARGET_64BIT)
2141 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2142 &x86_64_int_parameter_registers [cum->regno],
2143 cum->sse_regno);
2144 else
2145 switch (mode)
2147 /* For now, pass fp/complex values on the stack. */
2148 default:
2149 break;
2151 case BLKmode:
2152 case DImode:
2153 case SImode:
2154 case HImode:
2155 case QImode:
2156 if (words <= cum->nregs)
2157 ret = gen_rtx_REG (mode, cum->regno);
2158 break;
2159 case TImode:
2160 if (cum->sse_nregs)
2161 ret = gen_rtx_REG (mode, cum->sse_regno);
2162 break;
2165 if (TARGET_DEBUG_ARG)
2167 fprintf (stderr,
2168 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2169 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2171 if (ret)
2172 print_simple_rtl (stderr, ret);
2173 else
2174 fprintf (stderr, ", stack");
2176 fprintf (stderr, " )\n");
2179 return ret;
2182 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2183 and type. */
2186 ix86_function_arg_boundary (mode, type)
2187 enum machine_mode mode;
2188 tree type;
2190 int align;
2191 if (!TARGET_64BIT)
2192 return PARM_BOUNDARY;
2193 if (type)
2194 align = TYPE_ALIGN (type);
2195 else
2196 align = GET_MODE_ALIGNMENT (mode);
2197 if (align < PARM_BOUNDARY)
2198 align = PARM_BOUNDARY;
2199 if (align > 128)
2200 align = 128;
2201 return align;
2204 /* Return true if N is a possible register number of function value. */
2205 bool
2206 ix86_function_value_regno_p (regno)
2207 int regno;
2209 if (!TARGET_64BIT)
2211 return ((regno) == 0
2212 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2213 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2215 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2216 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2217 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2220 /* Define how to find the value returned by a function.
2221 VALTYPE is the data type of the value (as a tree).
2222 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2223 otherwise, FUNC is 0. */
2225 ix86_function_value (valtype)
2226 tree valtype;
2228 if (TARGET_64BIT)
2230 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2231 REGPARM_MAX, SSE_REGPARM_MAX,
2232 x86_64_int_return_registers, 0);
2233 /* For zero sized structures, construct_continer return NULL, but we need
2234 to keep rest of compiler happy by returning meaningfull value. */
2235 if (!ret)
2236 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2237 return ret;
2239 else
2240 return gen_rtx_REG (TYPE_MODE (valtype), VALUE_REGNO (TYPE_MODE (valtype)));
2243 /* Return false iff type is returned in memory. */
2245 ix86_return_in_memory (type)
2246 tree type;
2248 int needed_intregs, needed_sseregs;
2249 if (TARGET_64BIT)
2251 return !examine_argument (TYPE_MODE (type), type, 1,
2252 &needed_intregs, &needed_sseregs);
2254 else
2256 if (TYPE_MODE (type) == BLKmode
2257 || (VECTOR_MODE_P (TYPE_MODE (type))
2258 && int_size_in_bytes (type) == 8)
2259 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2260 && TYPE_MODE (type) != TFmode
2261 && !VECTOR_MODE_P (TYPE_MODE (type))))
2262 return 1;
2263 return 0;
2267 /* Define how to find the value returned by a library function
2268 assuming the value has mode MODE. */
2270 ix86_libcall_value (mode)
2271 enum machine_mode mode;
2273 if (TARGET_64BIT)
2275 switch (mode)
2277 case SFmode:
2278 case SCmode:
2279 case DFmode:
2280 case DCmode:
2281 return gen_rtx_REG (mode, FIRST_SSE_REG);
2282 case TFmode:
2283 case TCmode:
2284 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2285 default:
2286 return gen_rtx_REG (mode, 0);
2289 else
2290 return gen_rtx_REG (mode, VALUE_REGNO (mode));
2293 /* Create the va_list data type. */
2295 tree
2296 ix86_build_va_list ()
2298 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2300 /* For i386 we use plain pointer to argument area. */
2301 if (!TARGET_64BIT)
2302 return build_pointer_type (char_type_node);
2304 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2305 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2307 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2308 unsigned_type_node);
2309 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2310 unsigned_type_node);
2311 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2312 ptr_type_node);
2313 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2314 ptr_type_node);
2316 DECL_FIELD_CONTEXT (f_gpr) = record;
2317 DECL_FIELD_CONTEXT (f_fpr) = record;
2318 DECL_FIELD_CONTEXT (f_ovf) = record;
2319 DECL_FIELD_CONTEXT (f_sav) = record;
2321 TREE_CHAIN (record) = type_decl;
2322 TYPE_NAME (record) = type_decl;
2323 TYPE_FIELDS (record) = f_gpr;
2324 TREE_CHAIN (f_gpr) = f_fpr;
2325 TREE_CHAIN (f_fpr) = f_ovf;
2326 TREE_CHAIN (f_ovf) = f_sav;
2328 layout_type (record);
2330 /* The correct type is an array type of one element. */
2331 return build_array_type (record, build_index_type (size_zero_node));
2334 /* Perform any needed actions needed for a function that is receiving a
2335 variable number of arguments.
2337 CUM is as above.
2339 MODE and TYPE are the mode and type of the current parameter.
2341 PRETEND_SIZE is a variable that should be set to the amount of stack
2342 that must be pushed by the prolog to pretend that our caller pushed
2345 Normally, this macro will push all remaining incoming registers on the
2346 stack and set PRETEND_SIZE to the length of the registers pushed. */
2348 void
2349 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2350 CUMULATIVE_ARGS *cum;
2351 enum machine_mode mode;
2352 tree type;
2353 int *pretend_size ATTRIBUTE_UNUSED;
2354 int no_rtl;
2357 CUMULATIVE_ARGS next_cum;
2358 rtx save_area = NULL_RTX, mem;
2359 rtx label;
2360 rtx label_ref;
2361 rtx tmp_reg;
2362 rtx nsse_reg;
2363 int set;
2364 tree fntype;
2365 int stdarg_p;
2366 int i;
2368 if (!TARGET_64BIT)
2369 return;
2371 /* Indicate to allocate space on the stack for varargs save area. */
2372 ix86_save_varrargs_registers = 1;
2374 fntype = TREE_TYPE (current_function_decl);
2375 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2376 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2377 != void_type_node));
2379 /* For varargs, we do not want to skip the dummy va_dcl argument.
2380 For stdargs, we do want to skip the last named argument. */
2381 next_cum = *cum;
2382 if (stdarg_p)
2383 function_arg_advance (&next_cum, mode, type, 1);
2385 if (!no_rtl)
2386 save_area = frame_pointer_rtx;
2388 set = get_varargs_alias_set ();
2390 for (i = next_cum.regno; i < ix86_regparm; i++)
2392 mem = gen_rtx_MEM (Pmode,
2393 plus_constant (save_area, i * UNITS_PER_WORD));
2394 set_mem_alias_set (mem, set);
2395 emit_move_insn (mem, gen_rtx_REG (Pmode,
2396 x86_64_int_parameter_registers[i]));
2399 if (next_cum.sse_nregs)
2401 /* Now emit code to save SSE registers. The AX parameter contains number
2402 of SSE parameter regsiters used to call this function. We use
2403 sse_prologue_save insn template that produces computed jump across
2404 SSE saves. We need some preparation work to get this working. */
2406 label = gen_label_rtx ();
2407 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2409 /* Compute address to jump to :
2410 label - 5*eax + nnamed_sse_arguments*5 */
2411 tmp_reg = gen_reg_rtx (Pmode);
2412 nsse_reg = gen_reg_rtx (Pmode);
2413 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2414 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2415 gen_rtx_MULT (Pmode, nsse_reg,
2416 GEN_INT (4))));
2417 if (next_cum.sse_regno)
2418 emit_move_insn
2419 (nsse_reg,
2420 gen_rtx_CONST (DImode,
2421 gen_rtx_PLUS (DImode,
2422 label_ref,
2423 GEN_INT (next_cum.sse_regno * 4))));
2424 else
2425 emit_move_insn (nsse_reg, label_ref);
2426 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2428 /* Compute address of memory block we save into. We always use pointer
2429 pointing 127 bytes after first byte to store - this is needed to keep
2430 instruction size limited by 4 bytes. */
2431 tmp_reg = gen_reg_rtx (Pmode);
2432 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2433 plus_constant (save_area,
2434 8 * REGPARM_MAX + 127)));
2435 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2436 set_mem_alias_set (mem, set);
2437 set_mem_align (mem, BITS_PER_WORD);
2439 /* And finally do the dirty job! */
2440 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2441 GEN_INT (next_cum.sse_regno), label));
2446 /* Implement va_start. */
2448 void
2449 ix86_va_start (valist, nextarg)
2450 tree valist;
2451 rtx nextarg;
2453 HOST_WIDE_INT words, n_gpr, n_fpr;
2454 tree f_gpr, f_fpr, f_ovf, f_sav;
2455 tree gpr, fpr, ovf, sav, t;
2457 /* Only 64bit target needs something special. */
2458 if (!TARGET_64BIT)
2460 std_expand_builtin_va_start (valist, nextarg);
2461 return;
2464 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2465 f_fpr = TREE_CHAIN (f_gpr);
2466 f_ovf = TREE_CHAIN (f_fpr);
2467 f_sav = TREE_CHAIN (f_ovf);
2469 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2470 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2471 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2472 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2473 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2475 /* Count number of gp and fp argument registers used. */
2476 words = current_function_args_info.words;
2477 n_gpr = current_function_args_info.regno;
2478 n_fpr = current_function_args_info.sse_regno;
2480 if (TARGET_DEBUG_ARG)
2481 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2482 (int) words, (int) n_gpr, (int) n_fpr);
2484 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2485 build_int_2 (n_gpr * 8, 0));
2486 TREE_SIDE_EFFECTS (t) = 1;
2487 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2489 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2490 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2491 TREE_SIDE_EFFECTS (t) = 1;
2492 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2494 /* Find the overflow area. */
2495 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2496 if (words != 0)
2497 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2498 build_int_2 (words * UNITS_PER_WORD, 0));
2499 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2500 TREE_SIDE_EFFECTS (t) = 1;
2501 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2503 /* Find the register save area.
2504 Prologue of the function save it right above stack frame. */
2505 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2506 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2507 TREE_SIDE_EFFECTS (t) = 1;
2508 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2511 /* Implement va_arg. */
2513 ix86_va_arg (valist, type)
2514 tree valist, type;
2516 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2517 tree f_gpr, f_fpr, f_ovf, f_sav;
2518 tree gpr, fpr, ovf, sav, t;
2519 int size, rsize;
2520 rtx lab_false, lab_over = NULL_RTX;
2521 rtx addr_rtx, r;
2522 rtx container;
2524 /* Only 64bit target needs something special. */
2525 if (!TARGET_64BIT)
2527 return std_expand_builtin_va_arg (valist, type);
2530 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2531 f_fpr = TREE_CHAIN (f_gpr);
2532 f_ovf = TREE_CHAIN (f_fpr);
2533 f_sav = TREE_CHAIN (f_ovf);
2535 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2536 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2537 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2538 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2539 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2541 size = int_size_in_bytes (type);
2542 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2544 container = construct_container (TYPE_MODE (type), type, 0,
2545 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2547 * Pull the value out of the saved registers ...
2550 addr_rtx = gen_reg_rtx (Pmode);
2552 if (container)
2554 rtx int_addr_rtx, sse_addr_rtx;
2555 int needed_intregs, needed_sseregs;
2556 int need_temp;
2558 lab_over = gen_label_rtx ();
2559 lab_false = gen_label_rtx ();
2561 examine_argument (TYPE_MODE (type), type, 0,
2562 &needed_intregs, &needed_sseregs);
2565 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2566 || TYPE_ALIGN (type) > 128);
2568 /* In case we are passing structure, verify that it is consetuctive block
2569 on the register save area. If not we need to do moves. */
2570 if (!need_temp && !REG_P (container))
2572 /* Verify that all registers are strictly consetuctive */
2573 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2575 int i;
2577 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2579 rtx slot = XVECEXP (container, 0, i);
2580 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2581 || INTVAL (XEXP (slot, 1)) != i * 16)
2582 need_temp = 1;
2585 else
2587 int i;
2589 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2591 rtx slot = XVECEXP (container, 0, i);
2592 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2593 || INTVAL (XEXP (slot, 1)) != i * 8)
2594 need_temp = 1;
2598 if (!need_temp)
2600 int_addr_rtx = addr_rtx;
2601 sse_addr_rtx = addr_rtx;
2603 else
2605 int_addr_rtx = gen_reg_rtx (Pmode);
2606 sse_addr_rtx = gen_reg_rtx (Pmode);
2608 /* First ensure that we fit completely in registers. */
2609 if (needed_intregs)
2611 emit_cmp_and_jump_insns (expand_expr
2612 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2613 GEN_INT ((REGPARM_MAX - needed_intregs +
2614 1) * 8), GE, const1_rtx, SImode,
2615 1, lab_false);
2617 if (needed_sseregs)
2619 emit_cmp_and_jump_insns (expand_expr
2620 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2621 GEN_INT ((SSE_REGPARM_MAX -
2622 needed_sseregs + 1) * 16 +
2623 REGPARM_MAX * 8), GE, const1_rtx,
2624 SImode, 1, lab_false);
2627 /* Compute index to start of area used for integer regs. */
2628 if (needed_intregs)
2630 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2631 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2632 if (r != int_addr_rtx)
2633 emit_move_insn (int_addr_rtx, r);
2635 if (needed_sseregs)
2637 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2638 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2639 if (r != sse_addr_rtx)
2640 emit_move_insn (sse_addr_rtx, r);
2642 if (need_temp)
2644 int i;
2645 rtx mem;
2647 /* Never use the memory itself, as it has the alias set. */
2648 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2649 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2650 set_mem_alias_set (mem, get_varargs_alias_set ());
2651 set_mem_align (mem, BITS_PER_UNIT);
2653 for (i = 0; i < XVECLEN (container, 0); i++)
2655 rtx slot = XVECEXP (container, 0, i);
2656 rtx reg = XEXP (slot, 0);
2657 enum machine_mode mode = GET_MODE (reg);
2658 rtx src_addr;
2659 rtx src_mem;
2660 int src_offset;
2661 rtx dest_mem;
2663 if (SSE_REGNO_P (REGNO (reg)))
2665 src_addr = sse_addr_rtx;
2666 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2668 else
2670 src_addr = int_addr_rtx;
2671 src_offset = REGNO (reg) * 8;
2673 src_mem = gen_rtx_MEM (mode, src_addr);
2674 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2675 src_mem = adjust_address (src_mem, mode, src_offset);
2676 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2677 emit_move_insn (dest_mem, src_mem);
2681 if (needed_intregs)
2684 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2685 build_int_2 (needed_intregs * 8, 0));
2686 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2687 TREE_SIDE_EFFECTS (t) = 1;
2688 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2690 if (needed_sseregs)
2693 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2694 build_int_2 (needed_sseregs * 16, 0));
2695 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2696 TREE_SIDE_EFFECTS (t) = 1;
2697 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2700 emit_jump_insn (gen_jump (lab_over));
2701 emit_barrier ();
2702 emit_label (lab_false);
2705 /* ... otherwise out of the overflow area. */
2707 /* Care for on-stack alignment if needed. */
2708 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2709 t = ovf;
2710 else
2712 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2713 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2714 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2716 t = save_expr (t);
2718 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2719 if (r != addr_rtx)
2720 emit_move_insn (addr_rtx, r);
2723 build (PLUS_EXPR, TREE_TYPE (t), t,
2724 build_int_2 (rsize * UNITS_PER_WORD, 0));
2725 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2726 TREE_SIDE_EFFECTS (t) = 1;
2727 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2729 if (container)
2730 emit_label (lab_over);
2732 return addr_rtx;
2735 /* Return nonzero if OP is general operand representable on x86_64. */
2738 x86_64_general_operand (op, mode)
2739 rtx op;
2740 enum machine_mode mode;
2742 if (!TARGET_64BIT)
2743 return general_operand (op, mode);
2744 if (nonimmediate_operand (op, mode))
2745 return 1;
2746 return x86_64_sign_extended_value (op);
2749 /* Return nonzero if OP is general operand representable on x86_64
2750 as either sign extended or zero extended constant. */
2753 x86_64_szext_general_operand (op, mode)
2754 rtx op;
2755 enum machine_mode mode;
2757 if (!TARGET_64BIT)
2758 return general_operand (op, mode);
2759 if (nonimmediate_operand (op, mode))
2760 return 1;
2761 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2764 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2767 x86_64_nonmemory_operand (op, mode)
2768 rtx op;
2769 enum machine_mode mode;
2771 if (!TARGET_64BIT)
2772 return nonmemory_operand (op, mode);
2773 if (register_operand (op, mode))
2774 return 1;
2775 return x86_64_sign_extended_value (op);
2778 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2781 x86_64_movabs_operand (op, mode)
2782 rtx op;
2783 enum machine_mode mode;
2785 if (!TARGET_64BIT || !flag_pic)
2786 return nonmemory_operand (op, mode);
2787 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2788 return 1;
2789 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2790 return 1;
2791 return 0;
2794 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2797 x86_64_szext_nonmemory_operand (op, mode)
2798 rtx op;
2799 enum machine_mode mode;
2801 if (!TARGET_64BIT)
2802 return nonmemory_operand (op, mode);
2803 if (register_operand (op, mode))
2804 return 1;
2805 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2808 /* Return nonzero if OP is immediate operand representable on x86_64. */
2811 x86_64_immediate_operand (op, mode)
2812 rtx op;
2813 enum machine_mode mode;
2815 if (!TARGET_64BIT)
2816 return immediate_operand (op, mode);
2817 return x86_64_sign_extended_value (op);
2820 /* Return nonzero if OP is immediate operand representable on x86_64. */
2823 x86_64_zext_immediate_operand (op, mode)
2824 rtx op;
2825 enum machine_mode mode ATTRIBUTE_UNUSED;
2827 return x86_64_zero_extended_value (op);
2830 /* Return nonzero if OP is (const_int 1), else return zero. */
2833 const_int_1_operand (op, mode)
2834 rtx op;
2835 enum machine_mode mode ATTRIBUTE_UNUSED;
2837 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2840 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2841 for shift & compare patterns, as shifting by 0 does not change flags),
2842 else return zero. */
2845 const_int_1_31_operand (op, mode)
2846 rtx op;
2847 enum machine_mode mode ATTRIBUTE_UNUSED;
2849 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2852 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2853 reference and a constant. */
2856 symbolic_operand (op, mode)
2857 register rtx op;
2858 enum machine_mode mode ATTRIBUTE_UNUSED;
2860 switch (GET_CODE (op))
2862 case SYMBOL_REF:
2863 case LABEL_REF:
2864 return 1;
2866 case CONST:
2867 op = XEXP (op, 0);
2868 if (GET_CODE (op) == SYMBOL_REF
2869 || GET_CODE (op) == LABEL_REF
2870 || (GET_CODE (op) == UNSPEC
2871 && (XINT (op, 1) == UNSPEC_GOT
2872 || XINT (op, 1) == UNSPEC_GOTOFF
2873 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2874 return 1;
2875 if (GET_CODE (op) != PLUS
2876 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2877 return 0;
2879 op = XEXP (op, 0);
2880 if (GET_CODE (op) == SYMBOL_REF
2881 || GET_CODE (op) == LABEL_REF)
2882 return 1;
2883 /* Only @GOTOFF gets offsets. */
2884 if (GET_CODE (op) != UNSPEC
2885 || XINT (op, 1) != UNSPEC_GOTOFF)
2886 return 0;
2888 op = XVECEXP (op, 0, 0);
2889 if (GET_CODE (op) == SYMBOL_REF
2890 || GET_CODE (op) == LABEL_REF)
2891 return 1;
2892 return 0;
2894 default:
2895 return 0;
2899 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2902 pic_symbolic_operand (op, mode)
2903 register rtx op;
2904 enum machine_mode mode ATTRIBUTE_UNUSED;
2906 if (GET_CODE (op) != CONST)
2907 return 0;
2908 op = XEXP (op, 0);
2909 if (TARGET_64BIT)
2911 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2912 return 1;
2914 else
2916 if (GET_CODE (op) == UNSPEC)
2917 return 1;
2918 if (GET_CODE (op) != PLUS
2919 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2920 return 0;
2921 op = XEXP (op, 0);
2922 if (GET_CODE (op) == UNSPEC)
2923 return 1;
2925 return 0;
2928 /* Return true if OP is a symbolic operand that resolves locally. */
2930 static int
2931 local_symbolic_operand (op, mode)
2932 rtx op;
2933 enum machine_mode mode ATTRIBUTE_UNUSED;
2935 if (GET_CODE (op) == LABEL_REF)
2936 return 1;
2938 if (GET_CODE (op) == CONST
2939 && GET_CODE (XEXP (op, 0)) == PLUS
2940 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2941 op = XEXP (XEXP (op, 0), 0);
2943 if (GET_CODE (op) != SYMBOL_REF)
2944 return 0;
2946 /* These we've been told are local by varasm and encode_section_info
2947 respectively. */
2948 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2949 return 1;
2951 /* There is, however, a not insubstantial body of code in the rest of
2952 the compiler that assumes it can just stick the results of
2953 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2954 /* ??? This is a hack. Should update the body of the compiler to
2955 always create a DECL an invoke targetm.encode_section_info. */
2956 if (strncmp (XSTR (op, 0), internal_label_prefix,
2957 internal_label_prefix_len) == 0)
2958 return 1;
2960 return 0;
2963 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2966 tls_symbolic_operand (op, mode)
2967 register rtx op;
2968 enum machine_mode mode ATTRIBUTE_UNUSED;
2970 const char *symbol_str;
2972 if (GET_CODE (op) != SYMBOL_REF)
2973 return 0;
2974 symbol_str = XSTR (op, 0);
2976 if (symbol_str[0] != '%')
2977 return 0;
2978 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
2981 static int
2982 tls_symbolic_operand_1 (op, kind)
2983 rtx op;
2984 enum tls_model kind;
2986 const char *symbol_str;
2988 if (GET_CODE (op) != SYMBOL_REF)
2989 return 0;
2990 symbol_str = XSTR (op, 0);
2992 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
2996 global_dynamic_symbolic_operand (op, mode)
2997 register rtx op;
2998 enum machine_mode mode ATTRIBUTE_UNUSED;
3000 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3004 local_dynamic_symbolic_operand (op, mode)
3005 register rtx op;
3006 enum machine_mode mode ATTRIBUTE_UNUSED;
3008 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3012 initial_exec_symbolic_operand (op, mode)
3013 register rtx op;
3014 enum machine_mode mode ATTRIBUTE_UNUSED;
3016 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3020 local_exec_symbolic_operand (op, mode)
3021 register rtx op;
3022 enum machine_mode mode ATTRIBUTE_UNUSED;
3024 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3027 /* Test for a valid operand for a call instruction. Don't allow the
3028 arg pointer register or virtual regs since they may decay into
3029 reg + const, which the patterns can't handle. */
3032 call_insn_operand (op, mode)
3033 rtx op;
3034 enum machine_mode mode ATTRIBUTE_UNUSED;
3036 /* Disallow indirect through a virtual register. This leads to
3037 compiler aborts when trying to eliminate them. */
3038 if (GET_CODE (op) == REG
3039 && (op == arg_pointer_rtx
3040 || op == frame_pointer_rtx
3041 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3042 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3043 return 0;
3045 /* Disallow `call 1234'. Due to varying assembler lameness this
3046 gets either rejected or translated to `call .+1234'. */
3047 if (GET_CODE (op) == CONST_INT)
3048 return 0;
3050 /* Explicitly allow SYMBOL_REF even if pic. */
3051 if (GET_CODE (op) == SYMBOL_REF)
3052 return 1;
3054 /* Otherwise we can allow any general_operand in the address. */
3055 return general_operand (op, Pmode);
3059 constant_call_address_operand (op, mode)
3060 rtx op;
3061 enum machine_mode mode ATTRIBUTE_UNUSED;
3063 if (GET_CODE (op) == CONST
3064 && GET_CODE (XEXP (op, 0)) == PLUS
3065 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3066 op = XEXP (XEXP (op, 0), 0);
3067 return GET_CODE (op) == SYMBOL_REF;
3070 /* Match exactly zero and one. */
3073 const0_operand (op, mode)
3074 register rtx op;
3075 enum machine_mode mode;
3077 return op == CONST0_RTX (mode);
3081 const1_operand (op, mode)
3082 register rtx op;
3083 enum machine_mode mode ATTRIBUTE_UNUSED;
3085 return op == const1_rtx;
3088 /* Match 2, 4, or 8. Used for leal multiplicands. */
3091 const248_operand (op, mode)
3092 register rtx op;
3093 enum machine_mode mode ATTRIBUTE_UNUSED;
3095 return (GET_CODE (op) == CONST_INT
3096 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3099 /* True if this is a constant appropriate for an increment or decremenmt. */
3102 incdec_operand (op, mode)
3103 register rtx op;
3104 enum machine_mode mode ATTRIBUTE_UNUSED;
3106 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3107 registers, since carry flag is not set. */
3108 if (TARGET_PENTIUM4 && !optimize_size)
3109 return 0;
3110 return op == const1_rtx || op == constm1_rtx;
3113 /* Return nonzero if OP is acceptable as operand of DImode shift
3114 expander. */
3117 shiftdi_operand (op, mode)
3118 rtx op;
3119 enum machine_mode mode ATTRIBUTE_UNUSED;
3121 if (TARGET_64BIT)
3122 return nonimmediate_operand (op, mode);
3123 else
3124 return register_operand (op, mode);
3127 /* Return false if this is the stack pointer, or any other fake
3128 register eliminable to the stack pointer. Otherwise, this is
3129 a register operand.
3131 This is used to prevent esp from being used as an index reg.
3132 Which would only happen in pathological cases. */
3135 reg_no_sp_operand (op, mode)
3136 register rtx op;
3137 enum machine_mode mode;
3139 rtx t = op;
3140 if (GET_CODE (t) == SUBREG)
3141 t = SUBREG_REG (t);
3142 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3143 return 0;
3145 return register_operand (op, mode);
3149 mmx_reg_operand (op, mode)
3150 register rtx op;
3151 enum machine_mode mode ATTRIBUTE_UNUSED;
3153 return MMX_REG_P (op);
3156 /* Return false if this is any eliminable register. Otherwise
3157 general_operand. */
3160 general_no_elim_operand (op, mode)
3161 register rtx op;
3162 enum machine_mode mode;
3164 rtx t = op;
3165 if (GET_CODE (t) == SUBREG)
3166 t = SUBREG_REG (t);
3167 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3168 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3169 || t == virtual_stack_dynamic_rtx)
3170 return 0;
3171 if (REG_P (t)
3172 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3173 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3174 return 0;
3176 return general_operand (op, mode);
3179 /* Return false if this is any eliminable register. Otherwise
3180 register_operand or const_int. */
3183 nonmemory_no_elim_operand (op, mode)
3184 register rtx op;
3185 enum machine_mode mode;
3187 rtx t = op;
3188 if (GET_CODE (t) == SUBREG)
3189 t = SUBREG_REG (t);
3190 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3191 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3192 || t == virtual_stack_dynamic_rtx)
3193 return 0;
3195 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3198 /* Return true if op is a Q_REGS class register. */
3201 q_regs_operand (op, mode)
3202 register rtx op;
3203 enum machine_mode mode;
3205 if (mode != VOIDmode && GET_MODE (op) != mode)
3206 return 0;
3207 if (GET_CODE (op) == SUBREG)
3208 op = SUBREG_REG (op);
3209 return ANY_QI_REG_P (op);
3212 /* Return true if op is a NON_Q_REGS class register. */
3215 non_q_regs_operand (op, mode)
3216 register rtx op;
3217 enum machine_mode mode;
3219 if (mode != VOIDmode && GET_MODE (op) != mode)
3220 return 0;
3221 if (GET_CODE (op) == SUBREG)
3222 op = SUBREG_REG (op);
3223 return NON_QI_REG_P (op);
3226 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3227 insns. */
3229 sse_comparison_operator (op, mode)
3230 rtx op;
3231 enum machine_mode mode ATTRIBUTE_UNUSED;
3233 enum rtx_code code = GET_CODE (op);
3234 switch (code)
3236 /* Operations supported directly. */
3237 case EQ:
3238 case LT:
3239 case LE:
3240 case UNORDERED:
3241 case NE:
3242 case UNGE:
3243 case UNGT:
3244 case ORDERED:
3245 return 1;
3246 /* These are equivalent to ones above in non-IEEE comparisons. */
3247 case UNEQ:
3248 case UNLT:
3249 case UNLE:
3250 case LTGT:
3251 case GE:
3252 case GT:
3253 return !TARGET_IEEE_FP;
3254 default:
3255 return 0;
3258 /* Return 1 if OP is a valid comparison operator in valid mode. */
3260 ix86_comparison_operator (op, mode)
3261 register rtx op;
3262 enum machine_mode mode;
3264 enum machine_mode inmode;
3265 enum rtx_code code = GET_CODE (op);
3266 if (mode != VOIDmode && GET_MODE (op) != mode)
3267 return 0;
3268 if (GET_RTX_CLASS (code) != '<')
3269 return 0;
3270 inmode = GET_MODE (XEXP (op, 0));
3272 if (inmode == CCFPmode || inmode == CCFPUmode)
3274 enum rtx_code second_code, bypass_code;
3275 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3276 return (bypass_code == NIL && second_code == NIL);
3278 switch (code)
3280 case EQ: case NE:
3281 return 1;
3282 case LT: case GE:
3283 if (inmode == CCmode || inmode == CCGCmode
3284 || inmode == CCGOCmode || inmode == CCNOmode)
3285 return 1;
3286 return 0;
3287 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3288 if (inmode == CCmode)
3289 return 1;
3290 return 0;
3291 case GT: case LE:
3292 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3293 return 1;
3294 return 0;
3295 default:
3296 return 0;
3300 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3303 fcmov_comparison_operator (op, mode)
3304 register rtx op;
3305 enum machine_mode mode;
3307 enum machine_mode inmode;
3308 enum rtx_code code = GET_CODE (op);
3309 if (mode != VOIDmode && GET_MODE (op) != mode)
3310 return 0;
3311 if (GET_RTX_CLASS (code) != '<')
3312 return 0;
3313 inmode = GET_MODE (XEXP (op, 0));
3314 if (inmode == CCFPmode || inmode == CCFPUmode)
3316 enum rtx_code second_code, bypass_code;
3317 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3318 if (bypass_code != NIL || second_code != NIL)
3319 return 0;
3320 code = ix86_fp_compare_code_to_integer (code);
3322 /* i387 supports just limited amount of conditional codes. */
3323 switch (code)
3325 case LTU: case GTU: case LEU: case GEU:
3326 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3327 return 1;
3328 return 0;
3329 case ORDERED: case UNORDERED:
3330 case EQ: case NE:
3331 return 1;
3332 default:
3333 return 0;
3337 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3340 promotable_binary_operator (op, mode)
3341 register rtx op;
3342 enum machine_mode mode ATTRIBUTE_UNUSED;
3344 switch (GET_CODE (op))
3346 case MULT:
3347 /* Modern CPUs have same latency for HImode and SImode multiply,
3348 but 386 and 486 do HImode multiply faster. */
3349 return ix86_cpu > PROCESSOR_I486;
3350 case PLUS:
3351 case AND:
3352 case IOR:
3353 case XOR:
3354 case ASHIFT:
3355 return 1;
3356 default:
3357 return 0;
3361 /* Nearly general operand, but accept any const_double, since we wish
3362 to be able to drop them into memory rather than have them get pulled
3363 into registers. */
3366 cmp_fp_expander_operand (op, mode)
3367 register rtx op;
3368 enum machine_mode mode;
3370 if (mode != VOIDmode && mode != GET_MODE (op))
3371 return 0;
3372 if (GET_CODE (op) == CONST_DOUBLE)
3373 return 1;
3374 return general_operand (op, mode);
3377 /* Match an SI or HImode register for a zero_extract. */
3380 ext_register_operand (op, mode)
3381 register rtx op;
3382 enum machine_mode mode ATTRIBUTE_UNUSED;
3384 int regno;
3385 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3386 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3387 return 0;
3389 if (!register_operand (op, VOIDmode))
3390 return 0;
3392 /* Be curefull to accept only registers having upper parts. */
3393 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3394 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3397 /* Return 1 if this is a valid binary floating-point operation.
3398 OP is the expression matched, and MODE is its mode. */
3401 binary_fp_operator (op, mode)
3402 register rtx op;
3403 enum machine_mode mode;
3405 if (mode != VOIDmode && mode != GET_MODE (op))
3406 return 0;
3408 switch (GET_CODE (op))
3410 case PLUS:
3411 case MINUS:
3412 case MULT:
3413 case DIV:
3414 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3416 default:
3417 return 0;
3422 mult_operator (op, mode)
3423 register rtx op;
3424 enum machine_mode mode ATTRIBUTE_UNUSED;
3426 return GET_CODE (op) == MULT;
3430 div_operator (op, mode)
3431 register rtx op;
3432 enum machine_mode mode ATTRIBUTE_UNUSED;
3434 return GET_CODE (op) == DIV;
3438 arith_or_logical_operator (op, mode)
3439 rtx op;
3440 enum machine_mode mode;
3442 return ((mode == VOIDmode || GET_MODE (op) == mode)
3443 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3444 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3447 /* Returns 1 if OP is memory operand with a displacement. */
3450 memory_displacement_operand (op, mode)
3451 register rtx op;
3452 enum machine_mode mode;
3454 struct ix86_address parts;
3456 if (! memory_operand (op, mode))
3457 return 0;
3459 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3460 abort ();
3462 return parts.disp != NULL_RTX;
3465 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3466 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3468 ??? It seems likely that this will only work because cmpsi is an
3469 expander, and no actual insns use this. */
3472 cmpsi_operand (op, mode)
3473 rtx op;
3474 enum machine_mode mode;
3476 if (nonimmediate_operand (op, mode))
3477 return 1;
3479 if (GET_CODE (op) == AND
3480 && GET_MODE (op) == SImode
3481 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3482 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3483 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3484 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3485 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3486 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3487 return 1;
3489 return 0;
3492 /* Returns 1 if OP is memory operand that can not be represented by the
3493 modRM array. */
3496 long_memory_operand (op, mode)
3497 register rtx op;
3498 enum machine_mode mode;
3500 if (! memory_operand (op, mode))
3501 return 0;
3503 return memory_address_length (op) != 0;
3506 /* Return nonzero if the rtx is known aligned. */
3509 aligned_operand (op, mode)
3510 rtx op;
3511 enum machine_mode mode;
3513 struct ix86_address parts;
3515 if (!general_operand (op, mode))
3516 return 0;
3518 /* Registers and immediate operands are always "aligned". */
3519 if (GET_CODE (op) != MEM)
3520 return 1;
3522 /* Don't even try to do any aligned optimizations with volatiles. */
3523 if (MEM_VOLATILE_P (op))
3524 return 0;
3526 op = XEXP (op, 0);
3528 /* Pushes and pops are only valid on the stack pointer. */
3529 if (GET_CODE (op) == PRE_DEC
3530 || GET_CODE (op) == POST_INC)
3531 return 1;
3533 /* Decode the address. */
3534 if (! ix86_decompose_address (op, &parts))
3535 abort ();
3537 if (parts.base && GET_CODE (parts.base) == SUBREG)
3538 parts.base = SUBREG_REG (parts.base);
3539 if (parts.index && GET_CODE (parts.index) == SUBREG)
3540 parts.index = SUBREG_REG (parts.index);
3542 /* Look for some component that isn't known to be aligned. */
3543 if (parts.index)
3545 if (parts.scale < 4
3546 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3547 return 0;
3549 if (parts.base)
3551 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3552 return 0;
3554 if (parts.disp)
3556 if (GET_CODE (parts.disp) != CONST_INT
3557 || (INTVAL (parts.disp) & 3) != 0)
3558 return 0;
3561 /* Didn't find one -- this must be an aligned address. */
3562 return 1;
3565 /* Return true if the constant is something that can be loaded with
3566 a special instruction. Only handle 0.0 and 1.0; others are less
3567 worthwhile. */
3570 standard_80387_constant_p (x)
3571 rtx x;
3573 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3574 return -1;
3575 /* Note that on the 80387, other constants, such as pi, that we should support
3576 too. On some machines, these are much slower to load as standard constant,
3577 than to load from doubles in memory. */
3578 if (x == CONST0_RTX (GET_MODE (x)))
3579 return 1;
3580 if (x == CONST1_RTX (GET_MODE (x)))
3581 return 2;
3582 return 0;
3585 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3588 standard_sse_constant_p (x)
3589 rtx x;
3591 if (GET_CODE (x) != CONST_DOUBLE)
3592 return -1;
3593 return (x == CONST0_RTX (GET_MODE (x)));
3596 /* Returns 1 if OP contains a symbol reference */
3599 symbolic_reference_mentioned_p (op)
3600 rtx op;
3602 register const char *fmt;
3603 register int i;
3605 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3606 return 1;
3608 fmt = GET_RTX_FORMAT (GET_CODE (op));
3609 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3611 if (fmt[i] == 'E')
3613 register int j;
3615 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3616 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3617 return 1;
3620 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3621 return 1;
3624 return 0;
3627 /* Return 1 if it is appropriate to emit `ret' instructions in the
3628 body of a function. Do this only if the epilogue is simple, needing a
3629 couple of insns. Prior to reloading, we can't tell how many registers
3630 must be saved, so return 0 then. Return 0 if there is no frame
3631 marker to de-allocate.
3633 If NON_SAVING_SETJMP is defined and true, then it is not possible
3634 for the epilogue to be simple, so return 0. This is a special case
3635 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3636 until final, but jump_optimize may need to know sooner if a
3637 `return' is OK. */
3640 ix86_can_use_return_insn_p ()
3642 struct ix86_frame frame;
3644 #ifdef NON_SAVING_SETJMP
3645 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3646 return 0;
3647 #endif
3649 if (! reload_completed || frame_pointer_needed)
3650 return 0;
3652 /* Don't allow more than 32 pop, since that's all we can do
3653 with one instruction. */
3654 if (current_function_pops_args
3655 && current_function_args_size >= 32768)
3656 return 0;
3658 ix86_compute_frame_layout (&frame);
3659 return frame.to_allocate == 0 && frame.nregs == 0;
3662 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3664 x86_64_sign_extended_value (value)
3665 rtx value;
3667 switch (GET_CODE (value))
3669 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3670 to be at least 32 and this all acceptable constants are
3671 represented as CONST_INT. */
3672 case CONST_INT:
3673 if (HOST_BITS_PER_WIDE_INT == 32)
3674 return 1;
3675 else
3677 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3678 return trunc_int_for_mode (val, SImode) == val;
3680 break;
3682 /* For certain code models, the symbolic references are known to fit. */
3683 case SYMBOL_REF:
3684 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3686 /* For certain code models, the code is near as well. */
3687 case LABEL_REF:
3688 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3690 /* We also may accept the offsetted memory references in certain special
3691 cases. */
3692 case CONST:
3693 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3694 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3695 return 1;
3696 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3698 rtx op1 = XEXP (XEXP (value, 0), 0);
3699 rtx op2 = XEXP (XEXP (value, 0), 1);
3700 HOST_WIDE_INT offset;
3702 if (ix86_cmodel == CM_LARGE)
3703 return 0;
3704 if (GET_CODE (op2) != CONST_INT)
3705 return 0;
3706 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3707 switch (GET_CODE (op1))
3709 case SYMBOL_REF:
3710 /* For CM_SMALL assume that latest object is 1MB before
3711 end of 31bits boundary. We may also accept pretty
3712 large negative constants knowing that all objects are
3713 in the positive half of address space. */
3714 if (ix86_cmodel == CM_SMALL
3715 && offset < 1024*1024*1024
3716 && trunc_int_for_mode (offset, SImode) == offset)
3717 return 1;
3718 /* For CM_KERNEL we know that all object resist in the
3719 negative half of 32bits address space. We may not
3720 accept negative offsets, since they may be just off
3721 and we may accept pretty large positive ones. */
3722 if (ix86_cmodel == CM_KERNEL
3723 && offset > 0
3724 && trunc_int_for_mode (offset, SImode) == offset)
3725 return 1;
3726 break;
3727 case LABEL_REF:
3728 /* These conditions are similar to SYMBOL_REF ones, just the
3729 constraints for code models differ. */
3730 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3731 && offset < 1024*1024*1024
3732 && trunc_int_for_mode (offset, SImode) == offset)
3733 return 1;
3734 if (ix86_cmodel == CM_KERNEL
3735 && offset > 0
3736 && trunc_int_for_mode (offset, SImode) == offset)
3737 return 1;
3738 break;
3739 default:
3740 return 0;
3743 return 0;
3744 default:
3745 return 0;
3749 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3751 x86_64_zero_extended_value (value)
3752 rtx value;
3754 switch (GET_CODE (value))
3756 case CONST_DOUBLE:
3757 if (HOST_BITS_PER_WIDE_INT == 32)
3758 return (GET_MODE (value) == VOIDmode
3759 && !CONST_DOUBLE_HIGH (value));
3760 else
3761 return 0;
3762 case CONST_INT:
3763 if (HOST_BITS_PER_WIDE_INT == 32)
3764 return INTVAL (value) >= 0;
3765 else
3766 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3767 break;
3769 /* For certain code models, the symbolic references are known to fit. */
3770 case SYMBOL_REF:
3771 return ix86_cmodel == CM_SMALL;
3773 /* For certain code models, the code is near as well. */
3774 case LABEL_REF:
3775 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3777 /* We also may accept the offsetted memory references in certain special
3778 cases. */
3779 case CONST:
3780 if (GET_CODE (XEXP (value, 0)) == PLUS)
3782 rtx op1 = XEXP (XEXP (value, 0), 0);
3783 rtx op2 = XEXP (XEXP (value, 0), 1);
3785 if (ix86_cmodel == CM_LARGE)
3786 return 0;
3787 switch (GET_CODE (op1))
3789 case SYMBOL_REF:
3790 return 0;
3791 /* For small code model we may accept pretty large positive
3792 offsets, since one bit is available for free. Negative
3793 offsets are limited by the size of NULL pointer area
3794 specified by the ABI. */
3795 if (ix86_cmodel == CM_SMALL
3796 && GET_CODE (op2) == CONST_INT
3797 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3798 && (trunc_int_for_mode (INTVAL (op2), SImode)
3799 == INTVAL (op2)))
3800 return 1;
3801 /* ??? For the kernel, we may accept adjustment of
3802 -0x10000000, since we know that it will just convert
3803 negative address space to positive, but perhaps this
3804 is not worthwhile. */
3805 break;
3806 case LABEL_REF:
3807 /* These conditions are similar to SYMBOL_REF ones, just the
3808 constraints for code models differ. */
3809 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3810 && GET_CODE (op2) == CONST_INT
3811 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3812 && (trunc_int_for_mode (INTVAL (op2), SImode)
3813 == INTVAL (op2)))
3814 return 1;
3815 break;
3816 default:
3817 return 0;
3820 return 0;
3821 default:
3822 return 0;
3826 /* Value should be nonzero if functions must have frame pointers.
3827 Zero means the frame pointer need not be set up (and parms may
3828 be accessed via the stack pointer) in functions that seem suitable. */
3831 ix86_frame_pointer_required ()
3833 /* If we accessed previous frames, then the generated code expects
3834 to be able to access the saved ebp value in our frame. */
3835 if (cfun->machine->accesses_prev_frame)
3836 return 1;
3838 /* Several x86 os'es need a frame pointer for other reasons,
3839 usually pertaining to setjmp. */
3840 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3841 return 1;
3843 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3844 the frame pointer by default. Turn it back on now if we've not
3845 got a leaf function. */
3846 if (TARGET_OMIT_LEAF_FRAME_POINTER
3847 && (!current_function_is_leaf || current_function_profile))
3848 return 1;
3850 return 0;
3853 /* Record that the current function accesses previous call frames. */
3855 void
3856 ix86_setup_frame_addresses ()
3858 cfun->machine->accesses_prev_frame = 1;
3861 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3862 # define USE_HIDDEN_LINKONCE 1
3863 #else
3864 # define USE_HIDDEN_LINKONCE 0
3865 #endif
3867 static int pic_labels_used;
3869 /* Fills in the label name that should be used for a pc thunk for
3870 the given register. */
3872 static void
3873 get_pc_thunk_name (name, regno)
3874 char name[32];
3875 unsigned int regno;
3877 if (USE_HIDDEN_LINKONCE)
3878 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3879 else
3880 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3884 /* This function generates code for -fpic that loads %ebx with
3885 the return address of the caller and then returns. */
3887 void
3888 ix86_asm_file_end (file)
3889 FILE *file;
3891 rtx xops[2];
3892 int regno;
3894 for (regno = 0; regno < 8; ++regno)
3896 char name[32];
3898 if (! ((pic_labels_used >> regno) & 1))
3899 continue;
3901 get_pc_thunk_name (name, regno);
3903 if (USE_HIDDEN_LINKONCE)
3905 tree decl;
3907 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3908 error_mark_node);
3909 TREE_PUBLIC (decl) = 1;
3910 TREE_STATIC (decl) = 1;
3911 DECL_ONE_ONLY (decl) = 1;
3913 (*targetm.asm_out.unique_section) (decl, 0);
3914 named_section (decl, NULL, 0);
3916 ASM_GLOBALIZE_LABEL (file, name);
3917 fputs ("\t.hidden\t", file);
3918 assemble_name (file, name);
3919 fputc ('\n', file);
3920 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3922 else
3924 text_section ();
3925 ASM_OUTPUT_LABEL (file, name);
3928 xops[0] = gen_rtx_REG (SImode, regno);
3929 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3930 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3931 output_asm_insn ("ret", xops);
3935 /* Emit code for the SET_GOT patterns. */
3937 const char *
3938 output_set_got (dest)
3939 rtx dest;
3941 rtx xops[3];
3943 xops[0] = dest;
3944 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3946 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3948 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3950 if (!flag_pic)
3951 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3952 else
3953 output_asm_insn ("call\t%a2", xops);
3955 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3956 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3958 if (flag_pic)
3959 output_asm_insn ("pop{l}\t%0", xops);
3961 else
3963 char name[32];
3964 get_pc_thunk_name (name, REGNO (dest));
3965 pic_labels_used |= 1 << REGNO (dest);
3967 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3968 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3969 output_asm_insn ("call\t%X2", xops);
3972 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
3973 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
3974 else
3975 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
3977 return "";
3980 /* Generate an "push" pattern for input ARG. */
3982 static rtx
3983 gen_push (arg)
3984 rtx arg;
3986 return gen_rtx_SET (VOIDmode,
3987 gen_rtx_MEM (Pmode,
3988 gen_rtx_PRE_DEC (Pmode,
3989 stack_pointer_rtx)),
3990 arg);
3993 /* Return >= 0 if there is an unused call-clobbered register available
3994 for the entire function. */
3996 static unsigned int
3997 ix86_select_alt_pic_regnum ()
3999 if (current_function_is_leaf && !current_function_profile)
4001 int i;
4002 for (i = 2; i >= 0; --i)
4003 if (!regs_ever_live[i])
4004 return i;
4007 return INVALID_REGNUM;
4010 /* Return 1 if we need to save REGNO. */
4011 static int
4012 ix86_save_reg (regno, maybe_eh_return)
4013 unsigned int regno;
4014 int maybe_eh_return;
4016 if (pic_offset_table_rtx
4017 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4018 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4019 || current_function_profile
4020 || current_function_calls_eh_return))
4022 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4023 return 0;
4024 return 1;
4027 if (current_function_calls_eh_return && maybe_eh_return)
4029 unsigned i;
4030 for (i = 0; ; i++)
4032 unsigned test = EH_RETURN_DATA_REGNO (i);
4033 if (test == INVALID_REGNUM)
4034 break;
4035 if (test == regno)
4036 return 1;
4040 return (regs_ever_live[regno]
4041 && !call_used_regs[regno]
4042 && !fixed_regs[regno]
4043 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4046 /* Return number of registers to be saved on the stack. */
4048 static int
4049 ix86_nsaved_regs ()
4051 int nregs = 0;
4052 int regno;
4054 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4055 if (ix86_save_reg (regno, true))
4056 nregs++;
4057 return nregs;
4060 /* Return the offset between two registers, one to be eliminated, and the other
4061 its replacement, at the start of a routine. */
4063 HOST_WIDE_INT
4064 ix86_initial_elimination_offset (from, to)
4065 int from;
4066 int to;
4068 struct ix86_frame frame;
4069 ix86_compute_frame_layout (&frame);
4071 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4072 return frame.hard_frame_pointer_offset;
4073 else if (from == FRAME_POINTER_REGNUM
4074 && to == HARD_FRAME_POINTER_REGNUM)
4075 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4076 else
4078 if (to != STACK_POINTER_REGNUM)
4079 abort ();
4080 else if (from == ARG_POINTER_REGNUM)
4081 return frame.stack_pointer_offset;
4082 else if (from != FRAME_POINTER_REGNUM)
4083 abort ();
4084 else
4085 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4089 /* Fill structure ix86_frame about frame of currently computed function. */
4091 static void
4092 ix86_compute_frame_layout (frame)
4093 struct ix86_frame *frame;
4095 HOST_WIDE_INT total_size;
4096 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4097 int offset;
4098 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4099 HOST_WIDE_INT size = get_frame_size ();
4101 frame->nregs = ix86_nsaved_regs ();
4102 total_size = size;
4104 /* Skip return address and saved base pointer. */
4105 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4107 frame->hard_frame_pointer_offset = offset;
4109 /* Do some sanity checking of stack_alignment_needed and
4110 preferred_alignment, since i386 port is the only using those features
4111 that may break easily. */
4113 if (size && !stack_alignment_needed)
4114 abort ();
4115 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4116 abort ();
4117 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4118 abort ();
4119 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4120 abort ();
4122 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4123 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4125 /* Register save area */
4126 offset += frame->nregs * UNITS_PER_WORD;
4128 /* Va-arg area */
4129 if (ix86_save_varrargs_registers)
4131 offset += X86_64_VARARGS_SIZE;
4132 frame->va_arg_size = X86_64_VARARGS_SIZE;
4134 else
4135 frame->va_arg_size = 0;
4137 /* Align start of frame for local function. */
4138 frame->padding1 = ((offset + stack_alignment_needed - 1)
4139 & -stack_alignment_needed) - offset;
4141 offset += frame->padding1;
4143 /* Frame pointer points here. */
4144 frame->frame_pointer_offset = offset;
4146 offset += size;
4148 /* Add outgoing arguments area. Can be skipped if we eliminated
4149 all the function calls as dead code. */
4150 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4152 offset += current_function_outgoing_args_size;
4153 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4155 else
4156 frame->outgoing_arguments_size = 0;
4158 /* Align stack boundary. Only needed if we're calling another function
4159 or using alloca. */
4160 if (!current_function_is_leaf || current_function_calls_alloca)
4161 frame->padding2 = ((offset + preferred_alignment - 1)
4162 & -preferred_alignment) - offset;
4163 else
4164 frame->padding2 = 0;
4166 offset += frame->padding2;
4168 /* We've reached end of stack frame. */
4169 frame->stack_pointer_offset = offset;
4171 /* Size prologue needs to allocate. */
4172 frame->to_allocate =
4173 (size + frame->padding1 + frame->padding2
4174 + frame->outgoing_arguments_size + frame->va_arg_size);
4176 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4177 && current_function_is_leaf)
4179 frame->red_zone_size = frame->to_allocate;
4180 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4181 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4183 else
4184 frame->red_zone_size = 0;
4185 frame->to_allocate -= frame->red_zone_size;
4186 frame->stack_pointer_offset -= frame->red_zone_size;
4187 #if 0
4188 fprintf (stderr, "nregs: %i\n", frame->nregs);
4189 fprintf (stderr, "size: %i\n", size);
4190 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4191 fprintf (stderr, "padding1: %i\n", frame->padding1);
4192 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4193 fprintf (stderr, "padding2: %i\n", frame->padding2);
4194 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4195 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4196 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4197 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4198 frame->hard_frame_pointer_offset);
4199 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4200 #endif
4203 /* Emit code to save registers in the prologue. */
4205 static void
4206 ix86_emit_save_regs ()
4208 register int regno;
4209 rtx insn;
4211 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4212 if (ix86_save_reg (regno, true))
4214 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4215 RTX_FRAME_RELATED_P (insn) = 1;
4219 /* Emit code to save registers using MOV insns. First register
4220 is restored from POINTER + OFFSET. */
4221 static void
4222 ix86_emit_save_regs_using_mov (pointer, offset)
4223 rtx pointer;
4224 HOST_WIDE_INT offset;
4226 int regno;
4227 rtx insn;
4229 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4230 if (ix86_save_reg (regno, true))
4232 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4233 Pmode, offset),
4234 gen_rtx_REG (Pmode, regno));
4235 RTX_FRAME_RELATED_P (insn) = 1;
4236 offset += UNITS_PER_WORD;
4240 /* Expand the prologue into a bunch of separate insns. */
4242 void
4243 ix86_expand_prologue ()
4245 rtx insn;
4246 bool pic_reg_used;
4247 struct ix86_frame frame;
4248 int use_mov = 0;
4249 HOST_WIDE_INT allocate;
4251 if (!optimize_size)
4253 use_fast_prologue_epilogue
4254 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4255 if (TARGET_PROLOGUE_USING_MOVE)
4256 use_mov = use_fast_prologue_epilogue;
4258 ix86_compute_frame_layout (&frame);
4260 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4261 slower on all targets. Also sdb doesn't like it. */
4263 if (frame_pointer_needed)
4265 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4266 RTX_FRAME_RELATED_P (insn) = 1;
4268 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4269 RTX_FRAME_RELATED_P (insn) = 1;
4272 allocate = frame.to_allocate;
4273 /* In case we are dealing only with single register and empty frame,
4274 push is equivalent of the mov+add sequence. */
4275 if (allocate == 0 && frame.nregs <= 1)
4276 use_mov = 0;
4278 if (!use_mov)
4279 ix86_emit_save_regs ();
4280 else
4281 allocate += frame.nregs * UNITS_PER_WORD;
4283 if (allocate == 0)
4285 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4287 insn = emit_insn (gen_pro_epilogue_adjust_stack
4288 (stack_pointer_rtx, stack_pointer_rtx,
4289 GEN_INT (-allocate)));
4290 RTX_FRAME_RELATED_P (insn) = 1;
4292 else
4294 /* ??? Is this only valid for Win32? */
4296 rtx arg0, sym;
4298 if (TARGET_64BIT)
4299 abort ();
4301 arg0 = gen_rtx_REG (SImode, 0);
4302 emit_move_insn (arg0, GEN_INT (allocate));
4304 sym = gen_rtx_MEM (FUNCTION_MODE,
4305 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4306 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4308 CALL_INSN_FUNCTION_USAGE (insn)
4309 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4310 CALL_INSN_FUNCTION_USAGE (insn));
4312 if (use_mov)
4314 if (!frame_pointer_needed || !frame.to_allocate)
4315 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4316 else
4317 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4318 -frame.nregs * UNITS_PER_WORD);
4321 #ifdef SUBTARGET_PROLOGUE
4322 SUBTARGET_PROLOGUE;
4323 #endif
4325 pic_reg_used = false;
4326 if (pic_offset_table_rtx
4327 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4328 || current_function_profile))
4330 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4332 if (alt_pic_reg_used != INVALID_REGNUM)
4333 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4335 pic_reg_used = true;
4338 if (pic_reg_used)
4340 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4342 /* Even with accurate pre-reload life analysis, we can wind up
4343 deleting all references to the pic register after reload.
4344 Consider if cross-jumping unifies two sides of a branch
4345 controled by a comparison vs the only read from a global.
4346 In which case, allow the set_got to be deleted, though we're
4347 too late to do anything about the ebx save in the prologue. */
4348 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4351 /* Prevent function calls from be scheduled before the call to mcount.
4352 In the pic_reg_used case, make sure that the got load isn't deleted. */
4353 if (current_function_profile)
4354 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4357 /* Emit code to restore saved registers using MOV insns. First register
4358 is restored from POINTER + OFFSET. */
4359 static void
4360 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4361 rtx pointer;
4362 int offset;
4363 int maybe_eh_return;
4365 int regno;
4367 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4368 if (ix86_save_reg (regno, maybe_eh_return))
4370 emit_move_insn (gen_rtx_REG (Pmode, regno),
4371 adjust_address (gen_rtx_MEM (Pmode, pointer),
4372 Pmode, offset));
4373 offset += UNITS_PER_WORD;
4377 /* Restore function stack, frame, and registers. */
4379 void
4380 ix86_expand_epilogue (style)
4381 int style;
4383 int regno;
4384 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4385 struct ix86_frame frame;
4386 HOST_WIDE_INT offset;
4388 ix86_compute_frame_layout (&frame);
4390 /* Calculate start of saved registers relative to ebp. Special care
4391 must be taken for the normal return case of a function using
4392 eh_return: the eax and edx registers are marked as saved, but not
4393 restored along this path. */
4394 offset = frame.nregs;
4395 if (current_function_calls_eh_return && style != 2)
4396 offset -= 2;
4397 offset *= -UNITS_PER_WORD;
4399 /* If we're only restoring one register and sp is not valid then
4400 using a move instruction to restore the register since it's
4401 less work than reloading sp and popping the register.
4403 The default code result in stack adjustment using add/lea instruction,
4404 while this code results in LEAVE instruction (or discrete equivalent),
4405 so it is profitable in some other cases as well. Especially when there
4406 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4407 and there is exactly one register to pop. This heruistic may need some
4408 tuning in future. */
4409 if ((!sp_valid && frame.nregs <= 1)
4410 || (TARGET_EPILOGUE_USING_MOVE
4411 && use_fast_prologue_epilogue
4412 && (frame.nregs > 1 || frame.to_allocate))
4413 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4414 || (frame_pointer_needed && TARGET_USE_LEAVE
4415 && use_fast_prologue_epilogue && frame.nregs == 1)
4416 || current_function_calls_eh_return)
4418 /* Restore registers. We can use ebp or esp to address the memory
4419 locations. If both are available, default to ebp, since offsets
4420 are known to be small. Only exception is esp pointing directly to the
4421 end of block of saved registers, where we may simplify addressing
4422 mode. */
4424 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4425 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4426 frame.to_allocate, style == 2);
4427 else
4428 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4429 offset, style == 2);
4431 /* eh_return epilogues need %ecx added to the stack pointer. */
4432 if (style == 2)
4434 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4436 if (frame_pointer_needed)
4438 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4439 tmp = plus_constant (tmp, UNITS_PER_WORD);
4440 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4442 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4443 emit_move_insn (hard_frame_pointer_rtx, tmp);
4445 emit_insn (gen_pro_epilogue_adjust_stack
4446 (stack_pointer_rtx, sa, const0_rtx));
4448 else
4450 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4451 tmp = plus_constant (tmp, (frame.to_allocate
4452 + frame.nregs * UNITS_PER_WORD));
4453 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4456 else if (!frame_pointer_needed)
4457 emit_insn (gen_pro_epilogue_adjust_stack
4458 (stack_pointer_rtx, stack_pointer_rtx,
4459 GEN_INT (frame.to_allocate
4460 + frame.nregs * UNITS_PER_WORD)));
4461 /* If not an i386, mov & pop is faster than "leave". */
4462 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4463 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4464 else
4466 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4467 hard_frame_pointer_rtx,
4468 const0_rtx));
4469 if (TARGET_64BIT)
4470 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4471 else
4472 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4475 else
4477 /* First step is to deallocate the stack frame so that we can
4478 pop the registers. */
4479 if (!sp_valid)
4481 if (!frame_pointer_needed)
4482 abort ();
4483 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4484 hard_frame_pointer_rtx,
4485 GEN_INT (offset)));
4487 else if (frame.to_allocate)
4488 emit_insn (gen_pro_epilogue_adjust_stack
4489 (stack_pointer_rtx, stack_pointer_rtx,
4490 GEN_INT (frame.to_allocate)));
4492 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4493 if (ix86_save_reg (regno, false))
4495 if (TARGET_64BIT)
4496 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4497 else
4498 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4500 if (frame_pointer_needed)
4502 /* Leave results in shorter dependency chains on CPUs that are
4503 able to grok it fast. */
4504 if (TARGET_USE_LEAVE)
4505 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4506 else if (TARGET_64BIT)
4507 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4508 else
4509 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4513 /* Sibcall epilogues don't want a return instruction. */
4514 if (style == 0)
4515 return;
4517 if (current_function_pops_args && current_function_args_size)
4519 rtx popc = GEN_INT (current_function_pops_args);
4521 /* i386 can only pop 64K bytes. If asked to pop more, pop
4522 return address, do explicit add, and jump indirectly to the
4523 caller. */
4525 if (current_function_pops_args >= 65536)
4527 rtx ecx = gen_rtx_REG (SImode, 2);
4529 /* There are is no "pascal" calling convention in 64bit ABI. */
4530 if (TARGET_64BIT)
4531 abort ();
4533 emit_insn (gen_popsi1 (ecx));
4534 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4535 emit_jump_insn (gen_return_indirect_internal (ecx));
4537 else
4538 emit_jump_insn (gen_return_pop_internal (popc));
4540 else
4541 emit_jump_insn (gen_return_internal ());
4544 /* Reset from the function's potential modifications. */
4546 static void
4547 ix86_output_function_epilogue (file, size)
4548 FILE *file ATTRIBUTE_UNUSED;
4549 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4551 if (pic_offset_table_rtx)
4552 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4555 /* Extract the parts of an RTL expression that is a valid memory address
4556 for an instruction. Return 0 if the structure of the address is
4557 grossly off. Return -1 if the address contains ASHIFT, so it is not
4558 strictly valid, but still used for computing length of lea instruction.
4561 static int
4562 ix86_decompose_address (addr, out)
4563 register rtx addr;
4564 struct ix86_address *out;
4566 rtx base = NULL_RTX;
4567 rtx index = NULL_RTX;
4568 rtx disp = NULL_RTX;
4569 HOST_WIDE_INT scale = 1;
4570 rtx scale_rtx = NULL_RTX;
4571 int retval = 1;
4573 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4574 base = addr;
4575 else if (GET_CODE (addr) == PLUS)
4577 rtx op0 = XEXP (addr, 0);
4578 rtx op1 = XEXP (addr, 1);
4579 enum rtx_code code0 = GET_CODE (op0);
4580 enum rtx_code code1 = GET_CODE (op1);
4582 if (code0 == REG || code0 == SUBREG)
4584 if (code1 == REG || code1 == SUBREG)
4585 index = op0, base = op1; /* index + base */
4586 else
4587 base = op0, disp = op1; /* base + displacement */
4589 else if (code0 == MULT)
4591 index = XEXP (op0, 0);
4592 scale_rtx = XEXP (op0, 1);
4593 if (code1 == REG || code1 == SUBREG)
4594 base = op1; /* index*scale + base */
4595 else
4596 disp = op1; /* index*scale + disp */
4598 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4600 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4601 scale_rtx = XEXP (XEXP (op0, 0), 1);
4602 base = XEXP (op0, 1);
4603 disp = op1;
4605 else if (code0 == PLUS)
4607 index = XEXP (op0, 0); /* index + base + disp */
4608 base = XEXP (op0, 1);
4609 disp = op1;
4611 else
4612 return 0;
4614 else if (GET_CODE (addr) == MULT)
4616 index = XEXP (addr, 0); /* index*scale */
4617 scale_rtx = XEXP (addr, 1);
4619 else if (GET_CODE (addr) == ASHIFT)
4621 rtx tmp;
4623 /* We're called for lea too, which implements ashift on occasion. */
4624 index = XEXP (addr, 0);
4625 tmp = XEXP (addr, 1);
4626 if (GET_CODE (tmp) != CONST_INT)
4627 return 0;
4628 scale = INTVAL (tmp);
4629 if ((unsigned HOST_WIDE_INT) scale > 3)
4630 return 0;
4631 scale = 1 << scale;
4632 retval = -1;
4634 else
4635 disp = addr; /* displacement */
4637 /* Extract the integral value of scale. */
4638 if (scale_rtx)
4640 if (GET_CODE (scale_rtx) != CONST_INT)
4641 return 0;
4642 scale = INTVAL (scale_rtx);
4645 /* Allow arg pointer and stack pointer as index if there is not scaling */
4646 if (base && index && scale == 1
4647 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4648 || index == stack_pointer_rtx))
4650 rtx tmp = base;
4651 base = index;
4652 index = tmp;
4655 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4656 if ((base == hard_frame_pointer_rtx
4657 || base == frame_pointer_rtx
4658 || base == arg_pointer_rtx) && !disp)
4659 disp = const0_rtx;
4661 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4662 Avoid this by transforming to [%esi+0]. */
4663 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4664 && base && !index && !disp
4665 && REG_P (base)
4666 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4667 disp = const0_rtx;
4669 /* Special case: encode reg+reg instead of reg*2. */
4670 if (!base && index && scale && scale == 2)
4671 base = index, scale = 1;
4673 /* Special case: scaling cannot be encoded without base or displacement. */
4674 if (!base && !disp && index && scale != 1)
4675 disp = const0_rtx;
4677 out->base = base;
4678 out->index = index;
4679 out->disp = disp;
4680 out->scale = scale;
4682 return retval;
4685 /* Return cost of the memory address x.
4686 For i386, it is better to use a complex address than let gcc copy
4687 the address into a reg and make a new pseudo. But not if the address
4688 requires to two regs - that would mean more pseudos with longer
4689 lifetimes. */
4691 ix86_address_cost (x)
4692 rtx x;
4694 struct ix86_address parts;
4695 int cost = 1;
4697 if (!ix86_decompose_address (x, &parts))
4698 abort ();
4700 if (parts.base && GET_CODE (parts.base) == SUBREG)
4701 parts.base = SUBREG_REG (parts.base);
4702 if (parts.index && GET_CODE (parts.index) == SUBREG)
4703 parts.index = SUBREG_REG (parts.index);
4705 /* More complex memory references are better. */
4706 if (parts.disp && parts.disp != const0_rtx)
4707 cost--;
4709 /* Attempt to minimize number of registers in the address. */
4710 if ((parts.base
4711 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4712 || (parts.index
4713 && (!REG_P (parts.index)
4714 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4715 cost++;
4717 if (parts.base
4718 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4719 && parts.index
4720 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4721 && parts.base != parts.index)
4722 cost++;
4724 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4725 since it's predecode logic can't detect the length of instructions
4726 and it degenerates to vector decoded. Increase cost of such
4727 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4728 to split such addresses or even refuse such addresses at all.
4730 Following addressing modes are affected:
4731 [base+scale*index]
4732 [scale*index+disp]
4733 [base+index]
4735 The first and last case may be avoidable by explicitly coding the zero in
4736 memory address, but I don't have AMD-K6 machine handy to check this
4737 theory. */
4739 if (TARGET_K6
4740 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4741 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4742 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4743 cost += 10;
4745 return cost;
4748 /* If X is a machine specific address (i.e. a symbol or label being
4749 referenced as a displacement from the GOT implemented using an
4750 UNSPEC), then return the base term. Otherwise return X. */
4753 ix86_find_base_term (x)
4754 rtx x;
4756 rtx term;
4758 if (TARGET_64BIT)
4760 if (GET_CODE (x) != CONST)
4761 return x;
4762 term = XEXP (x, 0);
4763 if (GET_CODE (term) == PLUS
4764 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4765 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4766 term = XEXP (term, 0);
4767 if (GET_CODE (term) != UNSPEC
4768 || XINT (term, 1) != UNSPEC_GOTPCREL)
4769 return x;
4771 term = XVECEXP (term, 0, 0);
4773 if (GET_CODE (term) != SYMBOL_REF
4774 && GET_CODE (term) != LABEL_REF)
4775 return x;
4777 return term;
4780 if (GET_CODE (x) != PLUS
4781 || XEXP (x, 0) != pic_offset_table_rtx
4782 || GET_CODE (XEXP (x, 1)) != CONST)
4783 return x;
4785 term = XEXP (XEXP (x, 1), 0);
4787 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4788 term = XEXP (term, 0);
4790 if (GET_CODE (term) != UNSPEC
4791 || XINT (term, 1) != UNSPEC_GOTOFF)
4792 return x;
4794 term = XVECEXP (term, 0, 0);
4796 if (GET_CODE (term) != SYMBOL_REF
4797 && GET_CODE (term) != LABEL_REF)
4798 return x;
4800 return term;
4803 /* Determine if a given RTX is a valid constant. We already know this
4804 satisfies CONSTANT_P. */
4806 bool
4807 legitimate_constant_p (x)
4808 rtx x;
4810 rtx inner;
4812 switch (GET_CODE (x))
4814 case SYMBOL_REF:
4815 /* TLS symbols are not constant. */
4816 if (tls_symbolic_operand (x, Pmode))
4817 return false;
4818 break;
4820 case CONST:
4821 inner = XEXP (x, 0);
4823 /* Offsets of TLS symbols are never valid.
4824 Discourage CSE from creating them. */
4825 if (GET_CODE (inner) == PLUS
4826 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4827 return false;
4829 /* Only some unspecs are valid as "constants". */
4830 if (GET_CODE (inner) == UNSPEC)
4831 switch (XINT (inner, 1))
4833 case UNSPEC_TPOFF:
4834 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4835 case UNSPEC_TP:
4836 return true;
4837 default:
4838 return false;
4840 break;
4842 default:
4843 break;
4846 /* Otherwise we handle everything else in the move patterns. */
4847 return true;
4850 /* Determine if a given RTX is a valid constant address. */
4852 bool
4853 constant_address_p (x)
4854 rtx x;
4856 switch (GET_CODE (x))
4858 case LABEL_REF:
4859 case CONST_INT:
4860 return true;
4862 case CONST_DOUBLE:
4863 return TARGET_64BIT;
4865 case CONST:
4866 case SYMBOL_REF:
4867 return !flag_pic && legitimate_constant_p (x);
4869 default:
4870 return false;
4874 /* Nonzero if the constant value X is a legitimate general operand
4875 when generating PIC code. It is given that flag_pic is on and
4876 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4878 bool
4879 legitimate_pic_operand_p (x)
4880 rtx x;
4882 rtx inner;
4884 switch (GET_CODE (x))
4886 case CONST:
4887 inner = XEXP (x, 0);
4889 /* Only some unspecs are valid as "constants". */
4890 if (GET_CODE (inner) == UNSPEC)
4891 switch (XINT (inner, 1))
4893 case UNSPEC_TPOFF:
4894 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4895 case UNSPEC_TP:
4896 return true;
4897 default:
4898 return false;
4900 /* FALLTHRU */
4902 case SYMBOL_REF:
4903 case LABEL_REF:
4904 return legitimate_pic_address_disp_p (x);
4906 default:
4907 return true;
4911 /* Determine if a given CONST RTX is a valid memory displacement
4912 in PIC mode. */
4915 legitimate_pic_address_disp_p (disp)
4916 register rtx disp;
4918 bool saw_plus;
4920 /* In 64bit mode we can allow direct addresses of symbols and labels
4921 when they are not dynamic symbols. */
4922 if (TARGET_64BIT)
4924 rtx x = disp;
4925 if (GET_CODE (disp) == CONST)
4926 x = XEXP (disp, 0);
4927 /* ??? Handle PIC code models */
4928 if (GET_CODE (x) == PLUS
4929 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4930 && ix86_cmodel == CM_SMALL_PIC
4931 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4932 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4933 x = XEXP (x, 0);
4934 if (local_symbolic_operand (x, Pmode))
4935 return 1;
4937 if (GET_CODE (disp) != CONST)
4938 return 0;
4939 disp = XEXP (disp, 0);
4941 if (TARGET_64BIT)
4943 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4944 of GOT tables. We should not need these anyway. */
4945 if (GET_CODE (disp) != UNSPEC
4946 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4947 return 0;
4949 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4950 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4951 return 0;
4952 return 1;
4955 saw_plus = false;
4956 if (GET_CODE (disp) == PLUS)
4958 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4959 return 0;
4960 disp = XEXP (disp, 0);
4961 saw_plus = true;
4964 if (GET_CODE (disp) != UNSPEC)
4965 return 0;
4967 switch (XINT (disp, 1))
4969 case UNSPEC_GOT:
4970 if (saw_plus)
4971 return false;
4972 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
4973 case UNSPEC_GOTOFF:
4974 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4975 case UNSPEC_GOTTPOFF:
4976 if (saw_plus)
4977 return false;
4978 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4979 case UNSPEC_NTPOFF:
4980 /* ??? Could support offset here. */
4981 if (saw_plus)
4982 return false;
4983 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4984 case UNSPEC_DTPOFF:
4985 /* ??? Could support offset here. */
4986 if (saw_plus)
4987 return false;
4988 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
4991 return 0;
4994 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
4995 memory address for an instruction. The MODE argument is the machine mode
4996 for the MEM expression that wants to use this address.
4998 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
4999 convert common non-canonical forms to canonical form so that they will
5000 be recognized. */
5003 legitimate_address_p (mode, addr, strict)
5004 enum machine_mode mode;
5005 register rtx addr;
5006 int strict;
5008 struct ix86_address parts;
5009 rtx base, index, disp;
5010 HOST_WIDE_INT scale;
5011 const char *reason = NULL;
5012 rtx reason_rtx = NULL_RTX;
5014 if (TARGET_DEBUG_ADDR)
5016 fprintf (stderr,
5017 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5018 GET_MODE_NAME (mode), strict);
5019 debug_rtx (addr);
5022 if (ix86_decompose_address (addr, &parts) <= 0)
5024 reason = "decomposition failed";
5025 goto report_error;
5028 base = parts.base;
5029 index = parts.index;
5030 disp = parts.disp;
5031 scale = parts.scale;
5033 /* Validate base register.
5035 Don't allow SUBREG's here, it can lead to spill failures when the base
5036 is one word out of a two word structure, which is represented internally
5037 as a DImode int. */
5039 if (base)
5041 rtx reg;
5042 reason_rtx = base;
5044 if (GET_CODE (base) == SUBREG)
5045 reg = SUBREG_REG (base);
5046 else
5047 reg = base;
5049 if (GET_CODE (reg) != REG)
5051 reason = "base is not a register";
5052 goto report_error;
5055 if (GET_MODE (base) != Pmode)
5057 reason = "base is not in Pmode";
5058 goto report_error;
5061 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5062 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5064 reason = "base is not valid";
5065 goto report_error;
5069 /* Validate index register.
5071 Don't allow SUBREG's here, it can lead to spill failures when the index
5072 is one word out of a two word structure, which is represented internally
5073 as a DImode int. */
5075 if (index)
5077 rtx reg;
5078 reason_rtx = index;
5080 if (GET_CODE (index) == SUBREG)
5081 reg = SUBREG_REG (index);
5082 else
5083 reg = index;
5085 if (GET_CODE (reg) != REG)
5087 reason = "index is not a register";
5088 goto report_error;
5091 if (GET_MODE (index) != Pmode)
5093 reason = "index is not in Pmode";
5094 goto report_error;
5097 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5098 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5100 reason = "index is not valid";
5101 goto report_error;
5105 /* Validate scale factor. */
5106 if (scale != 1)
5108 reason_rtx = GEN_INT (scale);
5109 if (!index)
5111 reason = "scale without index";
5112 goto report_error;
5115 if (scale != 2 && scale != 4 && scale != 8)
5117 reason = "scale is not a valid multiplier";
5118 goto report_error;
5122 /* Validate displacement. */
5123 if (disp)
5125 reason_rtx = disp;
5127 if (TARGET_64BIT)
5129 if (!x86_64_sign_extended_value (disp))
5131 reason = "displacement is out of range";
5132 goto report_error;
5135 else
5137 if (GET_CODE (disp) == CONST_DOUBLE)
5139 reason = "displacement is a const_double";
5140 goto report_error;
5144 if (GET_CODE (disp) == CONST
5145 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5146 switch (XINT (XEXP (disp, 0), 1))
5148 case UNSPEC_GOT:
5149 case UNSPEC_GOTOFF:
5150 case UNSPEC_GOTPCREL:
5151 if (!flag_pic)
5152 abort ();
5153 goto is_legitimate_pic;
5155 case UNSPEC_GOTTPOFF:
5156 case UNSPEC_NTPOFF:
5157 case UNSPEC_DTPOFF:
5158 break;
5160 default:
5161 reason = "invalid address unspec";
5162 goto report_error;
5165 else if (flag_pic && SYMBOLIC_CONST (disp))
5167 is_legitimate_pic:
5168 if (TARGET_64BIT && (index || base))
5170 reason = "non-constant pic memory reference";
5171 goto report_error;
5173 if (! legitimate_pic_address_disp_p (disp))
5175 reason = "displacement is an invalid pic construct";
5176 goto report_error;
5179 /* This code used to verify that a symbolic pic displacement
5180 includes the pic_offset_table_rtx register.
5182 While this is good idea, unfortunately these constructs may
5183 be created by "adds using lea" optimization for incorrect
5184 code like:
5186 int a;
5187 int foo(int i)
5189 return *(&a+i);
5192 This code is nonsensical, but results in addressing
5193 GOT table with pic_offset_table_rtx base. We can't
5194 just refuse it easily, since it gets matched by
5195 "addsi3" pattern, that later gets split to lea in the
5196 case output register differs from input. While this
5197 can be handled by separate addsi pattern for this case
5198 that never results in lea, this seems to be easier and
5199 correct fix for crash to disable this test. */
5201 else if (!CONSTANT_ADDRESS_P (disp))
5203 reason = "displacement is not constant";
5204 goto report_error;
5208 /* Everything looks valid. */
5209 if (TARGET_DEBUG_ADDR)
5210 fprintf (stderr, "Success.\n");
5211 return TRUE;
5213 report_error:
5214 if (TARGET_DEBUG_ADDR)
5216 fprintf (stderr, "Error: %s\n", reason);
5217 debug_rtx (reason_rtx);
5219 return FALSE;
5222 /* Return an unique alias set for the GOT. */
5224 static HOST_WIDE_INT
5225 ix86_GOT_alias_set ()
5227 static HOST_WIDE_INT set = -1;
5228 if (set == -1)
5229 set = new_alias_set ();
5230 return set;
5233 /* Return a legitimate reference for ORIG (an address) using the
5234 register REG. If REG is 0, a new pseudo is generated.
5236 There are two types of references that must be handled:
5238 1. Global data references must load the address from the GOT, via
5239 the PIC reg. An insn is emitted to do this load, and the reg is
5240 returned.
5242 2. Static data references, constant pool addresses, and code labels
5243 compute the address as an offset from the GOT, whose base is in
5244 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5245 differentiate them from global data objects. The returned
5246 address is the PIC reg + an unspec constant.
5248 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5249 reg also appears in the address. */
5252 legitimize_pic_address (orig, reg)
5253 rtx orig;
5254 rtx reg;
5256 rtx addr = orig;
5257 rtx new = orig;
5258 rtx base;
5260 if (local_symbolic_operand (addr, Pmode))
5262 /* In 64bit mode we can address such objects directly. */
5263 if (TARGET_64BIT)
5264 new = addr;
5265 else
5267 /* This symbol may be referenced via a displacement from the PIC
5268 base address (@GOTOFF). */
5270 if (reload_in_progress)
5271 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5272 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5273 new = gen_rtx_CONST (Pmode, new);
5274 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5276 if (reg != 0)
5278 emit_move_insn (reg, new);
5279 new = reg;
5283 else if (GET_CODE (addr) == SYMBOL_REF)
5285 if (TARGET_64BIT)
5287 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5288 new = gen_rtx_CONST (Pmode, new);
5289 new = gen_rtx_MEM (Pmode, new);
5290 RTX_UNCHANGING_P (new) = 1;
5291 set_mem_alias_set (new, ix86_GOT_alias_set ());
5293 if (reg == 0)
5294 reg = gen_reg_rtx (Pmode);
5295 /* Use directly gen_movsi, otherwise the address is loaded
5296 into register for CSE. We don't want to CSE this addresses,
5297 instead we CSE addresses from the GOT table, so skip this. */
5298 emit_insn (gen_movsi (reg, new));
5299 new = reg;
5301 else
5303 /* This symbol must be referenced via a load from the
5304 Global Offset Table (@GOT). */
5306 if (reload_in_progress)
5307 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5308 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5309 new = gen_rtx_CONST (Pmode, new);
5310 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5311 new = gen_rtx_MEM (Pmode, new);
5312 RTX_UNCHANGING_P (new) = 1;
5313 set_mem_alias_set (new, ix86_GOT_alias_set ());
5315 if (reg == 0)
5316 reg = gen_reg_rtx (Pmode);
5317 emit_move_insn (reg, new);
5318 new = reg;
5321 else
5323 if (GET_CODE (addr) == CONST)
5325 addr = XEXP (addr, 0);
5327 /* We must match stuff we generate before. Assume the only
5328 unspecs that can get here are ours. Not that we could do
5329 anything with them anyway... */
5330 if (GET_CODE (addr) == UNSPEC
5331 || (GET_CODE (addr) == PLUS
5332 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5333 return orig;
5334 if (GET_CODE (addr) != PLUS)
5335 abort ();
5337 if (GET_CODE (addr) == PLUS)
5339 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5341 /* Check first to see if this is a constant offset from a @GOTOFF
5342 symbol reference. */
5343 if (local_symbolic_operand (op0, Pmode)
5344 && GET_CODE (op1) == CONST_INT)
5346 if (!TARGET_64BIT)
5348 if (reload_in_progress)
5349 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5350 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5351 UNSPEC_GOTOFF);
5352 new = gen_rtx_PLUS (Pmode, new, op1);
5353 new = gen_rtx_CONST (Pmode, new);
5354 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5356 if (reg != 0)
5358 emit_move_insn (reg, new);
5359 new = reg;
5362 else
5364 /* ??? We need to limit offsets here. */
5367 else
5369 base = legitimize_pic_address (XEXP (addr, 0), reg);
5370 new = legitimize_pic_address (XEXP (addr, 1),
5371 base == reg ? NULL_RTX : reg);
5373 if (GET_CODE (new) == CONST_INT)
5374 new = plus_constant (base, INTVAL (new));
5375 else
5377 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5379 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5380 new = XEXP (new, 1);
5382 new = gen_rtx_PLUS (Pmode, base, new);
5387 return new;
5390 static void
5391 ix86_encode_section_info (decl, first)
5392 tree decl;
5393 int first ATTRIBUTE_UNUSED;
5395 bool local_p = (*targetm.binds_local_p) (decl);
5396 rtx rtl, symbol;
5398 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5399 if (GET_CODE (rtl) != MEM)
5400 return;
5401 symbol = XEXP (rtl, 0);
5402 if (GET_CODE (symbol) != SYMBOL_REF)
5403 return;
5405 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5406 symbol so that we may access it directly in the GOT. */
5408 if (flag_pic)
5409 SYMBOL_REF_FLAG (symbol) = local_p;
5411 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5412 "local dynamic", "initial exec" or "local exec" TLS models
5413 respectively. */
5415 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5417 const char *symbol_str;
5418 char *newstr;
5419 size_t len;
5420 enum tls_model kind;
5422 if (!flag_pic)
5424 if (local_p)
5425 kind = TLS_MODEL_LOCAL_EXEC;
5426 else
5427 kind = TLS_MODEL_INITIAL_EXEC;
5429 /* Local dynamic is inefficient when we're not combining the
5430 parts of the address. */
5431 else if (optimize && local_p)
5432 kind = TLS_MODEL_LOCAL_DYNAMIC;
5433 else
5434 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5435 if (kind < flag_tls_default)
5436 kind = flag_tls_default;
5438 symbol_str = XSTR (symbol, 0);
5440 if (symbol_str[0] == '%')
5442 if (symbol_str[1] == tls_model_chars[kind])
5443 return;
5444 symbol_str += 2;
5446 len = strlen (symbol_str) + 1;
5447 newstr = alloca (len + 2);
5449 newstr[0] = '%';
5450 newstr[1] = tls_model_chars[kind];
5451 memcpy (newstr + 2, symbol_str, len);
5453 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5457 /* Undo the above when printing symbol names. */
5459 static const char *
5460 ix86_strip_name_encoding (str)
5461 const char *str;
5463 if (str[0] == '%')
5464 str += 2;
5465 if (str [0] == '*')
5466 str += 1;
5467 return str;
5470 /* Load the thread pointer into a register. */
5472 static rtx
5473 get_thread_pointer ()
5475 rtx tp;
5477 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5478 tp = gen_rtx_CONST (Pmode, tp);
5479 tp = force_reg (Pmode, tp);
5481 return tp;
5484 /* Try machine-dependent ways of modifying an illegitimate address
5485 to be legitimate. If we find one, return the new, valid address.
5486 This macro is used in only one place: `memory_address' in explow.c.
5488 OLDX is the address as it was before break_out_memory_refs was called.
5489 In some cases it is useful to look at this to decide what needs to be done.
5491 MODE and WIN are passed so that this macro can use
5492 GO_IF_LEGITIMATE_ADDRESS.
5494 It is always safe for this macro to do nothing. It exists to recognize
5495 opportunities to optimize the output.
5497 For the 80386, we handle X+REG by loading X into a register R and
5498 using R+REG. R will go in a general reg and indexing will be used.
5499 However, if REG is a broken-out memory address or multiplication,
5500 nothing needs to be done because REG can certainly go in a general reg.
5502 When -fpic is used, special handling is needed for symbolic references.
5503 See comments by legitimize_pic_address in i386.c for details. */
5506 legitimize_address (x, oldx, mode)
5507 register rtx x;
5508 register rtx oldx ATTRIBUTE_UNUSED;
5509 enum machine_mode mode;
5511 int changed = 0;
5512 unsigned log;
5514 if (TARGET_DEBUG_ADDR)
5516 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5517 GET_MODE_NAME (mode));
5518 debug_rtx (x);
5521 log = tls_symbolic_operand (x, mode);
5522 if (log)
5524 rtx dest, base, off, pic;
5526 switch (log)
5528 case TLS_MODEL_GLOBAL_DYNAMIC:
5529 dest = gen_reg_rtx (Pmode);
5530 emit_insn (gen_tls_global_dynamic (dest, x));
5531 break;
5533 case TLS_MODEL_LOCAL_DYNAMIC:
5534 base = gen_reg_rtx (Pmode);
5535 emit_insn (gen_tls_local_dynamic_base (base));
5537 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5538 off = gen_rtx_CONST (Pmode, off);
5540 return gen_rtx_PLUS (Pmode, base, off);
5542 case TLS_MODEL_INITIAL_EXEC:
5543 if (flag_pic)
5545 if (reload_in_progress)
5546 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5547 pic = pic_offset_table_rtx;
5549 else
5551 pic = gen_reg_rtx (Pmode);
5552 emit_insn (gen_set_got (pic));
5555 base = get_thread_pointer ();
5557 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5558 off = gen_rtx_CONST (Pmode, off);
5559 off = gen_rtx_PLUS (Pmode, pic, off);
5560 off = gen_rtx_MEM (Pmode, off);
5561 RTX_UNCHANGING_P (off) = 1;
5562 set_mem_alias_set (off, ix86_GOT_alias_set ());
5564 /* Damn Sun for specifing a set of dynamic relocations without
5565 considering the two-operand nature of the architecture!
5566 We'd be much better off with a "GOTNTPOFF" relocation that
5567 already contained the negated constant. */
5568 /* ??? Using negl and reg+reg addressing appears to be a lose
5569 size-wise. The negl is two bytes, just like the extra movl
5570 incurred by the two-operand subl, but reg+reg addressing
5571 uses the two-byte modrm form, unlike plain reg. */
5573 dest = gen_reg_rtx (Pmode);
5574 emit_insn (gen_subsi3 (dest, base, off));
5575 break;
5577 case TLS_MODEL_LOCAL_EXEC:
5578 base = get_thread_pointer ();
5580 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5581 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5582 off = gen_rtx_CONST (Pmode, off);
5584 if (TARGET_GNU_TLS)
5585 return gen_rtx_PLUS (Pmode, base, off);
5586 else
5588 dest = gen_reg_rtx (Pmode);
5589 emit_insn (gen_subsi3 (dest, base, off));
5591 break;
5593 default:
5594 abort ();
5597 return dest;
5600 if (flag_pic && SYMBOLIC_CONST (x))
5601 return legitimize_pic_address (x, 0);
5603 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5604 if (GET_CODE (x) == ASHIFT
5605 && GET_CODE (XEXP (x, 1)) == CONST_INT
5606 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5608 changed = 1;
5609 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5610 GEN_INT (1 << log));
5613 if (GET_CODE (x) == PLUS)
5615 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5617 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5618 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5619 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5621 changed = 1;
5622 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5623 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5624 GEN_INT (1 << log));
5627 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5628 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5629 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5631 changed = 1;
5632 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5633 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5634 GEN_INT (1 << log));
5637 /* Put multiply first if it isn't already. */
5638 if (GET_CODE (XEXP (x, 1)) == MULT)
5640 rtx tmp = XEXP (x, 0);
5641 XEXP (x, 0) = XEXP (x, 1);
5642 XEXP (x, 1) = tmp;
5643 changed = 1;
5646 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5647 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5648 created by virtual register instantiation, register elimination, and
5649 similar optimizations. */
5650 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5652 changed = 1;
5653 x = gen_rtx_PLUS (Pmode,
5654 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5655 XEXP (XEXP (x, 1), 0)),
5656 XEXP (XEXP (x, 1), 1));
5659 /* Canonicalize
5660 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5661 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5662 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5663 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5664 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5665 && CONSTANT_P (XEXP (x, 1)))
5667 rtx constant;
5668 rtx other = NULL_RTX;
5670 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5672 constant = XEXP (x, 1);
5673 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5675 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5677 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5678 other = XEXP (x, 1);
5680 else
5681 constant = 0;
5683 if (constant)
5685 changed = 1;
5686 x = gen_rtx_PLUS (Pmode,
5687 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5688 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5689 plus_constant (other, INTVAL (constant)));
5693 if (changed && legitimate_address_p (mode, x, FALSE))
5694 return x;
5696 if (GET_CODE (XEXP (x, 0)) == MULT)
5698 changed = 1;
5699 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5702 if (GET_CODE (XEXP (x, 1)) == MULT)
5704 changed = 1;
5705 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5708 if (changed
5709 && GET_CODE (XEXP (x, 1)) == REG
5710 && GET_CODE (XEXP (x, 0)) == REG)
5711 return x;
5713 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5715 changed = 1;
5716 x = legitimize_pic_address (x, 0);
5719 if (changed && legitimate_address_p (mode, x, FALSE))
5720 return x;
5722 if (GET_CODE (XEXP (x, 0)) == REG)
5724 register rtx temp = gen_reg_rtx (Pmode);
5725 register rtx val = force_operand (XEXP (x, 1), temp);
5726 if (val != temp)
5727 emit_move_insn (temp, val);
5729 XEXP (x, 1) = temp;
5730 return x;
5733 else if (GET_CODE (XEXP (x, 1)) == REG)
5735 register rtx temp = gen_reg_rtx (Pmode);
5736 register rtx val = force_operand (XEXP (x, 0), temp);
5737 if (val != temp)
5738 emit_move_insn (temp, val);
5740 XEXP (x, 0) = temp;
5741 return x;
5745 return x;
5748 /* Print an integer constant expression in assembler syntax. Addition
5749 and subtraction are the only arithmetic that may appear in these
5750 expressions. FILE is the stdio stream to write to, X is the rtx, and
5751 CODE is the operand print code from the output string. */
5753 static void
5754 output_pic_addr_const (file, x, code)
5755 FILE *file;
5756 rtx x;
5757 int code;
5759 char buf[256];
5761 switch (GET_CODE (x))
5763 case PC:
5764 if (flag_pic)
5765 putc ('.', file);
5766 else
5767 abort ();
5768 break;
5770 case SYMBOL_REF:
5771 assemble_name (file, XSTR (x, 0));
5772 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
5773 fputs ("@PLT", file);
5774 break;
5776 case LABEL_REF:
5777 x = XEXP (x, 0);
5778 /* FALLTHRU */
5779 case CODE_LABEL:
5780 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5781 assemble_name (asm_out_file, buf);
5782 break;
5784 case CONST_INT:
5785 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5786 break;
5788 case CONST:
5789 /* This used to output parentheses around the expression,
5790 but that does not work on the 386 (either ATT or BSD assembler). */
5791 output_pic_addr_const (file, XEXP (x, 0), code);
5792 break;
5794 case CONST_DOUBLE:
5795 if (GET_MODE (x) == VOIDmode)
5797 /* We can use %d if the number is <32 bits and positive. */
5798 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5799 fprintf (file, "0x%lx%08lx",
5800 (unsigned long) CONST_DOUBLE_HIGH (x),
5801 (unsigned long) CONST_DOUBLE_LOW (x));
5802 else
5803 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5805 else
5806 /* We can't handle floating point constants;
5807 PRINT_OPERAND must handle them. */
5808 output_operand_lossage ("floating constant misused");
5809 break;
5811 case PLUS:
5812 /* Some assemblers need integer constants to appear first. */
5813 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5815 output_pic_addr_const (file, XEXP (x, 0), code);
5816 putc ('+', file);
5817 output_pic_addr_const (file, XEXP (x, 1), code);
5819 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5821 output_pic_addr_const (file, XEXP (x, 1), code);
5822 putc ('+', file);
5823 output_pic_addr_const (file, XEXP (x, 0), code);
5825 else
5826 abort ();
5827 break;
5829 case MINUS:
5830 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5831 output_pic_addr_const (file, XEXP (x, 0), code);
5832 putc ('-', file);
5833 output_pic_addr_const (file, XEXP (x, 1), code);
5834 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5835 break;
5837 case UNSPEC:
5838 if (XVECLEN (x, 0) != 1)
5839 abort ();
5840 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5841 switch (XINT (x, 1))
5843 case UNSPEC_GOT:
5844 fputs ("@GOT", file);
5845 break;
5846 case UNSPEC_GOTOFF:
5847 fputs ("@GOTOFF", file);
5848 break;
5849 case UNSPEC_GOTPCREL:
5850 fputs ("@GOTPCREL(%rip)", file);
5851 break;
5852 case UNSPEC_GOTTPOFF:
5853 fputs ("@GOTTPOFF", file);
5854 break;
5855 case UNSPEC_TPOFF:
5856 fputs ("@TPOFF", file);
5857 break;
5858 case UNSPEC_NTPOFF:
5859 fputs ("@NTPOFF", file);
5860 break;
5861 case UNSPEC_DTPOFF:
5862 fputs ("@DTPOFF", file);
5863 break;
5864 default:
5865 output_operand_lossage ("invalid UNSPEC as operand");
5866 break;
5868 break;
5870 default:
5871 output_operand_lossage ("invalid expression as operand");
5875 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5876 We need to handle our special PIC relocations. */
5878 void
5879 i386_dwarf_output_addr_const (file, x)
5880 FILE *file;
5881 rtx x;
5883 #ifdef ASM_QUAD
5884 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5885 #else
5886 if (TARGET_64BIT)
5887 abort ();
5888 fprintf (file, "%s", ASM_LONG);
5889 #endif
5890 if (flag_pic)
5891 output_pic_addr_const (file, x, '\0');
5892 else
5893 output_addr_const (file, x);
5894 fputc ('\n', file);
5897 /* In the name of slightly smaller debug output, and to cater to
5898 general assembler losage, recognize PIC+GOTOFF and turn it back
5899 into a direct symbol reference. */
5902 i386_simplify_dwarf_addr (orig_x)
5903 rtx orig_x;
5905 rtx x = orig_x, y;
5907 if (GET_CODE (x) == MEM)
5908 x = XEXP (x, 0);
5910 if (TARGET_64BIT)
5912 if (GET_CODE (x) != CONST
5913 || GET_CODE (XEXP (x, 0)) != UNSPEC
5914 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5915 || GET_CODE (orig_x) != MEM)
5916 return orig_x;
5917 return XVECEXP (XEXP (x, 0), 0, 0);
5920 if (GET_CODE (x) != PLUS
5921 || GET_CODE (XEXP (x, 1)) != CONST)
5922 return orig_x;
5924 if (GET_CODE (XEXP (x, 0)) == REG
5925 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5926 /* %ebx + GOT/GOTOFF */
5927 y = NULL;
5928 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5930 /* %ebx + %reg * scale + GOT/GOTOFF */
5931 y = XEXP (x, 0);
5932 if (GET_CODE (XEXP (y, 0)) == REG
5933 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5934 y = XEXP (y, 1);
5935 else if (GET_CODE (XEXP (y, 1)) == REG
5936 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5937 y = XEXP (y, 0);
5938 else
5939 return orig_x;
5940 if (GET_CODE (y) != REG
5941 && GET_CODE (y) != MULT
5942 && GET_CODE (y) != ASHIFT)
5943 return orig_x;
5945 else
5946 return orig_x;
5948 x = XEXP (XEXP (x, 1), 0);
5949 if (GET_CODE (x) == UNSPEC
5950 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5951 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
5953 if (y)
5954 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
5955 return XVECEXP (x, 0, 0);
5958 if (GET_CODE (x) == PLUS
5959 && GET_CODE (XEXP (x, 0)) == UNSPEC
5960 && GET_CODE (XEXP (x, 1)) == CONST_INT
5961 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
5962 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
5963 && GET_CODE (orig_x) != MEM)))
5965 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
5966 if (y)
5967 return gen_rtx_PLUS (Pmode, y, x);
5968 return x;
5971 return orig_x;
5974 static void
5975 put_condition_code (code, mode, reverse, fp, file)
5976 enum rtx_code code;
5977 enum machine_mode mode;
5978 int reverse, fp;
5979 FILE *file;
5981 const char *suffix;
5983 if (mode == CCFPmode || mode == CCFPUmode)
5985 enum rtx_code second_code, bypass_code;
5986 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
5987 if (bypass_code != NIL || second_code != NIL)
5988 abort ();
5989 code = ix86_fp_compare_code_to_integer (code);
5990 mode = CCmode;
5992 if (reverse)
5993 code = reverse_condition (code);
5995 switch (code)
5997 case EQ:
5998 suffix = "e";
5999 break;
6000 case NE:
6001 suffix = "ne";
6002 break;
6003 case GT:
6004 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6005 abort ();
6006 suffix = "g";
6007 break;
6008 case GTU:
6009 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6010 Those same assemblers have the same but opposite losage on cmov. */
6011 if (mode != CCmode)
6012 abort ();
6013 suffix = fp ? "nbe" : "a";
6014 break;
6015 case LT:
6016 if (mode == CCNOmode || mode == CCGOCmode)
6017 suffix = "s";
6018 else if (mode == CCmode || mode == CCGCmode)
6019 suffix = "l";
6020 else
6021 abort ();
6022 break;
6023 case LTU:
6024 if (mode != CCmode)
6025 abort ();
6026 suffix = "b";
6027 break;
6028 case GE:
6029 if (mode == CCNOmode || mode == CCGOCmode)
6030 suffix = "ns";
6031 else if (mode == CCmode || mode == CCGCmode)
6032 suffix = "ge";
6033 else
6034 abort ();
6035 break;
6036 case GEU:
6037 /* ??? As above. */
6038 if (mode != CCmode)
6039 abort ();
6040 suffix = fp ? "nb" : "ae";
6041 break;
6042 case LE:
6043 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6044 abort ();
6045 suffix = "le";
6046 break;
6047 case LEU:
6048 if (mode != CCmode)
6049 abort ();
6050 suffix = "be";
6051 break;
6052 case UNORDERED:
6053 suffix = fp ? "u" : "p";
6054 break;
6055 case ORDERED:
6056 suffix = fp ? "nu" : "np";
6057 break;
6058 default:
6059 abort ();
6061 fputs (suffix, file);
6064 void
6065 print_reg (x, code, file)
6066 rtx x;
6067 int code;
6068 FILE *file;
6070 if (REGNO (x) == ARG_POINTER_REGNUM
6071 || REGNO (x) == FRAME_POINTER_REGNUM
6072 || REGNO (x) == FLAGS_REG
6073 || REGNO (x) == FPSR_REG)
6074 abort ();
6076 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6077 putc ('%', file);
6079 if (code == 'w' || MMX_REG_P (x))
6080 code = 2;
6081 else if (code == 'b')
6082 code = 1;
6083 else if (code == 'k')
6084 code = 4;
6085 else if (code == 'q')
6086 code = 8;
6087 else if (code == 'y')
6088 code = 3;
6089 else if (code == 'h')
6090 code = 0;
6091 else
6092 code = GET_MODE_SIZE (GET_MODE (x));
6094 /* Irritatingly, AMD extended registers use different naming convention
6095 from the normal registers. */
6096 if (REX_INT_REG_P (x))
6098 if (!TARGET_64BIT)
6099 abort ();
6100 switch (code)
6102 case 0:
6103 error ("extended registers have no high halves");
6104 break;
6105 case 1:
6106 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6107 break;
6108 case 2:
6109 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6110 break;
6111 case 4:
6112 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6113 break;
6114 case 8:
6115 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6116 break;
6117 default:
6118 error ("unsupported operand size for extended register");
6119 break;
6121 return;
6123 switch (code)
6125 case 3:
6126 if (STACK_TOP_P (x))
6128 fputs ("st(0)", file);
6129 break;
6131 /* FALLTHRU */
6132 case 8:
6133 case 4:
6134 case 12:
6135 if (! ANY_FP_REG_P (x))
6136 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6137 /* FALLTHRU */
6138 case 16:
6139 case 2:
6140 fputs (hi_reg_name[REGNO (x)], file);
6141 break;
6142 case 1:
6143 fputs (qi_reg_name[REGNO (x)], file);
6144 break;
6145 case 0:
6146 fputs (qi_high_reg_name[REGNO (x)], file);
6147 break;
6148 default:
6149 abort ();
6153 /* Locate some local-dynamic symbol still in use by this function
6154 so that we can print its name in some tls_local_dynamic_base
6155 pattern. */
6157 static const char *
6158 get_some_local_dynamic_name ()
6160 rtx insn;
6162 if (cfun->machine->some_ld_name)
6163 return cfun->machine->some_ld_name;
6165 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6166 if (INSN_P (insn)
6167 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6168 return cfun->machine->some_ld_name;
6170 abort ();
6173 static int
6174 get_some_local_dynamic_name_1 (px, data)
6175 rtx *px;
6176 void *data ATTRIBUTE_UNUSED;
6178 rtx x = *px;
6180 if (GET_CODE (x) == SYMBOL_REF
6181 && local_dynamic_symbolic_operand (x, Pmode))
6183 cfun->machine->some_ld_name = XSTR (x, 0);
6184 return 1;
6187 return 0;
6190 /* Meaning of CODE:
6191 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6192 C -- print opcode suffix for set/cmov insn.
6193 c -- like C, but print reversed condition
6194 F,f -- likewise, but for floating-point.
6195 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6196 nothing
6197 R -- print the prefix for register names.
6198 z -- print the opcode suffix for the size of the current operand.
6199 * -- print a star (in certain assembler syntax)
6200 A -- print an absolute memory reference.
6201 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6202 s -- print a shift double count, followed by the assemblers argument
6203 delimiter.
6204 b -- print the QImode name of the register for the indicated operand.
6205 %b0 would print %al if operands[0] is reg 0.
6206 w -- likewise, print the HImode name of the register.
6207 k -- likewise, print the SImode name of the register.
6208 q -- likewise, print the DImode name of the register.
6209 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6210 y -- print "st(0)" instead of "st" as a register.
6211 D -- print condition for SSE cmp instruction.
6212 P -- if PIC, print an @PLT suffix.
6213 X -- don't print any sort of PIC '@' suffix for a symbol.
6214 & -- print some in-use local-dynamic symbol name.
6217 void
6218 print_operand (file, x, code)
6219 FILE *file;
6220 rtx x;
6221 int code;
6223 if (code)
6225 switch (code)
6227 case '*':
6228 if (ASSEMBLER_DIALECT == ASM_ATT)
6229 putc ('*', file);
6230 return;
6232 case '&':
6233 assemble_name (file, get_some_local_dynamic_name ());
6234 return;
6236 case 'A':
6237 if (ASSEMBLER_DIALECT == ASM_ATT)
6238 putc ('*', file);
6239 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6241 /* Intel syntax. For absolute addresses, registers should not
6242 be surrounded by braces. */
6243 if (GET_CODE (x) != REG)
6245 putc ('[', file);
6246 PRINT_OPERAND (file, x, 0);
6247 putc (']', file);
6248 return;
6251 else
6252 abort ();
6254 PRINT_OPERAND (file, x, 0);
6255 return;
6258 case 'L':
6259 if (ASSEMBLER_DIALECT == ASM_ATT)
6260 putc ('l', file);
6261 return;
6263 case 'W':
6264 if (ASSEMBLER_DIALECT == ASM_ATT)
6265 putc ('w', file);
6266 return;
6268 case 'B':
6269 if (ASSEMBLER_DIALECT == ASM_ATT)
6270 putc ('b', file);
6271 return;
6273 case 'Q':
6274 if (ASSEMBLER_DIALECT == ASM_ATT)
6275 putc ('l', file);
6276 return;
6278 case 'S':
6279 if (ASSEMBLER_DIALECT == ASM_ATT)
6280 putc ('s', file);
6281 return;
6283 case 'T':
6284 if (ASSEMBLER_DIALECT == ASM_ATT)
6285 putc ('t', file);
6286 return;
6288 case 'z':
6289 /* 387 opcodes don't get size suffixes if the operands are
6290 registers. */
6291 if (STACK_REG_P (x))
6292 return;
6294 /* Likewise if using Intel opcodes. */
6295 if (ASSEMBLER_DIALECT == ASM_INTEL)
6296 return;
6298 /* This is the size of op from size of operand. */
6299 switch (GET_MODE_SIZE (GET_MODE (x)))
6301 case 2:
6302 #ifdef HAVE_GAS_FILDS_FISTS
6303 putc ('s', file);
6304 #endif
6305 return;
6307 case 4:
6308 if (GET_MODE (x) == SFmode)
6310 putc ('s', file);
6311 return;
6313 else
6314 putc ('l', file);
6315 return;
6317 case 12:
6318 case 16:
6319 putc ('t', file);
6320 return;
6322 case 8:
6323 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6325 #ifdef GAS_MNEMONICS
6326 putc ('q', file);
6327 #else
6328 putc ('l', file);
6329 putc ('l', file);
6330 #endif
6332 else
6333 putc ('l', file);
6334 return;
6336 default:
6337 abort ();
6340 case 'b':
6341 case 'w':
6342 case 'k':
6343 case 'q':
6344 case 'h':
6345 case 'y':
6346 case 'X':
6347 case 'P':
6348 break;
6350 case 's':
6351 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6353 PRINT_OPERAND (file, x, 0);
6354 putc (',', file);
6356 return;
6358 case 'D':
6359 /* Little bit of braindamage here. The SSE compare instructions
6360 does use completely different names for the comparisons that the
6361 fp conditional moves. */
6362 switch (GET_CODE (x))
6364 case EQ:
6365 case UNEQ:
6366 fputs ("eq", file);
6367 break;
6368 case LT:
6369 case UNLT:
6370 fputs ("lt", file);
6371 break;
6372 case LE:
6373 case UNLE:
6374 fputs ("le", file);
6375 break;
6376 case UNORDERED:
6377 fputs ("unord", file);
6378 break;
6379 case NE:
6380 case LTGT:
6381 fputs ("neq", file);
6382 break;
6383 case UNGE:
6384 case GE:
6385 fputs ("nlt", file);
6386 break;
6387 case UNGT:
6388 case GT:
6389 fputs ("nle", file);
6390 break;
6391 case ORDERED:
6392 fputs ("ord", file);
6393 break;
6394 default:
6395 abort ();
6396 break;
6398 return;
6399 case 'O':
6400 #ifdef CMOV_SUN_AS_SYNTAX
6401 if (ASSEMBLER_DIALECT == ASM_ATT)
6403 switch (GET_MODE (x))
6405 case HImode: putc ('w', file); break;
6406 case SImode:
6407 case SFmode: putc ('l', file); break;
6408 case DImode:
6409 case DFmode: putc ('q', file); break;
6410 default: abort ();
6412 putc ('.', file);
6414 #endif
6415 return;
6416 case 'C':
6417 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6418 return;
6419 case 'F':
6420 #ifdef CMOV_SUN_AS_SYNTAX
6421 if (ASSEMBLER_DIALECT == ASM_ATT)
6422 putc ('.', file);
6423 #endif
6424 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6425 return;
6427 /* Like above, but reverse condition */
6428 case 'c':
6429 /* Check to see if argument to %c is really a constant
6430 and not a condition code which needs to be reversed. */
6431 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6433 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6434 return;
6436 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6437 return;
6438 case 'f':
6439 #ifdef CMOV_SUN_AS_SYNTAX
6440 if (ASSEMBLER_DIALECT == ASM_ATT)
6441 putc ('.', file);
6442 #endif
6443 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6444 return;
6445 case '+':
6447 rtx x;
6449 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6450 return;
6452 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6453 if (x)
6455 int pred_val = INTVAL (XEXP (x, 0));
6457 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6458 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6460 int taken = pred_val > REG_BR_PROB_BASE / 2;
6461 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6463 /* Emit hints only in the case default branch prediction
6464 heruistics would fail. */
6465 if (taken != cputaken)
6467 /* We use 3e (DS) prefix for taken branches and
6468 2e (CS) prefix for not taken branches. */
6469 if (taken)
6470 fputs ("ds ; ", file);
6471 else
6472 fputs ("cs ; ", file);
6476 return;
6478 default:
6479 output_operand_lossage ("invalid operand code `%c'", code);
6483 if (GET_CODE (x) == REG)
6485 PRINT_REG (x, code, file);
6488 else if (GET_CODE (x) == MEM)
6490 /* No `byte ptr' prefix for call instructions. */
6491 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6493 const char * size;
6494 switch (GET_MODE_SIZE (GET_MODE (x)))
6496 case 1: size = "BYTE"; break;
6497 case 2: size = "WORD"; break;
6498 case 4: size = "DWORD"; break;
6499 case 8: size = "QWORD"; break;
6500 case 12: size = "XWORD"; break;
6501 case 16: size = "XMMWORD"; break;
6502 default:
6503 abort ();
6506 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6507 if (code == 'b')
6508 size = "BYTE";
6509 else if (code == 'w')
6510 size = "WORD";
6511 else if (code == 'k')
6512 size = "DWORD";
6514 fputs (size, file);
6515 fputs (" PTR ", file);
6518 x = XEXP (x, 0);
6519 if (flag_pic && CONSTANT_ADDRESS_P (x))
6520 output_pic_addr_const (file, x, code);
6521 /* Avoid (%rip) for call operands. */
6522 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6523 && GET_CODE (x) != CONST_INT)
6524 output_addr_const (file, x);
6525 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6526 output_operand_lossage ("invalid constraints for operand");
6527 else
6528 output_address (x);
6531 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6533 REAL_VALUE_TYPE r;
6534 long l;
6536 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6537 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6539 if (ASSEMBLER_DIALECT == ASM_ATT)
6540 putc ('$', file);
6541 fprintf (file, "0x%lx", l);
6544 /* These float cases don't actually occur as immediate operands. */
6545 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6547 REAL_VALUE_TYPE r;
6548 char dstr[30];
6550 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6551 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6552 fprintf (file, "%s", dstr);
6555 else if (GET_CODE (x) == CONST_DOUBLE
6556 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6558 REAL_VALUE_TYPE r;
6559 char dstr[30];
6561 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6562 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6563 fprintf (file, "%s", dstr);
6566 else if (GET_CODE (x) == CONST
6567 && GET_CODE (XEXP (x, 0)) == UNSPEC
6568 && XINT (XEXP (x, 0), 1) == UNSPEC_TP)
6570 if (ASSEMBLER_DIALECT == ASM_INTEL)
6571 fputs ("DWORD PTR ", file);
6572 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6573 putc ('%', file);
6574 fputs ("gs:0", file);
6577 else
6579 if (code != 'P')
6581 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6583 if (ASSEMBLER_DIALECT == ASM_ATT)
6584 putc ('$', file);
6586 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6587 || GET_CODE (x) == LABEL_REF)
6589 if (ASSEMBLER_DIALECT == ASM_ATT)
6590 putc ('$', file);
6591 else
6592 fputs ("OFFSET FLAT:", file);
6595 if (GET_CODE (x) == CONST_INT)
6596 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6597 else if (flag_pic)
6598 output_pic_addr_const (file, x, code);
6599 else
6600 output_addr_const (file, x);
6604 /* Print a memory operand whose address is ADDR. */
6606 void
6607 print_operand_address (file, addr)
6608 FILE *file;
6609 register rtx addr;
6611 struct ix86_address parts;
6612 rtx base, index, disp;
6613 int scale;
6615 if (! ix86_decompose_address (addr, &parts))
6616 abort ();
6618 base = parts.base;
6619 index = parts.index;
6620 disp = parts.disp;
6621 scale = parts.scale;
6623 if (!base && !index)
6625 /* Displacement only requires special attention. */
6627 if (GET_CODE (disp) == CONST_INT)
6629 if (ASSEMBLER_DIALECT == ASM_INTEL)
6631 if (USER_LABEL_PREFIX[0] == 0)
6632 putc ('%', file);
6633 fputs ("ds:", file);
6635 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6637 else if (flag_pic)
6638 output_pic_addr_const (file, addr, 0);
6639 else
6640 output_addr_const (file, addr);
6642 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6643 if (TARGET_64BIT
6644 && (GET_CODE (addr) == SYMBOL_REF
6645 || GET_CODE (addr) == LABEL_REF
6646 || (GET_CODE (addr) == CONST
6647 && GET_CODE (XEXP (addr, 0)) == PLUS
6648 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6649 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6650 fputs ("(%rip)", file);
6652 else
6654 if (ASSEMBLER_DIALECT == ASM_ATT)
6656 if (disp)
6658 if (flag_pic)
6659 output_pic_addr_const (file, disp, 0);
6660 else if (GET_CODE (disp) == LABEL_REF)
6661 output_asm_label (disp);
6662 else
6663 output_addr_const (file, disp);
6666 putc ('(', file);
6667 if (base)
6668 PRINT_REG (base, 0, file);
6669 if (index)
6671 putc (',', file);
6672 PRINT_REG (index, 0, file);
6673 if (scale != 1)
6674 fprintf (file, ",%d", scale);
6676 putc (')', file);
6678 else
6680 rtx offset = NULL_RTX;
6682 if (disp)
6684 /* Pull out the offset of a symbol; print any symbol itself. */
6685 if (GET_CODE (disp) == CONST
6686 && GET_CODE (XEXP (disp, 0)) == PLUS
6687 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6689 offset = XEXP (XEXP (disp, 0), 1);
6690 disp = gen_rtx_CONST (VOIDmode,
6691 XEXP (XEXP (disp, 0), 0));
6694 if (flag_pic)
6695 output_pic_addr_const (file, disp, 0);
6696 else if (GET_CODE (disp) == LABEL_REF)
6697 output_asm_label (disp);
6698 else if (GET_CODE (disp) == CONST_INT)
6699 offset = disp;
6700 else
6701 output_addr_const (file, disp);
6704 putc ('[', file);
6705 if (base)
6707 PRINT_REG (base, 0, file);
6708 if (offset)
6710 if (INTVAL (offset) >= 0)
6711 putc ('+', file);
6712 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6715 else if (offset)
6716 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6717 else
6718 putc ('0', file);
6720 if (index)
6722 putc ('+', file);
6723 PRINT_REG (index, 0, file);
6724 if (scale != 1)
6725 fprintf (file, "*%d", scale);
6727 putc (']', file);
6732 bool
6733 output_addr_const_extra (file, x)
6734 FILE *file;
6735 rtx x;
6737 rtx op;
6739 if (GET_CODE (x) != UNSPEC)
6740 return false;
6742 op = XVECEXP (x, 0, 0);
6743 switch (XINT (x, 1))
6745 case UNSPEC_GOTTPOFF:
6746 output_addr_const (file, op);
6747 fputs ("@GOTTPOFF", file);
6748 break;
6749 case UNSPEC_TPOFF:
6750 output_addr_const (file, op);
6751 fputs ("@TPOFF", file);
6752 break;
6753 case UNSPEC_NTPOFF:
6754 output_addr_const (file, op);
6755 fputs ("@NTPOFF", file);
6756 break;
6757 case UNSPEC_DTPOFF:
6758 output_addr_const (file, op);
6759 fputs ("@DTPOFF", file);
6760 break;
6762 default:
6763 return false;
6766 return true;
6769 /* Split one or more DImode RTL references into pairs of SImode
6770 references. The RTL can be REG, offsettable MEM, integer constant, or
6771 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6772 split and "num" is its length. lo_half and hi_half are output arrays
6773 that parallel "operands". */
6775 void
6776 split_di (operands, num, lo_half, hi_half)
6777 rtx operands[];
6778 int num;
6779 rtx lo_half[], hi_half[];
6781 while (num--)
6783 rtx op = operands[num];
6785 /* simplify_subreg refuse to split volatile memory addresses,
6786 but we still have to handle it. */
6787 if (GET_CODE (op) == MEM)
6789 lo_half[num] = adjust_address (op, SImode, 0);
6790 hi_half[num] = adjust_address (op, SImode, 4);
6792 else
6794 lo_half[num] = simplify_gen_subreg (SImode, op,
6795 GET_MODE (op) == VOIDmode
6796 ? DImode : GET_MODE (op), 0);
6797 hi_half[num] = simplify_gen_subreg (SImode, op,
6798 GET_MODE (op) == VOIDmode
6799 ? DImode : GET_MODE (op), 4);
6803 /* Split one or more TImode RTL references into pairs of SImode
6804 references. The RTL can be REG, offsettable MEM, integer constant, or
6805 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6806 split and "num" is its length. lo_half and hi_half are output arrays
6807 that parallel "operands". */
6809 void
6810 split_ti (operands, num, lo_half, hi_half)
6811 rtx operands[];
6812 int num;
6813 rtx lo_half[], hi_half[];
6815 while (num--)
6817 rtx op = operands[num];
6819 /* simplify_subreg refuse to split volatile memory addresses, but we
6820 still have to handle it. */
6821 if (GET_CODE (op) == MEM)
6823 lo_half[num] = adjust_address (op, DImode, 0);
6824 hi_half[num] = adjust_address (op, DImode, 8);
6826 else
6828 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6829 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6834 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6835 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6836 is the expression of the binary operation. The output may either be
6837 emitted here, or returned to the caller, like all output_* functions.
6839 There is no guarantee that the operands are the same mode, as they
6840 might be within FLOAT or FLOAT_EXTEND expressions. */
6842 #ifndef SYSV386_COMPAT
6843 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6844 wants to fix the assemblers because that causes incompatibility
6845 with gcc. No-one wants to fix gcc because that causes
6846 incompatibility with assemblers... You can use the option of
6847 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6848 #define SYSV386_COMPAT 1
6849 #endif
6851 const char *
6852 output_387_binary_op (insn, operands)
6853 rtx insn;
6854 rtx *operands;
6856 static char buf[30];
6857 const char *p;
6858 const char *ssep;
6859 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6861 #ifdef ENABLE_CHECKING
6862 /* Even if we do not want to check the inputs, this documents input
6863 constraints. Which helps in understanding the following code. */
6864 if (STACK_REG_P (operands[0])
6865 && ((REG_P (operands[1])
6866 && REGNO (operands[0]) == REGNO (operands[1])
6867 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6868 || (REG_P (operands[2])
6869 && REGNO (operands[0]) == REGNO (operands[2])
6870 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6871 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6872 ; /* ok */
6873 else if (!is_sse)
6874 abort ();
6875 #endif
6877 switch (GET_CODE (operands[3]))
6879 case PLUS:
6880 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6881 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6882 p = "fiadd";
6883 else
6884 p = "fadd";
6885 ssep = "add";
6886 break;
6888 case MINUS:
6889 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6890 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6891 p = "fisub";
6892 else
6893 p = "fsub";
6894 ssep = "sub";
6895 break;
6897 case MULT:
6898 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6899 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6900 p = "fimul";
6901 else
6902 p = "fmul";
6903 ssep = "mul";
6904 break;
6906 case DIV:
6907 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6908 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6909 p = "fidiv";
6910 else
6911 p = "fdiv";
6912 ssep = "div";
6913 break;
6915 default:
6916 abort ();
6919 if (is_sse)
6921 strcpy (buf, ssep);
6922 if (GET_MODE (operands[0]) == SFmode)
6923 strcat (buf, "ss\t{%2, %0|%0, %2}");
6924 else
6925 strcat (buf, "sd\t{%2, %0|%0, %2}");
6926 return buf;
6928 strcpy (buf, p);
6930 switch (GET_CODE (operands[3]))
6932 case MULT:
6933 case PLUS:
6934 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6936 rtx temp = operands[2];
6937 operands[2] = operands[1];
6938 operands[1] = temp;
6941 /* know operands[0] == operands[1]. */
6943 if (GET_CODE (operands[2]) == MEM)
6945 p = "%z2\t%2";
6946 break;
6949 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6951 if (STACK_TOP_P (operands[0]))
6952 /* How is it that we are storing to a dead operand[2]?
6953 Well, presumably operands[1] is dead too. We can't
6954 store the result to st(0) as st(0) gets popped on this
6955 instruction. Instead store to operands[2] (which I
6956 think has to be st(1)). st(1) will be popped later.
6957 gcc <= 2.8.1 didn't have this check and generated
6958 assembly code that the Unixware assembler rejected. */
6959 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6960 else
6961 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
6962 break;
6965 if (STACK_TOP_P (operands[0]))
6966 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
6967 else
6968 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
6969 break;
6971 case MINUS:
6972 case DIV:
6973 if (GET_CODE (operands[1]) == MEM)
6975 p = "r%z1\t%1";
6976 break;
6979 if (GET_CODE (operands[2]) == MEM)
6981 p = "%z2\t%2";
6982 break;
6985 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6987 #if SYSV386_COMPAT
6988 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
6989 derived assemblers, confusingly reverse the direction of
6990 the operation for fsub{r} and fdiv{r} when the
6991 destination register is not st(0). The Intel assembler
6992 doesn't have this brain damage. Read !SYSV386_COMPAT to
6993 figure out what the hardware really does. */
6994 if (STACK_TOP_P (operands[0]))
6995 p = "{p\t%0, %2|rp\t%2, %0}";
6996 else
6997 p = "{rp\t%2, %0|p\t%0, %2}";
6998 #else
6999 if (STACK_TOP_P (operands[0]))
7000 /* As above for fmul/fadd, we can't store to st(0). */
7001 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7002 else
7003 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7004 #endif
7005 break;
7008 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7010 #if SYSV386_COMPAT
7011 if (STACK_TOP_P (operands[0]))
7012 p = "{rp\t%0, %1|p\t%1, %0}";
7013 else
7014 p = "{p\t%1, %0|rp\t%0, %1}";
7015 #else
7016 if (STACK_TOP_P (operands[0]))
7017 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7018 else
7019 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7020 #endif
7021 break;
7024 if (STACK_TOP_P (operands[0]))
7026 if (STACK_TOP_P (operands[1]))
7027 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7028 else
7029 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7030 break;
7032 else if (STACK_TOP_P (operands[1]))
7034 #if SYSV386_COMPAT
7035 p = "{\t%1, %0|r\t%0, %1}";
7036 #else
7037 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7038 #endif
7040 else
7042 #if SYSV386_COMPAT
7043 p = "{r\t%2, %0|\t%0, %2}";
7044 #else
7045 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7046 #endif
7048 break;
7050 default:
7051 abort ();
7054 strcat (buf, p);
7055 return buf;
7058 /* Output code to initialize control word copies used by
7059 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7060 is set to control word rounding downwards. */
7061 void
7062 emit_i387_cw_initialization (normal, round_down)
7063 rtx normal, round_down;
7065 rtx reg = gen_reg_rtx (HImode);
7067 emit_insn (gen_x86_fnstcw_1 (normal));
7068 emit_move_insn (reg, normal);
7069 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7070 && !TARGET_64BIT)
7071 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7072 else
7073 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7074 emit_move_insn (round_down, reg);
7077 /* Output code for INSN to convert a float to a signed int. OPERANDS
7078 are the insn operands. The output may be [HSD]Imode and the input
7079 operand may be [SDX]Fmode. */
7081 const char *
7082 output_fix_trunc (insn, operands)
7083 rtx insn;
7084 rtx *operands;
7086 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7087 int dimode_p = GET_MODE (operands[0]) == DImode;
7089 /* Jump through a hoop or two for DImode, since the hardware has no
7090 non-popping instruction. We used to do this a different way, but
7091 that was somewhat fragile and broke with post-reload splitters. */
7092 if (dimode_p && !stack_top_dies)
7093 output_asm_insn ("fld\t%y1", operands);
7095 if (!STACK_TOP_P (operands[1]))
7096 abort ();
7098 if (GET_CODE (operands[0]) != MEM)
7099 abort ();
7101 output_asm_insn ("fldcw\t%3", operands);
7102 if (stack_top_dies || dimode_p)
7103 output_asm_insn ("fistp%z0\t%0", operands);
7104 else
7105 output_asm_insn ("fist%z0\t%0", operands);
7106 output_asm_insn ("fldcw\t%2", operands);
7108 return "";
7111 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7112 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7113 when fucom should be used. */
7115 const char *
7116 output_fp_compare (insn, operands, eflags_p, unordered_p)
7117 rtx insn;
7118 rtx *operands;
7119 int eflags_p, unordered_p;
7121 int stack_top_dies;
7122 rtx cmp_op0 = operands[0];
7123 rtx cmp_op1 = operands[1];
7124 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7126 if (eflags_p == 2)
7128 cmp_op0 = cmp_op1;
7129 cmp_op1 = operands[2];
7131 if (is_sse)
7133 if (GET_MODE (operands[0]) == SFmode)
7134 if (unordered_p)
7135 return "ucomiss\t{%1, %0|%0, %1}";
7136 else
7137 return "comiss\t{%1, %0|%0, %y}";
7138 else
7139 if (unordered_p)
7140 return "ucomisd\t{%1, %0|%0, %1}";
7141 else
7142 return "comisd\t{%1, %0|%0, %y}";
7145 if (! STACK_TOP_P (cmp_op0))
7146 abort ();
7148 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7150 if (STACK_REG_P (cmp_op1)
7151 && stack_top_dies
7152 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7153 && REGNO (cmp_op1) != FIRST_STACK_REG)
7155 /* If both the top of the 387 stack dies, and the other operand
7156 is also a stack register that dies, then this must be a
7157 `fcompp' float compare */
7159 if (eflags_p == 1)
7161 /* There is no double popping fcomi variant. Fortunately,
7162 eflags is immune from the fstp's cc clobbering. */
7163 if (unordered_p)
7164 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7165 else
7166 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7167 return "fstp\t%y0";
7169 else
7171 if (eflags_p == 2)
7173 if (unordered_p)
7174 return "fucompp\n\tfnstsw\t%0";
7175 else
7176 return "fcompp\n\tfnstsw\t%0";
7178 else
7180 if (unordered_p)
7181 return "fucompp";
7182 else
7183 return "fcompp";
7187 else
7189 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7191 static const char * const alt[24] =
7193 "fcom%z1\t%y1",
7194 "fcomp%z1\t%y1",
7195 "fucom%z1\t%y1",
7196 "fucomp%z1\t%y1",
7198 "ficom%z1\t%y1",
7199 "ficomp%z1\t%y1",
7200 NULL,
7201 NULL,
7203 "fcomi\t{%y1, %0|%0, %y1}",
7204 "fcomip\t{%y1, %0|%0, %y1}",
7205 "fucomi\t{%y1, %0|%0, %y1}",
7206 "fucomip\t{%y1, %0|%0, %y1}",
7208 NULL,
7209 NULL,
7210 NULL,
7211 NULL,
7213 "fcom%z2\t%y2\n\tfnstsw\t%0",
7214 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7215 "fucom%z2\t%y2\n\tfnstsw\t%0",
7216 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7218 "ficom%z2\t%y2\n\tfnstsw\t%0",
7219 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7220 NULL,
7221 NULL
7224 int mask;
7225 const char *ret;
7227 mask = eflags_p << 3;
7228 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7229 mask |= unordered_p << 1;
7230 mask |= stack_top_dies;
7232 if (mask >= 24)
7233 abort ();
7234 ret = alt[mask];
7235 if (ret == NULL)
7236 abort ();
7238 return ret;
7242 void
7243 ix86_output_addr_vec_elt (file, value)
7244 FILE *file;
7245 int value;
7247 const char *directive = ASM_LONG;
7249 if (TARGET_64BIT)
7251 #ifdef ASM_QUAD
7252 directive = ASM_QUAD;
7253 #else
7254 abort ();
7255 #endif
7258 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7261 void
7262 ix86_output_addr_diff_elt (file, value, rel)
7263 FILE *file;
7264 int value, rel;
7266 if (TARGET_64BIT)
7267 fprintf (file, "%s%s%d-%s%d\n",
7268 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7269 else if (HAVE_AS_GOTOFF_IN_DATA)
7270 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7271 else
7272 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7273 ASM_LONG, LPREFIX, value);
7276 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7277 for the target. */
7279 void
7280 ix86_expand_clear (dest)
7281 rtx dest;
7283 rtx tmp;
7285 /* We play register width games, which are only valid after reload. */
7286 if (!reload_completed)
7287 abort ();
7289 /* Avoid HImode and its attendant prefix byte. */
7290 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7291 dest = gen_rtx_REG (SImode, REGNO (dest));
7293 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7295 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7296 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7298 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7299 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7302 emit_insn (tmp);
7305 /* X is an unchanging MEM. If it is a constant pool reference, return
7306 the constant pool rtx, else NULL. */
7308 static rtx
7309 maybe_get_pool_constant (x)
7310 rtx x;
7312 x = XEXP (x, 0);
7314 if (flag_pic)
7316 if (GET_CODE (x) != PLUS)
7317 return NULL_RTX;
7318 if (XEXP (x, 0) != pic_offset_table_rtx)
7319 return NULL_RTX;
7320 x = XEXP (x, 1);
7321 if (GET_CODE (x) != CONST)
7322 return NULL_RTX;
7323 x = XEXP (x, 0);
7324 if (GET_CODE (x) != UNSPEC)
7325 return NULL_RTX;
7326 if (XINT (x, 1) != UNSPEC_GOTOFF)
7327 return NULL_RTX;
7328 x = XVECEXP (x, 0, 0);
7331 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7332 return get_pool_constant (x);
7334 return NULL_RTX;
7337 void
7338 ix86_expand_move (mode, operands)
7339 enum machine_mode mode;
7340 rtx operands[];
7342 int strict = (reload_in_progress || reload_completed);
7343 rtx insn, op0, op1, tmp;
7345 op0 = operands[0];
7346 op1 = operands[1];
7348 /* ??? We have a slight problem. We need to say that tls symbols are
7349 not legitimate constants so that reload does not helpfully reload
7350 these constants from a REG_EQUIV, which we cannot handle. (Recall
7351 that general- and local-dynamic address resolution requires a
7352 function call.)
7354 However, if we say that tls symbols are not legitimate constants,
7355 then emit_move_insn helpfully drop them into the constant pool.
7357 It is far easier to work around emit_move_insn than reload. Recognize
7358 the MEM that we would have created and extract the symbol_ref. */
7360 if (mode == Pmode
7361 && GET_CODE (op1) == MEM
7362 && RTX_UNCHANGING_P (op1))
7364 tmp = maybe_get_pool_constant (op1);
7365 /* Note that we only care about symbolic constants here, which
7366 unlike CONST_INT will always have a proper mode. */
7367 if (tmp && GET_MODE (tmp) == Pmode)
7368 op1 = tmp;
7371 if (tls_symbolic_operand (op1, Pmode))
7373 op1 = legitimize_address (op1, op1, VOIDmode);
7374 if (GET_CODE (op0) == MEM)
7376 tmp = gen_reg_rtx (mode);
7377 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7378 op1 = tmp;
7381 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7383 if (GET_CODE (op0) == MEM)
7384 op1 = force_reg (Pmode, op1);
7385 else
7387 rtx temp = op0;
7388 if (GET_CODE (temp) != REG)
7389 temp = gen_reg_rtx (Pmode);
7390 temp = legitimize_pic_address (op1, temp);
7391 if (temp == op0)
7392 return;
7393 op1 = temp;
7396 else
7398 if (GET_CODE (op0) == MEM
7399 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7400 || !push_operand (op0, mode))
7401 && GET_CODE (op1) == MEM)
7402 op1 = force_reg (mode, op1);
7404 if (push_operand (op0, mode)
7405 && ! general_no_elim_operand (op1, mode))
7406 op1 = copy_to_mode_reg (mode, op1);
7408 /* Force large constants in 64bit compilation into register
7409 to get them CSEed. */
7410 if (TARGET_64BIT && mode == DImode
7411 && immediate_operand (op1, mode)
7412 && !x86_64_zero_extended_value (op1)
7413 && !register_operand (op0, mode)
7414 && optimize && !reload_completed && !reload_in_progress)
7415 op1 = copy_to_mode_reg (mode, op1);
7417 if (FLOAT_MODE_P (mode))
7419 /* If we are loading a floating point constant to a register,
7420 force the value to memory now, since we'll get better code
7421 out the back end. */
7423 if (strict)
7425 else if (GET_CODE (op1) == CONST_DOUBLE
7426 && register_operand (op0, mode))
7427 op1 = validize_mem (force_const_mem (mode, op1));
7431 insn = gen_rtx_SET (VOIDmode, op0, op1);
7433 emit_insn (insn);
7436 void
7437 ix86_expand_vector_move (mode, operands)
7438 enum machine_mode mode;
7439 rtx operands[];
7441 /* Force constants other than zero into memory. We do not know how
7442 the instructions used to build constants modify the upper 64 bits
7443 of the register, once we have that information we may be able
7444 to handle some of them more efficiently. */
7445 if ((reload_in_progress | reload_completed) == 0
7446 && register_operand (operands[0], mode)
7447 && CONSTANT_P (operands[1]))
7449 rtx addr = gen_reg_rtx (Pmode);
7450 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7451 operands[1] = gen_rtx_MEM (mode, addr);
7454 /* Make operand1 a register if it isn't already. */
7455 if ((reload_in_progress | reload_completed) == 0
7456 && !register_operand (operands[0], mode)
7457 && !register_operand (operands[1], mode)
7458 && operands[1] != CONST0_RTX (mode))
7460 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7461 emit_move_insn (operands[0], temp);
7462 return;
7465 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7468 /* Attempt to expand a binary operator. Make the expansion closer to the
7469 actual machine, then just general_operand, which will allow 3 separate
7470 memory references (one output, two input) in a single insn. */
7472 void
7473 ix86_expand_binary_operator (code, mode, operands)
7474 enum rtx_code code;
7475 enum machine_mode mode;
7476 rtx operands[];
7478 int matching_memory;
7479 rtx src1, src2, dst, op, clob;
7481 dst = operands[0];
7482 src1 = operands[1];
7483 src2 = operands[2];
7485 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7486 if (GET_RTX_CLASS (code) == 'c'
7487 && (rtx_equal_p (dst, src2)
7488 || immediate_operand (src1, mode)))
7490 rtx temp = src1;
7491 src1 = src2;
7492 src2 = temp;
7495 /* If the destination is memory, and we do not have matching source
7496 operands, do things in registers. */
7497 matching_memory = 0;
7498 if (GET_CODE (dst) == MEM)
7500 if (rtx_equal_p (dst, src1))
7501 matching_memory = 1;
7502 else if (GET_RTX_CLASS (code) == 'c'
7503 && rtx_equal_p (dst, src2))
7504 matching_memory = 2;
7505 else
7506 dst = gen_reg_rtx (mode);
7509 /* Both source operands cannot be in memory. */
7510 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7512 if (matching_memory != 2)
7513 src2 = force_reg (mode, src2);
7514 else
7515 src1 = force_reg (mode, src1);
7518 /* If the operation is not commutable, source 1 cannot be a constant
7519 or non-matching memory. */
7520 if ((CONSTANT_P (src1)
7521 || (!matching_memory && GET_CODE (src1) == MEM))
7522 && GET_RTX_CLASS (code) != 'c')
7523 src1 = force_reg (mode, src1);
7525 /* If optimizing, copy to regs to improve CSE */
7526 if (optimize && ! no_new_pseudos)
7528 if (GET_CODE (dst) == MEM)
7529 dst = gen_reg_rtx (mode);
7530 if (GET_CODE (src1) == MEM)
7531 src1 = force_reg (mode, src1);
7532 if (GET_CODE (src2) == MEM)
7533 src2 = force_reg (mode, src2);
7536 /* Emit the instruction. */
7538 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7539 if (reload_in_progress)
7541 /* Reload doesn't know about the flags register, and doesn't know that
7542 it doesn't want to clobber it. We can only do this with PLUS. */
7543 if (code != PLUS)
7544 abort ();
7545 emit_insn (op);
7547 else
7549 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7550 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7553 /* Fix up the destination if needed. */
7554 if (dst != operands[0])
7555 emit_move_insn (operands[0], dst);
7558 /* Return TRUE or FALSE depending on whether the binary operator meets the
7559 appropriate constraints. */
7562 ix86_binary_operator_ok (code, mode, operands)
7563 enum rtx_code code;
7564 enum machine_mode mode ATTRIBUTE_UNUSED;
7565 rtx operands[3];
7567 /* Both source operands cannot be in memory. */
7568 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7569 return 0;
7570 /* If the operation is not commutable, source 1 cannot be a constant. */
7571 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7572 return 0;
7573 /* If the destination is memory, we must have a matching source operand. */
7574 if (GET_CODE (operands[0]) == MEM
7575 && ! (rtx_equal_p (operands[0], operands[1])
7576 || (GET_RTX_CLASS (code) == 'c'
7577 && rtx_equal_p (operands[0], operands[2]))))
7578 return 0;
7579 /* If the operation is not commutable and the source 1 is memory, we must
7580 have a matching destination. */
7581 if (GET_CODE (operands[1]) == MEM
7582 && GET_RTX_CLASS (code) != 'c'
7583 && ! rtx_equal_p (operands[0], operands[1]))
7584 return 0;
7585 return 1;
7588 /* Attempt to expand a unary operator. Make the expansion closer to the
7589 actual machine, then just general_operand, which will allow 2 separate
7590 memory references (one output, one input) in a single insn. */
7592 void
7593 ix86_expand_unary_operator (code, mode, operands)
7594 enum rtx_code code;
7595 enum machine_mode mode;
7596 rtx operands[];
7598 int matching_memory;
7599 rtx src, dst, op, clob;
7601 dst = operands[0];
7602 src = operands[1];
7604 /* If the destination is memory, and we do not have matching source
7605 operands, do things in registers. */
7606 matching_memory = 0;
7607 if (GET_CODE (dst) == MEM)
7609 if (rtx_equal_p (dst, src))
7610 matching_memory = 1;
7611 else
7612 dst = gen_reg_rtx (mode);
7615 /* When source operand is memory, destination must match. */
7616 if (!matching_memory && GET_CODE (src) == MEM)
7617 src = force_reg (mode, src);
7619 /* If optimizing, copy to regs to improve CSE */
7620 if (optimize && ! no_new_pseudos)
7622 if (GET_CODE (dst) == MEM)
7623 dst = gen_reg_rtx (mode);
7624 if (GET_CODE (src) == MEM)
7625 src = force_reg (mode, src);
7628 /* Emit the instruction. */
7630 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7631 if (reload_in_progress || code == NOT)
7633 /* Reload doesn't know about the flags register, and doesn't know that
7634 it doesn't want to clobber it. */
7635 if (code != NOT)
7636 abort ();
7637 emit_insn (op);
7639 else
7641 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7642 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7645 /* Fix up the destination if needed. */
7646 if (dst != operands[0])
7647 emit_move_insn (operands[0], dst);
7650 /* Return TRUE or FALSE depending on whether the unary operator meets the
7651 appropriate constraints. */
7654 ix86_unary_operator_ok (code, mode, operands)
7655 enum rtx_code code ATTRIBUTE_UNUSED;
7656 enum machine_mode mode ATTRIBUTE_UNUSED;
7657 rtx operands[2] ATTRIBUTE_UNUSED;
7659 /* If one of operands is memory, source and destination must match. */
7660 if ((GET_CODE (operands[0]) == MEM
7661 || GET_CODE (operands[1]) == MEM)
7662 && ! rtx_equal_p (operands[0], operands[1]))
7663 return FALSE;
7664 return TRUE;
7667 /* Return TRUE or FALSE depending on whether the first SET in INSN
7668 has source and destination with matching CC modes, and that the
7669 CC mode is at least as constrained as REQ_MODE. */
7672 ix86_match_ccmode (insn, req_mode)
7673 rtx insn;
7674 enum machine_mode req_mode;
7676 rtx set;
7677 enum machine_mode set_mode;
7679 set = PATTERN (insn);
7680 if (GET_CODE (set) == PARALLEL)
7681 set = XVECEXP (set, 0, 0);
7682 if (GET_CODE (set) != SET)
7683 abort ();
7684 if (GET_CODE (SET_SRC (set)) != COMPARE)
7685 abort ();
7687 set_mode = GET_MODE (SET_DEST (set));
7688 switch (set_mode)
7690 case CCNOmode:
7691 if (req_mode != CCNOmode
7692 && (req_mode != CCmode
7693 || XEXP (SET_SRC (set), 1) != const0_rtx))
7694 return 0;
7695 break;
7696 case CCmode:
7697 if (req_mode == CCGCmode)
7698 return 0;
7699 /* FALLTHRU */
7700 case CCGCmode:
7701 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7702 return 0;
7703 /* FALLTHRU */
7704 case CCGOCmode:
7705 if (req_mode == CCZmode)
7706 return 0;
7707 /* FALLTHRU */
7708 case CCZmode:
7709 break;
7711 default:
7712 abort ();
7715 return (GET_MODE (SET_SRC (set)) == set_mode);
7718 /* Generate insn patterns to do an integer compare of OPERANDS. */
7720 static rtx
7721 ix86_expand_int_compare (code, op0, op1)
7722 enum rtx_code code;
7723 rtx op0, op1;
7725 enum machine_mode cmpmode;
7726 rtx tmp, flags;
7728 cmpmode = SELECT_CC_MODE (code, op0, op1);
7729 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7731 /* This is very simple, but making the interface the same as in the
7732 FP case makes the rest of the code easier. */
7733 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7734 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7736 /* Return the test that should be put into the flags user, i.e.
7737 the bcc, scc, or cmov instruction. */
7738 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7741 /* Figure out whether to use ordered or unordered fp comparisons.
7742 Return the appropriate mode to use. */
7744 enum machine_mode
7745 ix86_fp_compare_mode (code)
7746 enum rtx_code code ATTRIBUTE_UNUSED;
7748 /* ??? In order to make all comparisons reversible, we do all comparisons
7749 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7750 all forms trapping and nontrapping comparisons, we can make inequality
7751 comparisons trapping again, since it results in better code when using
7752 FCOM based compares. */
7753 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7756 enum machine_mode
7757 ix86_cc_mode (code, op0, op1)
7758 enum rtx_code code;
7759 rtx op0, op1;
7761 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7762 return ix86_fp_compare_mode (code);
7763 switch (code)
7765 /* Only zero flag is needed. */
7766 case EQ: /* ZF=0 */
7767 case NE: /* ZF!=0 */
7768 return CCZmode;
7769 /* Codes needing carry flag. */
7770 case GEU: /* CF=0 */
7771 case GTU: /* CF=0 & ZF=0 */
7772 case LTU: /* CF=1 */
7773 case LEU: /* CF=1 | ZF=1 */
7774 return CCmode;
7775 /* Codes possibly doable only with sign flag when
7776 comparing against zero. */
7777 case GE: /* SF=OF or SF=0 */
7778 case LT: /* SF<>OF or SF=1 */
7779 if (op1 == const0_rtx)
7780 return CCGOCmode;
7781 else
7782 /* For other cases Carry flag is not required. */
7783 return CCGCmode;
7784 /* Codes doable only with sign flag when comparing
7785 against zero, but we miss jump instruction for it
7786 so we need to use relational tests agains overflow
7787 that thus needs to be zero. */
7788 case GT: /* ZF=0 & SF=OF */
7789 case LE: /* ZF=1 | SF<>OF */
7790 if (op1 == const0_rtx)
7791 return CCNOmode;
7792 else
7793 return CCGCmode;
7794 /* strcmp pattern do (use flags) and combine may ask us for proper
7795 mode. */
7796 case USE:
7797 return CCmode;
7798 default:
7799 abort ();
7803 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7806 ix86_use_fcomi_compare (code)
7807 enum rtx_code code ATTRIBUTE_UNUSED;
7809 enum rtx_code swapped_code = swap_condition (code);
7810 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7811 || (ix86_fp_comparison_cost (swapped_code)
7812 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7815 /* Swap, force into registers, or otherwise massage the two operands
7816 to a fp comparison. The operands are updated in place; the new
7817 comparsion code is returned. */
7819 static enum rtx_code
7820 ix86_prepare_fp_compare_args (code, pop0, pop1)
7821 enum rtx_code code;
7822 rtx *pop0, *pop1;
7824 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7825 rtx op0 = *pop0, op1 = *pop1;
7826 enum machine_mode op_mode = GET_MODE (op0);
7827 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7829 /* All of the unordered compare instructions only work on registers.
7830 The same is true of the XFmode compare instructions. The same is
7831 true of the fcomi compare instructions. */
7833 if (!is_sse
7834 && (fpcmp_mode == CCFPUmode
7835 || op_mode == XFmode
7836 || op_mode == TFmode
7837 || ix86_use_fcomi_compare (code)))
7839 op0 = force_reg (op_mode, op0);
7840 op1 = force_reg (op_mode, op1);
7842 else
7844 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7845 things around if they appear profitable, otherwise force op0
7846 into a register. */
7848 if (standard_80387_constant_p (op0) == 0
7849 || (GET_CODE (op0) == MEM
7850 && ! (standard_80387_constant_p (op1) == 0
7851 || GET_CODE (op1) == MEM)))
7853 rtx tmp;
7854 tmp = op0, op0 = op1, op1 = tmp;
7855 code = swap_condition (code);
7858 if (GET_CODE (op0) != REG)
7859 op0 = force_reg (op_mode, op0);
7861 if (CONSTANT_P (op1))
7863 if (standard_80387_constant_p (op1))
7864 op1 = force_reg (op_mode, op1);
7865 else
7866 op1 = validize_mem (force_const_mem (op_mode, op1));
7870 /* Try to rearrange the comparison to make it cheaper. */
7871 if (ix86_fp_comparison_cost (code)
7872 > ix86_fp_comparison_cost (swap_condition (code))
7873 && (GET_CODE (op1) == REG || !no_new_pseudos))
7875 rtx tmp;
7876 tmp = op0, op0 = op1, op1 = tmp;
7877 code = swap_condition (code);
7878 if (GET_CODE (op0) != REG)
7879 op0 = force_reg (op_mode, op0);
7882 *pop0 = op0;
7883 *pop1 = op1;
7884 return code;
7887 /* Convert comparison codes we use to represent FP comparison to integer
7888 code that will result in proper branch. Return UNKNOWN if no such code
7889 is available. */
7890 static enum rtx_code
7891 ix86_fp_compare_code_to_integer (code)
7892 enum rtx_code code;
7894 switch (code)
7896 case GT:
7897 return GTU;
7898 case GE:
7899 return GEU;
7900 case ORDERED:
7901 case UNORDERED:
7902 return code;
7903 break;
7904 case UNEQ:
7905 return EQ;
7906 break;
7907 case UNLT:
7908 return LTU;
7909 break;
7910 case UNLE:
7911 return LEU;
7912 break;
7913 case LTGT:
7914 return NE;
7915 break;
7916 default:
7917 return UNKNOWN;
7921 /* Split comparison code CODE into comparisons we can do using branch
7922 instructions. BYPASS_CODE is comparison code for branch that will
7923 branch around FIRST_CODE and SECOND_CODE. If some of branches
7924 is not required, set value to NIL.
7925 We never require more than two branches. */
7926 static void
7927 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
7928 enum rtx_code code, *bypass_code, *first_code, *second_code;
7930 *first_code = code;
7931 *bypass_code = NIL;
7932 *second_code = NIL;
7934 /* The fcomi comparison sets flags as follows:
7936 cmp ZF PF CF
7937 > 0 0 0
7938 < 0 0 1
7939 = 1 0 0
7940 un 1 1 1 */
7942 switch (code)
7944 case GT: /* GTU - CF=0 & ZF=0 */
7945 case GE: /* GEU - CF=0 */
7946 case ORDERED: /* PF=0 */
7947 case UNORDERED: /* PF=1 */
7948 case UNEQ: /* EQ - ZF=1 */
7949 case UNLT: /* LTU - CF=1 */
7950 case UNLE: /* LEU - CF=1 | ZF=1 */
7951 case LTGT: /* EQ - ZF=0 */
7952 break;
7953 case LT: /* LTU - CF=1 - fails on unordered */
7954 *first_code = UNLT;
7955 *bypass_code = UNORDERED;
7956 break;
7957 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
7958 *first_code = UNLE;
7959 *bypass_code = UNORDERED;
7960 break;
7961 case EQ: /* EQ - ZF=1 - fails on unordered */
7962 *first_code = UNEQ;
7963 *bypass_code = UNORDERED;
7964 break;
7965 case NE: /* NE - ZF=0 - fails on unordered */
7966 *first_code = LTGT;
7967 *second_code = UNORDERED;
7968 break;
7969 case UNGE: /* GEU - CF=0 - fails on unordered */
7970 *first_code = GE;
7971 *second_code = UNORDERED;
7972 break;
7973 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
7974 *first_code = GT;
7975 *second_code = UNORDERED;
7976 break;
7977 default:
7978 abort ();
7980 if (!TARGET_IEEE_FP)
7982 *second_code = NIL;
7983 *bypass_code = NIL;
7987 /* Return cost of comparison done fcom + arithmetics operations on AX.
7988 All following functions do use number of instructions as an cost metrics.
7989 In future this should be tweaked to compute bytes for optimize_size and
7990 take into account performance of various instructions on various CPUs. */
7991 static int
7992 ix86_fp_comparison_arithmetics_cost (code)
7993 enum rtx_code code;
7995 if (!TARGET_IEEE_FP)
7996 return 4;
7997 /* The cost of code output by ix86_expand_fp_compare. */
7998 switch (code)
8000 case UNLE:
8001 case UNLT:
8002 case LTGT:
8003 case GT:
8004 case GE:
8005 case UNORDERED:
8006 case ORDERED:
8007 case UNEQ:
8008 return 4;
8009 break;
8010 case LT:
8011 case NE:
8012 case EQ:
8013 case UNGE:
8014 return 5;
8015 break;
8016 case LE:
8017 case UNGT:
8018 return 6;
8019 break;
8020 default:
8021 abort ();
8025 /* Return cost of comparison done using fcomi operation.
8026 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8027 static int
8028 ix86_fp_comparison_fcomi_cost (code)
8029 enum rtx_code code;
8031 enum rtx_code bypass_code, first_code, second_code;
8032 /* Return arbitarily high cost when instruction is not supported - this
8033 prevents gcc from using it. */
8034 if (!TARGET_CMOVE)
8035 return 1024;
8036 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8037 return (bypass_code != NIL || second_code != NIL) + 2;
8040 /* Return cost of comparison done using sahf operation.
8041 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8042 static int
8043 ix86_fp_comparison_sahf_cost (code)
8044 enum rtx_code code;
8046 enum rtx_code bypass_code, first_code, second_code;
8047 /* Return arbitarily high cost when instruction is not preferred - this
8048 avoids gcc from using it. */
8049 if (!TARGET_USE_SAHF && !optimize_size)
8050 return 1024;
8051 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8052 return (bypass_code != NIL || second_code != NIL) + 3;
8055 /* Compute cost of the comparison done using any method.
8056 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8057 static int
8058 ix86_fp_comparison_cost (code)
8059 enum rtx_code code;
8061 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8062 int min;
8064 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8065 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8067 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8068 if (min > sahf_cost)
8069 min = sahf_cost;
8070 if (min > fcomi_cost)
8071 min = fcomi_cost;
8072 return min;
8075 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8077 static rtx
8078 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8079 enum rtx_code code;
8080 rtx op0, op1, scratch;
8081 rtx *second_test;
8082 rtx *bypass_test;
8084 enum machine_mode fpcmp_mode, intcmp_mode;
8085 rtx tmp, tmp2;
8086 int cost = ix86_fp_comparison_cost (code);
8087 enum rtx_code bypass_code, first_code, second_code;
8089 fpcmp_mode = ix86_fp_compare_mode (code);
8090 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8092 if (second_test)
8093 *second_test = NULL_RTX;
8094 if (bypass_test)
8095 *bypass_test = NULL_RTX;
8097 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8099 /* Do fcomi/sahf based test when profitable. */
8100 if ((bypass_code == NIL || bypass_test)
8101 && (second_code == NIL || second_test)
8102 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8104 if (TARGET_CMOVE)
8106 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8107 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8108 tmp);
8109 emit_insn (tmp);
8111 else
8113 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8114 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8115 if (!scratch)
8116 scratch = gen_reg_rtx (HImode);
8117 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8118 emit_insn (gen_x86_sahf_1 (scratch));
8121 /* The FP codes work out to act like unsigned. */
8122 intcmp_mode = fpcmp_mode;
8123 code = first_code;
8124 if (bypass_code != NIL)
8125 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8126 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8127 const0_rtx);
8128 if (second_code != NIL)
8129 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8130 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8131 const0_rtx);
8133 else
8135 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8136 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8137 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8138 if (!scratch)
8139 scratch = gen_reg_rtx (HImode);
8140 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8142 /* In the unordered case, we have to check C2 for NaN's, which
8143 doesn't happen to work out to anything nice combination-wise.
8144 So do some bit twiddling on the value we've got in AH to come
8145 up with an appropriate set of condition codes. */
8147 intcmp_mode = CCNOmode;
8148 switch (code)
8150 case GT:
8151 case UNGT:
8152 if (code == GT || !TARGET_IEEE_FP)
8154 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8155 code = EQ;
8157 else
8159 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8160 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8161 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8162 intcmp_mode = CCmode;
8163 code = GEU;
8165 break;
8166 case LT:
8167 case UNLT:
8168 if (code == LT && TARGET_IEEE_FP)
8170 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8171 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8172 intcmp_mode = CCmode;
8173 code = EQ;
8175 else
8177 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8178 code = NE;
8180 break;
8181 case GE:
8182 case UNGE:
8183 if (code == GE || !TARGET_IEEE_FP)
8185 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8186 code = EQ;
8188 else
8190 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8191 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8192 GEN_INT (0x01)));
8193 code = NE;
8195 break;
8196 case LE:
8197 case UNLE:
8198 if (code == LE && TARGET_IEEE_FP)
8200 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8201 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8202 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8203 intcmp_mode = CCmode;
8204 code = LTU;
8206 else
8208 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8209 code = NE;
8211 break;
8212 case EQ:
8213 case UNEQ:
8214 if (code == EQ && TARGET_IEEE_FP)
8216 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8217 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8218 intcmp_mode = CCmode;
8219 code = EQ;
8221 else
8223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8224 code = NE;
8225 break;
8227 break;
8228 case NE:
8229 case LTGT:
8230 if (code == NE && TARGET_IEEE_FP)
8232 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8233 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8234 GEN_INT (0x40)));
8235 code = NE;
8237 else
8239 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8240 code = EQ;
8242 break;
8244 case UNORDERED:
8245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8246 code = NE;
8247 break;
8248 case ORDERED:
8249 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8250 code = EQ;
8251 break;
8253 default:
8254 abort ();
8258 /* Return the test that should be put into the flags user, i.e.
8259 the bcc, scc, or cmov instruction. */
8260 return gen_rtx_fmt_ee (code, VOIDmode,
8261 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8262 const0_rtx);
8266 ix86_expand_compare (code, second_test, bypass_test)
8267 enum rtx_code code;
8268 rtx *second_test, *bypass_test;
8270 rtx op0, op1, ret;
8271 op0 = ix86_compare_op0;
8272 op1 = ix86_compare_op1;
8274 if (second_test)
8275 *second_test = NULL_RTX;
8276 if (bypass_test)
8277 *bypass_test = NULL_RTX;
8279 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8280 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8281 second_test, bypass_test);
8282 else
8283 ret = ix86_expand_int_compare (code, op0, op1);
8285 return ret;
8288 /* Return true if the CODE will result in nontrivial jump sequence. */
8289 bool
8290 ix86_fp_jump_nontrivial_p (code)
8291 enum rtx_code code;
8293 enum rtx_code bypass_code, first_code, second_code;
8294 if (!TARGET_CMOVE)
8295 return true;
8296 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8297 return bypass_code != NIL || second_code != NIL;
8300 void
8301 ix86_expand_branch (code, label)
8302 enum rtx_code code;
8303 rtx label;
8305 rtx tmp;
8307 switch (GET_MODE (ix86_compare_op0))
8309 case QImode:
8310 case HImode:
8311 case SImode:
8312 simple:
8313 tmp = ix86_expand_compare (code, NULL, NULL);
8314 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8315 gen_rtx_LABEL_REF (VOIDmode, label),
8316 pc_rtx);
8317 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8318 return;
8320 case SFmode:
8321 case DFmode:
8322 case XFmode:
8323 case TFmode:
8325 rtvec vec;
8326 int use_fcomi;
8327 enum rtx_code bypass_code, first_code, second_code;
8329 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8330 &ix86_compare_op1);
8332 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8334 /* Check whether we will use the natural sequence with one jump. If
8335 so, we can expand jump early. Otherwise delay expansion by
8336 creating compound insn to not confuse optimizers. */
8337 if (bypass_code == NIL && second_code == NIL
8338 && TARGET_CMOVE)
8340 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8341 gen_rtx_LABEL_REF (VOIDmode, label),
8342 pc_rtx, NULL_RTX);
8344 else
8346 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8347 ix86_compare_op0, ix86_compare_op1);
8348 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8349 gen_rtx_LABEL_REF (VOIDmode, label),
8350 pc_rtx);
8351 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8353 use_fcomi = ix86_use_fcomi_compare (code);
8354 vec = rtvec_alloc (3 + !use_fcomi);
8355 RTVEC_ELT (vec, 0) = tmp;
8356 RTVEC_ELT (vec, 1)
8357 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8358 RTVEC_ELT (vec, 2)
8359 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8360 if (! use_fcomi)
8361 RTVEC_ELT (vec, 3)
8362 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8364 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8366 return;
8369 case DImode:
8370 if (TARGET_64BIT)
8371 goto simple;
8372 /* Expand DImode branch into multiple compare+branch. */
8374 rtx lo[2], hi[2], label2;
8375 enum rtx_code code1, code2, code3;
8377 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8379 tmp = ix86_compare_op0;
8380 ix86_compare_op0 = ix86_compare_op1;
8381 ix86_compare_op1 = tmp;
8382 code = swap_condition (code);
8384 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8385 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8387 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8388 avoid two branches. This costs one extra insn, so disable when
8389 optimizing for size. */
8391 if ((code == EQ || code == NE)
8392 && (!optimize_size
8393 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8395 rtx xor0, xor1;
8397 xor1 = hi[0];
8398 if (hi[1] != const0_rtx)
8399 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8400 NULL_RTX, 0, OPTAB_WIDEN);
8402 xor0 = lo[0];
8403 if (lo[1] != const0_rtx)
8404 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8405 NULL_RTX, 0, OPTAB_WIDEN);
8407 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8408 NULL_RTX, 0, OPTAB_WIDEN);
8410 ix86_compare_op0 = tmp;
8411 ix86_compare_op1 = const0_rtx;
8412 ix86_expand_branch (code, label);
8413 return;
8416 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8417 op1 is a constant and the low word is zero, then we can just
8418 examine the high word. */
8420 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8421 switch (code)
8423 case LT: case LTU: case GE: case GEU:
8424 ix86_compare_op0 = hi[0];
8425 ix86_compare_op1 = hi[1];
8426 ix86_expand_branch (code, label);
8427 return;
8428 default:
8429 break;
8432 /* Otherwise, we need two or three jumps. */
8434 label2 = gen_label_rtx ();
8436 code1 = code;
8437 code2 = swap_condition (code);
8438 code3 = unsigned_condition (code);
8440 switch (code)
8442 case LT: case GT: case LTU: case GTU:
8443 break;
8445 case LE: code1 = LT; code2 = GT; break;
8446 case GE: code1 = GT; code2 = LT; break;
8447 case LEU: code1 = LTU; code2 = GTU; break;
8448 case GEU: code1 = GTU; code2 = LTU; break;
8450 case EQ: code1 = NIL; code2 = NE; break;
8451 case NE: code2 = NIL; break;
8453 default:
8454 abort ();
8458 * a < b =>
8459 * if (hi(a) < hi(b)) goto true;
8460 * if (hi(a) > hi(b)) goto false;
8461 * if (lo(a) < lo(b)) goto true;
8462 * false:
8465 ix86_compare_op0 = hi[0];
8466 ix86_compare_op1 = hi[1];
8468 if (code1 != NIL)
8469 ix86_expand_branch (code1, label);
8470 if (code2 != NIL)
8471 ix86_expand_branch (code2, label2);
8473 ix86_compare_op0 = lo[0];
8474 ix86_compare_op1 = lo[1];
8475 ix86_expand_branch (code3, label);
8477 if (code2 != NIL)
8478 emit_label (label2);
8479 return;
8482 default:
8483 abort ();
8487 /* Split branch based on floating point condition. */
8488 void
8489 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8490 enum rtx_code code;
8491 rtx op1, op2, target1, target2, tmp;
8493 rtx second, bypass;
8494 rtx label = NULL_RTX;
8495 rtx condition;
8496 int bypass_probability = -1, second_probability = -1, probability = -1;
8497 rtx i;
8499 if (target2 != pc_rtx)
8501 rtx tmp = target2;
8502 code = reverse_condition_maybe_unordered (code);
8503 target2 = target1;
8504 target1 = tmp;
8507 condition = ix86_expand_fp_compare (code, op1, op2,
8508 tmp, &second, &bypass);
8510 if (split_branch_probability >= 0)
8512 /* Distribute the probabilities across the jumps.
8513 Assume the BYPASS and SECOND to be always test
8514 for UNORDERED. */
8515 probability = split_branch_probability;
8517 /* Value of 1 is low enough to make no need for probability
8518 to be updated. Later we may run some experiments and see
8519 if unordered values are more frequent in practice. */
8520 if (bypass)
8521 bypass_probability = 1;
8522 if (second)
8523 second_probability = 1;
8525 if (bypass != NULL_RTX)
8527 label = gen_label_rtx ();
8528 i = emit_jump_insn (gen_rtx_SET
8529 (VOIDmode, pc_rtx,
8530 gen_rtx_IF_THEN_ELSE (VOIDmode,
8531 bypass,
8532 gen_rtx_LABEL_REF (VOIDmode,
8533 label),
8534 pc_rtx)));
8535 if (bypass_probability >= 0)
8536 REG_NOTES (i)
8537 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8538 GEN_INT (bypass_probability),
8539 REG_NOTES (i));
8541 i = emit_jump_insn (gen_rtx_SET
8542 (VOIDmode, pc_rtx,
8543 gen_rtx_IF_THEN_ELSE (VOIDmode,
8544 condition, target1, target2)));
8545 if (probability >= 0)
8546 REG_NOTES (i)
8547 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8548 GEN_INT (probability),
8549 REG_NOTES (i));
8550 if (second != NULL_RTX)
8552 i = emit_jump_insn (gen_rtx_SET
8553 (VOIDmode, pc_rtx,
8554 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8555 target2)));
8556 if (second_probability >= 0)
8557 REG_NOTES (i)
8558 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8559 GEN_INT (second_probability),
8560 REG_NOTES (i));
8562 if (label != NULL_RTX)
8563 emit_label (label);
8567 ix86_expand_setcc (code, dest)
8568 enum rtx_code code;
8569 rtx dest;
8571 rtx ret, tmp, tmpreg;
8572 rtx second_test, bypass_test;
8574 if (GET_MODE (ix86_compare_op0) == DImode
8575 && !TARGET_64BIT)
8576 return 0; /* FAIL */
8578 if (GET_MODE (dest) != QImode)
8579 abort ();
8581 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8582 PUT_MODE (ret, QImode);
8584 tmp = dest;
8585 tmpreg = dest;
8587 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8588 if (bypass_test || second_test)
8590 rtx test = second_test;
8591 int bypass = 0;
8592 rtx tmp2 = gen_reg_rtx (QImode);
8593 if (bypass_test)
8595 if (second_test)
8596 abort ();
8597 test = bypass_test;
8598 bypass = 1;
8599 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8601 PUT_MODE (test, QImode);
8602 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8604 if (bypass)
8605 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8606 else
8607 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8610 return 1; /* DONE */
8614 ix86_expand_int_movcc (operands)
8615 rtx operands[];
8617 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8618 rtx compare_seq, compare_op;
8619 rtx second_test, bypass_test;
8620 enum machine_mode mode = GET_MODE (operands[0]);
8622 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8623 In case comparsion is done with immediate, we can convert it to LTU or
8624 GEU by altering the integer. */
8626 if ((code == LEU || code == GTU)
8627 && GET_CODE (ix86_compare_op1) == CONST_INT
8628 && mode != HImode
8629 && INTVAL (ix86_compare_op1) != -1
8630 /* For x86-64, the immediate field in the instruction is 32-bit
8631 signed, so we can't increment a DImode value above 0x7fffffff. */
8632 && (!TARGET_64BIT
8633 || GET_MODE (ix86_compare_op0) != DImode
8634 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8635 && GET_CODE (operands[2]) == CONST_INT
8636 && GET_CODE (operands[3]) == CONST_INT)
8638 if (code == LEU)
8639 code = LTU;
8640 else
8641 code = GEU;
8642 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8643 GET_MODE (ix86_compare_op0));
8646 start_sequence ();
8647 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8648 compare_seq = get_insns ();
8649 end_sequence ();
8651 compare_code = GET_CODE (compare_op);
8653 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8654 HImode insns, we'd be swallowed in word prefix ops. */
8656 if (mode != HImode
8657 && (mode != DImode || TARGET_64BIT)
8658 && GET_CODE (operands[2]) == CONST_INT
8659 && GET_CODE (operands[3]) == CONST_INT)
8661 rtx out = operands[0];
8662 HOST_WIDE_INT ct = INTVAL (operands[2]);
8663 HOST_WIDE_INT cf = INTVAL (operands[3]);
8664 HOST_WIDE_INT diff;
8666 if ((compare_code == LTU || compare_code == GEU)
8667 && !second_test && !bypass_test)
8670 /* Detect overlap between destination and compare sources. */
8671 rtx tmp = out;
8673 /* To simplify rest of code, restrict to the GEU case. */
8674 if (compare_code == LTU)
8676 int tmp = ct;
8677 ct = cf;
8678 cf = tmp;
8679 compare_code = reverse_condition (compare_code);
8680 code = reverse_condition (code);
8682 diff = ct - cf;
8684 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8685 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8686 tmp = gen_reg_rtx (mode);
8688 emit_insn (compare_seq);
8689 if (mode == DImode)
8690 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8691 else
8692 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8694 if (diff == 1)
8697 * cmpl op0,op1
8698 * sbbl dest,dest
8699 * [addl dest, ct]
8701 * Size 5 - 8.
8703 if (ct)
8704 tmp = expand_simple_binop (mode, PLUS,
8705 tmp, GEN_INT (ct),
8706 tmp, 1, OPTAB_DIRECT);
8708 else if (cf == -1)
8711 * cmpl op0,op1
8712 * sbbl dest,dest
8713 * orl $ct, dest
8715 * Size 8.
8717 tmp = expand_simple_binop (mode, IOR,
8718 tmp, GEN_INT (ct),
8719 tmp, 1, OPTAB_DIRECT);
8721 else if (diff == -1 && ct)
8724 * cmpl op0,op1
8725 * sbbl dest,dest
8726 * xorl $-1, dest
8727 * [addl dest, cf]
8729 * Size 8 - 11.
8731 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8732 if (cf)
8733 tmp = expand_simple_binop (mode, PLUS,
8734 tmp, GEN_INT (cf),
8735 tmp, 1, OPTAB_DIRECT);
8737 else
8740 * cmpl op0,op1
8741 * sbbl dest,dest
8742 * andl cf - ct, dest
8743 * [addl dest, ct]
8745 * Size 8 - 11.
8747 tmp = expand_simple_binop (mode, AND,
8748 tmp,
8749 gen_int_mode (cf - ct, mode),
8750 tmp, 1, OPTAB_DIRECT);
8751 if (ct)
8752 tmp = expand_simple_binop (mode, PLUS,
8753 tmp, GEN_INT (ct),
8754 tmp, 1, OPTAB_DIRECT);
8757 if (tmp != out)
8758 emit_move_insn (out, tmp);
8760 return 1; /* DONE */
8763 diff = ct - cf;
8764 if (diff < 0)
8766 HOST_WIDE_INT tmp;
8767 tmp = ct, ct = cf, cf = tmp;
8768 diff = -diff;
8769 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8771 /* We may be reversing unordered compare to normal compare, that
8772 is not valid in general (we may convert non-trapping condition
8773 to trapping one), however on i386 we currently emit all
8774 comparisons unordered. */
8775 compare_code = reverse_condition_maybe_unordered (compare_code);
8776 code = reverse_condition_maybe_unordered (code);
8778 else
8780 compare_code = reverse_condition (compare_code);
8781 code = reverse_condition (code);
8785 compare_code = NIL;
8786 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8787 && GET_CODE (ix86_compare_op1) == CONST_INT)
8789 if (ix86_compare_op1 == const0_rtx
8790 && (code == LT || code == GE))
8791 compare_code = code;
8792 else if (ix86_compare_op1 == constm1_rtx)
8794 if (code == LE)
8795 compare_code = LT;
8796 else if (code == GT)
8797 compare_code = GE;
8801 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8802 if (compare_code != NIL
8803 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8804 && (cf == -1 || ct == -1))
8806 /* If lea code below could be used, only optimize
8807 if it results in a 2 insn sequence. */
8809 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8810 || diff == 3 || diff == 5 || diff == 9)
8811 || (compare_code == LT && ct == -1)
8812 || (compare_code == GE && cf == -1))
8815 * notl op1 (if necessary)
8816 * sarl $31, op1
8817 * orl cf, op1
8819 if (ct != -1)
8821 cf = ct;
8822 ct = -1;
8823 code = reverse_condition (code);
8826 out = emit_store_flag (out, code, ix86_compare_op0,
8827 ix86_compare_op1, VOIDmode, 0, -1);
8829 out = expand_simple_binop (mode, IOR,
8830 out, GEN_INT (cf),
8831 out, 1, OPTAB_DIRECT);
8832 if (out != operands[0])
8833 emit_move_insn (operands[0], out);
8835 return 1; /* DONE */
8839 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8840 || diff == 3 || diff == 5 || diff == 9)
8841 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8844 * xorl dest,dest
8845 * cmpl op1,op2
8846 * setcc dest
8847 * lea cf(dest*(ct-cf)),dest
8849 * Size 14.
8851 * This also catches the degenerate setcc-only case.
8854 rtx tmp;
8855 int nops;
8857 out = emit_store_flag (out, code, ix86_compare_op0,
8858 ix86_compare_op1, VOIDmode, 0, 1);
8860 nops = 0;
8861 /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
8862 done in proper mode to match. */
8863 if (diff == 1)
8864 tmp = out;
8865 else
8867 rtx out1;
8868 out1 = out;
8869 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8870 nops++;
8871 if (diff & 1)
8873 tmp = gen_rtx_PLUS (mode, tmp, out1);
8874 nops++;
8877 if (cf != 0)
8879 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8880 nops++;
8882 if (tmp != out
8883 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8885 if (nops == 1)
8887 rtx clob;
8889 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8890 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8892 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8893 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8894 emit_insn (tmp);
8896 else
8897 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8899 if (out != operands[0])
8900 emit_move_insn (operands[0], out);
8902 return 1; /* DONE */
8906 * General case: Jumpful:
8907 * xorl dest,dest cmpl op1, op2
8908 * cmpl op1, op2 movl ct, dest
8909 * setcc dest jcc 1f
8910 * decl dest movl cf, dest
8911 * andl (cf-ct),dest 1:
8912 * addl ct,dest
8914 * Size 20. Size 14.
8916 * This is reasonably steep, but branch mispredict costs are
8917 * high on modern cpus, so consider failing only if optimizing
8918 * for space.
8920 * %%% Parameterize branch_cost on the tuning architecture, then
8921 * use that. The 80386 couldn't care less about mispredicts.
8924 if (!optimize_size && !TARGET_CMOVE)
8926 if (ct == 0)
8928 ct = cf;
8929 cf = 0;
8930 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8931 /* We may be reversing unordered compare to normal compare,
8932 that is not valid in general (we may convert non-trapping
8933 condition to trapping one), however on i386 we currently
8934 emit all comparisons unordered. */
8935 code = reverse_condition_maybe_unordered (code);
8936 else
8938 code = reverse_condition (code);
8939 if (compare_code != NIL)
8940 compare_code = reverse_condition (compare_code);
8944 if (compare_code != NIL)
8946 /* notl op1 (if needed)
8947 sarl $31, op1
8948 andl (cf-ct), op1
8949 addl ct, op1
8951 For x < 0 (resp. x <= -1) there will be no notl,
8952 so if possible swap the constants to get rid of the
8953 complement.
8954 True/false will be -1/0 while code below (store flag
8955 followed by decrement) is 0/-1, so the constants need
8956 to be exchanged once more. */
8958 if (compare_code == GE || !cf)
8960 code = reverse_condition (code);
8961 compare_code = LT;
8963 else
8965 HOST_WIDE_INT tmp = cf;
8966 cf = ct;
8967 ct = tmp;
8970 out = emit_store_flag (out, code, ix86_compare_op0,
8971 ix86_compare_op1, VOIDmode, 0, -1);
8973 else
8975 out = emit_store_flag (out, code, ix86_compare_op0,
8976 ix86_compare_op1, VOIDmode, 0, 1);
8978 out = expand_simple_binop (mode, PLUS,
8979 out, constm1_rtx,
8980 out, 1, OPTAB_DIRECT);
8983 out = expand_simple_binop (mode, AND,
8984 out,
8985 gen_int_mode (cf - ct, mode),
8986 out, 1, OPTAB_DIRECT);
8987 out = expand_simple_binop (mode, PLUS,
8988 out, GEN_INT (ct),
8989 out, 1, OPTAB_DIRECT);
8990 if (out != operands[0])
8991 emit_move_insn (operands[0], out);
8993 return 1; /* DONE */
8997 if (!TARGET_CMOVE)
8999 /* Try a few things more with specific constants and a variable. */
9001 optab op;
9002 rtx var, orig_out, out, tmp;
9004 if (optimize_size)
9005 return 0; /* FAIL */
9007 /* If one of the two operands is an interesting constant, load a
9008 constant with the above and mask it in with a logical operation. */
9010 if (GET_CODE (operands[2]) == CONST_INT)
9012 var = operands[3];
9013 if (INTVAL (operands[2]) == 0)
9014 operands[3] = constm1_rtx, op = and_optab;
9015 else if (INTVAL (operands[2]) == -1)
9016 operands[3] = const0_rtx, op = ior_optab;
9017 else
9018 return 0; /* FAIL */
9020 else if (GET_CODE (operands[3]) == CONST_INT)
9022 var = operands[2];
9023 if (INTVAL (operands[3]) == 0)
9024 operands[2] = constm1_rtx, op = and_optab;
9025 else if (INTVAL (operands[3]) == -1)
9026 operands[2] = const0_rtx, op = ior_optab;
9027 else
9028 return 0; /* FAIL */
9030 else
9031 return 0; /* FAIL */
9033 orig_out = operands[0];
9034 tmp = gen_reg_rtx (mode);
9035 operands[0] = tmp;
9037 /* Recurse to get the constant loaded. */
9038 if (ix86_expand_int_movcc (operands) == 0)
9039 return 0; /* FAIL */
9041 /* Mask in the interesting variable. */
9042 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9043 OPTAB_WIDEN);
9044 if (out != orig_out)
9045 emit_move_insn (orig_out, out);
9047 return 1; /* DONE */
9051 * For comparison with above,
9053 * movl cf,dest
9054 * movl ct,tmp
9055 * cmpl op1,op2
9056 * cmovcc tmp,dest
9058 * Size 15.
9061 if (! nonimmediate_operand (operands[2], mode))
9062 operands[2] = force_reg (mode, operands[2]);
9063 if (! nonimmediate_operand (operands[3], mode))
9064 operands[3] = force_reg (mode, operands[3]);
9066 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9068 rtx tmp = gen_reg_rtx (mode);
9069 emit_move_insn (tmp, operands[3]);
9070 operands[3] = tmp;
9072 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9074 rtx tmp = gen_reg_rtx (mode);
9075 emit_move_insn (tmp, operands[2]);
9076 operands[2] = tmp;
9078 if (! register_operand (operands[2], VOIDmode)
9079 && ! register_operand (operands[3], VOIDmode))
9080 operands[2] = force_reg (mode, operands[2]);
9082 emit_insn (compare_seq);
9083 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9084 gen_rtx_IF_THEN_ELSE (mode,
9085 compare_op, operands[2],
9086 operands[3])));
9087 if (bypass_test)
9088 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9089 gen_rtx_IF_THEN_ELSE (mode,
9090 bypass_test,
9091 operands[3],
9092 operands[0])));
9093 if (second_test)
9094 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9095 gen_rtx_IF_THEN_ELSE (mode,
9096 second_test,
9097 operands[2],
9098 operands[0])));
9100 return 1; /* DONE */
9104 ix86_expand_fp_movcc (operands)
9105 rtx operands[];
9107 enum rtx_code code;
9108 rtx tmp;
9109 rtx compare_op, second_test, bypass_test;
9111 /* For SF/DFmode conditional moves based on comparisons
9112 in same mode, we may want to use SSE min/max instructions. */
9113 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9114 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9115 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9116 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9117 && (!TARGET_IEEE_FP
9118 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9119 /* We may be called from the post-reload splitter. */
9120 && (!REG_P (operands[0])
9121 || SSE_REG_P (operands[0])
9122 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9124 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9125 code = GET_CODE (operands[1]);
9127 /* See if we have (cross) match between comparison operands and
9128 conditional move operands. */
9129 if (rtx_equal_p (operands[2], op1))
9131 rtx tmp = op0;
9132 op0 = op1;
9133 op1 = tmp;
9134 code = reverse_condition_maybe_unordered (code);
9136 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9138 /* Check for min operation. */
9139 if (code == LT)
9141 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9142 if (memory_operand (op0, VOIDmode))
9143 op0 = force_reg (GET_MODE (operands[0]), op0);
9144 if (GET_MODE (operands[0]) == SFmode)
9145 emit_insn (gen_minsf3 (operands[0], op0, op1));
9146 else
9147 emit_insn (gen_mindf3 (operands[0], op0, op1));
9148 return 1;
9150 /* Check for max operation. */
9151 if (code == GT)
9153 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9154 if (memory_operand (op0, VOIDmode))
9155 op0 = force_reg (GET_MODE (operands[0]), op0);
9156 if (GET_MODE (operands[0]) == SFmode)
9157 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9158 else
9159 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9160 return 1;
9163 /* Manage condition to be sse_comparison_operator. In case we are
9164 in non-ieee mode, try to canonicalize the destination operand
9165 to be first in the comparison - this helps reload to avoid extra
9166 moves. */
9167 if (!sse_comparison_operator (operands[1], VOIDmode)
9168 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9170 rtx tmp = ix86_compare_op0;
9171 ix86_compare_op0 = ix86_compare_op1;
9172 ix86_compare_op1 = tmp;
9173 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9174 VOIDmode, ix86_compare_op0,
9175 ix86_compare_op1);
9177 /* Similary try to manage result to be first operand of conditional
9178 move. We also don't support the NE comparison on SSE, so try to
9179 avoid it. */
9180 if ((rtx_equal_p (operands[0], operands[3])
9181 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9182 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9184 rtx tmp = operands[2];
9185 operands[2] = operands[3];
9186 operands[3] = tmp;
9187 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9188 (GET_CODE (operands[1])),
9189 VOIDmode, ix86_compare_op0,
9190 ix86_compare_op1);
9192 if (GET_MODE (operands[0]) == SFmode)
9193 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9194 operands[2], operands[3],
9195 ix86_compare_op0, ix86_compare_op1));
9196 else
9197 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9198 operands[2], operands[3],
9199 ix86_compare_op0, ix86_compare_op1));
9200 return 1;
9203 /* The floating point conditional move instructions don't directly
9204 support conditions resulting from a signed integer comparison. */
9206 code = GET_CODE (operands[1]);
9207 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9209 /* The floating point conditional move instructions don't directly
9210 support signed integer comparisons. */
9212 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9214 if (second_test != NULL || bypass_test != NULL)
9215 abort ();
9216 tmp = gen_reg_rtx (QImode);
9217 ix86_expand_setcc (code, tmp);
9218 code = NE;
9219 ix86_compare_op0 = tmp;
9220 ix86_compare_op1 = const0_rtx;
9221 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9223 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9225 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9226 emit_move_insn (tmp, operands[3]);
9227 operands[3] = tmp;
9229 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9231 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9232 emit_move_insn (tmp, operands[2]);
9233 operands[2] = tmp;
9236 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9237 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9238 compare_op,
9239 operands[2],
9240 operands[3])));
9241 if (bypass_test)
9242 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9243 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9244 bypass_test,
9245 operands[3],
9246 operands[0])));
9247 if (second_test)
9248 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9249 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9250 second_test,
9251 operands[2],
9252 operands[0])));
9254 return 1;
9257 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9258 works for floating pointer parameters and nonoffsetable memories.
9259 For pushes, it returns just stack offsets; the values will be saved
9260 in the right order. Maximally three parts are generated. */
9262 static int
9263 ix86_split_to_parts (operand, parts, mode)
9264 rtx operand;
9265 rtx *parts;
9266 enum machine_mode mode;
9268 int size;
9270 if (!TARGET_64BIT)
9271 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9272 else
9273 size = (GET_MODE_SIZE (mode) + 4) / 8;
9275 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9276 abort ();
9277 if (size < 2 || size > 3)
9278 abort ();
9280 /* Optimize constant pool reference to immediates. This is used by fp
9281 moves, that force all constants to memory to allow combining. */
9282 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9284 rtx tmp = maybe_get_pool_constant (operand);
9285 if (tmp)
9286 operand = tmp;
9289 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9291 /* The only non-offsetable memories we handle are pushes. */
9292 if (! push_operand (operand, VOIDmode))
9293 abort ();
9295 operand = copy_rtx (operand);
9296 PUT_MODE (operand, Pmode);
9297 parts[0] = parts[1] = parts[2] = operand;
9299 else if (!TARGET_64BIT)
9301 if (mode == DImode)
9302 split_di (&operand, 1, &parts[0], &parts[1]);
9303 else
9305 if (REG_P (operand))
9307 if (!reload_completed)
9308 abort ();
9309 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9310 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9311 if (size == 3)
9312 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9314 else if (offsettable_memref_p (operand))
9316 operand = adjust_address (operand, SImode, 0);
9317 parts[0] = operand;
9318 parts[1] = adjust_address (operand, SImode, 4);
9319 if (size == 3)
9320 parts[2] = adjust_address (operand, SImode, 8);
9322 else if (GET_CODE (operand) == CONST_DOUBLE)
9324 REAL_VALUE_TYPE r;
9325 long l[4];
9327 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9328 switch (mode)
9330 case XFmode:
9331 case TFmode:
9332 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9333 parts[2] = gen_int_mode (l[2], SImode);
9334 break;
9335 case DFmode:
9336 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9337 break;
9338 default:
9339 abort ();
9341 parts[1] = gen_int_mode (l[1], SImode);
9342 parts[0] = gen_int_mode (l[0], SImode);
9344 else
9345 abort ();
9348 else
9350 if (mode == TImode)
9351 split_ti (&operand, 1, &parts[0], &parts[1]);
9352 if (mode == XFmode || mode == TFmode)
9354 if (REG_P (operand))
9356 if (!reload_completed)
9357 abort ();
9358 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9359 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9361 else if (offsettable_memref_p (operand))
9363 operand = adjust_address (operand, DImode, 0);
9364 parts[0] = operand;
9365 parts[1] = adjust_address (operand, SImode, 8);
9367 else if (GET_CODE (operand) == CONST_DOUBLE)
9369 REAL_VALUE_TYPE r;
9370 long l[3];
9372 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9373 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9374 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9375 if (HOST_BITS_PER_WIDE_INT >= 64)
9376 parts[0]
9377 = gen_int_mode
9378 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9379 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9380 DImode);
9381 else
9382 parts[0] = immed_double_const (l[0], l[1], DImode);
9383 parts[1] = gen_int_mode (l[2], SImode);
9385 else
9386 abort ();
9390 return size;
9393 /* Emit insns to perform a move or push of DI, DF, and XF values.
9394 Return false when normal moves are needed; true when all required
9395 insns have been emitted. Operands 2-4 contain the input values
9396 int the correct order; operands 5-7 contain the output values. */
9398 void
9399 ix86_split_long_move (operands)
9400 rtx operands[];
9402 rtx part[2][3];
9403 int nparts;
9404 int push = 0;
9405 int collisions = 0;
9406 enum machine_mode mode = GET_MODE (operands[0]);
9408 /* The DFmode expanders may ask us to move double.
9409 For 64bit target this is single move. By hiding the fact
9410 here we simplify i386.md splitters. */
9411 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9413 /* Optimize constant pool reference to immediates. This is used by
9414 fp moves, that force all constants to memory to allow combining. */
9416 if (GET_CODE (operands[1]) == MEM
9417 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9418 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9419 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9420 if (push_operand (operands[0], VOIDmode))
9422 operands[0] = copy_rtx (operands[0]);
9423 PUT_MODE (operands[0], Pmode);
9425 else
9426 operands[0] = gen_lowpart (DImode, operands[0]);
9427 operands[1] = gen_lowpart (DImode, operands[1]);
9428 emit_move_insn (operands[0], operands[1]);
9429 return;
9432 /* The only non-offsettable memory we handle is push. */
9433 if (push_operand (operands[0], VOIDmode))
9434 push = 1;
9435 else if (GET_CODE (operands[0]) == MEM
9436 && ! offsettable_memref_p (operands[0]))
9437 abort ();
9439 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9440 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9442 /* When emitting push, take care for source operands on the stack. */
9443 if (push && GET_CODE (operands[1]) == MEM
9444 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9446 if (nparts == 3)
9447 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9448 XEXP (part[1][2], 0));
9449 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9450 XEXP (part[1][1], 0));
9453 /* We need to do copy in the right order in case an address register
9454 of the source overlaps the destination. */
9455 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9457 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9458 collisions++;
9459 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9460 collisions++;
9461 if (nparts == 3
9462 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9463 collisions++;
9465 /* Collision in the middle part can be handled by reordering. */
9466 if (collisions == 1 && nparts == 3
9467 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9469 rtx tmp;
9470 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9471 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9474 /* If there are more collisions, we can't handle it by reordering.
9475 Do an lea to the last part and use only one colliding move. */
9476 else if (collisions > 1)
9478 collisions = 1;
9479 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9480 XEXP (part[1][0], 0)));
9481 part[1][0] = change_address (part[1][0],
9482 TARGET_64BIT ? DImode : SImode,
9483 part[0][nparts - 1]);
9484 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9485 if (nparts == 3)
9486 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9490 if (push)
9492 if (!TARGET_64BIT)
9494 if (nparts == 3)
9496 /* We use only first 12 bytes of TFmode value, but for pushing we
9497 are required to adjust stack as if we were pushing real 16byte
9498 value. */
9499 if (mode == TFmode && !TARGET_64BIT)
9500 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9501 GEN_INT (-4)));
9502 emit_move_insn (part[0][2], part[1][2]);
9505 else
9507 /* In 64bit mode we don't have 32bit push available. In case this is
9508 register, it is OK - we will just use larger counterpart. We also
9509 retype memory - these comes from attempt to avoid REX prefix on
9510 moving of second half of TFmode value. */
9511 if (GET_MODE (part[1][1]) == SImode)
9513 if (GET_CODE (part[1][1]) == MEM)
9514 part[1][1] = adjust_address (part[1][1], DImode, 0);
9515 else if (REG_P (part[1][1]))
9516 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9517 else
9518 abort ();
9519 if (GET_MODE (part[1][0]) == SImode)
9520 part[1][0] = part[1][1];
9523 emit_move_insn (part[0][1], part[1][1]);
9524 emit_move_insn (part[0][0], part[1][0]);
9525 return;
9528 /* Choose correct order to not overwrite the source before it is copied. */
9529 if ((REG_P (part[0][0])
9530 && REG_P (part[1][1])
9531 && (REGNO (part[0][0]) == REGNO (part[1][1])
9532 || (nparts == 3
9533 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9534 || (collisions > 0
9535 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9537 if (nparts == 3)
9539 operands[2] = part[0][2];
9540 operands[3] = part[0][1];
9541 operands[4] = part[0][0];
9542 operands[5] = part[1][2];
9543 operands[6] = part[1][1];
9544 operands[7] = part[1][0];
9546 else
9548 operands[2] = part[0][1];
9549 operands[3] = part[0][0];
9550 operands[5] = part[1][1];
9551 operands[6] = part[1][0];
9554 else
9556 if (nparts == 3)
9558 operands[2] = part[0][0];
9559 operands[3] = part[0][1];
9560 operands[4] = part[0][2];
9561 operands[5] = part[1][0];
9562 operands[6] = part[1][1];
9563 operands[7] = part[1][2];
9565 else
9567 operands[2] = part[0][0];
9568 operands[3] = part[0][1];
9569 operands[5] = part[1][0];
9570 operands[6] = part[1][1];
9573 emit_move_insn (operands[2], operands[5]);
9574 emit_move_insn (operands[3], operands[6]);
9575 if (nparts == 3)
9576 emit_move_insn (operands[4], operands[7]);
9578 return;
9581 void
9582 ix86_split_ashldi (operands, scratch)
9583 rtx *operands, scratch;
9585 rtx low[2], high[2];
9586 int count;
9588 if (GET_CODE (operands[2]) == CONST_INT)
9590 split_di (operands, 2, low, high);
9591 count = INTVAL (operands[2]) & 63;
9593 if (count >= 32)
9595 emit_move_insn (high[0], low[1]);
9596 emit_move_insn (low[0], const0_rtx);
9598 if (count > 32)
9599 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9601 else
9603 if (!rtx_equal_p (operands[0], operands[1]))
9604 emit_move_insn (operands[0], operands[1]);
9605 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9606 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9609 else
9611 if (!rtx_equal_p (operands[0], operands[1]))
9612 emit_move_insn (operands[0], operands[1]);
9614 split_di (operands, 1, low, high);
9616 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9617 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9619 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9621 if (! no_new_pseudos)
9622 scratch = force_reg (SImode, const0_rtx);
9623 else
9624 emit_move_insn (scratch, const0_rtx);
9626 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9627 scratch));
9629 else
9630 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9634 void
9635 ix86_split_ashrdi (operands, scratch)
9636 rtx *operands, scratch;
9638 rtx low[2], high[2];
9639 int count;
9641 if (GET_CODE (operands[2]) == CONST_INT)
9643 split_di (operands, 2, low, high);
9644 count = INTVAL (operands[2]) & 63;
9646 if (count >= 32)
9648 emit_move_insn (low[0], high[1]);
9650 if (! reload_completed)
9651 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9652 else
9654 emit_move_insn (high[0], low[0]);
9655 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9658 if (count > 32)
9659 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9661 else
9663 if (!rtx_equal_p (operands[0], operands[1]))
9664 emit_move_insn (operands[0], operands[1]);
9665 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9666 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9669 else
9671 if (!rtx_equal_p (operands[0], operands[1]))
9672 emit_move_insn (operands[0], operands[1]);
9674 split_di (operands, 1, low, high);
9676 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9677 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9679 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9681 if (! no_new_pseudos)
9682 scratch = gen_reg_rtx (SImode);
9683 emit_move_insn (scratch, high[0]);
9684 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9685 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9686 scratch));
9688 else
9689 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9693 void
9694 ix86_split_lshrdi (operands, scratch)
9695 rtx *operands, scratch;
9697 rtx low[2], high[2];
9698 int count;
9700 if (GET_CODE (operands[2]) == CONST_INT)
9702 split_di (operands, 2, low, high);
9703 count = INTVAL (operands[2]) & 63;
9705 if (count >= 32)
9707 emit_move_insn (low[0], high[1]);
9708 emit_move_insn (high[0], const0_rtx);
9710 if (count > 32)
9711 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9713 else
9715 if (!rtx_equal_p (operands[0], operands[1]))
9716 emit_move_insn (operands[0], operands[1]);
9717 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9718 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9721 else
9723 if (!rtx_equal_p (operands[0], operands[1]))
9724 emit_move_insn (operands[0], operands[1]);
9726 split_di (operands, 1, low, high);
9728 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9729 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9731 /* Heh. By reversing the arguments, we can reuse this pattern. */
9732 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9734 if (! no_new_pseudos)
9735 scratch = force_reg (SImode, const0_rtx);
9736 else
9737 emit_move_insn (scratch, const0_rtx);
9739 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9740 scratch));
9742 else
9743 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9747 /* Helper function for the string operations below. Dest VARIABLE whether
9748 it is aligned to VALUE bytes. If true, jump to the label. */
9749 static rtx
9750 ix86_expand_aligntest (variable, value)
9751 rtx variable;
9752 int value;
9754 rtx label = gen_label_rtx ();
9755 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9756 if (GET_MODE (variable) == DImode)
9757 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9758 else
9759 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9760 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9761 1, label);
9762 return label;
9765 /* Adjust COUNTER by the VALUE. */
9766 static void
9767 ix86_adjust_counter (countreg, value)
9768 rtx countreg;
9769 HOST_WIDE_INT value;
9771 if (GET_MODE (countreg) == DImode)
9772 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9773 else
9774 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9777 /* Zero extend possibly SImode EXP to Pmode register. */
9779 ix86_zero_extend_to_Pmode (exp)
9780 rtx exp;
9782 rtx r;
9783 if (GET_MODE (exp) == VOIDmode)
9784 return force_reg (Pmode, exp);
9785 if (GET_MODE (exp) == Pmode)
9786 return copy_to_mode_reg (Pmode, exp);
9787 r = gen_reg_rtx (Pmode);
9788 emit_insn (gen_zero_extendsidi2 (r, exp));
9789 return r;
9792 /* Expand string move (memcpy) operation. Use i386 string operations when
9793 profitable. expand_clrstr contains similar code. */
9795 ix86_expand_movstr (dst, src, count_exp, align_exp)
9796 rtx dst, src, count_exp, align_exp;
9798 rtx srcreg, destreg, countreg;
9799 enum machine_mode counter_mode;
9800 HOST_WIDE_INT align = 0;
9801 unsigned HOST_WIDE_INT count = 0;
9802 rtx insns;
9804 start_sequence ();
9806 if (GET_CODE (align_exp) == CONST_INT)
9807 align = INTVAL (align_exp);
9809 /* This simple hack avoids all inlining code and simplifies code below. */
9810 if (!TARGET_ALIGN_STRINGOPS)
9811 align = 64;
9813 if (GET_CODE (count_exp) == CONST_INT)
9814 count = INTVAL (count_exp);
9816 /* Figure out proper mode for counter. For 32bits it is always SImode,
9817 for 64bits use SImode when possible, otherwise DImode.
9818 Set count to number of bytes copied when known at compile time. */
9819 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9820 || x86_64_zero_extended_value (count_exp))
9821 counter_mode = SImode;
9822 else
9823 counter_mode = DImode;
9825 if (counter_mode != SImode && counter_mode != DImode)
9826 abort ();
9828 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9829 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9831 emit_insn (gen_cld ());
9833 /* When optimizing for size emit simple rep ; movsb instruction for
9834 counts not divisible by 4. */
9836 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9838 countreg = ix86_zero_extend_to_Pmode (count_exp);
9839 if (TARGET_64BIT)
9840 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9841 destreg, srcreg, countreg));
9842 else
9843 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9844 destreg, srcreg, countreg));
9847 /* For constant aligned (or small unaligned) copies use rep movsl
9848 followed by code copying the rest. For PentiumPro ensure 8 byte
9849 alignment to allow rep movsl acceleration. */
9851 else if (count != 0
9852 && (align >= 8
9853 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9854 || optimize_size || count < (unsigned int) 64))
9856 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9857 if (count & ~(size - 1))
9859 countreg = copy_to_mode_reg (counter_mode,
9860 GEN_INT ((count >> (size == 4 ? 2 : 3))
9861 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9862 countreg = ix86_zero_extend_to_Pmode (countreg);
9863 if (size == 4)
9865 if (TARGET_64BIT)
9866 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9867 destreg, srcreg, countreg));
9868 else
9869 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9870 destreg, srcreg, countreg));
9872 else
9873 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9874 destreg, srcreg, countreg));
9876 if (size == 8 && (count & 0x04))
9877 emit_insn (gen_strmovsi (destreg, srcreg));
9878 if (count & 0x02)
9879 emit_insn (gen_strmovhi (destreg, srcreg));
9880 if (count & 0x01)
9881 emit_insn (gen_strmovqi (destreg, srcreg));
9883 /* The generic code based on the glibc implementation:
9884 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9885 allowing accelerated copying there)
9886 - copy the data using rep movsl
9887 - copy the rest. */
9888 else
9890 rtx countreg2;
9891 rtx label = NULL;
9892 int desired_alignment = (TARGET_PENTIUMPRO
9893 && (count == 0 || count >= (unsigned int) 260)
9894 ? 8 : UNITS_PER_WORD);
9896 /* In case we don't know anything about the alignment, default to
9897 library version, since it is usually equally fast and result in
9898 shorter code. */
9899 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9901 end_sequence ();
9902 return 0;
9905 if (TARGET_SINGLE_STRINGOP)
9906 emit_insn (gen_cld ());
9908 countreg2 = gen_reg_rtx (Pmode);
9909 countreg = copy_to_mode_reg (counter_mode, count_exp);
9911 /* We don't use loops to align destination and to copy parts smaller
9912 than 4 bytes, because gcc is able to optimize such code better (in
9913 the case the destination or the count really is aligned, gcc is often
9914 able to predict the branches) and also it is friendlier to the
9915 hardware branch prediction.
9917 Using loops is benefical for generic case, because we can
9918 handle small counts using the loops. Many CPUs (such as Athlon)
9919 have large REP prefix setup costs.
9921 This is quite costy. Maybe we can revisit this decision later or
9922 add some customizability to this code. */
9924 if (count == 0 && align < desired_alignment)
9926 label = gen_label_rtx ();
9927 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
9928 LEU, 0, counter_mode, 1, label);
9930 if (align <= 1)
9932 rtx label = ix86_expand_aligntest (destreg, 1);
9933 emit_insn (gen_strmovqi (destreg, srcreg));
9934 ix86_adjust_counter (countreg, 1);
9935 emit_label (label);
9936 LABEL_NUSES (label) = 1;
9938 if (align <= 2)
9940 rtx label = ix86_expand_aligntest (destreg, 2);
9941 emit_insn (gen_strmovhi (destreg, srcreg));
9942 ix86_adjust_counter (countreg, 2);
9943 emit_label (label);
9944 LABEL_NUSES (label) = 1;
9946 if (align <= 4 && desired_alignment > 4)
9948 rtx label = ix86_expand_aligntest (destreg, 4);
9949 emit_insn (gen_strmovsi (destreg, srcreg));
9950 ix86_adjust_counter (countreg, 4);
9951 emit_label (label);
9952 LABEL_NUSES (label) = 1;
9955 if (label && desired_alignment > 4 && !TARGET_64BIT)
9957 emit_label (label);
9958 LABEL_NUSES (label) = 1;
9959 label = NULL_RTX;
9961 if (!TARGET_SINGLE_STRINGOP)
9962 emit_insn (gen_cld ());
9963 if (TARGET_64BIT)
9965 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
9966 GEN_INT (3)));
9967 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
9968 destreg, srcreg, countreg2));
9970 else
9972 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
9973 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
9974 destreg, srcreg, countreg2));
9977 if (label)
9979 emit_label (label);
9980 LABEL_NUSES (label) = 1;
9982 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
9983 emit_insn (gen_strmovsi (destreg, srcreg));
9984 if ((align <= 4 || count == 0) && TARGET_64BIT)
9986 rtx label = ix86_expand_aligntest (countreg, 4);
9987 emit_insn (gen_strmovsi (destreg, srcreg));
9988 emit_label (label);
9989 LABEL_NUSES (label) = 1;
9991 if (align > 2 && count != 0 && (count & 2))
9992 emit_insn (gen_strmovhi (destreg, srcreg));
9993 if (align <= 2 || count == 0)
9995 rtx label = ix86_expand_aligntest (countreg, 2);
9996 emit_insn (gen_strmovhi (destreg, srcreg));
9997 emit_label (label);
9998 LABEL_NUSES (label) = 1;
10000 if (align > 1 && count != 0 && (count & 1))
10001 emit_insn (gen_strmovqi (destreg, srcreg));
10002 if (align <= 1 || count == 0)
10004 rtx label = ix86_expand_aligntest (countreg, 1);
10005 emit_insn (gen_strmovqi (destreg, srcreg));
10006 emit_label (label);
10007 LABEL_NUSES (label) = 1;
10011 insns = get_insns ();
10012 end_sequence ();
10014 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10015 emit_insn (insns);
10016 return 1;
10019 /* Expand string clear operation (bzero). Use i386 string operations when
10020 profitable. expand_movstr contains similar code. */
10022 ix86_expand_clrstr (src, count_exp, align_exp)
10023 rtx src, count_exp, align_exp;
10025 rtx destreg, zeroreg, countreg;
10026 enum machine_mode counter_mode;
10027 HOST_WIDE_INT align = 0;
10028 unsigned HOST_WIDE_INT count = 0;
10030 if (GET_CODE (align_exp) == CONST_INT)
10031 align = INTVAL (align_exp);
10033 /* This simple hack avoids all inlining code and simplifies code below. */
10034 if (!TARGET_ALIGN_STRINGOPS)
10035 align = 32;
10037 if (GET_CODE (count_exp) == CONST_INT)
10038 count = INTVAL (count_exp);
10039 /* Figure out proper mode for counter. For 32bits it is always SImode,
10040 for 64bits use SImode when possible, otherwise DImode.
10041 Set count to number of bytes copied when known at compile time. */
10042 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10043 || x86_64_zero_extended_value (count_exp))
10044 counter_mode = SImode;
10045 else
10046 counter_mode = DImode;
10048 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10050 emit_insn (gen_cld ());
10052 /* When optimizing for size emit simple rep ; movsb instruction for
10053 counts not divisible by 4. */
10055 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10057 countreg = ix86_zero_extend_to_Pmode (count_exp);
10058 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10059 if (TARGET_64BIT)
10060 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10061 destreg, countreg));
10062 else
10063 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10064 destreg, countreg));
10066 else if (count != 0
10067 && (align >= 8
10068 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10069 || optimize_size || count < (unsigned int) 64))
10071 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10072 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10073 if (count & ~(size - 1))
10075 countreg = copy_to_mode_reg (counter_mode,
10076 GEN_INT ((count >> (size == 4 ? 2 : 3))
10077 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10078 countreg = ix86_zero_extend_to_Pmode (countreg);
10079 if (size == 4)
10081 if (TARGET_64BIT)
10082 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10083 destreg, countreg));
10084 else
10085 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10086 destreg, countreg));
10088 else
10089 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10090 destreg, countreg));
10092 if (size == 8 && (count & 0x04))
10093 emit_insn (gen_strsetsi (destreg,
10094 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10095 if (count & 0x02)
10096 emit_insn (gen_strsethi (destreg,
10097 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10098 if (count & 0x01)
10099 emit_insn (gen_strsetqi (destreg,
10100 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10102 else
10104 rtx countreg2;
10105 rtx label = NULL;
10106 /* Compute desired alignment of the string operation. */
10107 int desired_alignment = (TARGET_PENTIUMPRO
10108 && (count == 0 || count >= (unsigned int) 260)
10109 ? 8 : UNITS_PER_WORD);
10111 /* In case we don't know anything about the alignment, default to
10112 library version, since it is usually equally fast and result in
10113 shorter code. */
10114 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10115 return 0;
10117 if (TARGET_SINGLE_STRINGOP)
10118 emit_insn (gen_cld ());
10120 countreg2 = gen_reg_rtx (Pmode);
10121 countreg = copy_to_mode_reg (counter_mode, count_exp);
10122 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10124 if (count == 0 && align < desired_alignment)
10126 label = gen_label_rtx ();
10127 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10128 LEU, 0, counter_mode, 1, label);
10130 if (align <= 1)
10132 rtx label = ix86_expand_aligntest (destreg, 1);
10133 emit_insn (gen_strsetqi (destreg,
10134 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10135 ix86_adjust_counter (countreg, 1);
10136 emit_label (label);
10137 LABEL_NUSES (label) = 1;
10139 if (align <= 2)
10141 rtx label = ix86_expand_aligntest (destreg, 2);
10142 emit_insn (gen_strsethi (destreg,
10143 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10144 ix86_adjust_counter (countreg, 2);
10145 emit_label (label);
10146 LABEL_NUSES (label) = 1;
10148 if (align <= 4 && desired_alignment > 4)
10150 rtx label = ix86_expand_aligntest (destreg, 4);
10151 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10152 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10153 : zeroreg)));
10154 ix86_adjust_counter (countreg, 4);
10155 emit_label (label);
10156 LABEL_NUSES (label) = 1;
10159 if (label && desired_alignment > 4 && !TARGET_64BIT)
10161 emit_label (label);
10162 LABEL_NUSES (label) = 1;
10163 label = NULL_RTX;
10166 if (!TARGET_SINGLE_STRINGOP)
10167 emit_insn (gen_cld ());
10168 if (TARGET_64BIT)
10170 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10171 GEN_INT (3)));
10172 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10173 destreg, countreg2));
10175 else
10177 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10178 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10179 destreg, countreg2));
10181 if (label)
10183 emit_label (label);
10184 LABEL_NUSES (label) = 1;
10187 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10188 emit_insn (gen_strsetsi (destreg,
10189 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10190 if (TARGET_64BIT && (align <= 4 || count == 0))
10192 rtx label = ix86_expand_aligntest (countreg, 2);
10193 emit_insn (gen_strsetsi (destreg,
10194 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10195 emit_label (label);
10196 LABEL_NUSES (label) = 1;
10198 if (align > 2 && count != 0 && (count & 2))
10199 emit_insn (gen_strsethi (destreg,
10200 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10201 if (align <= 2 || count == 0)
10203 rtx label = ix86_expand_aligntest (countreg, 2);
10204 emit_insn (gen_strsethi (destreg,
10205 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10206 emit_label (label);
10207 LABEL_NUSES (label) = 1;
10209 if (align > 1 && count != 0 && (count & 1))
10210 emit_insn (gen_strsetqi (destreg,
10211 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10212 if (align <= 1 || count == 0)
10214 rtx label = ix86_expand_aligntest (countreg, 1);
10215 emit_insn (gen_strsetqi (destreg,
10216 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10217 emit_label (label);
10218 LABEL_NUSES (label) = 1;
10221 return 1;
10223 /* Expand strlen. */
10225 ix86_expand_strlen (out, src, eoschar, align)
10226 rtx out, src, eoschar, align;
10228 rtx addr, scratch1, scratch2, scratch3, scratch4;
10230 /* The generic case of strlen expander is long. Avoid it's
10231 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10233 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10234 && !TARGET_INLINE_ALL_STRINGOPS
10235 && !optimize_size
10236 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10237 return 0;
10239 addr = force_reg (Pmode, XEXP (src, 0));
10240 scratch1 = gen_reg_rtx (Pmode);
10242 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10243 && !optimize_size)
10245 /* Well it seems that some optimizer does not combine a call like
10246 foo(strlen(bar), strlen(bar));
10247 when the move and the subtraction is done here. It does calculate
10248 the length just once when these instructions are done inside of
10249 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10250 often used and I use one fewer register for the lifetime of
10251 output_strlen_unroll() this is better. */
10253 emit_move_insn (out, addr);
10255 ix86_expand_strlensi_unroll_1 (out, align);
10257 /* strlensi_unroll_1 returns the address of the zero at the end of
10258 the string, like memchr(), so compute the length by subtracting
10259 the start address. */
10260 if (TARGET_64BIT)
10261 emit_insn (gen_subdi3 (out, out, addr));
10262 else
10263 emit_insn (gen_subsi3 (out, out, addr));
10265 else
10267 scratch2 = gen_reg_rtx (Pmode);
10268 scratch3 = gen_reg_rtx (Pmode);
10269 scratch4 = force_reg (Pmode, constm1_rtx);
10271 emit_move_insn (scratch3, addr);
10272 eoschar = force_reg (QImode, eoschar);
10274 emit_insn (gen_cld ());
10275 if (TARGET_64BIT)
10277 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10278 align, scratch4, scratch3));
10279 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10280 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10282 else
10284 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10285 align, scratch4, scratch3));
10286 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10287 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10290 return 1;
10293 /* Expand the appropriate insns for doing strlen if not just doing
10294 repnz; scasb
10296 out = result, initialized with the start address
10297 align_rtx = alignment of the address.
10298 scratch = scratch register, initialized with the startaddress when
10299 not aligned, otherwise undefined
10301 This is just the body. It needs the initialisations mentioned above and
10302 some address computing at the end. These things are done in i386.md. */
10304 static void
10305 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10306 rtx out, align_rtx;
10308 int align;
10309 rtx tmp;
10310 rtx align_2_label = NULL_RTX;
10311 rtx align_3_label = NULL_RTX;
10312 rtx align_4_label = gen_label_rtx ();
10313 rtx end_0_label = gen_label_rtx ();
10314 rtx mem;
10315 rtx tmpreg = gen_reg_rtx (SImode);
10316 rtx scratch = gen_reg_rtx (SImode);
10318 align = 0;
10319 if (GET_CODE (align_rtx) == CONST_INT)
10320 align = INTVAL (align_rtx);
10322 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10324 /* Is there a known alignment and is it less than 4? */
10325 if (align < 4)
10327 rtx scratch1 = gen_reg_rtx (Pmode);
10328 emit_move_insn (scratch1, out);
10329 /* Is there a known alignment and is it not 2? */
10330 if (align != 2)
10332 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10333 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10335 /* Leave just the 3 lower bits. */
10336 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10337 NULL_RTX, 0, OPTAB_WIDEN);
10339 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10340 Pmode, 1, align_4_label);
10341 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10342 Pmode, 1, align_2_label);
10343 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10344 Pmode, 1, align_3_label);
10346 else
10348 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10349 check if is aligned to 4 - byte. */
10351 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10352 NULL_RTX, 0, OPTAB_WIDEN);
10354 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10355 Pmode, 1, align_4_label);
10358 mem = gen_rtx_MEM (QImode, out);
10360 /* Now compare the bytes. */
10362 /* Compare the first n unaligned byte on a byte per byte basis. */
10363 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10364 QImode, 1, end_0_label);
10366 /* Increment the address. */
10367 if (TARGET_64BIT)
10368 emit_insn (gen_adddi3 (out, out, const1_rtx));
10369 else
10370 emit_insn (gen_addsi3 (out, out, const1_rtx));
10372 /* Not needed with an alignment of 2 */
10373 if (align != 2)
10375 emit_label (align_2_label);
10377 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10378 end_0_label);
10380 if (TARGET_64BIT)
10381 emit_insn (gen_adddi3 (out, out, const1_rtx));
10382 else
10383 emit_insn (gen_addsi3 (out, out, const1_rtx));
10385 emit_label (align_3_label);
10388 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10389 end_0_label);
10391 if (TARGET_64BIT)
10392 emit_insn (gen_adddi3 (out, out, const1_rtx));
10393 else
10394 emit_insn (gen_addsi3 (out, out, const1_rtx));
10397 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10398 align this loop. It gives only huge programs, but does not help to
10399 speed up. */
10400 emit_label (align_4_label);
10402 mem = gen_rtx_MEM (SImode, out);
10403 emit_move_insn (scratch, mem);
10404 if (TARGET_64BIT)
10405 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10406 else
10407 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10409 /* This formula yields a nonzero result iff one of the bytes is zero.
10410 This saves three branches inside loop and many cycles. */
10412 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10413 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10414 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10415 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10416 gen_int_mode (0x80808080, SImode)));
10417 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10418 align_4_label);
10420 if (TARGET_CMOVE)
10422 rtx reg = gen_reg_rtx (SImode);
10423 rtx reg2 = gen_reg_rtx (Pmode);
10424 emit_move_insn (reg, tmpreg);
10425 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10427 /* If zero is not in the first two bytes, move two bytes forward. */
10428 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10429 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10430 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10431 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10432 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10433 reg,
10434 tmpreg)));
10435 /* Emit lea manually to avoid clobbering of flags. */
10436 emit_insn (gen_rtx_SET (SImode, reg2,
10437 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10439 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10440 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10441 emit_insn (gen_rtx_SET (VOIDmode, out,
10442 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10443 reg2,
10444 out)));
10447 else
10449 rtx end_2_label = gen_label_rtx ();
10450 /* Is zero in the first two bytes? */
10452 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10453 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10454 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10455 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10456 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10457 pc_rtx);
10458 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10459 JUMP_LABEL (tmp) = end_2_label;
10461 /* Not in the first two. Move two bytes forward. */
10462 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10463 if (TARGET_64BIT)
10464 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10465 else
10466 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10468 emit_label (end_2_label);
10472 /* Avoid branch in fixing the byte. */
10473 tmpreg = gen_lowpart (QImode, tmpreg);
10474 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10475 if (TARGET_64BIT)
10476 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10477 else
10478 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10480 emit_label (end_0_label);
10483 void
10484 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10485 rtx retval, fnaddr, callarg1, callarg2, pop;
10487 rtx use = NULL, call;
10489 if (pop == const0_rtx)
10490 pop = NULL;
10491 if (TARGET_64BIT && pop)
10492 abort ();
10494 /* Static functions and indirect calls don't need the pic register. */
10495 if (! TARGET_64BIT && flag_pic
10496 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10497 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10498 use_reg (&use, pic_offset_table_rtx);
10500 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10502 rtx al = gen_rtx_REG (QImode, 0);
10503 emit_move_insn (al, callarg2);
10504 use_reg (&use, al);
10507 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10509 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10510 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10513 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10514 if (retval)
10515 call = gen_rtx_SET (VOIDmode, retval, call);
10516 if (pop)
10518 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10519 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10520 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10523 call = emit_call_insn (call);
10524 if (use)
10525 CALL_INSN_FUNCTION_USAGE (call) = use;
10529 /* Clear stack slot assignments remembered from previous functions.
10530 This is called from INIT_EXPANDERS once before RTL is emitted for each
10531 function. */
10533 static struct machine_function *
10534 ix86_init_machine_status ()
10536 return ggc_alloc_cleared (sizeof (struct machine_function));
10539 /* Return a MEM corresponding to a stack slot with mode MODE.
10540 Allocate a new slot if necessary.
10542 The RTL for a function can have several slots available: N is
10543 which slot to use. */
10546 assign_386_stack_local (mode, n)
10547 enum machine_mode mode;
10548 int n;
10550 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10551 abort ();
10553 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10554 ix86_stack_locals[(int) mode][n]
10555 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10557 return ix86_stack_locals[(int) mode][n];
10560 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10562 static GTY(()) rtx ix86_tls_symbol;
10564 ix86_tls_get_addr ()
10567 if (!ix86_tls_symbol)
10569 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10570 ? "___tls_get_addr"
10571 : "__tls_get_addr"));
10574 return ix86_tls_symbol;
10577 /* Calculate the length of the memory address in the instruction
10578 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10580 static int
10581 memory_address_length (addr)
10582 rtx addr;
10584 struct ix86_address parts;
10585 rtx base, index, disp;
10586 int len;
10588 if (GET_CODE (addr) == PRE_DEC
10589 || GET_CODE (addr) == POST_INC
10590 || GET_CODE (addr) == PRE_MODIFY
10591 || GET_CODE (addr) == POST_MODIFY)
10592 return 0;
10594 if (! ix86_decompose_address (addr, &parts))
10595 abort ();
10597 base = parts.base;
10598 index = parts.index;
10599 disp = parts.disp;
10600 len = 0;
10602 /* Register Indirect. */
10603 if (base && !index && !disp)
10605 /* Special cases: ebp and esp need the two-byte modrm form. */
10606 if (addr == stack_pointer_rtx
10607 || addr == arg_pointer_rtx
10608 || addr == frame_pointer_rtx
10609 || addr == hard_frame_pointer_rtx)
10610 len = 1;
10613 /* Direct Addressing. */
10614 else if (disp && !base && !index)
10615 len = 4;
10617 else
10619 /* Find the length of the displacement constant. */
10620 if (disp)
10622 if (GET_CODE (disp) == CONST_INT
10623 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10624 len = 1;
10625 else
10626 len = 4;
10629 /* An index requires the two-byte modrm form. */
10630 if (index)
10631 len += 1;
10634 return len;
10637 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10638 is set, expect that insn have 8bit immediate alternative. */
10640 ix86_attr_length_immediate_default (insn, shortform)
10641 rtx insn;
10642 int shortform;
10644 int len = 0;
10645 int i;
10646 extract_insn_cached (insn);
10647 for (i = recog_data.n_operands - 1; i >= 0; --i)
10648 if (CONSTANT_P (recog_data.operand[i]))
10650 if (len)
10651 abort ();
10652 if (shortform
10653 && GET_CODE (recog_data.operand[i]) == CONST_INT
10654 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10655 len = 1;
10656 else
10658 switch (get_attr_mode (insn))
10660 case MODE_QI:
10661 len+=1;
10662 break;
10663 case MODE_HI:
10664 len+=2;
10665 break;
10666 case MODE_SI:
10667 len+=4;
10668 break;
10669 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10670 case MODE_DI:
10671 len+=4;
10672 break;
10673 default:
10674 fatal_insn ("unknown insn mode", insn);
10678 return len;
10680 /* Compute default value for "length_address" attribute. */
10682 ix86_attr_length_address_default (insn)
10683 rtx insn;
10685 int i;
10686 extract_insn_cached (insn);
10687 for (i = recog_data.n_operands - 1; i >= 0; --i)
10688 if (GET_CODE (recog_data.operand[i]) == MEM)
10690 return memory_address_length (XEXP (recog_data.operand[i], 0));
10691 break;
10693 return 0;
10696 /* Return the maximum number of instructions a cpu can issue. */
10698 static int
10699 ix86_issue_rate ()
10701 switch (ix86_cpu)
10703 case PROCESSOR_PENTIUM:
10704 case PROCESSOR_K6:
10705 return 2;
10707 case PROCESSOR_PENTIUMPRO:
10708 case PROCESSOR_PENTIUM4:
10709 case PROCESSOR_ATHLON:
10710 return 3;
10712 default:
10713 return 1;
10717 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10718 by DEP_INSN and nothing set by DEP_INSN. */
10720 static int
10721 ix86_flags_dependant (insn, dep_insn, insn_type)
10722 rtx insn, dep_insn;
10723 enum attr_type insn_type;
10725 rtx set, set2;
10727 /* Simplify the test for uninteresting insns. */
10728 if (insn_type != TYPE_SETCC
10729 && insn_type != TYPE_ICMOV
10730 && insn_type != TYPE_FCMOV
10731 && insn_type != TYPE_IBR)
10732 return 0;
10734 if ((set = single_set (dep_insn)) != 0)
10736 set = SET_DEST (set);
10737 set2 = NULL_RTX;
10739 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10740 && XVECLEN (PATTERN (dep_insn), 0) == 2
10741 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10742 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10744 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10745 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10747 else
10748 return 0;
10750 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10751 return 0;
10753 /* This test is true if the dependent insn reads the flags but
10754 not any other potentially set register. */
10755 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10756 return 0;
10758 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10759 return 0;
10761 return 1;
10764 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10765 address with operands set by DEP_INSN. */
10767 static int
10768 ix86_agi_dependant (insn, dep_insn, insn_type)
10769 rtx insn, dep_insn;
10770 enum attr_type insn_type;
10772 rtx addr;
10774 if (insn_type == TYPE_LEA
10775 && TARGET_PENTIUM)
10777 addr = PATTERN (insn);
10778 if (GET_CODE (addr) == SET)
10780 else if (GET_CODE (addr) == PARALLEL
10781 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10782 addr = XVECEXP (addr, 0, 0);
10783 else
10784 abort ();
10785 addr = SET_SRC (addr);
10787 else
10789 int i;
10790 extract_insn_cached (insn);
10791 for (i = recog_data.n_operands - 1; i >= 0; --i)
10792 if (GET_CODE (recog_data.operand[i]) == MEM)
10794 addr = XEXP (recog_data.operand[i], 0);
10795 goto found;
10797 return 0;
10798 found:;
10801 return modified_in_p (addr, dep_insn);
10804 static int
10805 ix86_adjust_cost (insn, link, dep_insn, cost)
10806 rtx insn, link, dep_insn;
10807 int cost;
10809 enum attr_type insn_type, dep_insn_type;
10810 enum attr_memory memory, dep_memory;
10811 rtx set, set2;
10812 int dep_insn_code_number;
10814 /* Anti and output depenancies have zero cost on all CPUs. */
10815 if (REG_NOTE_KIND (link) != 0)
10816 return 0;
10818 dep_insn_code_number = recog_memoized (dep_insn);
10820 /* If we can't recognize the insns, we can't really do anything. */
10821 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10822 return cost;
10824 insn_type = get_attr_type (insn);
10825 dep_insn_type = get_attr_type (dep_insn);
10827 switch (ix86_cpu)
10829 case PROCESSOR_PENTIUM:
10830 /* Address Generation Interlock adds a cycle of latency. */
10831 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10832 cost += 1;
10834 /* ??? Compares pair with jump/setcc. */
10835 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10836 cost = 0;
10838 /* Floating point stores require value to be ready one cycle ealier. */
10839 if (insn_type == TYPE_FMOV
10840 && get_attr_memory (insn) == MEMORY_STORE
10841 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10842 cost += 1;
10843 break;
10845 case PROCESSOR_PENTIUMPRO:
10846 memory = get_attr_memory (insn);
10847 dep_memory = get_attr_memory (dep_insn);
10849 /* Since we can't represent delayed latencies of load+operation,
10850 increase the cost here for non-imov insns. */
10851 if (dep_insn_type != TYPE_IMOV
10852 && dep_insn_type != TYPE_FMOV
10853 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10854 cost += 1;
10856 /* INT->FP conversion is expensive. */
10857 if (get_attr_fp_int_src (dep_insn))
10858 cost += 5;
10860 /* There is one cycle extra latency between an FP op and a store. */
10861 if (insn_type == TYPE_FMOV
10862 && (set = single_set (dep_insn)) != NULL_RTX
10863 && (set2 = single_set (insn)) != NULL_RTX
10864 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10865 && GET_CODE (SET_DEST (set2)) == MEM)
10866 cost += 1;
10868 /* Show ability of reorder buffer to hide latency of load by executing
10869 in parallel with previous instruction in case
10870 previous instruction is not needed to compute the address. */
10871 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10872 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10874 /* Claim moves to take one cycle, as core can issue one load
10875 at time and the next load can start cycle later. */
10876 if (dep_insn_type == TYPE_IMOV
10877 || dep_insn_type == TYPE_FMOV)
10878 cost = 1;
10879 else if (cost > 1)
10880 cost--;
10882 break;
10884 case PROCESSOR_K6:
10885 memory = get_attr_memory (insn);
10886 dep_memory = get_attr_memory (dep_insn);
10887 /* The esp dependency is resolved before the instruction is really
10888 finished. */
10889 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10890 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10891 return 1;
10893 /* Since we can't represent delayed latencies of load+operation,
10894 increase the cost here for non-imov insns. */
10895 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10896 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10898 /* INT->FP conversion is expensive. */
10899 if (get_attr_fp_int_src (dep_insn))
10900 cost += 5;
10902 /* Show ability of reorder buffer to hide latency of load by executing
10903 in parallel with previous instruction in case
10904 previous instruction is not needed to compute the address. */
10905 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10906 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10908 /* Claim moves to take one cycle, as core can issue one load
10909 at time and the next load can start cycle later. */
10910 if (dep_insn_type == TYPE_IMOV
10911 || dep_insn_type == TYPE_FMOV)
10912 cost = 1;
10913 else if (cost > 2)
10914 cost -= 2;
10915 else
10916 cost = 1;
10918 break;
10920 case PROCESSOR_ATHLON:
10921 memory = get_attr_memory (insn);
10922 dep_memory = get_attr_memory (dep_insn);
10924 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10926 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
10927 cost += 2;
10928 else
10929 cost += 3;
10931 /* Show ability of reorder buffer to hide latency of load by executing
10932 in parallel with previous instruction in case
10933 previous instruction is not needed to compute the address. */
10934 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10935 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10937 /* Claim moves to take one cycle, as core can issue one load
10938 at time and the next load can start cycle later. */
10939 if (dep_insn_type == TYPE_IMOV
10940 || dep_insn_type == TYPE_FMOV)
10941 cost = 0;
10942 else if (cost >= 3)
10943 cost -= 3;
10944 else
10945 cost = 0;
10948 default:
10949 break;
10952 return cost;
10955 static union
10957 struct ppro_sched_data
10959 rtx decode[3];
10960 int issued_this_cycle;
10961 } ppro;
10962 } ix86_sched_data;
10964 static enum attr_ppro_uops
10965 ix86_safe_ppro_uops (insn)
10966 rtx insn;
10968 if (recog_memoized (insn) >= 0)
10969 return get_attr_ppro_uops (insn);
10970 else
10971 return PPRO_UOPS_MANY;
10974 static void
10975 ix86_dump_ppro_packet (dump)
10976 FILE *dump;
10978 if (ix86_sched_data.ppro.decode[0])
10980 fprintf (dump, "PPRO packet: %d",
10981 INSN_UID (ix86_sched_data.ppro.decode[0]));
10982 if (ix86_sched_data.ppro.decode[1])
10983 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
10984 if (ix86_sched_data.ppro.decode[2])
10985 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
10986 fputc ('\n', dump);
10990 /* We're beginning a new block. Initialize data structures as necessary. */
10992 static void
10993 ix86_sched_init (dump, sched_verbose, veclen)
10994 FILE *dump ATTRIBUTE_UNUSED;
10995 int sched_verbose ATTRIBUTE_UNUSED;
10996 int veclen ATTRIBUTE_UNUSED;
10998 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11001 /* Shift INSN to SLOT, and shift everything else down. */
11003 static void
11004 ix86_reorder_insn (insnp, slot)
11005 rtx *insnp, *slot;
11007 if (insnp != slot)
11009 rtx insn = *insnp;
11011 insnp[0] = insnp[1];
11012 while (++insnp != slot);
11013 *insnp = insn;
11017 static void
11018 ix86_sched_reorder_ppro (ready, e_ready)
11019 rtx *ready;
11020 rtx *e_ready;
11022 rtx decode[3];
11023 enum attr_ppro_uops cur_uops;
11024 int issued_this_cycle;
11025 rtx *insnp;
11026 int i;
11028 /* At this point .ppro.decode contains the state of the three
11029 decoders from last "cycle". That is, those insns that were
11030 actually independent. But here we're scheduling for the
11031 decoder, and we may find things that are decodable in the
11032 same cycle. */
11034 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11035 issued_this_cycle = 0;
11037 insnp = e_ready;
11038 cur_uops = ix86_safe_ppro_uops (*insnp);
11040 /* If the decoders are empty, and we've a complex insn at the
11041 head of the priority queue, let it issue without complaint. */
11042 if (decode[0] == NULL)
11044 if (cur_uops == PPRO_UOPS_MANY)
11046 decode[0] = *insnp;
11047 goto ppro_done;
11050 /* Otherwise, search for a 2-4 uop unsn to issue. */
11051 while (cur_uops != PPRO_UOPS_FEW)
11053 if (insnp == ready)
11054 break;
11055 cur_uops = ix86_safe_ppro_uops (*--insnp);
11058 /* If so, move it to the head of the line. */
11059 if (cur_uops == PPRO_UOPS_FEW)
11060 ix86_reorder_insn (insnp, e_ready);
11062 /* Issue the head of the queue. */
11063 issued_this_cycle = 1;
11064 decode[0] = *e_ready--;
11067 /* Look for simple insns to fill in the other two slots. */
11068 for (i = 1; i < 3; ++i)
11069 if (decode[i] == NULL)
11071 if (ready > e_ready)
11072 goto ppro_done;
11074 insnp = e_ready;
11075 cur_uops = ix86_safe_ppro_uops (*insnp);
11076 while (cur_uops != PPRO_UOPS_ONE)
11078 if (insnp == ready)
11079 break;
11080 cur_uops = ix86_safe_ppro_uops (*--insnp);
11083 /* Found one. Move it to the head of the queue and issue it. */
11084 if (cur_uops == PPRO_UOPS_ONE)
11086 ix86_reorder_insn (insnp, e_ready);
11087 decode[i] = *e_ready--;
11088 issued_this_cycle++;
11089 continue;
11092 /* ??? Didn't find one. Ideally, here we would do a lazy split
11093 of 2-uop insns, issue one and queue the other. */
11096 ppro_done:
11097 if (issued_this_cycle == 0)
11098 issued_this_cycle = 1;
11099 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11102 /* We are about to being issuing insns for this clock cycle.
11103 Override the default sort algorithm to better slot instructions. */
11104 static int
11105 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11106 FILE *dump ATTRIBUTE_UNUSED;
11107 int sched_verbose ATTRIBUTE_UNUSED;
11108 rtx *ready;
11109 int *n_readyp;
11110 int clock_var ATTRIBUTE_UNUSED;
11112 int n_ready = *n_readyp;
11113 rtx *e_ready = ready + n_ready - 1;
11115 /* Make sure to go ahead and initialize key items in
11116 ix86_sched_data if we are not going to bother trying to
11117 reorder the ready queue. */
11118 if (n_ready < 2)
11120 ix86_sched_data.ppro.issued_this_cycle = 1;
11121 goto out;
11124 switch (ix86_cpu)
11126 default:
11127 break;
11129 case PROCESSOR_PENTIUMPRO:
11130 ix86_sched_reorder_ppro (ready, e_ready);
11131 break;
11134 out:
11135 return ix86_issue_rate ();
11138 /* We are about to issue INSN. Return the number of insns left on the
11139 ready queue that can be issued this cycle. */
11141 static int
11142 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11143 FILE *dump;
11144 int sched_verbose;
11145 rtx insn;
11146 int can_issue_more;
11148 int i;
11149 switch (ix86_cpu)
11151 default:
11152 return can_issue_more - 1;
11154 case PROCESSOR_PENTIUMPRO:
11156 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11158 if (uops == PPRO_UOPS_MANY)
11160 if (sched_verbose)
11161 ix86_dump_ppro_packet (dump);
11162 ix86_sched_data.ppro.decode[0] = insn;
11163 ix86_sched_data.ppro.decode[1] = NULL;
11164 ix86_sched_data.ppro.decode[2] = NULL;
11165 if (sched_verbose)
11166 ix86_dump_ppro_packet (dump);
11167 ix86_sched_data.ppro.decode[0] = NULL;
11169 else if (uops == PPRO_UOPS_FEW)
11171 if (sched_verbose)
11172 ix86_dump_ppro_packet (dump);
11173 ix86_sched_data.ppro.decode[0] = insn;
11174 ix86_sched_data.ppro.decode[1] = NULL;
11175 ix86_sched_data.ppro.decode[2] = NULL;
11177 else
11179 for (i = 0; i < 3; ++i)
11180 if (ix86_sched_data.ppro.decode[i] == NULL)
11182 ix86_sched_data.ppro.decode[i] = insn;
11183 break;
11185 if (i == 3)
11186 abort ();
11187 if (i == 2)
11189 if (sched_verbose)
11190 ix86_dump_ppro_packet (dump);
11191 ix86_sched_data.ppro.decode[0] = NULL;
11192 ix86_sched_data.ppro.decode[1] = NULL;
11193 ix86_sched_data.ppro.decode[2] = NULL;
11197 return --ix86_sched_data.ppro.issued_this_cycle;
11201 static int
11202 ia32_use_dfa_pipeline_interface ()
11204 if (ix86_cpu == PROCESSOR_PENTIUM)
11205 return 1;
11206 return 0;
11209 /* How many alternative schedules to try. This should be as wide as the
11210 scheduling freedom in the DFA, but no wider. Making this value too
11211 large results extra work for the scheduler. */
11213 static int
11214 ia32_multipass_dfa_lookahead ()
11216 if (ix86_cpu == PROCESSOR_PENTIUM)
11217 return 2;
11218 else
11219 return 0;
11223 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11224 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11225 appropriate. */
11227 void
11228 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11229 rtx insns;
11230 rtx dstref, srcref, dstreg, srcreg;
11232 rtx insn;
11234 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11235 if (INSN_P (insn))
11236 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11237 dstreg, srcreg);
11240 /* Subroutine of above to actually do the updating by recursively walking
11241 the rtx. */
11243 static void
11244 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11245 rtx x;
11246 rtx dstref, srcref, dstreg, srcreg;
11248 enum rtx_code code = GET_CODE (x);
11249 const char *format_ptr = GET_RTX_FORMAT (code);
11250 int i, j;
11252 if (code == MEM && XEXP (x, 0) == dstreg)
11253 MEM_COPY_ATTRIBUTES (x, dstref);
11254 else if (code == MEM && XEXP (x, 0) == srcreg)
11255 MEM_COPY_ATTRIBUTES (x, srcref);
11257 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11259 if (*format_ptr == 'e')
11260 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11261 dstreg, srcreg);
11262 else if (*format_ptr == 'E')
11263 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11264 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11265 dstreg, srcreg);
11269 /* Compute the alignment given to a constant that is being placed in memory.
11270 EXP is the constant and ALIGN is the alignment that the object would
11271 ordinarily have.
11272 The value of this function is used instead of that alignment to align
11273 the object. */
11276 ix86_constant_alignment (exp, align)
11277 tree exp;
11278 int align;
11280 if (TREE_CODE (exp) == REAL_CST)
11282 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11283 return 64;
11284 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11285 return 128;
11287 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11288 && align < 256)
11289 return 256;
11291 return align;
11294 /* Compute the alignment for a static variable.
11295 TYPE is the data type, and ALIGN is the alignment that
11296 the object would ordinarily have. The value of this function is used
11297 instead of that alignment to align the object. */
11300 ix86_data_alignment (type, align)
11301 tree type;
11302 int align;
11304 if (AGGREGATE_TYPE_P (type)
11305 && TYPE_SIZE (type)
11306 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11307 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11308 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11309 return 256;
11311 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11312 to 16byte boundary. */
11313 if (TARGET_64BIT)
11315 if (AGGREGATE_TYPE_P (type)
11316 && TYPE_SIZE (type)
11317 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11318 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11319 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11320 return 128;
11323 if (TREE_CODE (type) == ARRAY_TYPE)
11325 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11326 return 64;
11327 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11328 return 128;
11330 else if (TREE_CODE (type) == COMPLEX_TYPE)
11333 if (TYPE_MODE (type) == DCmode && align < 64)
11334 return 64;
11335 if (TYPE_MODE (type) == XCmode && align < 128)
11336 return 128;
11338 else if ((TREE_CODE (type) == RECORD_TYPE
11339 || TREE_CODE (type) == UNION_TYPE
11340 || TREE_CODE (type) == QUAL_UNION_TYPE)
11341 && TYPE_FIELDS (type))
11343 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11344 return 64;
11345 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11346 return 128;
11348 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11349 || TREE_CODE (type) == INTEGER_TYPE)
11351 if (TYPE_MODE (type) == DFmode && align < 64)
11352 return 64;
11353 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11354 return 128;
11357 return align;
11360 /* Compute the alignment for a local variable.
11361 TYPE is the data type, and ALIGN is the alignment that
11362 the object would ordinarily have. The value of this macro is used
11363 instead of that alignment to align the object. */
11366 ix86_local_alignment (type, align)
11367 tree type;
11368 int align;
11370 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11371 to 16byte boundary. */
11372 if (TARGET_64BIT)
11374 if (AGGREGATE_TYPE_P (type)
11375 && TYPE_SIZE (type)
11376 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11377 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11378 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11379 return 128;
11381 if (TREE_CODE (type) == ARRAY_TYPE)
11383 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11384 return 64;
11385 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11386 return 128;
11388 else if (TREE_CODE (type) == COMPLEX_TYPE)
11390 if (TYPE_MODE (type) == DCmode && align < 64)
11391 return 64;
11392 if (TYPE_MODE (type) == XCmode && align < 128)
11393 return 128;
11395 else if ((TREE_CODE (type) == RECORD_TYPE
11396 || TREE_CODE (type) == UNION_TYPE
11397 || TREE_CODE (type) == QUAL_UNION_TYPE)
11398 && TYPE_FIELDS (type))
11400 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11401 return 64;
11402 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11403 return 128;
11405 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11406 || TREE_CODE (type) == INTEGER_TYPE)
11409 if (TYPE_MODE (type) == DFmode && align < 64)
11410 return 64;
11411 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11412 return 128;
11414 return align;
11417 /* Emit RTL insns to initialize the variable parts of a trampoline.
11418 FNADDR is an RTX for the address of the function's pure code.
11419 CXT is an RTX for the static chain value for the function. */
11420 void
11421 x86_initialize_trampoline (tramp, fnaddr, cxt)
11422 rtx tramp, fnaddr, cxt;
11424 if (!TARGET_64BIT)
11426 /* Compute offset from the end of the jmp to the target function. */
11427 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11428 plus_constant (tramp, 10),
11429 NULL_RTX, 1, OPTAB_DIRECT);
11430 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11431 gen_int_mode (0xb9, QImode));
11432 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11433 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11434 gen_int_mode (0xe9, QImode));
11435 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11437 else
11439 int offset = 0;
11440 /* Try to load address using shorter movl instead of movabs.
11441 We may want to support movq for kernel mode, but kernel does not use
11442 trampolines at the moment. */
11443 if (x86_64_zero_extended_value (fnaddr))
11445 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11446 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11447 gen_int_mode (0xbb41, HImode));
11448 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11449 gen_lowpart (SImode, fnaddr));
11450 offset += 6;
11452 else
11454 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11455 gen_int_mode (0xbb49, HImode));
11456 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11457 fnaddr);
11458 offset += 10;
11460 /* Load static chain using movabs to r10. */
11461 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11462 gen_int_mode (0xba49, HImode));
11463 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11464 cxt);
11465 offset += 10;
11466 /* Jump to the r11 */
11467 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11468 gen_int_mode (0xff49, HImode));
11469 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11470 gen_int_mode (0xe3, QImode));
11471 offset += 3;
11472 if (offset > TRAMPOLINE_SIZE)
11473 abort ();
11477 #define def_builtin(MASK, NAME, TYPE, CODE) \
11478 do { \
11479 if ((MASK) & target_flags) \
11480 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11481 NULL, NULL_TREE); \
11482 } while (0)
11484 struct builtin_description
11486 const unsigned int mask;
11487 const enum insn_code icode;
11488 const char *const name;
11489 const enum ix86_builtins code;
11490 const enum rtx_code comparison;
11491 const unsigned int flag;
11494 /* Used for builtins that are enabled both by -msse and -msse2. */
11495 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11497 static const struct builtin_description bdesc_comi[] =
11499 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11500 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11501 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11502 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11503 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11504 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11505 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11506 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11507 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11508 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11509 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11510 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11511 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11512 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11513 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11514 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11515 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11516 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11517 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11518 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11519 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11520 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11521 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11522 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11525 static const struct builtin_description bdesc_2arg[] =
11527 /* SSE */
11528 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11529 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11530 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11531 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11532 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11533 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11534 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11535 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11537 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11538 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11539 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11540 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11541 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11542 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11543 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11544 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11545 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11546 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11547 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11548 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11549 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11550 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11551 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11552 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11553 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11554 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11555 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11556 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11557 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11558 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11559 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11560 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11562 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11563 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11564 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11565 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11567 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11568 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11569 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11570 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11571 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11573 /* MMX */
11574 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11575 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11576 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11577 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11578 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11579 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11581 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11582 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11583 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11584 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11585 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11586 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11587 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11588 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11590 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11591 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11592 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11594 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11595 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11596 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11597 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11599 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11600 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11602 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11603 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11604 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11605 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11606 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11607 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11609 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11610 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11611 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11612 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11614 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11615 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11616 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11617 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11618 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11619 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11621 /* Special. */
11622 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11623 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11624 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11626 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11627 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11629 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11630 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11631 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11632 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11633 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11634 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11636 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11637 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11638 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11639 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11640 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11641 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11643 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11644 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11645 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11646 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11648 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11649 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11651 /* SSE2 */
11652 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11653 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11654 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11655 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11656 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11657 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11658 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11659 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11661 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11662 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11663 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11664 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11665 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11666 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11667 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11668 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11669 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11670 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11671 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11672 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11673 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11674 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11675 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11676 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11677 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11678 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11679 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11680 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11681 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11682 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11683 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11684 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11686 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11687 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11688 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11689 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11691 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11692 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11693 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11694 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11696 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11697 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11698 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11700 /* SSE2 MMX */
11701 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11702 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11703 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11704 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11705 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11706 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11707 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11708 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11710 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11711 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11712 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11713 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11714 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11715 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11716 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11717 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11719 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11720 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11721 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11722 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11724 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11725 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11726 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11727 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11729 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11730 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11732 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11733 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11734 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11735 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11736 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11737 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11739 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11740 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11741 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11742 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11744 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11745 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11746 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11747 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11748 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11749 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11751 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11752 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11753 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11755 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11756 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11758 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11759 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11760 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11761 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11762 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11763 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11765 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11766 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11767 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11768 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11769 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11770 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11772 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11773 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11774 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11775 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11777 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11779 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11780 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11781 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11784 static const struct builtin_description bdesc_1arg[] =
11786 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11787 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11789 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11790 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11791 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11793 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11794 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11795 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11796 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11798 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11799 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11800 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11802 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11804 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11805 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11807 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11808 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11809 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11810 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11811 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11813 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11815 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11816 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11818 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11819 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11820 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11823 void
11824 ix86_init_builtins ()
11826 if (TARGET_MMX)
11827 ix86_init_mmx_sse_builtins ();
11830 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11831 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11832 builtins. */
11833 static void
11834 ix86_init_mmx_sse_builtins ()
11836 const struct builtin_description * d;
11837 size_t i;
11839 tree pchar_type_node = build_pointer_type (char_type_node);
11840 tree pfloat_type_node = build_pointer_type (float_type_node);
11841 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11842 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11843 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11845 /* Comparisons. */
11846 tree int_ftype_v4sf_v4sf
11847 = build_function_type_list (integer_type_node,
11848 V4SF_type_node, V4SF_type_node, NULL_TREE);
11849 tree v4si_ftype_v4sf_v4sf
11850 = build_function_type_list (V4SI_type_node,
11851 V4SF_type_node, V4SF_type_node, NULL_TREE);
11852 /* MMX/SSE/integer conversions. */
11853 tree int_ftype_v4sf
11854 = build_function_type_list (integer_type_node,
11855 V4SF_type_node, NULL_TREE);
11856 tree int_ftype_v8qi
11857 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
11858 tree v4sf_ftype_v4sf_int
11859 = build_function_type_list (V4SF_type_node,
11860 V4SF_type_node, integer_type_node, NULL_TREE);
11861 tree v4sf_ftype_v4sf_v2si
11862 = build_function_type_list (V4SF_type_node,
11863 V4SF_type_node, V2SI_type_node, NULL_TREE);
11864 tree int_ftype_v4hi_int
11865 = build_function_type_list (integer_type_node,
11866 V4HI_type_node, integer_type_node, NULL_TREE);
11867 tree v4hi_ftype_v4hi_int_int
11868 = build_function_type_list (V4HI_type_node, V4HI_type_node,
11869 integer_type_node, integer_type_node,
11870 NULL_TREE);
11871 /* Miscellaneous. */
11872 tree v8qi_ftype_v4hi_v4hi
11873 = build_function_type_list (V8QI_type_node,
11874 V4HI_type_node, V4HI_type_node, NULL_TREE);
11875 tree v4hi_ftype_v2si_v2si
11876 = build_function_type_list (V4HI_type_node,
11877 V2SI_type_node, V2SI_type_node, NULL_TREE);
11878 tree v4sf_ftype_v4sf_v4sf_int
11879 = build_function_type_list (V4SF_type_node,
11880 V4SF_type_node, V4SF_type_node,
11881 integer_type_node, NULL_TREE);
11882 tree v2si_ftype_v4hi_v4hi
11883 = build_function_type_list (V2SI_type_node,
11884 V4HI_type_node, V4HI_type_node, NULL_TREE);
11885 tree v4hi_ftype_v4hi_int
11886 = build_function_type_list (V4HI_type_node,
11887 V4HI_type_node, integer_type_node, NULL_TREE);
11888 tree v4hi_ftype_v4hi_di
11889 = build_function_type_list (V4HI_type_node,
11890 V4HI_type_node, long_long_unsigned_type_node,
11891 NULL_TREE);
11892 tree v2si_ftype_v2si_di
11893 = build_function_type_list (V2SI_type_node,
11894 V2SI_type_node, long_long_unsigned_type_node,
11895 NULL_TREE);
11896 tree void_ftype_void
11897 = build_function_type (void_type_node, void_list_node);
11898 tree void_ftype_unsigned
11899 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
11900 tree unsigned_ftype_void
11901 = build_function_type (unsigned_type_node, void_list_node);
11902 tree di_ftype_void
11903 = build_function_type (long_long_unsigned_type_node, void_list_node);
11904 tree v4sf_ftype_void
11905 = build_function_type (V4SF_type_node, void_list_node);
11906 tree v2si_ftype_v4sf
11907 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
11908 /* Loads/stores. */
11909 tree void_ftype_v8qi_v8qi_pchar
11910 = build_function_type_list (void_type_node,
11911 V8QI_type_node, V8QI_type_node,
11912 pchar_type_node, NULL_TREE);
11913 tree v4sf_ftype_pfloat
11914 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
11915 /* @@@ the type is bogus */
11916 tree v4sf_ftype_v4sf_pv2si
11917 = build_function_type_list (V4SF_type_node,
11918 V4SF_type_node, pv2di_type_node, NULL_TREE);
11919 tree void_ftype_pv2si_v4sf
11920 = build_function_type_list (void_type_node,
11921 pv2di_type_node, V4SF_type_node, NULL_TREE);
11922 tree void_ftype_pfloat_v4sf
11923 = build_function_type_list (void_type_node,
11924 pfloat_type_node, V4SF_type_node, NULL_TREE);
11925 tree void_ftype_pdi_di
11926 = build_function_type_list (void_type_node,
11927 pdi_type_node, long_long_unsigned_type_node,
11928 NULL_TREE);
11929 tree void_ftype_pv2di_v2di
11930 = build_function_type_list (void_type_node,
11931 pv2di_type_node, V2DI_type_node, NULL_TREE);
11932 /* Normal vector unops. */
11933 tree v4sf_ftype_v4sf
11934 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
11936 /* Normal vector binops. */
11937 tree v4sf_ftype_v4sf_v4sf
11938 = build_function_type_list (V4SF_type_node,
11939 V4SF_type_node, V4SF_type_node, NULL_TREE);
11940 tree v8qi_ftype_v8qi_v8qi
11941 = build_function_type_list (V8QI_type_node,
11942 V8QI_type_node, V8QI_type_node, NULL_TREE);
11943 tree v4hi_ftype_v4hi_v4hi
11944 = build_function_type_list (V4HI_type_node,
11945 V4HI_type_node, V4HI_type_node, NULL_TREE);
11946 tree v2si_ftype_v2si_v2si
11947 = build_function_type_list (V2SI_type_node,
11948 V2SI_type_node, V2SI_type_node, NULL_TREE);
11949 tree di_ftype_di_di
11950 = build_function_type_list (long_long_unsigned_type_node,
11951 long_long_unsigned_type_node,
11952 long_long_unsigned_type_node, NULL_TREE);
11954 tree v2si_ftype_v2sf
11955 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
11956 tree v2sf_ftype_v2si
11957 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
11958 tree v2si_ftype_v2si
11959 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
11960 tree v2sf_ftype_v2sf
11961 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
11962 tree v2sf_ftype_v2sf_v2sf
11963 = build_function_type_list (V2SF_type_node,
11964 V2SF_type_node, V2SF_type_node, NULL_TREE);
11965 tree v2si_ftype_v2sf_v2sf
11966 = build_function_type_list (V2SI_type_node,
11967 V2SF_type_node, V2SF_type_node, NULL_TREE);
11968 tree pint_type_node = build_pointer_type (integer_type_node);
11969 tree pdouble_type_node = build_pointer_type (double_type_node);
11970 tree int_ftype_v2df_v2df
11971 = build_function_type_list (integer_type_node,
11972 V2DF_type_node, V2DF_type_node, NULL_TREE);
11974 tree ti_ftype_void
11975 = build_function_type (intTI_type_node, void_list_node);
11976 tree ti_ftype_ti_ti
11977 = build_function_type_list (intTI_type_node,
11978 intTI_type_node, intTI_type_node, NULL_TREE);
11979 tree void_ftype_pvoid
11980 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
11981 tree v2di_ftype_di
11982 = build_function_type_list (V2DI_type_node,
11983 long_long_unsigned_type_node, NULL_TREE);
11984 tree v4sf_ftype_v4si
11985 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
11986 tree v4si_ftype_v4sf
11987 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
11988 tree v2df_ftype_v4si
11989 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
11990 tree v4si_ftype_v2df
11991 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
11992 tree v2si_ftype_v2df
11993 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
11994 tree v4sf_ftype_v2df
11995 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
11996 tree v2df_ftype_v2si
11997 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
11998 tree v2df_ftype_v4sf
11999 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12000 tree int_ftype_v2df
12001 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12002 tree v2df_ftype_v2df_int
12003 = build_function_type_list (V2DF_type_node,
12004 V2DF_type_node, integer_type_node, NULL_TREE);
12005 tree v4sf_ftype_v4sf_v2df
12006 = build_function_type_list (V4SF_type_node,
12007 V4SF_type_node, V2DF_type_node, NULL_TREE);
12008 tree v2df_ftype_v2df_v4sf
12009 = build_function_type_list (V2DF_type_node,
12010 V2DF_type_node, V4SF_type_node, NULL_TREE);
12011 tree v2df_ftype_v2df_v2df_int
12012 = build_function_type_list (V2DF_type_node,
12013 V2DF_type_node, V2DF_type_node,
12014 integer_type_node,
12015 NULL_TREE);
12016 tree v2df_ftype_v2df_pv2si
12017 = build_function_type_list (V2DF_type_node,
12018 V2DF_type_node, pv2si_type_node, NULL_TREE);
12019 tree void_ftype_pv2si_v2df
12020 = build_function_type_list (void_type_node,
12021 pv2si_type_node, V2DF_type_node, NULL_TREE);
12022 tree void_ftype_pdouble_v2df
12023 = build_function_type_list (void_type_node,
12024 pdouble_type_node, V2DF_type_node, NULL_TREE);
12025 tree void_ftype_pint_int
12026 = build_function_type_list (void_type_node,
12027 pint_type_node, integer_type_node, NULL_TREE);
12028 tree void_ftype_v16qi_v16qi_pchar
12029 = build_function_type_list (void_type_node,
12030 V16QI_type_node, V16QI_type_node,
12031 pchar_type_node, NULL_TREE);
12032 tree v2df_ftype_pdouble
12033 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12034 tree v2df_ftype_v2df_v2df
12035 = build_function_type_list (V2DF_type_node,
12036 V2DF_type_node, V2DF_type_node, NULL_TREE);
12037 tree v16qi_ftype_v16qi_v16qi
12038 = build_function_type_list (V16QI_type_node,
12039 V16QI_type_node, V16QI_type_node, NULL_TREE);
12040 tree v8hi_ftype_v8hi_v8hi
12041 = build_function_type_list (V8HI_type_node,
12042 V8HI_type_node, V8HI_type_node, NULL_TREE);
12043 tree v4si_ftype_v4si_v4si
12044 = build_function_type_list (V4SI_type_node,
12045 V4SI_type_node, V4SI_type_node, NULL_TREE);
12046 tree v2di_ftype_v2di_v2di
12047 = build_function_type_list (V2DI_type_node,
12048 V2DI_type_node, V2DI_type_node, NULL_TREE);
12049 tree v2di_ftype_v2df_v2df
12050 = build_function_type_list (V2DI_type_node,
12051 V2DF_type_node, V2DF_type_node, NULL_TREE);
12052 tree v2df_ftype_v2df
12053 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12054 tree v2df_ftype_double
12055 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12056 tree v2df_ftype_double_double
12057 = build_function_type_list (V2DF_type_node,
12058 double_type_node, double_type_node, NULL_TREE);
12059 tree int_ftype_v8hi_int
12060 = build_function_type_list (integer_type_node,
12061 V8HI_type_node, integer_type_node, NULL_TREE);
12062 tree v8hi_ftype_v8hi_int_int
12063 = build_function_type_list (V8HI_type_node,
12064 V8HI_type_node, integer_type_node,
12065 integer_type_node, NULL_TREE);
12066 tree v2di_ftype_v2di_int
12067 = build_function_type_list (V2DI_type_node,
12068 V2DI_type_node, integer_type_node, NULL_TREE);
12069 tree v4si_ftype_v4si_int
12070 = build_function_type_list (V4SI_type_node,
12071 V4SI_type_node, integer_type_node, NULL_TREE);
12072 tree v8hi_ftype_v8hi_int
12073 = build_function_type_list (V8HI_type_node,
12074 V8HI_type_node, integer_type_node, NULL_TREE);
12075 tree v8hi_ftype_v8hi_v2di
12076 = build_function_type_list (V8HI_type_node,
12077 V8HI_type_node, V2DI_type_node, NULL_TREE);
12078 tree v4si_ftype_v4si_v2di
12079 = build_function_type_list (V4SI_type_node,
12080 V4SI_type_node, V2DI_type_node, NULL_TREE);
12081 tree v4si_ftype_v8hi_v8hi
12082 = build_function_type_list (V4SI_type_node,
12083 V8HI_type_node, V8HI_type_node, NULL_TREE);
12084 tree di_ftype_v8qi_v8qi
12085 = build_function_type_list (long_long_unsigned_type_node,
12086 V8QI_type_node, V8QI_type_node, NULL_TREE);
12087 tree v2di_ftype_v16qi_v16qi
12088 = build_function_type_list (V2DI_type_node,
12089 V16QI_type_node, V16QI_type_node, NULL_TREE);
12090 tree int_ftype_v16qi
12091 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12093 /* Add all builtins that are more or less simple operations on two
12094 operands. */
12095 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12097 /* Use one of the operands; the target can have a different mode for
12098 mask-generating compares. */
12099 enum machine_mode mode;
12100 tree type;
12102 if (d->name == 0)
12103 continue;
12104 mode = insn_data[d->icode].operand[1].mode;
12106 switch (mode)
12108 case V16QImode:
12109 type = v16qi_ftype_v16qi_v16qi;
12110 break;
12111 case V8HImode:
12112 type = v8hi_ftype_v8hi_v8hi;
12113 break;
12114 case V4SImode:
12115 type = v4si_ftype_v4si_v4si;
12116 break;
12117 case V2DImode:
12118 type = v2di_ftype_v2di_v2di;
12119 break;
12120 case V2DFmode:
12121 type = v2df_ftype_v2df_v2df;
12122 break;
12123 case TImode:
12124 type = ti_ftype_ti_ti;
12125 break;
12126 case V4SFmode:
12127 type = v4sf_ftype_v4sf_v4sf;
12128 break;
12129 case V8QImode:
12130 type = v8qi_ftype_v8qi_v8qi;
12131 break;
12132 case V4HImode:
12133 type = v4hi_ftype_v4hi_v4hi;
12134 break;
12135 case V2SImode:
12136 type = v2si_ftype_v2si_v2si;
12137 break;
12138 case DImode:
12139 type = di_ftype_di_di;
12140 break;
12142 default:
12143 abort ();
12146 /* Override for comparisons. */
12147 if (d->icode == CODE_FOR_maskcmpv4sf3
12148 || d->icode == CODE_FOR_maskncmpv4sf3
12149 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12150 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12151 type = v4si_ftype_v4sf_v4sf;
12153 if (d->icode == CODE_FOR_maskcmpv2df3
12154 || d->icode == CODE_FOR_maskncmpv2df3
12155 || d->icode == CODE_FOR_vmmaskcmpv2df3
12156 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12157 type = v2di_ftype_v2df_v2df;
12159 def_builtin (d->mask, d->name, type, d->code);
12162 /* Add the remaining MMX insns with somewhat more complicated types. */
12163 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12164 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12165 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12166 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12167 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12168 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12169 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12171 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12172 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12173 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12175 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12176 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12178 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12179 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12181 /* comi/ucomi insns. */
12182 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12183 if (d->mask == MASK_SSE2)
12184 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12185 else
12186 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12188 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12189 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12190 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12192 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12193 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12194 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12195 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12196 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12197 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12199 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12200 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12201 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12202 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12204 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12205 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12207 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12209 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12210 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12211 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12212 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12213 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12214 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12216 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12217 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12218 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12219 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12221 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12222 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12223 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12224 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12226 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12228 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12230 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12231 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12232 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12233 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12234 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12235 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12237 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12239 /* Original 3DNow! */
12240 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12241 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12242 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12243 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12244 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12245 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12246 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12247 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12248 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12249 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12250 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12251 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12252 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12253 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12254 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12255 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12256 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12257 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12258 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12259 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12261 /* 3DNow! extension as used in the Athlon CPU. */
12262 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12263 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12264 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12265 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12266 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12267 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12269 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12271 /* SSE2 */
12272 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12273 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12275 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12276 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12278 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12279 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12280 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12281 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12282 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12283 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12285 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12286 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12287 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12288 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12290 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12291 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12292 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12293 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12294 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12296 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12297 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12298 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12299 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12301 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12302 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12304 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12306 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12307 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12309 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12310 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12311 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12312 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12313 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12315 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12317 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12318 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12320 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12321 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12322 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12324 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12325 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12326 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12328 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12329 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12330 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12331 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12332 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12333 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12334 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12336 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12337 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12338 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12340 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12341 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12342 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12344 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12345 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12346 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12348 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12349 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12351 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12352 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12353 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12355 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12356 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12357 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12359 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12360 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12362 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12365 /* Errors in the source file can cause expand_expr to return const0_rtx
12366 where we expect a vector. To avoid crashing, use one of the vector
12367 clear instructions. */
12368 static rtx
12369 safe_vector_operand (x, mode)
12370 rtx x;
12371 enum machine_mode mode;
12373 if (x != const0_rtx)
12374 return x;
12375 x = gen_reg_rtx (mode);
12377 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12378 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12379 : gen_rtx_SUBREG (DImode, x, 0)));
12380 else
12381 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12382 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12383 return x;
12386 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12388 static rtx
12389 ix86_expand_binop_builtin (icode, arglist, target)
12390 enum insn_code icode;
12391 tree arglist;
12392 rtx target;
12394 rtx pat;
12395 tree arg0 = TREE_VALUE (arglist);
12396 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12397 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12398 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12399 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12400 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12401 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12403 if (VECTOR_MODE_P (mode0))
12404 op0 = safe_vector_operand (op0, mode0);
12405 if (VECTOR_MODE_P (mode1))
12406 op1 = safe_vector_operand (op1, mode1);
12408 if (! target
12409 || GET_MODE (target) != tmode
12410 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12411 target = gen_reg_rtx (tmode);
12413 /* In case the insn wants input operands in modes different from
12414 the result, abort. */
12415 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12416 abort ();
12418 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12419 op0 = copy_to_mode_reg (mode0, op0);
12420 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12421 op1 = copy_to_mode_reg (mode1, op1);
12423 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12424 yet one of the two must not be a memory. This is normally enforced
12425 by expanders, but we didn't bother to create one here. */
12426 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12427 op0 = copy_to_mode_reg (mode0, op0);
12429 pat = GEN_FCN (icode) (target, op0, op1);
12430 if (! pat)
12431 return 0;
12432 emit_insn (pat);
12433 return target;
12436 /* In type_for_mode we restrict the ability to create TImode types
12437 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12438 to have a V4SFmode signature. Convert them in-place to TImode. */
12440 static rtx
12441 ix86_expand_timode_binop_builtin (icode, arglist, target)
12442 enum insn_code icode;
12443 tree arglist;
12444 rtx target;
12446 rtx pat;
12447 tree arg0 = TREE_VALUE (arglist);
12448 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12449 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12450 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12452 op0 = gen_lowpart (TImode, op0);
12453 op1 = gen_lowpart (TImode, op1);
12454 target = gen_reg_rtx (TImode);
12456 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12457 op0 = copy_to_mode_reg (TImode, op0);
12458 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12459 op1 = copy_to_mode_reg (TImode, op1);
12461 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12462 yet one of the two must not be a memory. This is normally enforced
12463 by expanders, but we didn't bother to create one here. */
12464 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12465 op0 = copy_to_mode_reg (TImode, op0);
12467 pat = GEN_FCN (icode) (target, op0, op1);
12468 if (! pat)
12469 return 0;
12470 emit_insn (pat);
12472 return gen_lowpart (V4SFmode, target);
12475 /* Subroutine of ix86_expand_builtin to take care of stores. */
12477 static rtx
12478 ix86_expand_store_builtin (icode, arglist)
12479 enum insn_code icode;
12480 tree arglist;
12482 rtx pat;
12483 tree arg0 = TREE_VALUE (arglist);
12484 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12485 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12486 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12487 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12488 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12490 if (VECTOR_MODE_P (mode1))
12491 op1 = safe_vector_operand (op1, mode1);
12493 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12495 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12496 op1 = copy_to_mode_reg (mode1, op1);
12498 pat = GEN_FCN (icode) (op0, op1);
12499 if (pat)
12500 emit_insn (pat);
12501 return 0;
12504 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12506 static rtx
12507 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12508 enum insn_code icode;
12509 tree arglist;
12510 rtx target;
12511 int do_load;
12513 rtx pat;
12514 tree arg0 = TREE_VALUE (arglist);
12515 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12516 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12517 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12519 if (! target
12520 || GET_MODE (target) != tmode
12521 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12522 target = gen_reg_rtx (tmode);
12523 if (do_load)
12524 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12525 else
12527 if (VECTOR_MODE_P (mode0))
12528 op0 = safe_vector_operand (op0, mode0);
12530 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12531 op0 = copy_to_mode_reg (mode0, op0);
12534 pat = GEN_FCN (icode) (target, op0);
12535 if (! pat)
12536 return 0;
12537 emit_insn (pat);
12538 return target;
12541 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12542 sqrtss, rsqrtss, rcpss. */
12544 static rtx
12545 ix86_expand_unop1_builtin (icode, arglist, target)
12546 enum insn_code icode;
12547 tree arglist;
12548 rtx target;
12550 rtx pat;
12551 tree arg0 = TREE_VALUE (arglist);
12552 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12553 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12554 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12556 if (! target
12557 || GET_MODE (target) != tmode
12558 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12559 target = gen_reg_rtx (tmode);
12561 if (VECTOR_MODE_P (mode0))
12562 op0 = safe_vector_operand (op0, mode0);
12564 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12565 op0 = copy_to_mode_reg (mode0, op0);
12567 op1 = op0;
12568 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12569 op1 = copy_to_mode_reg (mode0, op1);
12571 pat = GEN_FCN (icode) (target, op0, op1);
12572 if (! pat)
12573 return 0;
12574 emit_insn (pat);
12575 return target;
12578 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12580 static rtx
12581 ix86_expand_sse_compare (d, arglist, target)
12582 const struct builtin_description *d;
12583 tree arglist;
12584 rtx target;
12586 rtx pat;
12587 tree arg0 = TREE_VALUE (arglist);
12588 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12589 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12590 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12591 rtx op2;
12592 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12593 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12594 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12595 enum rtx_code comparison = d->comparison;
12597 if (VECTOR_MODE_P (mode0))
12598 op0 = safe_vector_operand (op0, mode0);
12599 if (VECTOR_MODE_P (mode1))
12600 op1 = safe_vector_operand (op1, mode1);
12602 /* Swap operands if we have a comparison that isn't available in
12603 hardware. */
12604 if (d->flag)
12606 rtx tmp = gen_reg_rtx (mode1);
12607 emit_move_insn (tmp, op1);
12608 op1 = op0;
12609 op0 = tmp;
12612 if (! target
12613 || GET_MODE (target) != tmode
12614 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12615 target = gen_reg_rtx (tmode);
12617 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12618 op0 = copy_to_mode_reg (mode0, op0);
12619 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12620 op1 = copy_to_mode_reg (mode1, op1);
12622 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12623 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12624 if (! pat)
12625 return 0;
12626 emit_insn (pat);
12627 return target;
12630 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12632 static rtx
12633 ix86_expand_sse_comi (d, arglist, target)
12634 const struct builtin_description *d;
12635 tree arglist;
12636 rtx target;
12638 rtx pat;
12639 tree arg0 = TREE_VALUE (arglist);
12640 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12641 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12642 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12643 rtx op2;
12644 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12645 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12646 enum rtx_code comparison = d->comparison;
12648 if (VECTOR_MODE_P (mode0))
12649 op0 = safe_vector_operand (op0, mode0);
12650 if (VECTOR_MODE_P (mode1))
12651 op1 = safe_vector_operand (op1, mode1);
12653 /* Swap operands if we have a comparison that isn't available in
12654 hardware. */
12655 if (d->flag)
12657 rtx tmp = op1;
12658 op1 = op0;
12659 op0 = tmp;
12662 target = gen_reg_rtx (SImode);
12663 emit_move_insn (target, const0_rtx);
12664 target = gen_rtx_SUBREG (QImode, target, 0);
12666 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12667 op0 = copy_to_mode_reg (mode0, op0);
12668 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12669 op1 = copy_to_mode_reg (mode1, op1);
12671 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12672 pat = GEN_FCN (d->icode) (op0, op1, op2);
12673 if (! pat)
12674 return 0;
12675 emit_insn (pat);
12676 emit_insn (gen_rtx_SET (VOIDmode,
12677 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12678 gen_rtx_fmt_ee (comparison, QImode,
12679 gen_rtx_REG (CCmode, FLAGS_REG),
12680 const0_rtx)));
12682 return SUBREG_REG (target);
12685 /* Expand an expression EXP that calls a built-in function,
12686 with result going to TARGET if that's convenient
12687 (and in mode MODE if that's convenient).
12688 SUBTARGET may be used as the target for computing one of EXP's operands.
12689 IGNORE is nonzero if the value is to be ignored. */
12692 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12693 tree exp;
12694 rtx target;
12695 rtx subtarget ATTRIBUTE_UNUSED;
12696 enum machine_mode mode ATTRIBUTE_UNUSED;
12697 int ignore ATTRIBUTE_UNUSED;
12699 const struct builtin_description *d;
12700 size_t i;
12701 enum insn_code icode;
12702 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12703 tree arglist = TREE_OPERAND (exp, 1);
12704 tree arg0, arg1, arg2;
12705 rtx op0, op1, op2, pat;
12706 enum machine_mode tmode, mode0, mode1, mode2;
12707 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12709 switch (fcode)
12711 case IX86_BUILTIN_EMMS:
12712 emit_insn (gen_emms ());
12713 return 0;
12715 case IX86_BUILTIN_SFENCE:
12716 emit_insn (gen_sfence ());
12717 return 0;
12719 case IX86_BUILTIN_PEXTRW:
12720 case IX86_BUILTIN_PEXTRW128:
12721 icode = (fcode == IX86_BUILTIN_PEXTRW
12722 ? CODE_FOR_mmx_pextrw
12723 : CODE_FOR_sse2_pextrw);
12724 arg0 = TREE_VALUE (arglist);
12725 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12726 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12727 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12728 tmode = insn_data[icode].operand[0].mode;
12729 mode0 = insn_data[icode].operand[1].mode;
12730 mode1 = insn_data[icode].operand[2].mode;
12732 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12733 op0 = copy_to_mode_reg (mode0, op0);
12734 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12736 /* @@@ better error message */
12737 error ("selector must be an immediate");
12738 return gen_reg_rtx (tmode);
12740 if (target == 0
12741 || GET_MODE (target) != tmode
12742 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12743 target = gen_reg_rtx (tmode);
12744 pat = GEN_FCN (icode) (target, op0, op1);
12745 if (! pat)
12746 return 0;
12747 emit_insn (pat);
12748 return target;
12750 case IX86_BUILTIN_PINSRW:
12751 case IX86_BUILTIN_PINSRW128:
12752 icode = (fcode == IX86_BUILTIN_PINSRW
12753 ? CODE_FOR_mmx_pinsrw
12754 : CODE_FOR_sse2_pinsrw);
12755 arg0 = TREE_VALUE (arglist);
12756 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12757 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12758 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12759 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12760 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12761 tmode = insn_data[icode].operand[0].mode;
12762 mode0 = insn_data[icode].operand[1].mode;
12763 mode1 = insn_data[icode].operand[2].mode;
12764 mode2 = insn_data[icode].operand[3].mode;
12766 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12767 op0 = copy_to_mode_reg (mode0, op0);
12768 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12769 op1 = copy_to_mode_reg (mode1, op1);
12770 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12772 /* @@@ better error message */
12773 error ("selector must be an immediate");
12774 return const0_rtx;
12776 if (target == 0
12777 || GET_MODE (target) != tmode
12778 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12779 target = gen_reg_rtx (tmode);
12780 pat = GEN_FCN (icode) (target, op0, op1, op2);
12781 if (! pat)
12782 return 0;
12783 emit_insn (pat);
12784 return target;
12786 case IX86_BUILTIN_MASKMOVQ:
12787 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12788 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12789 : CODE_FOR_sse2_maskmovdqu);
12790 /* Note the arg order is different from the operand order. */
12791 arg1 = TREE_VALUE (arglist);
12792 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12793 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12794 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12795 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12796 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12797 mode0 = insn_data[icode].operand[0].mode;
12798 mode1 = insn_data[icode].operand[1].mode;
12799 mode2 = insn_data[icode].operand[2].mode;
12801 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12802 op0 = copy_to_mode_reg (mode0, op0);
12803 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12804 op1 = copy_to_mode_reg (mode1, op1);
12805 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12806 op2 = copy_to_mode_reg (mode2, op2);
12807 pat = GEN_FCN (icode) (op0, op1, op2);
12808 if (! pat)
12809 return 0;
12810 emit_insn (pat);
12811 return 0;
12813 case IX86_BUILTIN_SQRTSS:
12814 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12815 case IX86_BUILTIN_RSQRTSS:
12816 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12817 case IX86_BUILTIN_RCPSS:
12818 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12820 case IX86_BUILTIN_ANDPS:
12821 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12822 arglist, target);
12823 case IX86_BUILTIN_ANDNPS:
12824 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12825 arglist, target);
12826 case IX86_BUILTIN_ORPS:
12827 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12828 arglist, target);
12829 case IX86_BUILTIN_XORPS:
12830 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12831 arglist, target);
12833 case IX86_BUILTIN_LOADAPS:
12834 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12836 case IX86_BUILTIN_LOADUPS:
12837 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12839 case IX86_BUILTIN_STOREAPS:
12840 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12841 case IX86_BUILTIN_STOREUPS:
12842 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12844 case IX86_BUILTIN_LOADSS:
12845 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12847 case IX86_BUILTIN_STORESS:
12848 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12850 case IX86_BUILTIN_LOADHPS:
12851 case IX86_BUILTIN_LOADLPS:
12852 case IX86_BUILTIN_LOADHPD:
12853 case IX86_BUILTIN_LOADLPD:
12854 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12855 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12856 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12857 : CODE_FOR_sse2_movlpd);
12858 arg0 = TREE_VALUE (arglist);
12859 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12860 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12861 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12862 tmode = insn_data[icode].operand[0].mode;
12863 mode0 = insn_data[icode].operand[1].mode;
12864 mode1 = insn_data[icode].operand[2].mode;
12866 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12867 op0 = copy_to_mode_reg (mode0, op0);
12868 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12869 if (target == 0
12870 || GET_MODE (target) != tmode
12871 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12872 target = gen_reg_rtx (tmode);
12873 pat = GEN_FCN (icode) (target, op0, op1);
12874 if (! pat)
12875 return 0;
12876 emit_insn (pat);
12877 return target;
12879 case IX86_BUILTIN_STOREHPS:
12880 case IX86_BUILTIN_STORELPS:
12881 case IX86_BUILTIN_STOREHPD:
12882 case IX86_BUILTIN_STORELPD:
12883 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12884 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12885 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12886 : CODE_FOR_sse2_movlpd);
12887 arg0 = TREE_VALUE (arglist);
12888 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12889 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12890 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12891 mode0 = insn_data[icode].operand[1].mode;
12892 mode1 = insn_data[icode].operand[2].mode;
12894 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12895 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12896 op1 = copy_to_mode_reg (mode1, op1);
12898 pat = GEN_FCN (icode) (op0, op0, op1);
12899 if (! pat)
12900 return 0;
12901 emit_insn (pat);
12902 return 0;
12904 case IX86_BUILTIN_MOVNTPS:
12905 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
12906 case IX86_BUILTIN_MOVNTQ:
12907 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
12909 case IX86_BUILTIN_LDMXCSR:
12910 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
12911 target = assign_386_stack_local (SImode, 0);
12912 emit_move_insn (target, op0);
12913 emit_insn (gen_ldmxcsr (target));
12914 return 0;
12916 case IX86_BUILTIN_STMXCSR:
12917 target = assign_386_stack_local (SImode, 0);
12918 emit_insn (gen_stmxcsr (target));
12919 return copy_to_mode_reg (SImode, target);
12921 case IX86_BUILTIN_SHUFPS:
12922 case IX86_BUILTIN_SHUFPD:
12923 icode = (fcode == IX86_BUILTIN_SHUFPS
12924 ? CODE_FOR_sse_shufps
12925 : CODE_FOR_sse2_shufpd);
12926 arg0 = TREE_VALUE (arglist);
12927 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12928 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12929 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12930 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12931 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12932 tmode = insn_data[icode].operand[0].mode;
12933 mode0 = insn_data[icode].operand[1].mode;
12934 mode1 = insn_data[icode].operand[2].mode;
12935 mode2 = insn_data[icode].operand[3].mode;
12937 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12938 op0 = copy_to_mode_reg (mode0, op0);
12939 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12940 op1 = copy_to_mode_reg (mode1, op1);
12941 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12943 /* @@@ better error message */
12944 error ("mask must be an immediate");
12945 return gen_reg_rtx (tmode);
12947 if (target == 0
12948 || GET_MODE (target) != tmode
12949 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12950 target = gen_reg_rtx (tmode);
12951 pat = GEN_FCN (icode) (target, op0, op1, op2);
12952 if (! pat)
12953 return 0;
12954 emit_insn (pat);
12955 return target;
12957 case IX86_BUILTIN_PSHUFW:
12958 case IX86_BUILTIN_PSHUFD:
12959 case IX86_BUILTIN_PSHUFHW:
12960 case IX86_BUILTIN_PSHUFLW:
12961 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
12962 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
12963 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
12964 : CODE_FOR_mmx_pshufw);
12965 arg0 = TREE_VALUE (arglist);
12966 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12967 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12968 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12969 tmode = insn_data[icode].operand[0].mode;
12970 mode1 = insn_data[icode].operand[1].mode;
12971 mode2 = insn_data[icode].operand[2].mode;
12973 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12974 op0 = copy_to_mode_reg (mode1, op0);
12975 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12977 /* @@@ better error message */
12978 error ("mask must be an immediate");
12979 return const0_rtx;
12981 if (target == 0
12982 || GET_MODE (target) != tmode
12983 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12984 target = gen_reg_rtx (tmode);
12985 pat = GEN_FCN (icode) (target, op0, op1);
12986 if (! pat)
12987 return 0;
12988 emit_insn (pat);
12989 return target;
12991 case IX86_BUILTIN_FEMMS:
12992 emit_insn (gen_femms ());
12993 return NULL_RTX;
12995 case IX86_BUILTIN_PAVGUSB:
12996 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
12998 case IX86_BUILTIN_PF2ID:
12999 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13001 case IX86_BUILTIN_PFACC:
13002 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13004 case IX86_BUILTIN_PFADD:
13005 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13007 case IX86_BUILTIN_PFCMPEQ:
13008 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13010 case IX86_BUILTIN_PFCMPGE:
13011 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13013 case IX86_BUILTIN_PFCMPGT:
13014 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13016 case IX86_BUILTIN_PFMAX:
13017 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13019 case IX86_BUILTIN_PFMIN:
13020 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13022 case IX86_BUILTIN_PFMUL:
13023 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13025 case IX86_BUILTIN_PFRCP:
13026 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13028 case IX86_BUILTIN_PFRCPIT1:
13029 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13031 case IX86_BUILTIN_PFRCPIT2:
13032 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13034 case IX86_BUILTIN_PFRSQIT1:
13035 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13037 case IX86_BUILTIN_PFRSQRT:
13038 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13040 case IX86_BUILTIN_PFSUB:
13041 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13043 case IX86_BUILTIN_PFSUBR:
13044 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13046 case IX86_BUILTIN_PI2FD:
13047 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13049 case IX86_BUILTIN_PMULHRW:
13050 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13052 case IX86_BUILTIN_PF2IW:
13053 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13055 case IX86_BUILTIN_PFNACC:
13056 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13058 case IX86_BUILTIN_PFPNACC:
13059 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13061 case IX86_BUILTIN_PI2FW:
13062 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13064 case IX86_BUILTIN_PSWAPDSI:
13065 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13067 case IX86_BUILTIN_PSWAPDSF:
13068 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13070 case IX86_BUILTIN_SSE_ZERO:
13071 target = gen_reg_rtx (V4SFmode);
13072 emit_insn (gen_sse_clrv4sf (target));
13073 return target;
13075 case IX86_BUILTIN_MMX_ZERO:
13076 target = gen_reg_rtx (DImode);
13077 emit_insn (gen_mmx_clrdi (target));
13078 return target;
13080 case IX86_BUILTIN_SQRTSD:
13081 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13082 case IX86_BUILTIN_LOADAPD:
13083 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13084 case IX86_BUILTIN_LOADUPD:
13085 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13087 case IX86_BUILTIN_STOREAPD:
13088 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13089 case IX86_BUILTIN_STOREUPD:
13090 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13092 case IX86_BUILTIN_LOADSD:
13093 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13095 case IX86_BUILTIN_STORESD:
13096 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13098 case IX86_BUILTIN_SETPD1:
13099 target = assign_386_stack_local (DFmode, 0);
13100 arg0 = TREE_VALUE (arglist);
13101 emit_move_insn (adjust_address (target, DFmode, 0),
13102 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13103 op0 = gen_reg_rtx (V2DFmode);
13104 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13105 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13106 return op0;
13108 case IX86_BUILTIN_SETPD:
13109 target = assign_386_stack_local (V2DFmode, 0);
13110 arg0 = TREE_VALUE (arglist);
13111 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13112 emit_move_insn (adjust_address (target, DFmode, 0),
13113 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13114 emit_move_insn (adjust_address (target, DFmode, 8),
13115 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13116 op0 = gen_reg_rtx (V2DFmode);
13117 emit_insn (gen_sse2_movapd (op0, target));
13118 return op0;
13120 case IX86_BUILTIN_LOADRPD:
13121 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13122 gen_reg_rtx (V2DFmode), 1);
13123 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13124 return target;
13126 case IX86_BUILTIN_LOADPD1:
13127 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13128 gen_reg_rtx (V2DFmode), 1);
13129 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13130 return target;
13132 case IX86_BUILTIN_STOREPD1:
13133 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13134 case IX86_BUILTIN_STORERPD:
13135 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13137 case IX86_BUILTIN_MFENCE:
13138 emit_insn (gen_sse2_mfence ());
13139 return 0;
13140 case IX86_BUILTIN_LFENCE:
13141 emit_insn (gen_sse2_lfence ());
13142 return 0;
13144 case IX86_BUILTIN_CLFLUSH:
13145 arg0 = TREE_VALUE (arglist);
13146 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13147 icode = CODE_FOR_sse2_clflush;
13148 mode0 = insn_data[icode].operand[0].mode;
13149 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13150 op0 = copy_to_mode_reg (mode0, op0);
13152 emit_insn (gen_sse2_clflush (op0));
13153 return 0;
13155 case IX86_BUILTIN_MOVNTPD:
13156 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13157 case IX86_BUILTIN_MOVNTDQ:
13158 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13159 case IX86_BUILTIN_MOVNTI:
13160 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13162 default:
13163 break;
13166 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13167 if (d->code == fcode)
13169 /* Compares are treated specially. */
13170 if (d->icode == CODE_FOR_maskcmpv4sf3
13171 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13172 || d->icode == CODE_FOR_maskncmpv4sf3
13173 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13174 || d->icode == CODE_FOR_maskcmpv2df3
13175 || d->icode == CODE_FOR_vmmaskcmpv2df3
13176 || d->icode == CODE_FOR_maskncmpv2df3
13177 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13178 return ix86_expand_sse_compare (d, arglist, target);
13180 return ix86_expand_binop_builtin (d->icode, arglist, target);
13183 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13184 if (d->code == fcode)
13185 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13187 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13188 if (d->code == fcode)
13189 return ix86_expand_sse_comi (d, arglist, target);
13191 /* @@@ Should really do something sensible here. */
13192 return 0;
13195 /* Store OPERAND to the memory after reload is completed. This means
13196 that we can't easily use assign_stack_local. */
13198 ix86_force_to_memory (mode, operand)
13199 enum machine_mode mode;
13200 rtx operand;
13202 rtx result;
13203 if (!reload_completed)
13204 abort ();
13205 if (TARGET_64BIT && TARGET_RED_ZONE)
13207 result = gen_rtx_MEM (mode,
13208 gen_rtx_PLUS (Pmode,
13209 stack_pointer_rtx,
13210 GEN_INT (-RED_ZONE_SIZE)));
13211 emit_move_insn (result, operand);
13213 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13215 switch (mode)
13217 case HImode:
13218 case SImode:
13219 operand = gen_lowpart (DImode, operand);
13220 /* FALLTHRU */
13221 case DImode:
13222 emit_insn (
13223 gen_rtx_SET (VOIDmode,
13224 gen_rtx_MEM (DImode,
13225 gen_rtx_PRE_DEC (DImode,
13226 stack_pointer_rtx)),
13227 operand));
13228 break;
13229 default:
13230 abort ();
13232 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13234 else
13236 switch (mode)
13238 case DImode:
13240 rtx operands[2];
13241 split_di (&operand, 1, operands, operands + 1);
13242 emit_insn (
13243 gen_rtx_SET (VOIDmode,
13244 gen_rtx_MEM (SImode,
13245 gen_rtx_PRE_DEC (Pmode,
13246 stack_pointer_rtx)),
13247 operands[1]));
13248 emit_insn (
13249 gen_rtx_SET (VOIDmode,
13250 gen_rtx_MEM (SImode,
13251 gen_rtx_PRE_DEC (Pmode,
13252 stack_pointer_rtx)),
13253 operands[0]));
13255 break;
13256 case HImode:
13257 /* It is better to store HImodes as SImodes. */
13258 if (!TARGET_PARTIAL_REG_STALL)
13259 operand = gen_lowpart (SImode, operand);
13260 /* FALLTHRU */
13261 case SImode:
13262 emit_insn (
13263 gen_rtx_SET (VOIDmode,
13264 gen_rtx_MEM (GET_MODE (operand),
13265 gen_rtx_PRE_DEC (SImode,
13266 stack_pointer_rtx)),
13267 operand));
13268 break;
13269 default:
13270 abort ();
13272 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13274 return result;
13277 /* Free operand from the memory. */
13278 void
13279 ix86_free_from_memory (mode)
13280 enum machine_mode mode;
13282 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13284 int size;
13286 if (mode == DImode || TARGET_64BIT)
13287 size = 8;
13288 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13289 size = 2;
13290 else
13291 size = 4;
13292 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13293 to pop or add instruction if registers are available. */
13294 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13295 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13296 GEN_INT (size))));
13300 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13301 QImode must go into class Q_REGS.
13302 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13303 movdf to do mem-to-mem moves through integer regs. */
13304 enum reg_class
13305 ix86_preferred_reload_class (x, class)
13306 rtx x;
13307 enum reg_class class;
13309 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13311 /* SSE can't load any constant directly yet. */
13312 if (SSE_CLASS_P (class))
13313 return NO_REGS;
13314 /* Floats can load 0 and 1. */
13315 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13317 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13318 if (MAYBE_SSE_CLASS_P (class))
13319 return (reg_class_subset_p (class, GENERAL_REGS)
13320 ? GENERAL_REGS : FLOAT_REGS);
13321 else
13322 return class;
13324 /* General regs can load everything. */
13325 if (reg_class_subset_p (class, GENERAL_REGS))
13326 return GENERAL_REGS;
13327 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13328 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13329 return NO_REGS;
13331 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13332 return NO_REGS;
13333 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13334 return Q_REGS;
13335 return class;
13338 /* If we are copying between general and FP registers, we need a memory
13339 location. The same is true for SSE and MMX registers.
13341 The macro can't work reliably when one of the CLASSES is class containing
13342 registers from multiple units (SSE, MMX, integer). We avoid this by never
13343 combining those units in single alternative in the machine description.
13344 Ensure that this constraint holds to avoid unexpected surprises.
13346 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13347 enforce these sanity checks. */
13349 ix86_secondary_memory_needed (class1, class2, mode, strict)
13350 enum reg_class class1, class2;
13351 enum machine_mode mode;
13352 int strict;
13354 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13355 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13356 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13357 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13358 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13359 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13361 if (strict)
13362 abort ();
13363 else
13364 return 1;
13366 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13367 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13368 && (mode) != SImode)
13369 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13370 && (mode) != SImode));
13372 /* Return the cost of moving data from a register in class CLASS1 to
13373 one in class CLASS2.
13375 It is not required that the cost always equal 2 when FROM is the same as TO;
13376 on some machines it is expensive to move between registers if they are not
13377 general registers. */
13379 ix86_register_move_cost (mode, class1, class2)
13380 enum machine_mode mode;
13381 enum reg_class class1, class2;
13383 /* In case we require secondary memory, compute cost of the store followed
13384 by load. In case of copying from general_purpose_register we may emit
13385 multiple stores followed by single load causing memory size mismatch
13386 stall. Count this as arbitarily high cost of 20. */
13387 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13389 int add_cost = 0;
13390 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13391 add_cost = 20;
13392 return (MEMORY_MOVE_COST (mode, class1, 0)
13393 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13395 /* Moves between SSE/MMX and integer unit are expensive. */
13396 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13397 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13398 return ix86_cost->mmxsse_to_integer;
13399 if (MAYBE_FLOAT_CLASS_P (class1))
13400 return ix86_cost->fp_move;
13401 if (MAYBE_SSE_CLASS_P (class1))
13402 return ix86_cost->sse_move;
13403 if (MAYBE_MMX_CLASS_P (class1))
13404 return ix86_cost->mmx_move;
13405 return 2;
13408 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13410 ix86_hard_regno_mode_ok (regno, mode)
13411 int regno;
13412 enum machine_mode mode;
13414 /* Flags and only flags can only hold CCmode values. */
13415 if (CC_REGNO_P (regno))
13416 return GET_MODE_CLASS (mode) == MODE_CC;
13417 if (GET_MODE_CLASS (mode) == MODE_CC
13418 || GET_MODE_CLASS (mode) == MODE_RANDOM
13419 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13420 return 0;
13421 if (FP_REGNO_P (regno))
13422 return VALID_FP_MODE_P (mode);
13423 if (SSE_REGNO_P (regno))
13424 return VALID_SSE_REG_MODE (mode);
13425 if (MMX_REGNO_P (regno))
13426 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13427 /* We handle both integer and floats in the general purpose registers.
13428 In future we should be able to handle vector modes as well. */
13429 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13430 return 0;
13431 /* Take care for QImode values - they can be in non-QI regs, but then
13432 they do cause partial register stalls. */
13433 if (regno < 4 || mode != QImode || TARGET_64BIT)
13434 return 1;
13435 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13438 /* Return the cost of moving data of mode M between a
13439 register and memory. A value of 2 is the default; this cost is
13440 relative to those in `REGISTER_MOVE_COST'.
13442 If moving between registers and memory is more expensive than
13443 between two registers, you should define this macro to express the
13444 relative cost.
13446 Model also increased moving costs of QImode registers in non
13447 Q_REGS classes.
13450 ix86_memory_move_cost (mode, class, in)
13451 enum machine_mode mode;
13452 enum reg_class class;
13453 int in;
13455 if (FLOAT_CLASS_P (class))
13457 int index;
13458 switch (mode)
13460 case SFmode:
13461 index = 0;
13462 break;
13463 case DFmode:
13464 index = 1;
13465 break;
13466 case XFmode:
13467 case TFmode:
13468 index = 2;
13469 break;
13470 default:
13471 return 100;
13473 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13475 if (SSE_CLASS_P (class))
13477 int index;
13478 switch (GET_MODE_SIZE (mode))
13480 case 4:
13481 index = 0;
13482 break;
13483 case 8:
13484 index = 1;
13485 break;
13486 case 16:
13487 index = 2;
13488 break;
13489 default:
13490 return 100;
13492 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13494 if (MMX_CLASS_P (class))
13496 int index;
13497 switch (GET_MODE_SIZE (mode))
13499 case 4:
13500 index = 0;
13501 break;
13502 case 8:
13503 index = 1;
13504 break;
13505 default:
13506 return 100;
13508 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13510 switch (GET_MODE_SIZE (mode))
13512 case 1:
13513 if (in)
13514 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13515 : ix86_cost->movzbl_load);
13516 else
13517 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13518 : ix86_cost->int_store[0] + 4);
13519 break;
13520 case 2:
13521 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13522 default:
13523 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13524 if (mode == TFmode)
13525 mode = XFmode;
13526 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13527 * (int) GET_MODE_SIZE (mode) / 4);
13531 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13532 static void
13533 ix86_svr3_asm_out_constructor (symbol, priority)
13534 rtx symbol;
13535 int priority ATTRIBUTE_UNUSED;
13537 init_section ();
13538 fputs ("\tpushl $", asm_out_file);
13539 assemble_name (asm_out_file, XSTR (symbol, 0));
13540 fputc ('\n', asm_out_file);
13542 #endif
13544 /* Order the registers for register allocator. */
13546 void
13547 x86_order_regs_for_local_alloc ()
13549 int pos = 0;
13550 int i;
13552 /* First allocate the local general purpose registers. */
13553 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13554 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13555 reg_alloc_order [pos++] = i;
13557 /* Global general purpose registers. */
13558 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13559 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13560 reg_alloc_order [pos++] = i;
13562 /* x87 registers come first in case we are doing FP math
13563 using them. */
13564 if (!TARGET_SSE_MATH)
13565 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13566 reg_alloc_order [pos++] = i;
13568 /* SSE registers. */
13569 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13570 reg_alloc_order [pos++] = i;
13571 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13572 reg_alloc_order [pos++] = i;
13574 /* x87 registerts. */
13575 if (TARGET_SSE_MATH)
13576 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13577 reg_alloc_order [pos++] = i;
13579 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13580 reg_alloc_order [pos++] = i;
13582 /* Initialize the rest of array as we do not allocate some registers
13583 at all. */
13584 while (pos < FIRST_PSEUDO_REGISTER)
13585 reg_alloc_order [pos++] = 0;
13588 void
13589 x86_output_mi_thunk (file, delta, function)
13590 FILE *file;
13591 int delta;
13592 tree function;
13594 tree parm;
13595 rtx xops[3];
13597 if (ix86_regparm > 0)
13598 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13599 else
13600 parm = NULL_TREE;
13601 for (; parm; parm = TREE_CHAIN (parm))
13602 if (TREE_VALUE (parm) == void_type_node)
13603 break;
13605 xops[0] = GEN_INT (delta);
13606 if (TARGET_64BIT)
13608 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13609 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13610 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13611 if (flag_pic)
13613 fprintf (file, "\tjmp *");
13614 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13615 fprintf (file, "@GOTPCREL(%%rip)\n");
13617 else
13619 fprintf (file, "\tjmp ");
13620 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13621 fprintf (file, "\n");
13624 else
13626 if (parm)
13627 xops[1] = gen_rtx_REG (SImode, 0);
13628 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13629 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13630 else
13631 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13632 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13634 if (flag_pic)
13636 xops[0] = pic_offset_table_rtx;
13637 xops[1] = gen_label_rtx ();
13638 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13640 if (ix86_regparm > 2)
13641 abort ();
13642 output_asm_insn ("push{l}\t%0", xops);
13643 output_asm_insn ("call\t%P1", xops);
13644 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13645 output_asm_insn ("pop{l}\t%0", xops);
13646 output_asm_insn
13647 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13648 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13649 output_asm_insn
13650 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13651 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13652 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13654 else
13656 fprintf (file, "\tjmp ");
13657 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13658 fprintf (file, "\n");
13664 x86_field_alignment (field, computed)
13665 tree field;
13666 int computed;
13668 enum machine_mode mode;
13669 if (TARGET_64BIT || DECL_USER_ALIGN (field) || TARGET_ALIGN_DOUBLE)
13670 return computed;
13671 mode = TYPE_MODE (TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE
13672 ? get_inner_array_type (field) : TREE_TYPE (field));
13673 if ((mode == DFmode || mode == DCmode
13674 || mode == DImode || mode == CDImode)
13675 && !TARGET_ALIGN_DOUBLE)
13676 return MIN (32, computed);
13677 return computed;
13680 /* Implement machine specific optimizations.
13681 At the moment we implement single transformation: AMD Athlon works faster
13682 when RET is not destination of conditional jump or directly preceeded
13683 by other jump instruction. We avoid the penalty by inserting NOP just
13684 before the RET instructions in such cases. */
13685 void
13686 x86_machine_dependent_reorg (first)
13687 rtx first ATTRIBUTE_UNUSED;
13689 edge e;
13691 if (!TARGET_ATHLON || !optimize || optimize_size)
13692 return;
13693 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13695 basic_block bb = e->src;
13696 rtx ret = bb->end;
13697 rtx prev;
13698 bool insert = false;
13700 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13701 continue;
13702 prev = prev_nonnote_insn (ret);
13703 if (prev && GET_CODE (prev) == CODE_LABEL)
13705 edge e;
13706 for (e = bb->pred; e; e = e->pred_next)
13707 if (EDGE_FREQUENCY (e) && e->src->index > 0
13708 && !(e->flags & EDGE_FALLTHRU))
13709 insert = 1;
13711 if (!insert)
13713 prev = prev_real_insn (ret);
13714 if (prev && GET_CODE (prev) == JUMP_INSN
13715 && any_condjump_p (prev))
13716 insert = 1;
13718 if (insert)
13719 emit_insn_before (gen_nop (), ret);
13723 #include "gt-i386.h"