* config/i386/i386.c (x86_ext_80387_constants): Add m_K8, m_CORE2
[official-gcc.git] / gcc / config / i386 / i386.c
blob0e172dbf286f242dd12947116eed442dd191e1a2
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 comopared to K8. Alignment becomes important after 8 bytes for mempcy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
607 static const
608 struct processor_costs pentium4_cost = {
609 COSTS_N_INSNS (1), /* cost of an add instruction */
610 COSTS_N_INSNS (3), /* cost of a lea instruction */
611 COSTS_N_INSNS (4), /* variable shift costs */
612 COSTS_N_INSNS (4), /* constant shift costs */
613 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
614 COSTS_N_INSNS (15), /* HI */
615 COSTS_N_INSNS (15), /* SI */
616 COSTS_N_INSNS (15), /* DI */
617 COSTS_N_INSNS (15)}, /* other */
618 0, /* cost of multiply per each bit set */
619 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
620 COSTS_N_INSNS (56), /* HI */
621 COSTS_N_INSNS (56), /* SI */
622 COSTS_N_INSNS (56), /* DI */
623 COSTS_N_INSNS (56)}, /* other */
624 COSTS_N_INSNS (1), /* cost of movsx */
625 COSTS_N_INSNS (1), /* cost of movzx */
626 16, /* "large" insn */
627 6, /* MOVE_RATIO */
628 2, /* cost for loading QImode using movzbl */
629 {4, 5, 4}, /* cost of loading integer registers
630 in QImode, HImode and SImode.
631 Relative to reg-reg move (2). */
632 {2, 3, 2}, /* cost of storing integer registers */
633 2, /* cost of reg,reg fld/fst */
634 {2, 2, 6}, /* cost of loading fp registers
635 in SFmode, DFmode and XFmode */
636 {4, 4, 6}, /* cost of storing fp registers
637 in SFmode, DFmode and XFmode */
638 2, /* cost of moving MMX register */
639 {2, 2}, /* cost of loading MMX registers
640 in SImode and DImode */
641 {2, 2}, /* cost of storing MMX registers
642 in SImode and DImode */
643 12, /* cost of moving SSE register */
644 {12, 12, 12}, /* cost of loading SSE registers
645 in SImode, DImode and TImode */
646 {2, 2, 8}, /* cost of storing SSE registers
647 in SImode, DImode and TImode */
648 10, /* MMX or SSE register to integer */
649 64, /* size of prefetch block */
650 6, /* number of parallel prefetches */
651 2, /* Branch cost */
652 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
653 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
654 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
655 COSTS_N_INSNS (2), /* cost of FABS instruction. */
656 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
657 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
658 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
659 {libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}},
660 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
661 {libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}}
664 static const
665 struct processor_costs nocona_cost = {
666 COSTS_N_INSNS (1), /* cost of an add instruction */
667 COSTS_N_INSNS (1), /* cost of a lea instruction */
668 COSTS_N_INSNS (1), /* variable shift costs */
669 COSTS_N_INSNS (1), /* constant shift costs */
670 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
671 COSTS_N_INSNS (10), /* HI */
672 COSTS_N_INSNS (10), /* SI */
673 COSTS_N_INSNS (10), /* DI */
674 COSTS_N_INSNS (10)}, /* other */
675 0, /* cost of multiply per each bit set */
676 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
677 COSTS_N_INSNS (66), /* HI */
678 COSTS_N_INSNS (66), /* SI */
679 COSTS_N_INSNS (66), /* DI */
680 COSTS_N_INSNS (66)}, /* other */
681 COSTS_N_INSNS (1), /* cost of movsx */
682 COSTS_N_INSNS (1), /* cost of movzx */
683 16, /* "large" insn */
684 17, /* MOVE_RATIO */
685 4, /* cost for loading QImode using movzbl */
686 {4, 4, 4}, /* cost of loading integer registers
687 in QImode, HImode and SImode.
688 Relative to reg-reg move (2). */
689 {4, 4, 4}, /* cost of storing integer registers */
690 3, /* cost of reg,reg fld/fst */
691 {12, 12, 12}, /* cost of loading fp registers
692 in SFmode, DFmode and XFmode */
693 {4, 4, 4}, /* cost of storing fp registers
694 in SFmode, DFmode and XFmode */
695 6, /* cost of moving MMX register */
696 {12, 12}, /* cost of loading MMX registers
697 in SImode and DImode */
698 {12, 12}, /* cost of storing MMX registers
699 in SImode and DImode */
700 6, /* cost of moving SSE register */
701 {12, 12, 12}, /* cost of loading SSE registers
702 in SImode, DImode and TImode */
703 {12, 12, 12}, /* cost of storing SSE registers
704 in SImode, DImode and TImode */
705 8, /* MMX or SSE register to integer */
706 128, /* size of prefetch block */
707 8, /* number of parallel prefetches */
708 1, /* Branch cost */
709 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
710 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
711 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
712 COSTS_N_INSNS (3), /* cost of FABS instruction. */
713 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
714 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
715 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
716 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
717 {100000, unrolled_loop}, {-1, libcall}}}},
718 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
719 {libcall, {{24, loop}, {64, unrolled_loop},
720 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
723 static const
724 struct processor_costs core2_cost = {
725 COSTS_N_INSNS (1), /* cost of an add instruction */
726 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
727 COSTS_N_INSNS (1), /* variable shift costs */
728 COSTS_N_INSNS (1), /* constant shift costs */
729 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
730 COSTS_N_INSNS (3), /* HI */
731 COSTS_N_INSNS (3), /* SI */
732 COSTS_N_INSNS (3), /* DI */
733 COSTS_N_INSNS (3)}, /* other */
734 0, /* cost of multiply per each bit set */
735 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
736 COSTS_N_INSNS (22), /* HI */
737 COSTS_N_INSNS (22), /* SI */
738 COSTS_N_INSNS (22), /* DI */
739 COSTS_N_INSNS (22)}, /* other */
740 COSTS_N_INSNS (1), /* cost of movsx */
741 COSTS_N_INSNS (1), /* cost of movzx */
742 8, /* "large" insn */
743 16, /* MOVE_RATIO */
744 2, /* cost for loading QImode using movzbl */
745 {6, 6, 6}, /* cost of loading integer registers
746 in QImode, HImode and SImode.
747 Relative to reg-reg move (2). */
748 {4, 4, 4}, /* cost of storing integer registers */
749 2, /* cost of reg,reg fld/fst */
750 {6, 6, 6}, /* cost of loading fp registers
751 in SFmode, DFmode and XFmode */
752 {4, 4, 4}, /* cost of loading integer registers */
753 2, /* cost of moving MMX register */
754 {6, 6}, /* cost of loading MMX registers
755 in SImode and DImode */
756 {4, 4}, /* cost of storing MMX registers
757 in SImode and DImode */
758 2, /* cost of moving SSE register */
759 {6, 6, 6}, /* cost of loading SSE registers
760 in SImode, DImode and TImode */
761 {4, 4, 4}, /* cost of storing SSE registers
762 in SImode, DImode and TImode */
763 2, /* MMX or SSE register to integer */
764 128, /* size of prefetch block */
765 8, /* number of parallel prefetches */
766 3, /* Branch cost */
767 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
768 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
769 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
770 COSTS_N_INSNS (1), /* cost of FABS instruction. */
771 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
772 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
773 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
774 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
775 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
776 {{libcall, {{8, loop}, {15, unrolled_loop},
777 {2048, rep_prefix_4_byte}, {-1, libcall}}},
778 {libcall, {{24, loop}, {32, unrolled_loop},
779 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
782 /* Generic64 should produce code tuned for Nocona and K8. */
783 static const
784 struct processor_costs generic64_cost = {
785 COSTS_N_INSNS (1), /* cost of an add instruction */
786 /* On all chips taken into consideration lea is 2 cycles and more. With
787 this cost however our current implementation of synth_mult results in
788 use of unnecessary temporary registers causing regression on several
789 SPECfp benchmarks. */
790 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
791 COSTS_N_INSNS (1), /* variable shift costs */
792 COSTS_N_INSNS (1), /* constant shift costs */
793 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
794 COSTS_N_INSNS (4), /* HI */
795 COSTS_N_INSNS (3), /* SI */
796 COSTS_N_INSNS (4), /* DI */
797 COSTS_N_INSNS (2)}, /* other */
798 0, /* cost of multiply per each bit set */
799 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
800 COSTS_N_INSNS (26), /* HI */
801 COSTS_N_INSNS (42), /* SI */
802 COSTS_N_INSNS (74), /* DI */
803 COSTS_N_INSNS (74)}, /* other */
804 COSTS_N_INSNS (1), /* cost of movsx */
805 COSTS_N_INSNS (1), /* cost of movzx */
806 8, /* "large" insn */
807 17, /* MOVE_RATIO */
808 4, /* cost for loading QImode using movzbl */
809 {4, 4, 4}, /* cost of loading integer registers
810 in QImode, HImode and SImode.
811 Relative to reg-reg move (2). */
812 {4, 4, 4}, /* cost of storing integer registers */
813 4, /* cost of reg,reg fld/fst */
814 {12, 12, 12}, /* cost of loading fp registers
815 in SFmode, DFmode and XFmode */
816 {6, 6, 8}, /* cost of storing fp registers
817 in SFmode, DFmode and XFmode */
818 2, /* cost of moving MMX register */
819 {8, 8}, /* cost of loading MMX registers
820 in SImode and DImode */
821 {8, 8}, /* cost of storing MMX registers
822 in SImode and DImode */
823 2, /* cost of moving SSE register */
824 {8, 8, 8}, /* cost of loading SSE registers
825 in SImode, DImode and TImode */
826 {8, 8, 8}, /* cost of storing SSE registers
827 in SImode, DImode and TImode */
828 5, /* MMX or SSE register to integer */
829 64, /* size of prefetch block */
830 6, /* number of parallel prefetches */
831 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
832 is increased to perhaps more appropriate value of 5. */
833 3, /* Branch cost */
834 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
835 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
836 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
837 COSTS_N_INSNS (8), /* cost of FABS instruction. */
838 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
839 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
840 {DUMMY_STRINGOP_ALGS,
841 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
842 {DUMMY_STRINGOP_ALGS,
843 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
846 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
847 static const
848 struct processor_costs generic32_cost = {
849 COSTS_N_INSNS (1), /* cost of an add instruction */
850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
851 COSTS_N_INSNS (1), /* variable shift costs */
852 COSTS_N_INSNS (1), /* constant shift costs */
853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
854 COSTS_N_INSNS (4), /* HI */
855 COSTS_N_INSNS (3), /* SI */
856 COSTS_N_INSNS (4), /* DI */
857 COSTS_N_INSNS (2)}, /* other */
858 0, /* cost of multiply per each bit set */
859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
860 COSTS_N_INSNS (26), /* HI */
861 COSTS_N_INSNS (42), /* SI */
862 COSTS_N_INSNS (74), /* DI */
863 COSTS_N_INSNS (74)}, /* other */
864 COSTS_N_INSNS (1), /* cost of movsx */
865 COSTS_N_INSNS (1), /* cost of movzx */
866 8, /* "large" insn */
867 17, /* MOVE_RATIO */
868 4, /* cost for loading QImode using movzbl */
869 {4, 4, 4}, /* cost of loading integer registers
870 in QImode, HImode and SImode.
871 Relative to reg-reg move (2). */
872 {4, 4, 4}, /* cost of storing integer registers */
873 4, /* cost of reg,reg fld/fst */
874 {12, 12, 12}, /* cost of loading fp registers
875 in SFmode, DFmode and XFmode */
876 {6, 6, 8}, /* cost of storing fp registers
877 in SFmode, DFmode and XFmode */
878 2, /* cost of moving MMX register */
879 {8, 8}, /* cost of loading MMX registers
880 in SImode and DImode */
881 {8, 8}, /* cost of storing MMX registers
882 in SImode and DImode */
883 2, /* cost of moving SSE register */
884 {8, 8, 8}, /* cost of loading SSE registers
885 in SImode, DImode and TImode */
886 {8, 8, 8}, /* cost of storing SSE registers
887 in SImode, DImode and TImode */
888 5, /* MMX or SSE register to integer */
889 64, /* size of prefetch block */
890 6, /* number of parallel prefetches */
891 3, /* Branch cost */
892 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
893 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
894 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
895 COSTS_N_INSNS (8), /* cost of FABS instruction. */
896 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
897 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
898 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
899 DUMMY_STRINGOP_ALGS},
900 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
901 DUMMY_STRINGOP_ALGS},
904 const struct processor_costs *ix86_cost = &pentium_cost;
906 /* Processor feature/optimization bitmasks. */
907 #define m_386 (1<<PROCESSOR_I386)
908 #define m_486 (1<<PROCESSOR_I486)
909 #define m_PENT (1<<PROCESSOR_PENTIUM)
910 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
911 #define m_GEODE (1<<PROCESSOR_GEODE)
912 #define m_K6_GEODE (m_K6 | m_GEODE)
913 #define m_K6 (1<<PROCESSOR_K6)
914 #define m_ATHLON (1<<PROCESSOR_ATHLON)
915 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
916 #define m_K8 (1<<PROCESSOR_K8)
917 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
918 #define m_NOCONA (1<<PROCESSOR_NOCONA)
919 #define m_CORE2 (1<<PROCESSOR_CORE2)
920 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
921 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
922 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
924 /* Generic instruction choice should be common subset of supported CPUs
925 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
927 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
928 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
929 generic because it is not working well with PPro base chips. */
930 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
931 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
932 const int x86_zero_extend_with_and = m_486 | m_PENT;
933 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
934 const int x86_double_with_add = ~m_386;
935 const int x86_use_bit_test = m_386;
936 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
937 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
938 const int x86_3dnow_a = m_ATHLON_K8;
939 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
940 /* Branch hints were put in P4 based on simulation result. But
941 after P4 was made, no performance benefit was observed with
942 branch hints. It also increases the code size. As the result,
943 icc never generates branch hints. */
944 const int x86_branch_hints = 0;
945 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
946 /* We probably ought to watch for partial register stalls on Generic32
947 compilation setting as well. However in current implementation the
948 partial register stalls are not eliminated very well - they can
949 be introduced via subregs synthesized by combine and can happen
950 in caller/callee saving sequences.
951 Because this option pays back little on PPro based chips and is in conflict
952 with partial reg. dependencies used by Athlon/P4 based chips, it is better
953 to leave it off for generic32 for now. */
954 const int x86_partial_reg_stall = m_PPRO;
955 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
956 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
957 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
958 const int x86_use_mov0 = m_K6;
959 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
960 const int x86_read_modify_write = ~m_PENT;
961 const int x86_read_modify = ~(m_PENT | m_PPRO);
962 const int x86_split_long_moves = m_PPRO;
963 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
964 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
965 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
966 const int x86_qimode_math = ~(0);
967 const int x86_promote_qi_regs = 0;
968 /* On PPro this flag is meant to avoid partial register stalls. Just like
969 the x86_partial_reg_stall this option might be considered for Generic32
970 if our scheme for avoiding partial stalls was more effective. */
971 const int x86_himode_math = ~(m_PPRO);
972 const int x86_promote_hi_regs = m_PPRO;
973 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
974 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
975 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
976 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
977 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
978 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
980 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
981 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
982 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
983 const int x86_shift1 = ~m_486;
984 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
985 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
986 that thread 128bit SSE registers as single units versus K8 based chips that
987 divide SSE registers to two 64bit halves.
988 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
989 to allow register renaming on 128bit SSE units, but usually results in one
990 extra microop on 64bit SSE units. Experimental results shows that disabling
991 this option on P4 brings over 20% SPECfp regression, while enabling it on
992 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
993 of moves. */
994 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
995 /* Set for machines where the type and dependencies are resolved on SSE
996 register parts instead of whole registers, so we may maintain just
997 lower part of scalar values in proper format leaving the upper part
998 undefined. */
999 const int x86_sse_split_regs = m_ATHLON_K8;
1000 const int x86_sse_typeless_stores = m_ATHLON_K8;
1001 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1002 const int x86_use_ffreep = m_ATHLON_K8;
1003 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1005 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
1006 integer data in xmm registers. Which results in pretty abysmal code. */
1007 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1009 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1010 /* Some CPU cores are not able to predict more than 4 branch instructions in
1011 the 16 byte window. */
1012 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1013 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
1014 const int x86_use_bt = m_ATHLON_K8;
1015 /* Compare and exchange was added for 80486. */
1016 const int x86_cmpxchg = ~m_386;
1017 /* Compare and exchange 8 bytes was added for pentium. */
1018 const int x86_cmpxchg8b = ~(m_386 | m_486);
1019 /* Compare and exchange 16 bytes was added for nocona. */
1020 const int x86_cmpxchg16b = m_NOCONA;
1021 /* Exchange and add was added for 80486. */
1022 const int x86_xadd = ~m_386;
1023 /* Byteswap was added for 80486. */
1024 const int x86_bswap = ~m_386;
1025 const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
1027 static enum stringop_alg stringop_alg = no_stringop;
1029 /* In case the average insn count for single function invocation is
1030 lower than this constant, emit fast (but longer) prologue and
1031 epilogue code. */
1032 #define FAST_PROLOGUE_INSN_COUNT 20
1034 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1035 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1036 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1037 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1039 /* Array of the smallest class containing reg number REGNO, indexed by
1040 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1042 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1044 /* ax, dx, cx, bx */
1045 AREG, DREG, CREG, BREG,
1046 /* si, di, bp, sp */
1047 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1048 /* FP registers */
1049 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1050 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1051 /* arg pointer */
1052 NON_Q_REGS,
1053 /* flags, fpsr, fpcr, dirflag, frame */
1054 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1055 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1056 SSE_REGS, SSE_REGS,
1057 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1058 MMX_REGS, MMX_REGS,
1059 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1060 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1061 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1062 SSE_REGS, SSE_REGS,
1065 /* The "default" register map used in 32bit mode. */
1067 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1069 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1070 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1071 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1072 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1073 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1074 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1075 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1078 static int const x86_64_int_parameter_registers[6] =
1080 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1081 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1084 static int const x86_64_int_return_registers[4] =
1086 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1089 /* The "default" register map used in 64bit mode. */
1090 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1092 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1093 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1094 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1095 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1096 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1097 8,9,10,11,12,13,14,15, /* extended integer registers */
1098 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1101 /* Define the register numbers to be used in Dwarf debugging information.
1102 The SVR4 reference port C compiler uses the following register numbers
1103 in its Dwarf output code:
1104 0 for %eax (gcc regno = 0)
1105 1 for %ecx (gcc regno = 2)
1106 2 for %edx (gcc regno = 1)
1107 3 for %ebx (gcc regno = 3)
1108 4 for %esp (gcc regno = 7)
1109 5 for %ebp (gcc regno = 6)
1110 6 for %esi (gcc regno = 4)
1111 7 for %edi (gcc regno = 5)
1112 The following three DWARF register numbers are never generated by
1113 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1114 believes these numbers have these meanings.
1115 8 for %eip (no gcc equivalent)
1116 9 for %eflags (gcc regno = 17)
1117 10 for %trapno (no gcc equivalent)
1118 It is not at all clear how we should number the FP stack registers
1119 for the x86 architecture. If the version of SDB on x86/svr4 were
1120 a bit less brain dead with respect to floating-point then we would
1121 have a precedent to follow with respect to DWARF register numbers
1122 for x86 FP registers, but the SDB on x86/svr4 is so completely
1123 broken with respect to FP registers that it is hardly worth thinking
1124 of it as something to strive for compatibility with.
1125 The version of x86/svr4 SDB I have at the moment does (partially)
1126 seem to believe that DWARF register number 11 is associated with
1127 the x86 register %st(0), but that's about all. Higher DWARF
1128 register numbers don't seem to be associated with anything in
1129 particular, and even for DWARF regno 11, SDB only seems to under-
1130 stand that it should say that a variable lives in %st(0) (when
1131 asked via an `=' command) if we said it was in DWARF regno 11,
1132 but SDB still prints garbage when asked for the value of the
1133 variable in question (via a `/' command).
1134 (Also note that the labels SDB prints for various FP stack regs
1135 when doing an `x' command are all wrong.)
1136 Note that these problems generally don't affect the native SVR4
1137 C compiler because it doesn't allow the use of -O with -g and
1138 because when it is *not* optimizing, it allocates a memory
1139 location for each floating-point variable, and the memory
1140 location is what gets described in the DWARF AT_location
1141 attribute for the variable in question.
1142 Regardless of the severe mental illness of the x86/svr4 SDB, we
1143 do something sensible here and we use the following DWARF
1144 register numbers. Note that these are all stack-top-relative
1145 numbers.
1146 11 for %st(0) (gcc regno = 8)
1147 12 for %st(1) (gcc regno = 9)
1148 13 for %st(2) (gcc regno = 10)
1149 14 for %st(3) (gcc regno = 11)
1150 15 for %st(4) (gcc regno = 12)
1151 16 for %st(5) (gcc regno = 13)
1152 17 for %st(6) (gcc regno = 14)
1153 18 for %st(7) (gcc regno = 15)
1155 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1157 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1158 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1159 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1160 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1161 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1162 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1163 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1166 /* Test and compare insns in i386.md store the information needed to
1167 generate branch and scc insns here. */
1169 rtx ix86_compare_op0 = NULL_RTX;
1170 rtx ix86_compare_op1 = NULL_RTX;
1171 rtx ix86_compare_emitted = NULL_RTX;
1173 /* Size of the register save area. */
1174 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1176 /* Define the structure for the machine field in struct function. */
1178 struct stack_local_entry GTY(())
1180 unsigned short mode;
1181 unsigned short n;
1182 rtx rtl;
1183 struct stack_local_entry *next;
1186 /* Structure describing stack frame layout.
1187 Stack grows downward:
1189 [arguments]
1190 <- ARG_POINTER
1191 saved pc
1193 saved frame pointer if frame_pointer_needed
1194 <- HARD_FRAME_POINTER
1195 [saved regs]
1197 [padding1] \
1199 [va_arg registers] (
1200 > to_allocate <- FRAME_POINTER
1201 [frame] (
1203 [padding2] /
1205 struct ix86_frame
1207 int nregs;
1208 int padding1;
1209 int va_arg_size;
1210 HOST_WIDE_INT frame;
1211 int padding2;
1212 int outgoing_arguments_size;
1213 int red_zone_size;
1215 HOST_WIDE_INT to_allocate;
1216 /* The offsets relative to ARG_POINTER. */
1217 HOST_WIDE_INT frame_pointer_offset;
1218 HOST_WIDE_INT hard_frame_pointer_offset;
1219 HOST_WIDE_INT stack_pointer_offset;
1221 /* When save_regs_using_mov is set, emit prologue using
1222 move instead of push instructions. */
1223 bool save_regs_using_mov;
1226 /* Code model option. */
1227 enum cmodel ix86_cmodel;
1228 /* Asm dialect. */
1229 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1230 /* TLS dialects. */
1231 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1233 /* Which unit we are generating floating point math for. */
1234 enum fpmath_unit ix86_fpmath;
1236 /* Which cpu are we scheduling for. */
1237 enum processor_type ix86_tune;
1238 /* Which instruction set architecture to use. */
1239 enum processor_type ix86_arch;
1241 /* true if sse prefetch instruction is not NOOP. */
1242 int x86_prefetch_sse;
1244 /* ix86_regparm_string as a number */
1245 static int ix86_regparm;
1247 /* -mstackrealign option */
1248 extern int ix86_force_align_arg_pointer;
1249 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1251 /* Preferred alignment for stack boundary in bits. */
1252 unsigned int ix86_preferred_stack_boundary;
1254 /* Values 1-5: see jump.c */
1255 int ix86_branch_cost;
1257 /* Variables which are this size or smaller are put in the data/bss
1258 or ldata/lbss sections. */
1260 int ix86_section_threshold = 65536;
1262 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1263 char internal_label_prefix[16];
1264 int internal_label_prefix_len;
1266 static bool ix86_handle_option (size_t, const char *, int);
1267 static void output_pic_addr_const (FILE *, rtx, int);
1268 static void put_condition_code (enum rtx_code, enum machine_mode,
1269 int, int, FILE *);
1270 static const char *get_some_local_dynamic_name (void);
1271 static int get_some_local_dynamic_name_1 (rtx *, void *);
1272 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1273 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1274 rtx *);
1275 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1276 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1277 enum machine_mode);
1278 static rtx get_thread_pointer (int);
1279 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1280 static void get_pc_thunk_name (char [32], unsigned int);
1281 static rtx gen_push (rtx);
1282 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1283 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1284 static struct machine_function * ix86_init_machine_status (void);
1285 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1286 static int ix86_nsaved_regs (void);
1287 static void ix86_emit_save_regs (void);
1288 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1289 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1290 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1291 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1292 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1293 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1294 static int ix86_issue_rate (void);
1295 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1296 static int ia32_multipass_dfa_lookahead (void);
1297 static void ix86_init_mmx_sse_builtins (void);
1298 static rtx x86_this_parameter (tree);
1299 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1300 HOST_WIDE_INT, tree);
1301 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1302 static void x86_file_start (void);
1303 static void ix86_reorg (void);
1304 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1305 static tree ix86_build_builtin_va_list (void);
1306 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1307 tree, int *, int);
1308 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1309 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1310 static bool ix86_vector_mode_supported_p (enum machine_mode);
1312 static int ix86_address_cost (rtx);
1313 static bool ix86_cannot_force_const_mem (rtx);
1314 static rtx ix86_delegitimize_address (rtx);
1316 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1318 struct builtin_description;
1319 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1320 tree, rtx);
1321 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1322 tree, rtx);
1323 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1324 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1325 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1326 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1327 static rtx safe_vector_operand (rtx, enum machine_mode);
1328 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1329 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1330 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1331 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1332 static int ix86_fp_comparison_cost (enum rtx_code code);
1333 static unsigned int ix86_select_alt_pic_regnum (void);
1334 static int ix86_save_reg (unsigned int, int);
1335 static void ix86_compute_frame_layout (struct ix86_frame *);
1336 static int ix86_comp_type_attributes (tree, tree);
1337 static int ix86_function_regparm (tree, tree);
1338 const struct attribute_spec ix86_attribute_table[];
1339 static bool ix86_function_ok_for_sibcall (tree, tree);
1340 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1341 static int ix86_value_regno (enum machine_mode, tree, tree);
1342 static bool contains_128bit_aligned_vector_p (tree);
1343 static rtx ix86_struct_value_rtx (tree, int);
1344 static bool ix86_ms_bitfield_layout_p (tree);
1345 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1346 static int extended_reg_mentioned_1 (rtx *, void *);
1347 static bool ix86_rtx_costs (rtx, int, int, int *);
1348 static int min_insn_size (rtx);
1349 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1350 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1351 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1352 tree, bool);
1353 static void ix86_init_builtins (void);
1354 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1355 static const char *ix86_mangle_fundamental_type (tree);
1356 static tree ix86_stack_protect_fail (void);
1357 static rtx ix86_internal_arg_pointer (void);
1358 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1360 /* This function is only used on Solaris. */
1361 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1362 ATTRIBUTE_UNUSED;
1364 /* Register class used for passing given 64bit part of the argument.
1365 These represent classes as documented by the PS ABI, with the exception
1366 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1367 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1369 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1370 whenever possible (upper half does contain padding).
1372 enum x86_64_reg_class
1374 X86_64_NO_CLASS,
1375 X86_64_INTEGER_CLASS,
1376 X86_64_INTEGERSI_CLASS,
1377 X86_64_SSE_CLASS,
1378 X86_64_SSESF_CLASS,
1379 X86_64_SSEDF_CLASS,
1380 X86_64_SSEUP_CLASS,
1381 X86_64_X87_CLASS,
1382 X86_64_X87UP_CLASS,
1383 X86_64_COMPLEX_X87_CLASS,
1384 X86_64_MEMORY_CLASS
1386 static const char * const x86_64_reg_class_name[] = {
1387 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1388 "sseup", "x87", "x87up", "cplx87", "no"
1391 #define MAX_CLASSES 4
1393 /* Table of constants used by fldpi, fldln2, etc.... */
1394 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1395 static bool ext_80387_constants_init = 0;
1396 static void init_ext_80387_constants (void);
1397 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1398 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1399 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1400 static section *x86_64_elf_select_section (tree decl, int reloc,
1401 unsigned HOST_WIDE_INT align)
1402 ATTRIBUTE_UNUSED;
1404 /* Initialize the GCC target structure. */
1405 #undef TARGET_ATTRIBUTE_TABLE
1406 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1407 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1408 # undef TARGET_MERGE_DECL_ATTRIBUTES
1409 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1410 #endif
1412 #undef TARGET_COMP_TYPE_ATTRIBUTES
1413 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1415 #undef TARGET_INIT_BUILTINS
1416 #define TARGET_INIT_BUILTINS ix86_init_builtins
1417 #undef TARGET_EXPAND_BUILTIN
1418 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1420 #undef TARGET_ASM_FUNCTION_EPILOGUE
1421 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1423 #undef TARGET_ENCODE_SECTION_INFO
1424 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1425 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1426 #else
1427 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1428 #endif
1430 #undef TARGET_ASM_OPEN_PAREN
1431 #define TARGET_ASM_OPEN_PAREN ""
1432 #undef TARGET_ASM_CLOSE_PAREN
1433 #define TARGET_ASM_CLOSE_PAREN ""
1435 #undef TARGET_ASM_ALIGNED_HI_OP
1436 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1437 #undef TARGET_ASM_ALIGNED_SI_OP
1438 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1439 #ifdef ASM_QUAD
1440 #undef TARGET_ASM_ALIGNED_DI_OP
1441 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1442 #endif
1444 #undef TARGET_ASM_UNALIGNED_HI_OP
1445 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1446 #undef TARGET_ASM_UNALIGNED_SI_OP
1447 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1448 #undef TARGET_ASM_UNALIGNED_DI_OP
1449 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1451 #undef TARGET_SCHED_ADJUST_COST
1452 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1453 #undef TARGET_SCHED_ISSUE_RATE
1454 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1455 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1456 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1457 ia32_multipass_dfa_lookahead
1459 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1460 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1462 #ifdef HAVE_AS_TLS
1463 #undef TARGET_HAVE_TLS
1464 #define TARGET_HAVE_TLS true
1465 #endif
1466 #undef TARGET_CANNOT_FORCE_CONST_MEM
1467 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1468 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1469 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1471 #undef TARGET_DELEGITIMIZE_ADDRESS
1472 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1474 #undef TARGET_MS_BITFIELD_LAYOUT_P
1475 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1477 #if TARGET_MACHO
1478 #undef TARGET_BINDS_LOCAL_P
1479 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1480 #endif
1482 #undef TARGET_ASM_OUTPUT_MI_THUNK
1483 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1484 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1485 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1487 #undef TARGET_ASM_FILE_START
1488 #define TARGET_ASM_FILE_START x86_file_start
1490 #undef TARGET_DEFAULT_TARGET_FLAGS
1491 #define TARGET_DEFAULT_TARGET_FLAGS \
1492 (TARGET_DEFAULT \
1493 | TARGET_64BIT_DEFAULT \
1494 | TARGET_SUBTARGET_DEFAULT \
1495 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1497 #undef TARGET_HANDLE_OPTION
1498 #define TARGET_HANDLE_OPTION ix86_handle_option
1500 #undef TARGET_RTX_COSTS
1501 #define TARGET_RTX_COSTS ix86_rtx_costs
1502 #undef TARGET_ADDRESS_COST
1503 #define TARGET_ADDRESS_COST ix86_address_cost
1505 #undef TARGET_FIXED_CONDITION_CODE_REGS
1506 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1507 #undef TARGET_CC_MODES_COMPATIBLE
1508 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1510 #undef TARGET_MACHINE_DEPENDENT_REORG
1511 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1513 #undef TARGET_BUILD_BUILTIN_VA_LIST
1514 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1516 #undef TARGET_MD_ASM_CLOBBERS
1517 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1519 #undef TARGET_PROMOTE_PROTOTYPES
1520 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1521 #undef TARGET_STRUCT_VALUE_RTX
1522 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1523 #undef TARGET_SETUP_INCOMING_VARARGS
1524 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1525 #undef TARGET_MUST_PASS_IN_STACK
1526 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1527 #undef TARGET_PASS_BY_REFERENCE
1528 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1529 #undef TARGET_INTERNAL_ARG_POINTER
1530 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1531 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1532 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1534 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1535 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1537 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1538 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1540 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1541 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1543 #ifdef HAVE_AS_TLS
1544 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1545 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1546 #endif
1548 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1549 #undef TARGET_INSERT_ATTRIBUTES
1550 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1551 #endif
1553 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1554 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1556 #undef TARGET_STACK_PROTECT_FAIL
1557 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1559 #undef TARGET_FUNCTION_VALUE
1560 #define TARGET_FUNCTION_VALUE ix86_function_value
1562 struct gcc_target targetm = TARGET_INITIALIZER;
1565 /* The svr4 ABI for the i386 says that records and unions are returned
1566 in memory. */
1567 #ifndef DEFAULT_PCC_STRUCT_RETURN
1568 #define DEFAULT_PCC_STRUCT_RETURN 1
1569 #endif
1571 /* Implement TARGET_HANDLE_OPTION. */
1573 static bool
1574 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1576 switch (code)
1578 case OPT_m3dnow:
1579 if (!value)
1581 target_flags &= ~MASK_3DNOW_A;
1582 target_flags_explicit |= MASK_3DNOW_A;
1584 return true;
1586 case OPT_mmmx:
1587 if (!value)
1589 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1590 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1592 return true;
1594 case OPT_msse:
1595 if (!value)
1597 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1598 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1600 return true;
1602 case OPT_msse2:
1603 if (!value)
1605 target_flags &= ~MASK_SSE3;
1606 target_flags_explicit |= MASK_SSE3;
1608 return true;
1610 default:
1611 return true;
1615 /* Sometimes certain combinations of command options do not make
1616 sense on a particular target machine. You can define a macro
1617 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1618 defined, is executed once just after all the command options have
1619 been parsed.
1621 Don't use this macro to turn on various extra optimizations for
1622 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1624 void
1625 override_options (void)
1627 int i;
1628 int ix86_tune_defaulted = 0;
1630 /* Comes from final.c -- no real reason to change it. */
1631 #define MAX_CODE_ALIGN 16
1633 static struct ptt
1635 const struct processor_costs *cost; /* Processor costs */
1636 const int target_enable; /* Target flags to enable. */
1637 const int target_disable; /* Target flags to disable. */
1638 const int align_loop; /* Default alignments. */
1639 const int align_loop_max_skip;
1640 const int align_jump;
1641 const int align_jump_max_skip;
1642 const int align_func;
1644 const processor_target_table[PROCESSOR_max] =
1646 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1647 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1648 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1649 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1650 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1651 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1652 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1653 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1654 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1655 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1656 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1657 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1658 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1661 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1662 static struct pta
1664 const char *const name; /* processor name or nickname. */
1665 const enum processor_type processor;
1666 const enum pta_flags
1668 PTA_SSE = 1,
1669 PTA_SSE2 = 2,
1670 PTA_SSE3 = 4,
1671 PTA_MMX = 8,
1672 PTA_PREFETCH_SSE = 16,
1673 PTA_3DNOW = 32,
1674 PTA_3DNOW_A = 64,
1675 PTA_64BIT = 128,
1676 PTA_SSSE3 = 256
1677 } flags;
1679 const processor_alias_table[] =
1681 {"i386", PROCESSOR_I386, 0},
1682 {"i486", PROCESSOR_I486, 0},
1683 {"i586", PROCESSOR_PENTIUM, 0},
1684 {"pentium", PROCESSOR_PENTIUM, 0},
1685 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1686 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1687 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1688 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1689 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1690 {"i686", PROCESSOR_PENTIUMPRO, 0},
1691 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1692 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1693 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1694 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1695 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1696 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1697 | PTA_MMX | PTA_PREFETCH_SSE},
1698 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1699 | PTA_MMX | PTA_PREFETCH_SSE},
1700 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1701 | PTA_MMX | PTA_PREFETCH_SSE},
1702 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1703 | PTA_MMX | PTA_PREFETCH_SSE},
1704 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
1705 | PTA_64BIT | PTA_MMX
1706 | PTA_PREFETCH_SSE},
1707 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1708 | PTA_3DNOW_A},
1709 {"k6", PROCESSOR_K6, PTA_MMX},
1710 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1711 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1712 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1713 | PTA_3DNOW_A},
1714 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1715 | PTA_3DNOW | PTA_3DNOW_A},
1716 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1717 | PTA_3DNOW_A | PTA_SSE},
1718 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1719 | PTA_3DNOW_A | PTA_SSE},
1720 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1721 | PTA_3DNOW_A | PTA_SSE},
1722 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1723 | PTA_SSE | PTA_SSE2 },
1724 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1725 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1726 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1727 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1728 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1729 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1730 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1731 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1732 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1733 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1736 int const pta_size = ARRAY_SIZE (processor_alias_table);
1738 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1739 SUBTARGET_OVERRIDE_OPTIONS;
1740 #endif
1742 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1743 SUBSUBTARGET_OVERRIDE_OPTIONS;
1744 #endif
1746 /* -fPIC is the default for x86_64. */
1747 if (TARGET_MACHO && TARGET_64BIT)
1748 flag_pic = 2;
1750 /* Set the default values for switches whose default depends on TARGET_64BIT
1751 in case they weren't overwritten by command line options. */
1752 if (TARGET_64BIT)
1754 /* Mach-O doesn't support omitting the frame pointer for now. */
1755 if (flag_omit_frame_pointer == 2)
1756 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1757 if (flag_asynchronous_unwind_tables == 2)
1758 flag_asynchronous_unwind_tables = 1;
1759 if (flag_pcc_struct_return == 2)
1760 flag_pcc_struct_return = 0;
1762 else
1764 if (flag_omit_frame_pointer == 2)
1765 flag_omit_frame_pointer = 0;
1766 if (flag_asynchronous_unwind_tables == 2)
1767 flag_asynchronous_unwind_tables = 0;
1768 if (flag_pcc_struct_return == 2)
1769 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1772 /* Need to check -mtune=generic first. */
1773 if (ix86_tune_string)
1775 if (!strcmp (ix86_tune_string, "generic")
1776 || !strcmp (ix86_tune_string, "i686")
1777 /* As special support for cross compilers we read -mtune=native
1778 as -mtune=generic. With native compilers we won't see the
1779 -mtune=native, as it was changed by the driver. */
1780 || !strcmp (ix86_tune_string, "native"))
1782 if (TARGET_64BIT)
1783 ix86_tune_string = "generic64";
1784 else
1785 ix86_tune_string = "generic32";
1787 else if (!strncmp (ix86_tune_string, "generic", 7))
1788 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1790 else
1792 if (ix86_arch_string)
1793 ix86_tune_string = ix86_arch_string;
1794 if (!ix86_tune_string)
1796 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1797 ix86_tune_defaulted = 1;
1800 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1801 need to use a sensible tune option. */
1802 if (!strcmp (ix86_tune_string, "generic")
1803 || !strcmp (ix86_tune_string, "x86-64")
1804 || !strcmp (ix86_tune_string, "i686"))
1806 if (TARGET_64BIT)
1807 ix86_tune_string = "generic64";
1808 else
1809 ix86_tune_string = "generic32";
1812 if (ix86_stringop_string)
1814 if (!strcmp (ix86_stringop_string, "rep_byte"))
1815 stringop_alg = rep_prefix_1_byte;
1816 else if (!strcmp (ix86_stringop_string, "libcall"))
1817 stringop_alg = libcall;
1818 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1819 stringop_alg = rep_prefix_4_byte;
1820 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1821 stringop_alg = rep_prefix_8_byte;
1822 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1823 stringop_alg = loop_1_byte;
1824 else if (!strcmp (ix86_stringop_string, "loop"))
1825 stringop_alg = loop;
1826 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1827 stringop_alg = unrolled_loop;
1828 else
1829 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1831 if (!strcmp (ix86_tune_string, "x86-64"))
1832 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1833 "-mtune=generic instead as appropriate.");
1835 if (!ix86_arch_string)
1836 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1837 if (!strcmp (ix86_arch_string, "generic"))
1838 error ("generic CPU can be used only for -mtune= switch");
1839 if (!strncmp (ix86_arch_string, "generic", 7))
1840 error ("bad value (%s) for -march= switch", ix86_arch_string);
1842 if (ix86_cmodel_string != 0)
1844 if (!strcmp (ix86_cmodel_string, "small"))
1845 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1846 else if (!strcmp (ix86_cmodel_string, "medium"))
1847 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1848 else if (flag_pic)
1849 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1850 else if (!strcmp (ix86_cmodel_string, "32"))
1851 ix86_cmodel = CM_32;
1852 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1853 ix86_cmodel = CM_KERNEL;
1854 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1855 ix86_cmodel = CM_LARGE;
1856 else
1857 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1859 else
1861 ix86_cmodel = CM_32;
1862 if (TARGET_64BIT)
1863 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1865 if (ix86_asm_string != 0)
1867 if (! TARGET_MACHO
1868 && !strcmp (ix86_asm_string, "intel"))
1869 ix86_asm_dialect = ASM_INTEL;
1870 else if (!strcmp (ix86_asm_string, "att"))
1871 ix86_asm_dialect = ASM_ATT;
1872 else
1873 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1875 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1876 error ("code model %qs not supported in the %s bit mode",
1877 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1878 if (ix86_cmodel == CM_LARGE)
1879 sorry ("code model %<large%> not supported yet");
1880 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1881 sorry ("%i-bit mode not compiled in",
1882 (target_flags & MASK_64BIT) ? 64 : 32);
1884 for (i = 0; i < pta_size; i++)
1885 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1887 ix86_arch = processor_alias_table[i].processor;
1888 /* Default cpu tuning to the architecture. */
1889 ix86_tune = ix86_arch;
1890 if (processor_alias_table[i].flags & PTA_MMX
1891 && !(target_flags_explicit & MASK_MMX))
1892 target_flags |= MASK_MMX;
1893 if (processor_alias_table[i].flags & PTA_3DNOW
1894 && !(target_flags_explicit & MASK_3DNOW))
1895 target_flags |= MASK_3DNOW;
1896 if (processor_alias_table[i].flags & PTA_3DNOW_A
1897 && !(target_flags_explicit & MASK_3DNOW_A))
1898 target_flags |= MASK_3DNOW_A;
1899 if (processor_alias_table[i].flags & PTA_SSE
1900 && !(target_flags_explicit & MASK_SSE))
1901 target_flags |= MASK_SSE;
1902 if (processor_alias_table[i].flags & PTA_SSE2
1903 && !(target_flags_explicit & MASK_SSE2))
1904 target_flags |= MASK_SSE2;
1905 if (processor_alias_table[i].flags & PTA_SSE3
1906 && !(target_flags_explicit & MASK_SSE3))
1907 target_flags |= MASK_SSE3;
1908 if (processor_alias_table[i].flags & PTA_SSSE3
1909 && !(target_flags_explicit & MASK_SSSE3))
1910 target_flags |= MASK_SSSE3;
1911 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1912 x86_prefetch_sse = true;
1913 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1914 error ("CPU you selected does not support x86-64 "
1915 "instruction set");
1916 break;
1919 if (i == pta_size)
1920 error ("bad value (%s) for -march= switch", ix86_arch_string);
1922 for (i = 0; i < pta_size; i++)
1923 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1925 ix86_tune = processor_alias_table[i].processor;
1926 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1928 if (ix86_tune_defaulted)
1930 ix86_tune_string = "x86-64";
1931 for (i = 0; i < pta_size; i++)
1932 if (! strcmp (ix86_tune_string,
1933 processor_alias_table[i].name))
1934 break;
1935 ix86_tune = processor_alias_table[i].processor;
1937 else
1938 error ("CPU you selected does not support x86-64 "
1939 "instruction set");
1941 /* Intel CPUs have always interpreted SSE prefetch instructions as
1942 NOPs; so, we can enable SSE prefetch instructions even when
1943 -mtune (rather than -march) points us to a processor that has them.
1944 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1945 higher processors. */
1946 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1947 x86_prefetch_sse = true;
1948 break;
1950 if (i == pta_size)
1951 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1953 if (optimize_size)
1954 ix86_cost = &size_cost;
1955 else
1956 ix86_cost = processor_target_table[ix86_tune].cost;
1957 target_flags |= processor_target_table[ix86_tune].target_enable;
1958 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1960 /* Arrange to set up i386_stack_locals for all functions. */
1961 init_machine_status = ix86_init_machine_status;
1963 /* Validate -mregparm= value. */
1964 if (ix86_regparm_string)
1966 i = atoi (ix86_regparm_string);
1967 if (i < 0 || i > REGPARM_MAX)
1968 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1969 else
1970 ix86_regparm = i;
1972 else
1973 if (TARGET_64BIT)
1974 ix86_regparm = REGPARM_MAX;
1976 /* If the user has provided any of the -malign-* options,
1977 warn and use that value only if -falign-* is not set.
1978 Remove this code in GCC 3.2 or later. */
1979 if (ix86_align_loops_string)
1981 warning (0, "-malign-loops is obsolete, use -falign-loops");
1982 if (align_loops == 0)
1984 i = atoi (ix86_align_loops_string);
1985 if (i < 0 || i > MAX_CODE_ALIGN)
1986 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1987 else
1988 align_loops = 1 << i;
1992 if (ix86_align_jumps_string)
1994 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1995 if (align_jumps == 0)
1997 i = atoi (ix86_align_jumps_string);
1998 if (i < 0 || i > MAX_CODE_ALIGN)
1999 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2000 else
2001 align_jumps = 1 << i;
2005 if (ix86_align_funcs_string)
2007 warning (0, "-malign-functions is obsolete, use -falign-functions");
2008 if (align_functions == 0)
2010 i = atoi (ix86_align_funcs_string);
2011 if (i < 0 || i > MAX_CODE_ALIGN)
2012 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2013 else
2014 align_functions = 1 << i;
2018 /* Default align_* from the processor table. */
2019 if (align_loops == 0)
2021 align_loops = processor_target_table[ix86_tune].align_loop;
2022 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2024 if (align_jumps == 0)
2026 align_jumps = processor_target_table[ix86_tune].align_jump;
2027 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2029 if (align_functions == 0)
2031 align_functions = processor_target_table[ix86_tune].align_func;
2034 /* Validate -mbranch-cost= value, or provide default. */
2035 ix86_branch_cost = ix86_cost->branch_cost;
2036 if (ix86_branch_cost_string)
2038 i = atoi (ix86_branch_cost_string);
2039 if (i < 0 || i > 5)
2040 error ("-mbranch-cost=%d is not between 0 and 5", i);
2041 else
2042 ix86_branch_cost = i;
2044 if (ix86_section_threshold_string)
2046 i = atoi (ix86_section_threshold_string);
2047 if (i < 0)
2048 error ("-mlarge-data-threshold=%d is negative", i);
2049 else
2050 ix86_section_threshold = i;
2053 if (ix86_tls_dialect_string)
2055 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2056 ix86_tls_dialect = TLS_DIALECT_GNU;
2057 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2058 ix86_tls_dialect = TLS_DIALECT_GNU2;
2059 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2060 ix86_tls_dialect = TLS_DIALECT_SUN;
2061 else
2062 error ("bad value (%s) for -mtls-dialect= switch",
2063 ix86_tls_dialect_string);
2066 /* Keep nonleaf frame pointers. */
2067 if (flag_omit_frame_pointer)
2068 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2069 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2070 flag_omit_frame_pointer = 1;
2072 /* If we're doing fast math, we don't care about comparison order
2073 wrt NaNs. This lets us use a shorter comparison sequence. */
2074 if (flag_finite_math_only)
2075 target_flags &= ~MASK_IEEE_FP;
2077 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2078 since the insns won't need emulation. */
2079 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2080 target_flags &= ~MASK_NO_FANCY_MATH_387;
2082 /* Likewise, if the target doesn't have a 387, or we've specified
2083 software floating point, don't use 387 inline intrinsics. */
2084 if (!TARGET_80387)
2085 target_flags |= MASK_NO_FANCY_MATH_387;
2087 /* Turn on SSE3 builtins for -mssse3. */
2088 if (TARGET_SSSE3)
2089 target_flags |= MASK_SSE3;
2091 /* Turn on SSE2 builtins for -msse3. */
2092 if (TARGET_SSE3)
2093 target_flags |= MASK_SSE2;
2095 /* Turn on SSE builtins for -msse2. */
2096 if (TARGET_SSE2)
2097 target_flags |= MASK_SSE;
2099 /* Turn on MMX builtins for -msse. */
2100 if (TARGET_SSE)
2102 target_flags |= MASK_MMX & ~target_flags_explicit;
2103 x86_prefetch_sse = true;
2106 /* Turn on MMX builtins for 3Dnow. */
2107 if (TARGET_3DNOW)
2108 target_flags |= MASK_MMX;
2110 if (TARGET_64BIT)
2112 if (TARGET_ALIGN_DOUBLE)
2113 error ("-malign-double makes no sense in the 64bit mode");
2114 if (TARGET_RTD)
2115 error ("-mrtd calling convention not supported in the 64bit mode");
2117 /* Enable by default the SSE and MMX builtins. Do allow the user to
2118 explicitly disable any of these. In particular, disabling SSE and
2119 MMX for kernel code is extremely useful. */
2120 target_flags
2121 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2122 & ~target_flags_explicit);
2124 else
2126 /* i386 ABI does not specify red zone. It still makes sense to use it
2127 when programmer takes care to stack from being destroyed. */
2128 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2129 target_flags |= MASK_NO_RED_ZONE;
2132 /* Validate -mpreferred-stack-boundary= value, or provide default.
2133 The default of 128 bits is for Pentium III's SSE __m128. We can't
2134 change it because of optimize_size. Otherwise, we can't mix object
2135 files compiled with -Os and -On. */
2136 ix86_preferred_stack_boundary = 128;
2137 if (ix86_preferred_stack_boundary_string)
2139 i = atoi (ix86_preferred_stack_boundary_string);
2140 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2141 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2142 TARGET_64BIT ? 4 : 2);
2143 else
2144 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2147 /* Accept -mx87regparm only if 80387 support is enabled. */
2148 if (TARGET_X87REGPARM
2149 && ! TARGET_80387)
2150 error ("-mx87regparm used without 80387 enabled");
2152 /* Accept -msseregparm only if at least SSE support is enabled. */
2153 if (TARGET_SSEREGPARM
2154 && ! TARGET_SSE)
2155 error ("-msseregparm used without SSE enabled");
2157 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2159 if (ix86_fpmath_string != 0)
2161 if (! strcmp (ix86_fpmath_string, "387"))
2162 ix86_fpmath = FPMATH_387;
2163 else if (! strcmp (ix86_fpmath_string, "sse"))
2165 if (!TARGET_SSE)
2167 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2168 ix86_fpmath = FPMATH_387;
2170 else
2171 ix86_fpmath = FPMATH_SSE;
2173 else if (! strcmp (ix86_fpmath_string, "387,sse")
2174 || ! strcmp (ix86_fpmath_string, "sse,387"))
2176 if (!TARGET_SSE)
2178 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2179 ix86_fpmath = FPMATH_387;
2181 else if (!TARGET_80387)
2183 warning (0, "387 instruction set disabled, using SSE arithmetics");
2184 ix86_fpmath = FPMATH_SSE;
2186 else
2187 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2189 else
2190 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2193 /* If the i387 is disabled, then do not return values in it. */
2194 if (!TARGET_80387)
2195 target_flags &= ~MASK_FLOAT_RETURNS;
2197 if ((x86_accumulate_outgoing_args & TUNEMASK)
2198 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2199 && !optimize_size)
2200 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2202 /* ??? Unwind info is not correct around the CFG unless either a frame
2203 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2204 unwind info generation to be aware of the CFG and propagating states
2205 around edges. */
2206 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2207 || flag_exceptions || flag_non_call_exceptions)
2208 && flag_omit_frame_pointer
2209 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2211 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2212 warning (0, "unwind tables currently require either a frame pointer "
2213 "or -maccumulate-outgoing-args for correctness");
2214 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2217 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2219 char *p;
2220 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2221 p = strchr (internal_label_prefix, 'X');
2222 internal_label_prefix_len = p - internal_label_prefix;
2223 *p = '\0';
2226 /* When scheduling description is not available, disable scheduler pass
2227 so it won't slow down the compilation and make x87 code slower. */
2228 if (!TARGET_SCHEDULE)
2229 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2231 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2232 set_param_value ("simultaneous-prefetches",
2233 ix86_cost->simultaneous_prefetches);
2234 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2235 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2238 /* switch to the appropriate section for output of DECL.
2239 DECL is either a `VAR_DECL' node or a constant of some sort.
2240 RELOC indicates whether forming the initial value of DECL requires
2241 link-time relocations. */
2243 static section *
2244 x86_64_elf_select_section (tree decl, int reloc,
2245 unsigned HOST_WIDE_INT align)
2247 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2248 && ix86_in_large_data_p (decl))
2250 const char *sname = NULL;
2251 unsigned int flags = SECTION_WRITE;
2252 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2254 case SECCAT_DATA:
2255 sname = ".ldata";
2256 break;
2257 case SECCAT_DATA_REL:
2258 sname = ".ldata.rel";
2259 break;
2260 case SECCAT_DATA_REL_LOCAL:
2261 sname = ".ldata.rel.local";
2262 break;
2263 case SECCAT_DATA_REL_RO:
2264 sname = ".ldata.rel.ro";
2265 break;
2266 case SECCAT_DATA_REL_RO_LOCAL:
2267 sname = ".ldata.rel.ro.local";
2268 break;
2269 case SECCAT_BSS:
2270 sname = ".lbss";
2271 flags |= SECTION_BSS;
2272 break;
2273 case SECCAT_RODATA:
2274 case SECCAT_RODATA_MERGE_STR:
2275 case SECCAT_RODATA_MERGE_STR_INIT:
2276 case SECCAT_RODATA_MERGE_CONST:
2277 sname = ".lrodata";
2278 flags = 0;
2279 break;
2280 case SECCAT_SRODATA:
2281 case SECCAT_SDATA:
2282 case SECCAT_SBSS:
2283 gcc_unreachable ();
2284 case SECCAT_TEXT:
2285 case SECCAT_TDATA:
2286 case SECCAT_TBSS:
2287 /* We don't split these for medium model. Place them into
2288 default sections and hope for best. */
2289 break;
2291 if (sname)
2293 /* We might get called with string constants, but get_named_section
2294 doesn't like them as they are not DECLs. Also, we need to set
2295 flags in that case. */
2296 if (!DECL_P (decl))
2297 return get_section (sname, flags, NULL);
2298 return get_named_section (decl, sname, reloc);
2301 return default_elf_select_section (decl, reloc, align);
2304 /* Build up a unique section name, expressed as a
2305 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2306 RELOC indicates whether the initial value of EXP requires
2307 link-time relocations. */
2309 static void
2310 x86_64_elf_unique_section (tree decl, int reloc)
2312 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2313 && ix86_in_large_data_p (decl))
2315 const char *prefix = NULL;
2316 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2317 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2319 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2321 case SECCAT_DATA:
2322 case SECCAT_DATA_REL:
2323 case SECCAT_DATA_REL_LOCAL:
2324 case SECCAT_DATA_REL_RO:
2325 case SECCAT_DATA_REL_RO_LOCAL:
2326 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2327 break;
2328 case SECCAT_BSS:
2329 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2330 break;
2331 case SECCAT_RODATA:
2332 case SECCAT_RODATA_MERGE_STR:
2333 case SECCAT_RODATA_MERGE_STR_INIT:
2334 case SECCAT_RODATA_MERGE_CONST:
2335 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2336 break;
2337 case SECCAT_SRODATA:
2338 case SECCAT_SDATA:
2339 case SECCAT_SBSS:
2340 gcc_unreachable ();
2341 case SECCAT_TEXT:
2342 case SECCAT_TDATA:
2343 case SECCAT_TBSS:
2344 /* We don't split these for medium model. Place them into
2345 default sections and hope for best. */
2346 break;
2348 if (prefix)
2350 const char *name;
2351 size_t nlen, plen;
2352 char *string;
2353 plen = strlen (prefix);
2355 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2356 name = targetm.strip_name_encoding (name);
2357 nlen = strlen (name);
2359 string = alloca (nlen + plen + 1);
2360 memcpy (string, prefix, plen);
2361 memcpy (string + plen, name, nlen + 1);
2363 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2364 return;
2367 default_unique_section (decl, reloc);
2370 #ifdef COMMON_ASM_OP
2371 /* This says how to output assembler code to declare an
2372 uninitialized external linkage data object.
2374 For medium model x86-64 we need to use .largecomm opcode for
2375 large objects. */
2376 void
2377 x86_elf_aligned_common (FILE *file,
2378 const char *name, unsigned HOST_WIDE_INT size,
2379 int align)
2381 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2382 && size > (unsigned int)ix86_section_threshold)
2383 fprintf (file, ".largecomm\t");
2384 else
2385 fprintf (file, "%s", COMMON_ASM_OP);
2386 assemble_name (file, name);
2387 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2388 size, align / BITS_PER_UNIT);
2391 /* Utility function for targets to use in implementing
2392 ASM_OUTPUT_ALIGNED_BSS. */
2394 void
2395 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2396 const char *name, unsigned HOST_WIDE_INT size,
2397 int align)
2399 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2400 && size > (unsigned int)ix86_section_threshold)
2401 switch_to_section (get_named_section (decl, ".lbss", 0));
2402 else
2403 switch_to_section (bss_section);
2404 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2405 #ifdef ASM_DECLARE_OBJECT_NAME
2406 last_assemble_variable_decl = decl;
2407 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2408 #else
2409 /* Standard thing is just output label for the object. */
2410 ASM_OUTPUT_LABEL (file, name);
2411 #endif /* ASM_DECLARE_OBJECT_NAME */
2412 ASM_OUTPUT_SKIP (file, size ? size : 1);
2414 #endif
2416 void
2417 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2419 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2420 make the problem with not enough registers even worse. */
2421 #ifdef INSN_SCHEDULING
2422 if (level > 1)
2423 flag_schedule_insns = 0;
2424 #endif
2426 if (TARGET_MACHO)
2427 /* The Darwin libraries never set errno, so we might as well
2428 avoid calling them when that's the only reason we would. */
2429 flag_errno_math = 0;
2431 /* The default values of these switches depend on the TARGET_64BIT
2432 that is not known at this moment. Mark these values with 2 and
2433 let user the to override these. In case there is no command line option
2434 specifying them, we will set the defaults in override_options. */
2435 if (optimize >= 1)
2436 flag_omit_frame_pointer = 2;
2437 flag_pcc_struct_return = 2;
2438 flag_asynchronous_unwind_tables = 2;
2439 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2440 SUBTARGET_OPTIMIZATION_OPTIONS;
2441 #endif
2444 /* Table of valid machine attributes. */
2445 const struct attribute_spec ix86_attribute_table[] =
2447 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2448 /* Stdcall attribute says callee is responsible for popping arguments
2449 if they are not variable. */
2450 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2451 /* Fastcall attribute says callee is responsible for popping arguments
2452 if they are not variable. */
2453 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2454 /* Cdecl attribute says the callee is a normal C declaration */
2455 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2456 /* Regparm attribute specifies how many integer arguments are to be
2457 passed in registers. */
2458 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2459 /* X87regparm attribute says we are passing floating point arguments
2460 in 80387 registers. */
2461 { "x87regparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2462 /* Sseregparm attribute says we are using x86_64 calling conventions
2463 for FP arguments. */
2464 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2465 /* force_align_arg_pointer says this function realigns the stack at entry. */
2466 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2467 false, true, true, ix86_handle_cconv_attribute },
2468 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2469 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2470 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2471 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2472 #endif
2473 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2474 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2475 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2476 SUBTARGET_ATTRIBUTE_TABLE,
2477 #endif
2478 { NULL, 0, 0, false, false, false, NULL }
2481 /* Decide whether we can make a sibling call to a function. DECL is the
2482 declaration of the function being targeted by the call and EXP is the
2483 CALL_EXPR representing the call. */
2485 static bool
2486 ix86_function_ok_for_sibcall (tree decl, tree exp)
2488 tree func;
2489 rtx a, b;
2491 /* If we are generating position-independent code, we cannot sibcall
2492 optimize any indirect call, or a direct call to a global function,
2493 as the PLT requires %ebx be live. */
2494 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2495 return false;
2497 if (decl)
2498 func = decl;
2499 else
2501 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2502 if (POINTER_TYPE_P (func))
2503 func = TREE_TYPE (func);
2506 /* Check that the return value locations are the same. Like
2507 if we are returning floats on the 80387 register stack, we cannot
2508 make a sibcall from a function that doesn't return a float to a
2509 function that does or, conversely, from a function that does return
2510 a float to a function that doesn't; the necessary stack adjustment
2511 would not be executed. This is also the place we notice
2512 differences in the return value ABI. Note that it is ok for one
2513 of the functions to have void return type as long as the return
2514 value of the other is passed in a register. */
2515 a = ix86_function_value (TREE_TYPE (exp), func, false);
2516 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2517 cfun->decl, false);
2518 if (STACK_REG_P (a) || STACK_REG_P (b))
2520 if (!rtx_equal_p (a, b))
2521 return false;
2523 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2525 else if (!rtx_equal_p (a, b))
2526 return false;
2528 /* If this call is indirect, we'll need to be able to use a call-clobbered
2529 register for the address of the target function. Make sure that all
2530 such registers are not used for passing parameters. */
2531 if (!decl && !TARGET_64BIT)
2533 tree type;
2535 /* We're looking at the CALL_EXPR, we need the type of the function. */
2536 type = TREE_OPERAND (exp, 0); /* pointer expression */
2537 type = TREE_TYPE (type); /* pointer type */
2538 type = TREE_TYPE (type); /* function type */
2540 if (ix86_function_regparm (type, NULL) >= 3)
2542 /* ??? Need to count the actual number of registers to be used,
2543 not the possible number of registers. Fix later. */
2544 return false;
2548 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2549 /* Dllimport'd functions are also called indirectly. */
2550 if (decl && DECL_DLLIMPORT_P (decl)
2551 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2552 return false;
2553 #endif
2555 /* If we forced aligned the stack, then sibcalling would unalign the
2556 stack, which may break the called function. */
2557 if (cfun->machine->force_align_arg_pointer)
2558 return false;
2560 /* Otherwise okay. That also includes certain types of indirect calls. */
2561 return true;
2564 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "x87regparm"
2565 and "sseregparm" calling convention attributes;
2566 arguments as in struct attribute_spec.handler. */
2568 static tree
2569 ix86_handle_cconv_attribute (tree *node, tree name,
2570 tree args,
2571 int flags ATTRIBUTE_UNUSED,
2572 bool *no_add_attrs)
2574 if (TREE_CODE (*node) != FUNCTION_TYPE
2575 && TREE_CODE (*node) != METHOD_TYPE
2576 && TREE_CODE (*node) != FIELD_DECL
2577 && TREE_CODE (*node) != TYPE_DECL)
2579 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2580 IDENTIFIER_POINTER (name));
2581 *no_add_attrs = true;
2582 return NULL_TREE;
2585 /* Can combine regparm with all attributes but fastcall. */
2586 if (is_attribute_p ("regparm", name))
2588 tree cst;
2590 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2592 error ("fastcall and regparm attributes are not compatible");
2595 cst = TREE_VALUE (args);
2596 if (TREE_CODE (cst) != INTEGER_CST)
2598 warning (OPT_Wattributes,
2599 "%qs attribute requires an integer constant argument",
2600 IDENTIFIER_POINTER (name));
2601 *no_add_attrs = true;
2603 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2605 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2606 IDENTIFIER_POINTER (name), REGPARM_MAX);
2607 *no_add_attrs = true;
2610 if (!TARGET_64BIT
2611 && lookup_attribute (ix86_force_align_arg_pointer_string,
2612 TYPE_ATTRIBUTES (*node))
2613 && compare_tree_int (cst, REGPARM_MAX-1))
2615 error ("%s functions limited to %d register parameters",
2616 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2619 return NULL_TREE;
2622 if (TARGET_64BIT)
2624 warning (OPT_Wattributes, "%qs attribute ignored",
2625 IDENTIFIER_POINTER (name));
2626 *no_add_attrs = true;
2627 return NULL_TREE;
2630 /* Can combine fastcall with stdcall (redundant), x87regparm
2631 and sseregparm. */
2632 if (is_attribute_p ("fastcall", name))
2634 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2636 error ("fastcall and cdecl attributes are not compatible");
2638 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2640 error ("fastcall and stdcall attributes are not compatible");
2642 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2644 error ("fastcall and regparm attributes are not compatible");
2648 /* Can combine stdcall with fastcall (redundant), regparm,
2649 x87regparm and sseregparm. */
2650 else if (is_attribute_p ("stdcall", name))
2652 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2654 error ("stdcall and cdecl attributes are not compatible");
2656 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2658 error ("stdcall and fastcall attributes are not compatible");
2662 /* Can combine cdecl with regparm, x87regparm and sseregparm. */
2663 else if (is_attribute_p ("cdecl", name))
2665 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2667 error ("stdcall and cdecl attributes are not compatible");
2669 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2671 error ("fastcall and cdecl attributes are not compatible");
2675 /* Can combine x87regparm or sseregparm with all attributes. */
2677 return NULL_TREE;
2680 /* Return 0 if the attributes for two types are incompatible, 1 if they
2681 are compatible, and 2 if they are nearly compatible (which causes a
2682 warning to be generated). */
2684 static int
2685 ix86_comp_type_attributes (tree type1, tree type2)
2687 /* Check for mismatch of non-default calling convention. */
2688 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2690 if (TREE_CODE (type1) != FUNCTION_TYPE)
2691 return 1;
2693 /* Check for mismatched fastcall/regparm types. */
2694 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2695 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2696 || (ix86_function_regparm (type1, NULL)
2697 != ix86_function_regparm (type2, NULL)))
2698 return 0;
2700 /* Check for mismatched x87regparm types. */
2701 if (!lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type1))
2702 != !lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type2)))
2703 return 0;
2705 /* Check for mismatched sseregparm types. */
2706 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2707 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2708 return 0;
2710 /* Check for mismatched return types (cdecl vs stdcall). */
2711 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2712 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2713 return 0;
2715 return 1;
2718 /* Return the regparm value for a function with the indicated TYPE and DECL.
2719 DECL may be NULL when calling function indirectly
2720 or considering a libcall. */
2722 static int
2723 ix86_function_regparm (tree type, tree decl)
2725 tree attr;
2726 int regparm = ix86_regparm;
2727 bool user_convention = false;
2729 if (!TARGET_64BIT)
2731 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2732 if (attr)
2734 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2735 user_convention = true;
2738 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2740 regparm = 2;
2741 user_convention = true;
2744 /* Use register calling convention for local functions when possible. */
2745 if (!TARGET_64BIT && !user_convention && decl
2746 && flag_unit_at_a_time && !profile_flag)
2748 struct cgraph_local_info *i = cgraph_local_info (decl);
2749 if (i && i->local)
2751 int local_regparm, globals = 0, regno;
2753 /* Make sure no regparm register is taken by a global register
2754 variable. */
2755 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2756 if (global_regs[local_regparm])
2757 break;
2758 /* We can't use regparm(3) for nested functions as these use
2759 static chain pointer in third argument. */
2760 if (local_regparm == 3
2761 && decl_function_context (decl)
2762 && !DECL_NO_STATIC_CHAIN (decl))
2763 local_regparm = 2;
2764 /* If the function realigns its stackpointer, the
2765 prologue will clobber %ecx. If we've already
2766 generated code for the callee, the callee
2767 DECL_STRUCT_FUNCTION is gone, so we fall back to
2768 scanning the attributes for the self-realigning
2769 property. */
2770 if ((DECL_STRUCT_FUNCTION (decl)
2771 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2772 || (!DECL_STRUCT_FUNCTION (decl)
2773 && lookup_attribute (ix86_force_align_arg_pointer_string,
2774 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2775 local_regparm = 2;
2776 /* Each global register variable increases register preassure,
2777 so the more global reg vars there are, the smaller regparm
2778 optimization use, unless requested by the user explicitly. */
2779 for (regno = 0; regno < 6; regno++)
2780 if (global_regs[regno])
2781 globals++;
2782 local_regparm
2783 = globals < local_regparm ? local_regparm - globals : 0;
2785 if (local_regparm > regparm)
2786 regparm = local_regparm;
2790 return regparm;
2793 /* Return 1 if we can pass up to X87_REGPARM_MAX floating point
2794 arguments in x87 registers for a function with the indicated
2795 TYPE and DECL. DECL may be NULL when calling function indirectly
2796 or considering a libcall. For local functions, return 2.
2797 Otherwise return 0. */
2799 static int
2800 ix86_function_x87regparm (tree type, tree decl)
2802 /* Use x87 registers to pass floating point arguments if requested
2803 by the x87regparm attribute. */
2804 if (TARGET_X87REGPARM
2805 || (type
2806 && lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type))))
2808 if (!TARGET_80387)
2810 if (decl)
2811 error ("Calling %qD with attribute x87regparm without "
2812 "80387 enabled", decl);
2813 else
2814 error ("Calling %qT with attribute x87regparm without "
2815 "80387 enabled", type);
2816 return 0;
2819 return 1;
2822 /* For local functions, pass up to X87_REGPARM_MAX floating point
2823 arguments in x87 registers. */
2824 if (!TARGET_64BIT && decl
2825 && flag_unit_at_a_time && !profile_flag)
2827 struct cgraph_local_info *i = cgraph_local_info (decl);
2828 if (i && i->local)
2829 return 2;
2832 return 0;
2835 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2836 DFmode (2) arguments in SSE registers for a function with the
2837 indicated TYPE and DECL. DECL may be NULL when calling function
2838 indirectly or considering a libcall. Otherwise return 0. */
2840 static int
2841 ix86_function_sseregparm (tree type, tree decl)
2843 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2844 by the sseregparm attribute. */
2845 if (TARGET_SSEREGPARM
2846 || (type
2847 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2849 if (!TARGET_SSE)
2851 if (decl)
2852 error ("Calling %qD with attribute sseregparm without "
2853 "SSE/SSE2 enabled", decl);
2854 else
2855 error ("Calling %qT with attribute sseregparm without "
2856 "SSE/SSE2 enabled", type);
2857 return 0;
2860 return 2;
2863 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2864 (and DFmode for SSE2) arguments in SSE registers,
2865 even for 32-bit targets. */
2866 if (!TARGET_64BIT && decl
2867 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2869 struct cgraph_local_info *i = cgraph_local_info (decl);
2870 if (i && i->local)
2871 return TARGET_SSE2 ? 2 : 1;
2874 return 0;
2877 /* Return true if EAX is live at the start of the function. Used by
2878 ix86_expand_prologue to determine if we need special help before
2879 calling allocate_stack_worker. */
2881 static bool
2882 ix86_eax_live_at_start_p (void)
2884 /* Cheat. Don't bother working forward from ix86_function_regparm
2885 to the function type to whether an actual argument is located in
2886 eax. Instead just look at cfg info, which is still close enough
2887 to correct at this point. This gives false positives for broken
2888 functions that might use uninitialized data that happens to be
2889 allocated in eax, but who cares? */
2890 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2893 /* Value is the number of bytes of arguments automatically
2894 popped when returning from a subroutine call.
2895 FUNDECL is the declaration node of the function (as a tree),
2896 FUNTYPE is the data type of the function (as a tree),
2897 or for a library call it is an identifier node for the subroutine name.
2898 SIZE is the number of bytes of arguments passed on the stack.
2900 On the 80386, the RTD insn may be used to pop them if the number
2901 of args is fixed, but if the number is variable then the caller
2902 must pop them all. RTD can't be used for library calls now
2903 because the library is compiled with the Unix compiler.
2904 Use of RTD is a selectable option, since it is incompatible with
2905 standard Unix calling sequences. If the option is not selected,
2906 the caller must always pop the args.
2908 The attribute stdcall is equivalent to RTD on a per module basis. */
2911 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2913 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2915 /* Cdecl functions override -mrtd, and never pop the stack. */
2916 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2918 /* Stdcall and fastcall functions will pop the stack if not
2919 variable args. */
2920 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2921 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2922 rtd = 1;
2924 if (rtd
2925 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2926 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2927 == void_type_node)))
2928 return size;
2931 /* Lose any fake structure return argument if it is passed on the stack. */
2932 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2933 && !TARGET_64BIT
2934 && !KEEP_AGGREGATE_RETURN_POINTER)
2936 int nregs = ix86_function_regparm (funtype, fundecl);
2938 if (!nregs)
2939 return GET_MODE_SIZE (Pmode);
2942 return 0;
2945 /* Argument support functions. */
2947 /* Return true when register may be used to pass function parameters. */
2948 bool
2949 ix86_function_arg_regno_p (int regno)
2951 int i;
2952 if (!TARGET_64BIT)
2953 return (regno < REGPARM_MAX
2954 || (TARGET_80387 && FP_REGNO_P (regno)
2955 && (regno < FIRST_FLOAT_REG + X87_REGPARM_MAX))
2956 || (TARGET_MMX && MMX_REGNO_P (regno)
2957 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2958 || (TARGET_SSE && SSE_REGNO_P (regno)
2959 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2961 if (TARGET_SSE && SSE_REGNO_P (regno)
2962 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2963 return true;
2964 /* RAX is used as hidden argument to va_arg functions. */
2965 if (!regno)
2966 return true;
2967 for (i = 0; i < REGPARM_MAX; i++)
2968 if (regno == x86_64_int_parameter_registers[i])
2969 return true;
2970 return false;
2973 /* Return if we do not know how to pass TYPE solely in registers. */
2975 static bool
2976 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2978 if (must_pass_in_stack_var_size_or_pad (mode, type))
2979 return true;
2981 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2982 The layout_type routine is crafty and tries to trick us into passing
2983 currently unsupported vector types on the stack by using TImode. */
2984 return (!TARGET_64BIT && mode == TImode
2985 && type && TREE_CODE (type) != VECTOR_TYPE);
2988 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2989 for a call to a function whose data type is FNTYPE.
2990 For a library call, FNTYPE is 0. */
2992 void
2993 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2994 tree fntype, /* tree ptr for function decl */
2995 rtx libname, /* SYMBOL_REF of library name or 0 */
2996 tree fndecl)
2998 static CUMULATIVE_ARGS zero_cum;
2999 tree param, next_param;
3001 if (TARGET_DEBUG_ARG)
3003 fprintf (stderr, "\ninit_cumulative_args (");
3004 if (fntype)
3005 fprintf (stderr, "fntype code = %s, ret code = %s",
3006 tree_code_name[(int) TREE_CODE (fntype)],
3007 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3008 else
3009 fprintf (stderr, "no fntype");
3011 if (libname)
3012 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3015 *cum = zero_cum;
3017 /* Set up the number of registers to use for passing arguments. */
3018 cum->nregs = ix86_regparm;
3019 if (TARGET_80387)
3020 cum->x87_nregs = X87_REGPARM_MAX;
3021 if (TARGET_SSE)
3022 cum->sse_nregs = SSE_REGPARM_MAX;
3023 if (TARGET_MMX)
3024 cum->mmx_nregs = MMX_REGPARM_MAX;
3025 cum->warn_sse = true;
3026 cum->warn_mmx = true;
3027 cum->maybe_vaarg = false;
3029 /* Use ecx and edx registers if function has fastcall attribute,
3030 else look for regparm information. */
3031 if (fntype && !TARGET_64BIT)
3033 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3035 cum->nregs = 2;
3036 cum->fastcall = 1;
3038 else
3039 cum->nregs = ix86_function_regparm (fntype, fndecl);
3042 /* Set up the number of 80387 registers used for passing
3043 floating point arguments. Warn for mismatching ABI. */
3044 cum->float_in_x87 = ix86_function_x87regparm (fntype, fndecl);
3046 /* Set up the number of SSE registers used for passing SFmode
3047 and DFmode arguments. Warn for mismatching ABI. */
3048 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3050 /* Determine if this function has variable arguments. This is
3051 indicated by the last argument being 'void_type_mode' if there
3052 are no variable arguments. If there are variable arguments, then
3053 we won't pass anything in registers in 32-bit mode. */
3055 if (cum->nregs || cum->mmx_nregs
3056 || cum->x87_nregs || cum->sse_nregs)
3058 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3059 param != 0; param = next_param)
3061 next_param = TREE_CHAIN (param);
3062 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3064 if (!TARGET_64BIT)
3066 cum->nregs = 0;
3067 cum->x87_nregs = 0;
3068 cum->sse_nregs = 0;
3069 cum->mmx_nregs = 0;
3070 cum->warn_sse = 0;
3071 cum->warn_mmx = 0;
3072 cum->fastcall = 0;
3073 cum->float_in_x87 = 0;
3074 cum->float_in_sse = 0;
3076 cum->maybe_vaarg = true;
3080 if ((!fntype && !libname)
3081 || (fntype && !TYPE_ARG_TYPES (fntype)))
3082 cum->maybe_vaarg = true;
3084 if (TARGET_DEBUG_ARG)
3085 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3087 return;
3090 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3091 But in the case of vector types, it is some vector mode.
3093 When we have only some of our vector isa extensions enabled, then there
3094 are some modes for which vector_mode_supported_p is false. For these
3095 modes, the generic vector support in gcc will choose some non-vector mode
3096 in order to implement the type. By computing the natural mode, we'll
3097 select the proper ABI location for the operand and not depend on whatever
3098 the middle-end decides to do with these vector types. */
3100 static enum machine_mode
3101 type_natural_mode (tree type)
3103 enum machine_mode mode = TYPE_MODE (type);
3105 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3107 HOST_WIDE_INT size = int_size_in_bytes (type);
3108 if ((size == 8 || size == 16)
3109 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3110 && TYPE_VECTOR_SUBPARTS (type) > 1)
3112 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3114 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3115 mode = MIN_MODE_VECTOR_FLOAT;
3116 else
3117 mode = MIN_MODE_VECTOR_INT;
3119 /* Get the mode which has this inner mode and number of units. */
3120 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3121 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3122 && GET_MODE_INNER (mode) == innermode)
3123 return mode;
3125 gcc_unreachable ();
3129 return mode;
3132 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3133 this may not agree with the mode that the type system has chosen for the
3134 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3135 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3137 static rtx
3138 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3139 unsigned int regno)
3141 rtx tmp;
3143 if (orig_mode != BLKmode)
3144 tmp = gen_rtx_REG (orig_mode, regno);
3145 else
3147 tmp = gen_rtx_REG (mode, regno);
3148 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3149 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3152 return tmp;
3155 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3156 of this code is to classify each 8bytes of incoming argument by the register
3157 class and assign registers accordingly. */
3159 /* Return the union class of CLASS1 and CLASS2.
3160 See the x86-64 PS ABI for details. */
3162 static enum x86_64_reg_class
3163 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3165 /* Rule #1: If both classes are equal, this is the resulting class. */
3166 if (class1 == class2)
3167 return class1;
3169 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3170 the other class. */
3171 if (class1 == X86_64_NO_CLASS)
3172 return class2;
3173 if (class2 == X86_64_NO_CLASS)
3174 return class1;
3176 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3177 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3178 return X86_64_MEMORY_CLASS;
3180 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3181 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3182 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3183 return X86_64_INTEGERSI_CLASS;
3184 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3185 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3186 return X86_64_INTEGER_CLASS;
3188 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3189 MEMORY is used. */
3190 if (class1 == X86_64_X87_CLASS
3191 || class1 == X86_64_X87UP_CLASS
3192 || class1 == X86_64_COMPLEX_X87_CLASS
3193 || class2 == X86_64_X87_CLASS
3194 || class2 == X86_64_X87UP_CLASS
3195 || class2 == X86_64_COMPLEX_X87_CLASS)
3196 return X86_64_MEMORY_CLASS;
3198 /* Rule #6: Otherwise class SSE is used. */
3199 return X86_64_SSE_CLASS;
3202 /* Classify the argument of type TYPE and mode MODE.
3203 CLASSES will be filled by the register class used to pass each word
3204 of the operand. The number of words is returned. In case the parameter
3205 should be passed in memory, 0 is returned. As a special case for zero
3206 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3208 BIT_OFFSET is used internally for handling records and specifies offset
3209 of the offset in bits modulo 256 to avoid overflow cases.
3211 See the x86-64 PS ABI for details.
3214 static int
3215 classify_argument (enum machine_mode mode, tree type,
3216 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3218 HOST_WIDE_INT bytes =
3219 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3220 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3222 /* Variable sized entities are always passed/returned in memory. */
3223 if (bytes < 0)
3224 return 0;
3226 if (mode != VOIDmode
3227 && targetm.calls.must_pass_in_stack (mode, type))
3228 return 0;
3230 if (type && AGGREGATE_TYPE_P (type))
3232 int i;
3233 tree field;
3234 enum x86_64_reg_class subclasses[MAX_CLASSES];
3236 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3237 if (bytes > 16)
3238 return 0;
3240 for (i = 0; i < words; i++)
3241 classes[i] = X86_64_NO_CLASS;
3243 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3244 signalize memory class, so handle it as special case. */
3245 if (!words)
3247 classes[0] = X86_64_NO_CLASS;
3248 return 1;
3251 /* Classify each field of record and merge classes. */
3252 switch (TREE_CODE (type))
3254 case RECORD_TYPE:
3255 /* And now merge the fields of structure. */
3256 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3258 if (TREE_CODE (field) == FIELD_DECL)
3260 int num;
3262 if (TREE_TYPE (field) == error_mark_node)
3263 continue;
3265 /* Bitfields are always classified as integer. Handle them
3266 early, since later code would consider them to be
3267 misaligned integers. */
3268 if (DECL_BIT_FIELD (field))
3270 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3271 i < ((int_bit_position (field) + (bit_offset % 64))
3272 + tree_low_cst (DECL_SIZE (field), 0)
3273 + 63) / 8 / 8; i++)
3274 classes[i] =
3275 merge_classes (X86_64_INTEGER_CLASS,
3276 classes[i]);
3278 else
3280 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3281 TREE_TYPE (field), subclasses,
3282 (int_bit_position (field)
3283 + bit_offset) % 256);
3284 if (!num)
3285 return 0;
3286 for (i = 0; i < num; i++)
3288 int pos =
3289 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3290 classes[i + pos] =
3291 merge_classes (subclasses[i], classes[i + pos]);
3296 break;
3298 case ARRAY_TYPE:
3299 /* Arrays are handled as small records. */
3301 int num;
3302 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3303 TREE_TYPE (type), subclasses, bit_offset);
3304 if (!num)
3305 return 0;
3307 /* The partial classes are now full classes. */
3308 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3309 subclasses[0] = X86_64_SSE_CLASS;
3310 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3311 subclasses[0] = X86_64_INTEGER_CLASS;
3313 for (i = 0; i < words; i++)
3314 classes[i] = subclasses[i % num];
3316 break;
3318 case UNION_TYPE:
3319 case QUAL_UNION_TYPE:
3320 /* Unions are similar to RECORD_TYPE but offset is always 0.
3322 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3324 if (TREE_CODE (field) == FIELD_DECL)
3326 int num;
3328 if (TREE_TYPE (field) == error_mark_node)
3329 continue;
3331 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3332 TREE_TYPE (field), subclasses,
3333 bit_offset);
3334 if (!num)
3335 return 0;
3336 for (i = 0; i < num; i++)
3337 classes[i] = merge_classes (subclasses[i], classes[i]);
3340 break;
3342 default:
3343 gcc_unreachable ();
3346 /* Final merger cleanup. */
3347 for (i = 0; i < words; i++)
3349 /* If one class is MEMORY, everything should be passed in
3350 memory. */
3351 if (classes[i] == X86_64_MEMORY_CLASS)
3352 return 0;
3354 /* The X86_64_SSEUP_CLASS should be always preceded by
3355 X86_64_SSE_CLASS. */
3356 if (classes[i] == X86_64_SSEUP_CLASS
3357 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3358 classes[i] = X86_64_SSE_CLASS;
3360 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3361 if (classes[i] == X86_64_X87UP_CLASS
3362 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3363 classes[i] = X86_64_SSE_CLASS;
3365 return words;
3368 /* Compute alignment needed. We align all types to natural boundaries with
3369 exception of XFmode that is aligned to 64bits. */
3370 if (mode != VOIDmode && mode != BLKmode)
3372 int mode_alignment = GET_MODE_BITSIZE (mode);
3374 if (mode == XFmode)
3375 mode_alignment = 128;
3376 else if (mode == XCmode)
3377 mode_alignment = 256;
3378 if (COMPLEX_MODE_P (mode))
3379 mode_alignment /= 2;
3380 /* Misaligned fields are always returned in memory. */
3381 if (bit_offset % mode_alignment)
3382 return 0;
3385 /* for V1xx modes, just use the base mode */
3386 if (VECTOR_MODE_P (mode)
3387 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3388 mode = GET_MODE_INNER (mode);
3390 /* Classification of atomic types. */
3391 switch (mode)
3393 case SDmode:
3394 case DDmode:
3395 classes[0] = X86_64_SSE_CLASS;
3396 return 1;
3397 case TDmode:
3398 classes[0] = X86_64_SSE_CLASS;
3399 classes[1] = X86_64_SSEUP_CLASS;
3400 return 2;
3401 case DImode:
3402 case SImode:
3403 case HImode:
3404 case QImode:
3405 case CSImode:
3406 case CHImode:
3407 case CQImode:
3408 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3409 classes[0] = X86_64_INTEGERSI_CLASS;
3410 else
3411 classes[0] = X86_64_INTEGER_CLASS;
3412 return 1;
3413 case CDImode:
3414 case TImode:
3415 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3416 return 2;
3417 case CTImode:
3418 return 0;
3419 case SFmode:
3420 if (!(bit_offset % 64))
3421 classes[0] = X86_64_SSESF_CLASS;
3422 else
3423 classes[0] = X86_64_SSE_CLASS;
3424 return 1;
3425 case DFmode:
3426 classes[0] = X86_64_SSEDF_CLASS;
3427 return 1;
3428 case XFmode:
3429 classes[0] = X86_64_X87_CLASS;
3430 classes[1] = X86_64_X87UP_CLASS;
3431 return 2;
3432 case TFmode:
3433 classes[0] = X86_64_SSE_CLASS;
3434 classes[1] = X86_64_SSEUP_CLASS;
3435 return 2;
3436 case SCmode:
3437 classes[0] = X86_64_SSE_CLASS;
3438 return 1;
3439 case DCmode:
3440 classes[0] = X86_64_SSEDF_CLASS;
3441 classes[1] = X86_64_SSEDF_CLASS;
3442 return 2;
3443 case XCmode:
3444 classes[0] = X86_64_COMPLEX_X87_CLASS;
3445 return 1;
3446 case TCmode:
3447 /* This modes is larger than 16 bytes. */
3448 return 0;
3449 case V4SFmode:
3450 case V4SImode:
3451 case V16QImode:
3452 case V8HImode:
3453 case V2DFmode:
3454 case V2DImode:
3455 classes[0] = X86_64_SSE_CLASS;
3456 classes[1] = X86_64_SSEUP_CLASS;
3457 return 2;
3458 case V2SFmode:
3459 case V2SImode:
3460 case V4HImode:
3461 case V8QImode:
3462 classes[0] = X86_64_SSE_CLASS;
3463 return 1;
3464 case BLKmode:
3465 case VOIDmode:
3466 return 0;
3467 default:
3468 gcc_assert (VECTOR_MODE_P (mode));
3470 if (bytes > 16)
3471 return 0;
3473 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3475 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3476 classes[0] = X86_64_INTEGERSI_CLASS;
3477 else
3478 classes[0] = X86_64_INTEGER_CLASS;
3479 classes[1] = X86_64_INTEGER_CLASS;
3480 return 1 + (bytes > 8);
3484 /* Examine the argument and return set number of register required in each
3485 class. Return 0 iff parameter should be passed in memory. */
3486 static int
3487 examine_argument (enum machine_mode mode, tree type, int in_return,
3488 int *int_nregs, int *sse_nregs)
3490 enum x86_64_reg_class class[MAX_CLASSES];
3491 int n = classify_argument (mode, type, class, 0);
3493 *int_nregs = 0;
3494 *sse_nregs = 0;
3495 if (!n)
3496 return 0;
3497 for (n--; n >= 0; n--)
3498 switch (class[n])
3500 case X86_64_INTEGER_CLASS:
3501 case X86_64_INTEGERSI_CLASS:
3502 (*int_nregs)++;
3503 break;
3504 case X86_64_SSE_CLASS:
3505 case X86_64_SSESF_CLASS:
3506 case X86_64_SSEDF_CLASS:
3507 (*sse_nregs)++;
3508 break;
3509 case X86_64_NO_CLASS:
3510 case X86_64_SSEUP_CLASS:
3511 break;
3512 case X86_64_X87_CLASS:
3513 case X86_64_X87UP_CLASS:
3514 if (!in_return)
3515 return 0;
3516 break;
3517 case X86_64_COMPLEX_X87_CLASS:
3518 return in_return ? 2 : 0;
3519 case X86_64_MEMORY_CLASS:
3520 gcc_unreachable ();
3522 return 1;
3525 /* Construct container for the argument used by GCC interface. See
3526 FUNCTION_ARG for the detailed description. */
3528 static rtx
3529 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3530 tree type, int in_return, int nintregs, int nsseregs,
3531 const int *intreg, int sse_regno)
3533 /* The following variables hold the static issued_error state. */
3534 static bool issued_sse_arg_error;
3535 static bool issued_sse_ret_error;
3536 static bool issued_x87_ret_error;
3538 enum machine_mode tmpmode;
3539 int bytes =
3540 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3541 enum x86_64_reg_class class[MAX_CLASSES];
3542 int n;
3543 int i;
3544 int nexps = 0;
3545 int needed_sseregs, needed_intregs;
3546 rtx exp[MAX_CLASSES];
3547 rtx ret;
3549 n = classify_argument (mode, type, class, 0);
3550 if (TARGET_DEBUG_ARG)
3552 if (!n)
3553 fprintf (stderr, "Memory class\n");
3554 else
3556 fprintf (stderr, "Classes:");
3557 for (i = 0; i < n; i++)
3559 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3561 fprintf (stderr, "\n");
3564 if (!n)
3565 return NULL;
3566 if (!examine_argument (mode, type, in_return, &needed_intregs,
3567 &needed_sseregs))
3568 return NULL;
3569 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3570 return NULL;
3572 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3573 some less clueful developer tries to use floating-point anyway. */
3574 if (needed_sseregs && !TARGET_SSE)
3576 if (in_return)
3578 if (!issued_sse_ret_error)
3580 error ("SSE register return with SSE disabled");
3581 issued_sse_ret_error = true;
3584 else if (!issued_sse_arg_error)
3586 error ("SSE register argument with SSE disabled");
3587 issued_sse_arg_error = true;
3589 return NULL;
3592 /* Likewise, error if the ABI requires us to return values in the
3593 x87 registers and the user specified -mno-80387. */
3594 if (!TARGET_80387 && in_return)
3595 for (i = 0; i < n; i++)
3596 if (class[i] == X86_64_X87_CLASS
3597 || class[i] == X86_64_X87UP_CLASS
3598 || class[i] == X86_64_COMPLEX_X87_CLASS)
3600 if (!issued_x87_ret_error)
3602 error ("x87 register return with x87 disabled");
3603 issued_x87_ret_error = true;
3605 return NULL;
3608 /* First construct simple cases. Avoid SCmode, since we want to use
3609 single register to pass this type. */
3610 if (n == 1 && mode != SCmode)
3611 switch (class[0])
3613 case X86_64_INTEGER_CLASS:
3614 case X86_64_INTEGERSI_CLASS:
3615 return gen_rtx_REG (mode, intreg[0]);
3616 case X86_64_SSE_CLASS:
3617 case X86_64_SSESF_CLASS:
3618 case X86_64_SSEDF_CLASS:
3619 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3620 case X86_64_X87_CLASS:
3621 case X86_64_COMPLEX_X87_CLASS:
3622 return gen_rtx_REG (mode, FIRST_STACK_REG);
3623 case X86_64_NO_CLASS:
3624 /* Zero sized array, struct or class. */
3625 return NULL;
3626 default:
3627 gcc_unreachable ();
3629 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3630 && mode != BLKmode)
3631 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3632 if (n == 2
3633 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3634 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3635 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3636 && class[1] == X86_64_INTEGER_CLASS
3637 && (mode == CDImode || mode == TImode || mode == TFmode)
3638 && intreg[0] + 1 == intreg[1])
3639 return gen_rtx_REG (mode, intreg[0]);
3641 /* Otherwise figure out the entries of the PARALLEL. */
3642 for (i = 0; i < n; i++)
3644 switch (class[i])
3646 case X86_64_NO_CLASS:
3647 break;
3648 case X86_64_INTEGER_CLASS:
3649 case X86_64_INTEGERSI_CLASS:
3650 /* Merge TImodes on aligned occasions here too. */
3651 if (i * 8 + 8 > bytes)
3652 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3653 else if (class[i] == X86_64_INTEGERSI_CLASS)
3654 tmpmode = SImode;
3655 else
3656 tmpmode = DImode;
3657 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3658 if (tmpmode == BLKmode)
3659 tmpmode = DImode;
3660 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3661 gen_rtx_REG (tmpmode, *intreg),
3662 GEN_INT (i*8));
3663 intreg++;
3664 break;
3665 case X86_64_SSESF_CLASS:
3666 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3667 gen_rtx_REG (SFmode,
3668 SSE_REGNO (sse_regno)),
3669 GEN_INT (i*8));
3670 sse_regno++;
3671 break;
3672 case X86_64_SSEDF_CLASS:
3673 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3674 gen_rtx_REG (DFmode,
3675 SSE_REGNO (sse_regno)),
3676 GEN_INT (i*8));
3677 sse_regno++;
3678 break;
3679 case X86_64_SSE_CLASS:
3680 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3681 tmpmode = TImode;
3682 else
3683 tmpmode = DImode;
3684 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3685 gen_rtx_REG (tmpmode,
3686 SSE_REGNO (sse_regno)),
3687 GEN_INT (i*8));
3688 if (tmpmode == TImode)
3689 i++;
3690 sse_regno++;
3691 break;
3692 default:
3693 gcc_unreachable ();
3697 /* Empty aligned struct, union or class. */
3698 if (nexps == 0)
3699 return NULL;
3701 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3702 for (i = 0; i < nexps; i++)
3703 XVECEXP (ret, 0, i) = exp [i];
3704 return ret;
3707 /* Update the data in CUM to advance over an argument
3708 of mode MODE and data type TYPE.
3709 (TYPE is null for libcalls where that information may not be available.) */
3711 void
3712 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3713 tree type, int named)
3715 int bytes =
3716 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3717 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3719 if (type)
3720 mode = type_natural_mode (type);
3722 if (TARGET_DEBUG_ARG)
3723 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3724 "mode=%s, named=%d)\n\n",
3725 words, cum->words, cum->nregs, cum->sse_nregs,
3726 GET_MODE_NAME (mode), named);
3728 if (TARGET_64BIT)
3730 int int_nregs, sse_nregs;
3731 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3732 cum->words += words;
3733 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3735 cum->nregs -= int_nregs;
3736 cum->sse_nregs -= sse_nregs;
3737 cum->regno += int_nregs;
3738 cum->sse_regno += sse_nregs;
3740 else
3741 cum->words += words;
3743 else
3745 switch (mode)
3747 default:
3748 break;
3750 case BLKmode:
3751 if (bytes < 0)
3752 break;
3753 /* FALLTHRU */
3755 case DImode:
3756 case SImode:
3757 case HImode:
3758 case QImode:
3759 cum->words += words;
3760 cum->nregs -= words;
3761 cum->regno += words;
3763 if (cum->nregs <= 0)
3765 cum->nregs = 0;
3766 cum->regno = 0;
3768 break;
3770 case SFmode:
3771 if (cum->float_in_sse > 0)
3772 goto skip_80387;
3774 case DFmode:
3775 if (cum->float_in_sse > 1)
3776 goto skip_80387;
3778 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3779 rounding takes place when values are passed in x87
3780 registers, pass DFmode and SFmode types to local functions
3781 only when flag_unsafe_math_optimizations is set. */
3782 if (!cum->float_in_x87
3783 || (cum->float_in_x87 == 2
3784 && !flag_unsafe_math_optimizations))
3785 break;
3787 case XFmode:
3788 if (!cum->float_in_x87)
3789 break;
3791 if (!type || !AGGREGATE_TYPE_P (type))
3793 cum->x87_nregs -= 1;
3794 cum->x87_regno += 1;
3795 if (cum->x87_nregs <= 0)
3797 cum->x87_nregs = 0;
3798 cum->x87_regno = 0;
3801 break;
3803 skip_80387:
3805 case TImode:
3806 case V16QImode:
3807 case V8HImode:
3808 case V4SImode:
3809 case V2DImode:
3810 case V4SFmode:
3811 case V2DFmode:
3812 if (!type || !AGGREGATE_TYPE_P (type))
3814 cum->sse_nregs -= 1;
3815 cum->sse_regno += 1;
3816 if (cum->sse_nregs <= 0)
3818 cum->sse_nregs = 0;
3819 cum->sse_regno = 0;
3822 break;
3824 case V8QImode:
3825 case V4HImode:
3826 case V2SImode:
3827 case V2SFmode:
3828 if (!type || !AGGREGATE_TYPE_P (type))
3830 cum->mmx_nregs -= 1;
3831 cum->mmx_regno += 1;
3832 if (cum->mmx_nregs <= 0)
3834 cum->mmx_nregs = 0;
3835 cum->mmx_regno = 0;
3838 break;
3843 /* Define where to put the arguments to a function.
3844 Value is zero to push the argument on the stack,
3845 or a hard register in which to store the argument.
3847 MODE is the argument's machine mode.
3848 TYPE is the data type of the argument (as a tree).
3849 This is null for libcalls where that information may
3850 not be available.
3851 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3852 the preceding args and about the function being called.
3853 NAMED is nonzero if this argument is a named parameter
3854 (otherwise it is an extra parameter matching an ellipsis). */
3857 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3858 tree type, int named)
3860 enum machine_mode mode = orig_mode;
3861 rtx ret = NULL_RTX;
3862 int bytes =
3863 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3864 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3865 static bool warnedsse, warnedmmx;
3867 /* To simplify the code below, represent vector types with a vector mode
3868 even if MMX/SSE are not active. */
3869 if (type && TREE_CODE (type) == VECTOR_TYPE)
3870 mode = type_natural_mode (type);
3872 /* Handle a hidden AL argument containing number of registers for varargs
3873 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3874 any AL settings. */
3875 if (mode == VOIDmode)
3877 if (TARGET_64BIT)
3878 return GEN_INT (cum->maybe_vaarg
3879 ? (cum->sse_nregs < 0
3880 ? SSE_REGPARM_MAX
3881 : cum->sse_regno)
3882 : -1);
3883 else
3884 return constm1_rtx;
3886 if (TARGET_64BIT)
3887 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3888 cum->sse_nregs,
3889 &x86_64_int_parameter_registers [cum->regno],
3890 cum->sse_regno);
3891 else
3892 switch (mode)
3894 default:
3895 break;
3897 case BLKmode:
3898 if (bytes < 0)
3899 break;
3900 /* FALLTHRU */
3901 case DImode:
3902 case SImode:
3903 case HImode:
3904 case QImode:
3905 if (words <= cum->nregs)
3907 int regno = cum->regno;
3909 /* Fastcall allocates the first two DWORD (SImode) or
3910 smaller arguments to ECX and EDX. */
3911 if (cum->fastcall)
3913 if (mode == BLKmode || mode == DImode)
3914 break;
3916 /* ECX not EAX is the first allocated register. */
3917 if (regno == 0)
3918 regno = 2;
3920 ret = gen_rtx_REG (mode, regno);
3922 break;
3924 case SFmode:
3925 if (cum->float_in_sse > 0)
3926 goto skip_80387;
3928 case DFmode:
3929 if (cum->float_in_sse > 1)
3930 goto skip_80387;
3932 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3933 rounding takes place when values are passed in x87
3934 registers, pass DFmode and SFmode types to local functions
3935 only when flag_unsafe_math_optimizations is set. */
3936 if (!cum->float_in_x87
3937 || (cum->float_in_x87 == 2
3938 && !flag_unsafe_math_optimizations))
3939 break;
3941 case XFmode:
3942 if (!cum->float_in_x87)
3943 break;
3945 if (!type || !AGGREGATE_TYPE_P (type))
3946 if (cum->x87_nregs)
3947 ret = gen_rtx_REG (mode, cum->x87_regno + FIRST_FLOAT_REG);
3948 break;
3950 skip_80387:
3952 case TImode:
3953 case V16QImode:
3954 case V8HImode:
3955 case V4SImode:
3956 case V2DImode:
3957 case V4SFmode:
3958 case V2DFmode:
3959 if (!type || !AGGREGATE_TYPE_P (type))
3961 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3963 warnedsse = true;
3964 warning (0, "SSE vector argument without SSE enabled "
3965 "changes the ABI");
3967 if (cum->sse_nregs)
3968 ret = gen_reg_or_parallel (mode, orig_mode,
3969 cum->sse_regno + FIRST_SSE_REG);
3971 break;
3972 case V8QImode:
3973 case V4HImode:
3974 case V2SImode:
3975 case V2SFmode:
3976 if (!type || !AGGREGATE_TYPE_P (type))
3978 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3980 warnedmmx = true;
3981 warning (0, "MMX vector argument without MMX enabled "
3982 "changes the ABI");
3984 if (cum->mmx_nregs)
3985 ret = gen_reg_or_parallel (mode, orig_mode,
3986 cum->mmx_regno + FIRST_MMX_REG);
3988 break;
3991 if (TARGET_DEBUG_ARG)
3993 fprintf (stderr,
3994 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3995 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3997 if (ret)
3998 print_simple_rtl (stderr, ret);
3999 else
4000 fprintf (stderr, ", stack");
4002 fprintf (stderr, " )\n");
4005 return ret;
4008 /* A C expression that indicates when an argument must be passed by
4009 reference. If nonzero for an argument, a copy of that argument is
4010 made in memory and a pointer to the argument is passed instead of
4011 the argument itself. The pointer is passed in whatever way is
4012 appropriate for passing a pointer to that type. */
4014 static bool
4015 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4016 enum machine_mode mode ATTRIBUTE_UNUSED,
4017 tree type, bool named ATTRIBUTE_UNUSED)
4019 if (!TARGET_64BIT)
4020 return 0;
4022 if (type && int_size_in_bytes (type) == -1)
4024 if (TARGET_DEBUG_ARG)
4025 fprintf (stderr, "function_arg_pass_by_reference\n");
4026 return 1;
4029 return 0;
4032 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4033 ABI. Only called if TARGET_SSE. */
4034 static bool
4035 contains_128bit_aligned_vector_p (tree type)
4037 enum machine_mode mode = TYPE_MODE (type);
4038 if (SSE_REG_MODE_P (mode)
4039 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4040 return true;
4041 if (TYPE_ALIGN (type) < 128)
4042 return false;
4044 if (AGGREGATE_TYPE_P (type))
4046 /* Walk the aggregates recursively. */
4047 switch (TREE_CODE (type))
4049 case RECORD_TYPE:
4050 case UNION_TYPE:
4051 case QUAL_UNION_TYPE:
4053 tree field;
4055 /* Walk all the structure fields. */
4056 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4058 if (TREE_CODE (field) == FIELD_DECL
4059 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4060 return true;
4062 break;
4065 case ARRAY_TYPE:
4066 /* Just for use if some languages passes arrays by value. */
4067 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4068 return true;
4069 break;
4071 default:
4072 gcc_unreachable ();
4075 return false;
4078 /* Gives the alignment boundary, in bits, of an argument with the
4079 specified mode and type. */
4082 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4084 int align;
4085 if (type)
4086 align = TYPE_ALIGN (type);
4087 else
4088 align = GET_MODE_ALIGNMENT (mode);
4089 if (align < PARM_BOUNDARY)
4090 align = PARM_BOUNDARY;
4091 if (!TARGET_64BIT)
4093 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4094 make an exception for SSE modes since these require 128bit
4095 alignment.
4097 The handling here differs from field_alignment. ICC aligns MMX
4098 arguments to 4 byte boundaries, while structure fields are aligned
4099 to 8 byte boundaries. */
4100 if (!TARGET_SSE)
4101 align = PARM_BOUNDARY;
4102 else if (!type)
4104 if (!SSE_REG_MODE_P (mode))
4105 align = PARM_BOUNDARY;
4107 else
4109 if (!contains_128bit_aligned_vector_p (type))
4110 align = PARM_BOUNDARY;
4113 if (align > 128)
4114 align = 128;
4115 return align;
4118 /* Return true if N is a possible register number of function value. */
4119 bool
4120 ix86_function_value_regno_p (int regno)
4122 if (regno == 0
4123 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4124 || (regno == FIRST_SSE_REG && TARGET_SSE))
4125 return true;
4127 if (!TARGET_64BIT
4128 && (regno == FIRST_MMX_REG && TARGET_MMX))
4129 return true;
4131 return false;
4134 /* Define how to find the value returned by a function.
4135 VALTYPE is the data type of the value (as a tree).
4136 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4137 otherwise, FUNC is 0. */
4139 ix86_function_value (tree valtype, tree fntype_or_decl,
4140 bool outgoing ATTRIBUTE_UNUSED)
4142 enum machine_mode natmode = type_natural_mode (valtype);
4144 if (TARGET_64BIT)
4146 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4147 1, REGPARM_MAX, SSE_REGPARM_MAX,
4148 x86_64_int_return_registers, 0);
4149 /* For zero sized structures, construct_container return NULL, but we
4150 need to keep rest of compiler happy by returning meaningful value. */
4151 if (!ret)
4152 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4153 return ret;
4155 else
4157 tree fn = NULL_TREE, fntype;
4158 if (fntype_or_decl
4159 && DECL_P (fntype_or_decl))
4160 fn = fntype_or_decl;
4161 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4162 return gen_rtx_REG (TYPE_MODE (valtype),
4163 ix86_value_regno (natmode, fn, fntype));
4167 /* Return true iff type is returned in memory. */
4169 ix86_return_in_memory (tree type)
4171 int needed_intregs, needed_sseregs, size;
4172 enum machine_mode mode = type_natural_mode (type);
4174 if (TARGET_64BIT)
4175 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4177 if (mode == BLKmode)
4178 return 1;
4180 size = int_size_in_bytes (type);
4182 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4183 return 0;
4185 if (VECTOR_MODE_P (mode) || mode == TImode)
4187 /* User-created vectors small enough to fit in EAX. */
4188 if (size < 8)
4189 return 0;
4191 /* MMX/3dNow values are returned in MM0,
4192 except when it doesn't exits. */
4193 if (size == 8)
4194 return (TARGET_MMX ? 0 : 1);
4196 /* SSE values are returned in XMM0, except when it doesn't exist. */
4197 if (size == 16)
4198 return (TARGET_SSE ? 0 : 1);
4201 if (mode == XFmode)
4202 return 0;
4204 if (mode == TDmode)
4205 return 1;
4207 if (size > 12)
4208 return 1;
4209 return 0;
4212 /* When returning SSE vector types, we have a choice of either
4213 (1) being abi incompatible with a -march switch, or
4214 (2) generating an error.
4215 Given no good solution, I think the safest thing is one warning.
4216 The user won't be able to use -Werror, but....
4218 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4219 called in response to actually generating a caller or callee that
4220 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4221 via aggregate_value_p for general type probing from tree-ssa. */
4223 static rtx
4224 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4226 static bool warnedsse, warnedmmx;
4228 if (type)
4230 /* Look at the return type of the function, not the function type. */
4231 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4233 if (!TARGET_SSE && !warnedsse)
4235 if (mode == TImode
4236 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4238 warnedsse = true;
4239 warning (0, "SSE vector return without SSE enabled "
4240 "changes the ABI");
4244 if (!TARGET_MMX && !warnedmmx)
4246 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4248 warnedmmx = true;
4249 warning (0, "MMX vector return without MMX enabled "
4250 "changes the ABI");
4255 return NULL;
4258 /* Define how to find the value returned by a library function
4259 assuming the value has mode MODE. */
4261 ix86_libcall_value (enum machine_mode mode)
4263 if (TARGET_64BIT)
4265 switch (mode)
4267 case SFmode:
4268 case SCmode:
4269 case DFmode:
4270 case DCmode:
4271 case TFmode:
4272 case SDmode:
4273 case DDmode:
4274 case TDmode:
4275 return gen_rtx_REG (mode, FIRST_SSE_REG);
4276 case XFmode:
4277 case XCmode:
4278 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4279 case TCmode:
4280 return NULL;
4281 default:
4282 return gen_rtx_REG (mode, 0);
4285 else
4286 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4289 /* Given a mode, return the register to use for a return value. */
4291 static int
4292 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4294 gcc_assert (!TARGET_64BIT);
4296 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4297 we normally prevent this case when mmx is not available. However
4298 some ABIs may require the result to be returned like DImode. */
4299 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4300 return TARGET_MMX ? FIRST_MMX_REG : 0;
4302 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4303 we prevent this case when sse is not available. However some ABIs
4304 may require the result to be returned like integer TImode. */
4305 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4306 return TARGET_SSE ? FIRST_SSE_REG : 0;
4308 /* Decimal floating point values can go in %eax, unlike other float modes. */
4309 if (DECIMAL_FLOAT_MODE_P (mode))
4310 return 0;
4312 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4313 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4314 return 0;
4316 /* Floating point return values in %st(0), except for local functions when
4317 SSE math is enabled or for functions with sseregparm attribute. */
4318 if ((func || fntype)
4319 && (mode == SFmode || mode == DFmode))
4321 int sse_level = ix86_function_sseregparm (fntype, func);
4322 if ((sse_level >= 1 && mode == SFmode)
4323 || (sse_level == 2 && mode == DFmode))
4324 return FIRST_SSE_REG;
4327 return FIRST_FLOAT_REG;
4330 /* Create the va_list data type. */
4332 static tree
4333 ix86_build_builtin_va_list (void)
4335 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4337 /* For i386 we use plain pointer to argument area. */
4338 if (!TARGET_64BIT)
4339 return build_pointer_type (char_type_node);
4341 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4342 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4344 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4345 unsigned_type_node);
4346 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4347 unsigned_type_node);
4348 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4349 ptr_type_node);
4350 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4351 ptr_type_node);
4353 va_list_gpr_counter_field = f_gpr;
4354 va_list_fpr_counter_field = f_fpr;
4356 DECL_FIELD_CONTEXT (f_gpr) = record;
4357 DECL_FIELD_CONTEXT (f_fpr) = record;
4358 DECL_FIELD_CONTEXT (f_ovf) = record;
4359 DECL_FIELD_CONTEXT (f_sav) = record;
4361 TREE_CHAIN (record) = type_decl;
4362 TYPE_NAME (record) = type_decl;
4363 TYPE_FIELDS (record) = f_gpr;
4364 TREE_CHAIN (f_gpr) = f_fpr;
4365 TREE_CHAIN (f_fpr) = f_ovf;
4366 TREE_CHAIN (f_ovf) = f_sav;
4368 layout_type (record);
4370 /* The correct type is an array type of one element. */
4371 return build_array_type (record, build_index_type (size_zero_node));
4374 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4376 static void
4377 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4378 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4379 int no_rtl)
4381 CUMULATIVE_ARGS next_cum;
4382 rtx save_area = NULL_RTX, mem;
4383 rtx label;
4384 rtx label_ref;
4385 rtx tmp_reg;
4386 rtx nsse_reg;
4387 int set;
4388 tree fntype;
4389 int stdarg_p;
4390 int i;
4392 if (!TARGET_64BIT)
4393 return;
4395 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4396 return;
4398 /* Indicate to allocate space on the stack for varargs save area. */
4399 ix86_save_varrargs_registers = 1;
4401 cfun->stack_alignment_needed = 128;
4403 fntype = TREE_TYPE (current_function_decl);
4404 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4405 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4406 != void_type_node));
4408 /* For varargs, we do not want to skip the dummy va_dcl argument.
4409 For stdargs, we do want to skip the last named argument. */
4410 next_cum = *cum;
4411 if (stdarg_p)
4412 function_arg_advance (&next_cum, mode, type, 1);
4414 if (!no_rtl)
4415 save_area = frame_pointer_rtx;
4417 set = get_varargs_alias_set ();
4419 for (i = next_cum.regno;
4420 i < ix86_regparm
4421 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4422 i++)
4424 mem = gen_rtx_MEM (Pmode,
4425 plus_constant (save_area, i * UNITS_PER_WORD));
4426 MEM_NOTRAP_P (mem) = 1;
4427 set_mem_alias_set (mem, set);
4428 emit_move_insn (mem, gen_rtx_REG (Pmode,
4429 x86_64_int_parameter_registers[i]));
4432 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4434 /* Now emit code to save SSE registers. The AX parameter contains number
4435 of SSE parameter registers used to call this function. We use
4436 sse_prologue_save insn template that produces computed jump across
4437 SSE saves. We need some preparation work to get this working. */
4439 label = gen_label_rtx ();
4440 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4442 /* Compute address to jump to :
4443 label - 5*eax + nnamed_sse_arguments*5 */
4444 tmp_reg = gen_reg_rtx (Pmode);
4445 nsse_reg = gen_reg_rtx (Pmode);
4446 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4447 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4448 gen_rtx_MULT (Pmode, nsse_reg,
4449 GEN_INT (4))));
4450 if (next_cum.sse_regno)
4451 emit_move_insn
4452 (nsse_reg,
4453 gen_rtx_CONST (DImode,
4454 gen_rtx_PLUS (DImode,
4455 label_ref,
4456 GEN_INT (next_cum.sse_regno * 4))));
4457 else
4458 emit_move_insn (nsse_reg, label_ref);
4459 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4461 /* Compute address of memory block we save into. We always use pointer
4462 pointing 127 bytes after first byte to store - this is needed to keep
4463 instruction size limited by 4 bytes. */
4464 tmp_reg = gen_reg_rtx (Pmode);
4465 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4466 plus_constant (save_area,
4467 8 * REGPARM_MAX + 127)));
4468 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4469 MEM_NOTRAP_P (mem) = 1;
4470 set_mem_alias_set (mem, set);
4471 set_mem_align (mem, BITS_PER_WORD);
4473 /* And finally do the dirty job! */
4474 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4475 GEN_INT (next_cum.sse_regno), label));
4480 /* Implement va_start. */
4482 void
4483 ix86_va_start (tree valist, rtx nextarg)
4485 HOST_WIDE_INT words, n_gpr, n_fpr;
4486 tree f_gpr, f_fpr, f_ovf, f_sav;
4487 tree gpr, fpr, ovf, sav, t;
4488 tree type;
4490 /* Only 64bit target needs something special. */
4491 if (!TARGET_64BIT)
4493 std_expand_builtin_va_start (valist, nextarg);
4494 return;
4497 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4498 f_fpr = TREE_CHAIN (f_gpr);
4499 f_ovf = TREE_CHAIN (f_fpr);
4500 f_sav = TREE_CHAIN (f_ovf);
4502 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4503 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4504 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4505 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4506 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4508 /* Count number of gp and fp argument registers used. */
4509 words = current_function_args_info.words;
4510 n_gpr = current_function_args_info.regno;
4511 n_fpr = current_function_args_info.sse_regno;
4513 if (TARGET_DEBUG_ARG)
4514 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4515 (int) words, (int) n_gpr, (int) n_fpr);
4517 if (cfun->va_list_gpr_size)
4519 type = TREE_TYPE (gpr);
4520 t = build2 (MODIFY_EXPR, type, gpr,
4521 build_int_cst (type, n_gpr * 8));
4522 TREE_SIDE_EFFECTS (t) = 1;
4523 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4526 if (cfun->va_list_fpr_size)
4528 type = TREE_TYPE (fpr);
4529 t = build2 (MODIFY_EXPR, type, fpr,
4530 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4531 TREE_SIDE_EFFECTS (t) = 1;
4532 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4535 /* Find the overflow area. */
4536 type = TREE_TYPE (ovf);
4537 t = make_tree (type, virtual_incoming_args_rtx);
4538 if (words != 0)
4539 t = build2 (PLUS_EXPR, type, t,
4540 build_int_cst (type, words * UNITS_PER_WORD));
4541 t = build2 (MODIFY_EXPR, type, ovf, t);
4542 TREE_SIDE_EFFECTS (t) = 1;
4543 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4545 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4547 /* Find the register save area.
4548 Prologue of the function save it right above stack frame. */
4549 type = TREE_TYPE (sav);
4550 t = make_tree (type, frame_pointer_rtx);
4551 t = build2 (MODIFY_EXPR, type, sav, t);
4552 TREE_SIDE_EFFECTS (t) = 1;
4553 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4557 /* Implement va_arg. */
4559 tree
4560 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4562 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4563 tree f_gpr, f_fpr, f_ovf, f_sav;
4564 tree gpr, fpr, ovf, sav, t;
4565 int size, rsize;
4566 tree lab_false, lab_over = NULL_TREE;
4567 tree addr, t2;
4568 rtx container;
4569 int indirect_p = 0;
4570 tree ptrtype;
4571 enum machine_mode nat_mode;
4573 /* Only 64bit target needs something special. */
4574 if (!TARGET_64BIT)
4575 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4577 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4578 f_fpr = TREE_CHAIN (f_gpr);
4579 f_ovf = TREE_CHAIN (f_fpr);
4580 f_sav = TREE_CHAIN (f_ovf);
4582 valist = build_va_arg_indirect_ref (valist);
4583 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4584 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4585 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4586 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4588 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4589 if (indirect_p)
4590 type = build_pointer_type (type);
4591 size = int_size_in_bytes (type);
4592 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4594 nat_mode = type_natural_mode (type);
4595 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4596 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4598 /* Pull the value out of the saved registers. */
4600 addr = create_tmp_var (ptr_type_node, "addr");
4601 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4603 if (container)
4605 int needed_intregs, needed_sseregs;
4606 bool need_temp;
4607 tree int_addr, sse_addr;
4609 lab_false = create_artificial_label ();
4610 lab_over = create_artificial_label ();
4612 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4614 need_temp = (!REG_P (container)
4615 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4616 || TYPE_ALIGN (type) > 128));
4618 /* In case we are passing structure, verify that it is consecutive block
4619 on the register save area. If not we need to do moves. */
4620 if (!need_temp && !REG_P (container))
4622 /* Verify that all registers are strictly consecutive */
4623 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4625 int i;
4627 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4629 rtx slot = XVECEXP (container, 0, i);
4630 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4631 || INTVAL (XEXP (slot, 1)) != i * 16)
4632 need_temp = 1;
4635 else
4637 int i;
4639 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4641 rtx slot = XVECEXP (container, 0, i);
4642 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4643 || INTVAL (XEXP (slot, 1)) != i * 8)
4644 need_temp = 1;
4648 if (!need_temp)
4650 int_addr = addr;
4651 sse_addr = addr;
4653 else
4655 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4656 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4657 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4658 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4661 /* First ensure that we fit completely in registers. */
4662 if (needed_intregs)
4664 t = build_int_cst (TREE_TYPE (gpr),
4665 (REGPARM_MAX - needed_intregs + 1) * 8);
4666 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4667 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4668 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4669 gimplify_and_add (t, pre_p);
4671 if (needed_sseregs)
4673 t = build_int_cst (TREE_TYPE (fpr),
4674 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4675 + REGPARM_MAX * 8);
4676 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4677 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4678 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4679 gimplify_and_add (t, pre_p);
4682 /* Compute index to start of area used for integer regs. */
4683 if (needed_intregs)
4685 /* int_addr = gpr + sav; */
4686 t = fold_convert (ptr_type_node, gpr);
4687 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4688 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4689 gimplify_and_add (t, pre_p);
4691 if (needed_sseregs)
4693 /* sse_addr = fpr + sav; */
4694 t = fold_convert (ptr_type_node, fpr);
4695 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4696 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4697 gimplify_and_add (t, pre_p);
4699 if (need_temp)
4701 int i;
4702 tree temp = create_tmp_var (type, "va_arg_tmp");
4704 /* addr = &temp; */
4705 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4706 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4707 gimplify_and_add (t, pre_p);
4709 for (i = 0; i < XVECLEN (container, 0); i++)
4711 rtx slot = XVECEXP (container, 0, i);
4712 rtx reg = XEXP (slot, 0);
4713 enum machine_mode mode = GET_MODE (reg);
4714 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4715 tree addr_type = build_pointer_type (piece_type);
4716 tree src_addr, src;
4717 int src_offset;
4718 tree dest_addr, dest;
4720 if (SSE_REGNO_P (REGNO (reg)))
4722 src_addr = sse_addr;
4723 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4725 else
4727 src_addr = int_addr;
4728 src_offset = REGNO (reg) * 8;
4730 src_addr = fold_convert (addr_type, src_addr);
4731 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4732 size_int (src_offset)));
4733 src = build_va_arg_indirect_ref (src_addr);
4735 dest_addr = fold_convert (addr_type, addr);
4736 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4737 size_int (INTVAL (XEXP (slot, 1)))));
4738 dest = build_va_arg_indirect_ref (dest_addr);
4740 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4741 gimplify_and_add (t, pre_p);
4745 if (needed_intregs)
4747 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4748 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4749 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4750 gimplify_and_add (t, pre_p);
4752 if (needed_sseregs)
4754 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4755 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4756 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4757 gimplify_and_add (t, pre_p);
4760 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4761 gimplify_and_add (t, pre_p);
4763 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4764 append_to_statement_list (t, pre_p);
4767 /* ... otherwise out of the overflow area. */
4769 /* Care for on-stack alignment if needed. */
4770 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4771 || integer_zerop (TYPE_SIZE (type)))
4772 t = ovf;
4773 else
4775 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4776 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4777 build_int_cst (TREE_TYPE (ovf), align - 1));
4778 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4779 build_int_cst (TREE_TYPE (t), -align));
4781 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4783 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4784 gimplify_and_add (t2, pre_p);
4786 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4787 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4788 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4789 gimplify_and_add (t, pre_p);
4791 if (container)
4793 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4794 append_to_statement_list (t, pre_p);
4797 ptrtype = build_pointer_type (type);
4798 addr = fold_convert (ptrtype, addr);
4800 if (indirect_p)
4801 addr = build_va_arg_indirect_ref (addr);
4802 return build_va_arg_indirect_ref (addr);
4805 /* Return nonzero if OPNUM's MEM should be matched
4806 in movabs* patterns. */
4809 ix86_check_movabs (rtx insn, int opnum)
4811 rtx set, mem;
4813 set = PATTERN (insn);
4814 if (GET_CODE (set) == PARALLEL)
4815 set = XVECEXP (set, 0, 0);
4816 gcc_assert (GET_CODE (set) == SET);
4817 mem = XEXP (set, opnum);
4818 while (GET_CODE (mem) == SUBREG)
4819 mem = SUBREG_REG (mem);
4820 gcc_assert (GET_CODE (mem) == MEM);
4821 return (volatile_ok || !MEM_VOLATILE_P (mem));
4824 /* Initialize the table of extra 80387 mathematical constants. */
4826 static void
4827 init_ext_80387_constants (void)
4829 static const char * cst[5] =
4831 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4832 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4833 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4834 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4835 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4837 int i;
4839 for (i = 0; i < 5; i++)
4841 real_from_string (&ext_80387_constants_table[i], cst[i]);
4842 /* Ensure each constant is rounded to XFmode precision. */
4843 real_convert (&ext_80387_constants_table[i],
4844 XFmode, &ext_80387_constants_table[i]);
4847 ext_80387_constants_init = 1;
4850 /* Return true if the constant is something that can be loaded with
4851 a special instruction. */
4854 standard_80387_constant_p (rtx x)
4856 REAL_VALUE_TYPE r;
4858 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4859 return -1;
4861 if (x == CONST0_RTX (GET_MODE (x)))
4862 return 1;
4863 if (x == CONST1_RTX (GET_MODE (x)))
4864 return 2;
4866 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4868 /* For XFmode constants, try to find a special 80387 instruction when
4869 optimizing for size or on those CPUs that benefit from them. */
4870 if (GET_MODE (x) == XFmode
4871 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4873 int i;
4875 if (! ext_80387_constants_init)
4876 init_ext_80387_constants ();
4878 for (i = 0; i < 5; i++)
4879 if (real_identical (&r, &ext_80387_constants_table[i]))
4880 return i + 3;
4883 /* Load of the constant -0.0 or -1.0 will be split as
4884 fldz;fchs or fld1;fchs sequence. */
4885 if (real_isnegzero (&r))
4886 return 8;
4887 if (real_identical (&r, &dconstm1))
4888 return 9;
4890 return 0;
4893 /* Return the opcode of the special instruction to be used to load
4894 the constant X. */
4896 const char *
4897 standard_80387_constant_opcode (rtx x)
4899 switch (standard_80387_constant_p (x))
4901 case 1:
4902 return "fldz";
4903 case 2:
4904 return "fld1";
4905 case 3:
4906 return "fldlg2";
4907 case 4:
4908 return "fldln2";
4909 case 5:
4910 return "fldl2e";
4911 case 6:
4912 return "fldl2t";
4913 case 7:
4914 return "fldpi";
4915 case 8:
4916 case 9:
4917 return "#";
4918 default:
4919 gcc_unreachable ();
4923 /* Return the CONST_DOUBLE representing the 80387 constant that is
4924 loaded by the specified special instruction. The argument IDX
4925 matches the return value from standard_80387_constant_p. */
4928 standard_80387_constant_rtx (int idx)
4930 int i;
4932 if (! ext_80387_constants_init)
4933 init_ext_80387_constants ();
4935 switch (idx)
4937 case 3:
4938 case 4:
4939 case 5:
4940 case 6:
4941 case 7:
4942 i = idx - 3;
4943 break;
4945 default:
4946 gcc_unreachable ();
4949 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4950 XFmode);
4953 /* Return 1 if mode is a valid mode for sse. */
4954 static int
4955 standard_sse_mode_p (enum machine_mode mode)
4957 switch (mode)
4959 case V16QImode:
4960 case V8HImode:
4961 case V4SImode:
4962 case V2DImode:
4963 case V4SFmode:
4964 case V2DFmode:
4965 return 1;
4967 default:
4968 return 0;
4972 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4975 standard_sse_constant_p (rtx x)
4977 enum machine_mode mode = GET_MODE (x);
4979 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4980 return 1;
4981 if (vector_all_ones_operand (x, mode)
4982 && standard_sse_mode_p (mode))
4983 return TARGET_SSE2 ? 2 : -1;
4985 return 0;
4988 /* Return the opcode of the special instruction to be used to load
4989 the constant X. */
4991 const char *
4992 standard_sse_constant_opcode (rtx insn, rtx x)
4994 switch (standard_sse_constant_p (x))
4996 case 1:
4997 if (get_attr_mode (insn) == MODE_V4SF)
4998 return "xorps\t%0, %0";
4999 else if (get_attr_mode (insn) == MODE_V2DF)
5000 return "xorpd\t%0, %0";
5001 else
5002 return "pxor\t%0, %0";
5003 case 2:
5004 return "pcmpeqd\t%0, %0";
5006 gcc_unreachable ();
5009 /* Returns 1 if OP contains a symbol reference */
5012 symbolic_reference_mentioned_p (rtx op)
5014 const char *fmt;
5015 int i;
5017 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5018 return 1;
5020 fmt = GET_RTX_FORMAT (GET_CODE (op));
5021 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5023 if (fmt[i] == 'E')
5025 int j;
5027 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5028 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5029 return 1;
5032 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5033 return 1;
5036 return 0;
5039 /* Return 1 if it is appropriate to emit `ret' instructions in the
5040 body of a function. Do this only if the epilogue is simple, needing a
5041 couple of insns. Prior to reloading, we can't tell how many registers
5042 must be saved, so return 0 then. Return 0 if there is no frame
5043 marker to de-allocate. */
5046 ix86_can_use_return_insn_p (void)
5048 struct ix86_frame frame;
5050 if (! reload_completed || frame_pointer_needed)
5051 return 0;
5053 /* Don't allow more than 32 pop, since that's all we can do
5054 with one instruction. */
5055 if (current_function_pops_args
5056 && current_function_args_size >= 32768)
5057 return 0;
5059 ix86_compute_frame_layout (&frame);
5060 return frame.to_allocate == 0 && frame.nregs == 0;
5063 /* Value should be nonzero if functions must have frame pointers.
5064 Zero means the frame pointer need not be set up (and parms may
5065 be accessed via the stack pointer) in functions that seem suitable. */
5068 ix86_frame_pointer_required (void)
5070 /* If we accessed previous frames, then the generated code expects
5071 to be able to access the saved ebp value in our frame. */
5072 if (cfun->machine->accesses_prev_frame)
5073 return 1;
5075 /* Several x86 os'es need a frame pointer for other reasons,
5076 usually pertaining to setjmp. */
5077 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5078 return 1;
5080 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5081 the frame pointer by default. Turn it back on now if we've not
5082 got a leaf function. */
5083 if (TARGET_OMIT_LEAF_FRAME_POINTER
5084 && (!current_function_is_leaf
5085 || ix86_current_function_calls_tls_descriptor))
5086 return 1;
5088 if (current_function_profile)
5089 return 1;
5091 return 0;
5094 /* Record that the current function accesses previous call frames. */
5096 void
5097 ix86_setup_frame_addresses (void)
5099 cfun->machine->accesses_prev_frame = 1;
5102 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5103 # define USE_HIDDEN_LINKONCE 1
5104 #else
5105 # define USE_HIDDEN_LINKONCE 0
5106 #endif
5108 static int pic_labels_used;
5110 /* Fills in the label name that should be used for a pc thunk for
5111 the given register. */
5113 static void
5114 get_pc_thunk_name (char name[32], unsigned int regno)
5116 gcc_assert (!TARGET_64BIT);
5118 if (USE_HIDDEN_LINKONCE)
5119 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5120 else
5121 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5125 /* This function generates code for -fpic that loads %ebx with
5126 the return address of the caller and then returns. */
5128 void
5129 ix86_file_end (void)
5131 rtx xops[2];
5132 int regno;
5134 for (regno = 0; regno < 8; ++regno)
5136 char name[32];
5138 if (! ((pic_labels_used >> regno) & 1))
5139 continue;
5141 get_pc_thunk_name (name, regno);
5143 #if TARGET_MACHO
5144 if (TARGET_MACHO)
5146 switch_to_section (darwin_sections[text_coal_section]);
5147 fputs ("\t.weak_definition\t", asm_out_file);
5148 assemble_name (asm_out_file, name);
5149 fputs ("\n\t.private_extern\t", asm_out_file);
5150 assemble_name (asm_out_file, name);
5151 fputs ("\n", asm_out_file);
5152 ASM_OUTPUT_LABEL (asm_out_file, name);
5154 else
5155 #endif
5156 if (USE_HIDDEN_LINKONCE)
5158 tree decl;
5160 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5161 error_mark_node);
5162 TREE_PUBLIC (decl) = 1;
5163 TREE_STATIC (decl) = 1;
5164 DECL_ONE_ONLY (decl) = 1;
5166 (*targetm.asm_out.unique_section) (decl, 0);
5167 switch_to_section (get_named_section (decl, NULL, 0));
5169 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5170 fputs ("\t.hidden\t", asm_out_file);
5171 assemble_name (asm_out_file, name);
5172 fputc ('\n', asm_out_file);
5173 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5175 else
5177 switch_to_section (text_section);
5178 ASM_OUTPUT_LABEL (asm_out_file, name);
5181 xops[0] = gen_rtx_REG (SImode, regno);
5182 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5183 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5184 output_asm_insn ("ret", xops);
5187 if (NEED_INDICATE_EXEC_STACK)
5188 file_end_indicate_exec_stack ();
5191 /* Emit code for the SET_GOT patterns. */
5193 const char *
5194 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5196 rtx xops[3];
5198 xops[0] = dest;
5199 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5201 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5203 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5205 if (!flag_pic)
5206 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5207 else
5208 output_asm_insn ("call\t%a2", xops);
5210 #if TARGET_MACHO
5211 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5212 is what will be referenced by the Mach-O PIC subsystem. */
5213 if (!label)
5214 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5215 #endif
5217 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5218 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5220 if (flag_pic)
5221 output_asm_insn ("pop{l}\t%0", xops);
5223 else
5225 char name[32];
5226 get_pc_thunk_name (name, REGNO (dest));
5227 pic_labels_used |= 1 << REGNO (dest);
5229 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5230 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5231 output_asm_insn ("call\t%X2", xops);
5232 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5233 is what will be referenced by the Mach-O PIC subsystem. */
5234 #if TARGET_MACHO
5235 if (!label)
5236 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5237 else
5238 targetm.asm_out.internal_label (asm_out_file, "L",
5239 CODE_LABEL_NUMBER (label));
5240 #endif
5243 if (TARGET_MACHO)
5244 return "";
5246 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5247 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5248 else
5249 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5251 return "";
5254 /* Generate an "push" pattern for input ARG. */
5256 static rtx
5257 gen_push (rtx arg)
5259 return gen_rtx_SET (VOIDmode,
5260 gen_rtx_MEM (Pmode,
5261 gen_rtx_PRE_DEC (Pmode,
5262 stack_pointer_rtx)),
5263 arg);
5266 /* Return >= 0 if there is an unused call-clobbered register available
5267 for the entire function. */
5269 static unsigned int
5270 ix86_select_alt_pic_regnum (void)
5272 if (current_function_is_leaf && !current_function_profile
5273 && !ix86_current_function_calls_tls_descriptor)
5275 int i;
5276 for (i = 2; i >= 0; --i)
5277 if (!regs_ever_live[i])
5278 return i;
5281 return INVALID_REGNUM;
5284 /* Return 1 if we need to save REGNO. */
5285 static int
5286 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5288 if (pic_offset_table_rtx
5289 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5290 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5291 || current_function_profile
5292 || current_function_calls_eh_return
5293 || current_function_uses_const_pool))
5295 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5296 return 0;
5297 return 1;
5300 if (current_function_calls_eh_return && maybe_eh_return)
5302 unsigned i;
5303 for (i = 0; ; i++)
5305 unsigned test = EH_RETURN_DATA_REGNO (i);
5306 if (test == INVALID_REGNUM)
5307 break;
5308 if (test == regno)
5309 return 1;
5313 if (cfun->machine->force_align_arg_pointer
5314 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5315 return 1;
5317 return (regs_ever_live[regno]
5318 && !call_used_regs[regno]
5319 && !fixed_regs[regno]
5320 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5323 /* Return number of registers to be saved on the stack. */
5325 static int
5326 ix86_nsaved_regs (void)
5328 int nregs = 0;
5329 int regno;
5331 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5332 if (ix86_save_reg (regno, true))
5333 nregs++;
5334 return nregs;
5337 /* Return the offset between two registers, one to be eliminated, and the other
5338 its replacement, at the start of a routine. */
5340 HOST_WIDE_INT
5341 ix86_initial_elimination_offset (int from, int to)
5343 struct ix86_frame frame;
5344 ix86_compute_frame_layout (&frame);
5346 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5347 return frame.hard_frame_pointer_offset;
5348 else if (from == FRAME_POINTER_REGNUM
5349 && to == HARD_FRAME_POINTER_REGNUM)
5350 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5351 else
5353 gcc_assert (to == STACK_POINTER_REGNUM);
5355 if (from == ARG_POINTER_REGNUM)
5356 return frame.stack_pointer_offset;
5358 gcc_assert (from == FRAME_POINTER_REGNUM);
5359 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5363 /* Fill structure ix86_frame about frame of currently computed function. */
5365 static void
5366 ix86_compute_frame_layout (struct ix86_frame *frame)
5368 HOST_WIDE_INT total_size;
5369 unsigned int stack_alignment_needed;
5370 HOST_WIDE_INT offset;
5371 unsigned int preferred_alignment;
5372 HOST_WIDE_INT size = get_frame_size ();
5374 frame->nregs = ix86_nsaved_regs ();
5375 total_size = size;
5377 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5378 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5380 /* During reload iteration the amount of registers saved can change.
5381 Recompute the value as needed. Do not recompute when amount of registers
5382 didn't change as reload does multiple calls to the function and does not
5383 expect the decision to change within single iteration. */
5384 if (!optimize_size
5385 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5387 int count = frame->nregs;
5389 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5390 /* The fast prologue uses move instead of push to save registers. This
5391 is significantly longer, but also executes faster as modern hardware
5392 can execute the moves in parallel, but can't do that for push/pop.
5394 Be careful about choosing what prologue to emit: When function takes
5395 many instructions to execute we may use slow version as well as in
5396 case function is known to be outside hot spot (this is known with
5397 feedback only). Weight the size of function by number of registers
5398 to save as it is cheap to use one or two push instructions but very
5399 slow to use many of them. */
5400 if (count)
5401 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5402 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5403 || (flag_branch_probabilities
5404 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5405 cfun->machine->use_fast_prologue_epilogue = false;
5406 else
5407 cfun->machine->use_fast_prologue_epilogue
5408 = !expensive_function_p (count);
5410 if (TARGET_PROLOGUE_USING_MOVE
5411 && cfun->machine->use_fast_prologue_epilogue)
5412 frame->save_regs_using_mov = true;
5413 else
5414 frame->save_regs_using_mov = false;
5417 /* Skip return address and saved base pointer. */
5418 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5420 frame->hard_frame_pointer_offset = offset;
5422 /* Do some sanity checking of stack_alignment_needed and
5423 preferred_alignment, since i386 port is the only using those features
5424 that may break easily. */
5426 gcc_assert (!size || stack_alignment_needed);
5427 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5428 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5429 gcc_assert (stack_alignment_needed
5430 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5432 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5433 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5435 /* Register save area */
5436 offset += frame->nregs * UNITS_PER_WORD;
5438 /* Va-arg area */
5439 if (ix86_save_varrargs_registers)
5441 offset += X86_64_VARARGS_SIZE;
5442 frame->va_arg_size = X86_64_VARARGS_SIZE;
5444 else
5445 frame->va_arg_size = 0;
5447 /* Align start of frame for local function. */
5448 frame->padding1 = ((offset + stack_alignment_needed - 1)
5449 & -stack_alignment_needed) - offset;
5451 offset += frame->padding1;
5453 /* Frame pointer points here. */
5454 frame->frame_pointer_offset = offset;
5456 offset += size;
5458 /* Add outgoing arguments area. Can be skipped if we eliminated
5459 all the function calls as dead code.
5460 Skipping is however impossible when function calls alloca. Alloca
5461 expander assumes that last current_function_outgoing_args_size
5462 of stack frame are unused. */
5463 if (ACCUMULATE_OUTGOING_ARGS
5464 && (!current_function_is_leaf || current_function_calls_alloca
5465 || ix86_current_function_calls_tls_descriptor))
5467 offset += current_function_outgoing_args_size;
5468 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5470 else
5471 frame->outgoing_arguments_size = 0;
5473 /* Align stack boundary. Only needed if we're calling another function
5474 or using alloca. */
5475 if (!current_function_is_leaf || current_function_calls_alloca
5476 || ix86_current_function_calls_tls_descriptor)
5477 frame->padding2 = ((offset + preferred_alignment - 1)
5478 & -preferred_alignment) - offset;
5479 else
5480 frame->padding2 = 0;
5482 offset += frame->padding2;
5484 /* We've reached end of stack frame. */
5485 frame->stack_pointer_offset = offset;
5487 /* Size prologue needs to allocate. */
5488 frame->to_allocate =
5489 (size + frame->padding1 + frame->padding2
5490 + frame->outgoing_arguments_size + frame->va_arg_size);
5492 if ((!frame->to_allocate && frame->nregs <= 1)
5493 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5494 frame->save_regs_using_mov = false;
5496 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5497 && current_function_is_leaf
5498 && !ix86_current_function_calls_tls_descriptor)
5500 frame->red_zone_size = frame->to_allocate;
5501 if (frame->save_regs_using_mov)
5502 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5503 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5504 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5506 else
5507 frame->red_zone_size = 0;
5508 frame->to_allocate -= frame->red_zone_size;
5509 frame->stack_pointer_offset -= frame->red_zone_size;
5510 #if 0
5511 fprintf (stderr, "nregs: %i\n", frame->nregs);
5512 fprintf (stderr, "size: %i\n", size);
5513 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5514 fprintf (stderr, "padding1: %i\n", frame->padding1);
5515 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5516 fprintf (stderr, "padding2: %i\n", frame->padding2);
5517 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5518 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5519 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5520 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5521 frame->hard_frame_pointer_offset);
5522 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5523 #endif
5526 /* Emit code to save registers in the prologue. */
5528 static void
5529 ix86_emit_save_regs (void)
5531 unsigned int regno;
5532 rtx insn;
5534 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5535 if (ix86_save_reg (regno, true))
5537 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5538 RTX_FRAME_RELATED_P (insn) = 1;
5542 /* Emit code to save registers using MOV insns. First register
5543 is restored from POINTER + OFFSET. */
5544 static void
5545 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5547 unsigned int regno;
5548 rtx insn;
5550 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5551 if (ix86_save_reg (regno, true))
5553 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5554 Pmode, offset),
5555 gen_rtx_REG (Pmode, regno));
5556 RTX_FRAME_RELATED_P (insn) = 1;
5557 offset += UNITS_PER_WORD;
5561 /* Expand prologue or epilogue stack adjustment.
5562 The pattern exist to put a dependency on all ebp-based memory accesses.
5563 STYLE should be negative if instructions should be marked as frame related,
5564 zero if %r11 register is live and cannot be freely used and positive
5565 otherwise. */
5567 static void
5568 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5570 rtx insn;
5572 if (! TARGET_64BIT)
5573 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5574 else if (x86_64_immediate_operand (offset, DImode))
5575 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5576 else
5578 rtx r11;
5579 /* r11 is used by indirect sibcall return as well, set before the
5580 epilogue and used after the epilogue. ATM indirect sibcall
5581 shouldn't be used together with huge frame sizes in one
5582 function because of the frame_size check in sibcall.c. */
5583 gcc_assert (style);
5584 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5585 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5586 if (style < 0)
5587 RTX_FRAME_RELATED_P (insn) = 1;
5588 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5589 offset));
5591 if (style < 0)
5592 RTX_FRAME_RELATED_P (insn) = 1;
5595 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5597 static rtx
5598 ix86_internal_arg_pointer (void)
5600 bool has_force_align_arg_pointer =
5601 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5602 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5603 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5604 && DECL_NAME (current_function_decl)
5605 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5606 && DECL_FILE_SCOPE_P (current_function_decl))
5607 || ix86_force_align_arg_pointer
5608 || has_force_align_arg_pointer)
5610 /* Nested functions can't realign the stack due to a register
5611 conflict. */
5612 if (DECL_CONTEXT (current_function_decl)
5613 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5615 if (ix86_force_align_arg_pointer)
5616 warning (0, "-mstackrealign ignored for nested functions");
5617 if (has_force_align_arg_pointer)
5618 error ("%s not supported for nested functions",
5619 ix86_force_align_arg_pointer_string);
5620 return virtual_incoming_args_rtx;
5622 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5623 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5625 else
5626 return virtual_incoming_args_rtx;
5629 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5630 This is called from dwarf2out.c to emit call frame instructions
5631 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5632 static void
5633 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5635 rtx unspec = SET_SRC (pattern);
5636 gcc_assert (GET_CODE (unspec) == UNSPEC);
5638 switch (index)
5640 case UNSPEC_REG_SAVE:
5641 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5642 SET_DEST (pattern));
5643 break;
5644 case UNSPEC_DEF_CFA:
5645 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5646 INTVAL (XVECEXP (unspec, 0, 0)));
5647 break;
5648 default:
5649 gcc_unreachable ();
5653 /* Expand the prologue into a bunch of separate insns. */
5655 void
5656 ix86_expand_prologue (void)
5658 rtx insn;
5659 bool pic_reg_used;
5660 struct ix86_frame frame;
5661 HOST_WIDE_INT allocate;
5663 ix86_compute_frame_layout (&frame);
5665 if (cfun->machine->force_align_arg_pointer)
5667 rtx x, y;
5669 /* Grab the argument pointer. */
5670 x = plus_constant (stack_pointer_rtx, 4);
5671 y = cfun->machine->force_align_arg_pointer;
5672 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5673 RTX_FRAME_RELATED_P (insn) = 1;
5675 /* The unwind info consists of two parts: install the fafp as the cfa,
5676 and record the fafp as the "save register" of the stack pointer.
5677 The later is there in order that the unwinder can see where it
5678 should restore the stack pointer across the and insn. */
5679 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5680 x = gen_rtx_SET (VOIDmode, y, x);
5681 RTX_FRAME_RELATED_P (x) = 1;
5682 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5683 UNSPEC_REG_SAVE);
5684 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5685 RTX_FRAME_RELATED_P (y) = 1;
5686 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5687 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5688 REG_NOTES (insn) = x;
5690 /* Align the stack. */
5691 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5692 GEN_INT (-16)));
5694 /* And here we cheat like madmen with the unwind info. We force the
5695 cfa register back to sp+4, which is exactly what it was at the
5696 start of the function. Re-pushing the return address results in
5697 the return at the same spot relative to the cfa, and thus is
5698 correct wrt the unwind info. */
5699 x = cfun->machine->force_align_arg_pointer;
5700 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5701 insn = emit_insn (gen_push (x));
5702 RTX_FRAME_RELATED_P (insn) = 1;
5704 x = GEN_INT (4);
5705 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5706 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5707 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5708 REG_NOTES (insn) = x;
5711 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5712 slower on all targets. Also sdb doesn't like it. */
5714 if (frame_pointer_needed)
5716 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5717 RTX_FRAME_RELATED_P (insn) = 1;
5719 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5720 RTX_FRAME_RELATED_P (insn) = 1;
5723 allocate = frame.to_allocate;
5725 if (!frame.save_regs_using_mov)
5726 ix86_emit_save_regs ();
5727 else
5728 allocate += frame.nregs * UNITS_PER_WORD;
5730 /* When using red zone we may start register saving before allocating
5731 the stack frame saving one cycle of the prologue. */
5732 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5733 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5734 : stack_pointer_rtx,
5735 -frame.nregs * UNITS_PER_WORD);
5737 if (allocate == 0)
5739 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5740 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5741 GEN_INT (-allocate), -1);
5742 else
5744 /* Only valid for Win32. */
5745 rtx eax = gen_rtx_REG (SImode, 0);
5746 bool eax_live = ix86_eax_live_at_start_p ();
5747 rtx t;
5749 gcc_assert (!TARGET_64BIT);
5751 if (eax_live)
5753 emit_insn (gen_push (eax));
5754 allocate -= 4;
5757 emit_move_insn (eax, GEN_INT (allocate));
5759 insn = emit_insn (gen_allocate_stack_worker (eax));
5760 RTX_FRAME_RELATED_P (insn) = 1;
5761 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5762 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5763 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5764 t, REG_NOTES (insn));
5766 if (eax_live)
5768 if (frame_pointer_needed)
5769 t = plus_constant (hard_frame_pointer_rtx,
5770 allocate
5771 - frame.to_allocate
5772 - frame.nregs * UNITS_PER_WORD);
5773 else
5774 t = plus_constant (stack_pointer_rtx, allocate);
5775 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5779 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5781 if (!frame_pointer_needed || !frame.to_allocate)
5782 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5783 else
5784 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5785 -frame.nregs * UNITS_PER_WORD);
5788 pic_reg_used = false;
5789 if (pic_offset_table_rtx
5790 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5791 || current_function_profile))
5793 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5795 if (alt_pic_reg_used != INVALID_REGNUM)
5796 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5798 pic_reg_used = true;
5801 if (pic_reg_used)
5803 if (TARGET_64BIT)
5804 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5805 else
5806 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5808 /* Even with accurate pre-reload life analysis, we can wind up
5809 deleting all references to the pic register after reload.
5810 Consider if cross-jumping unifies two sides of a branch
5811 controlled by a comparison vs the only read from a global.
5812 In which case, allow the set_got to be deleted, though we're
5813 too late to do anything about the ebx save in the prologue. */
5814 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5817 /* Prevent function calls from be scheduled before the call to mcount.
5818 In the pic_reg_used case, make sure that the got load isn't deleted. */
5819 if (current_function_profile)
5820 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5823 /* Emit code to restore saved registers using MOV insns. First register
5824 is restored from POINTER + OFFSET. */
5825 static void
5826 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5827 int maybe_eh_return)
5829 int regno;
5830 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5832 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5833 if (ix86_save_reg (regno, maybe_eh_return))
5835 /* Ensure that adjust_address won't be forced to produce pointer
5836 out of range allowed by x86-64 instruction set. */
5837 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5839 rtx r11;
5841 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5842 emit_move_insn (r11, GEN_INT (offset));
5843 emit_insn (gen_adddi3 (r11, r11, pointer));
5844 base_address = gen_rtx_MEM (Pmode, r11);
5845 offset = 0;
5847 emit_move_insn (gen_rtx_REG (Pmode, regno),
5848 adjust_address (base_address, Pmode, offset));
5849 offset += UNITS_PER_WORD;
5853 /* Restore function stack, frame, and registers. */
5855 void
5856 ix86_expand_epilogue (int style)
5858 int regno;
5859 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5860 struct ix86_frame frame;
5861 HOST_WIDE_INT offset;
5863 ix86_compute_frame_layout (&frame);
5865 /* Calculate start of saved registers relative to ebp. Special care
5866 must be taken for the normal return case of a function using
5867 eh_return: the eax and edx registers are marked as saved, but not
5868 restored along this path. */
5869 offset = frame.nregs;
5870 if (current_function_calls_eh_return && style != 2)
5871 offset -= 2;
5872 offset *= -UNITS_PER_WORD;
5874 /* If we're only restoring one register and sp is not valid then
5875 using a move instruction to restore the register since it's
5876 less work than reloading sp and popping the register.
5878 The default code result in stack adjustment using add/lea instruction,
5879 while this code results in LEAVE instruction (or discrete equivalent),
5880 so it is profitable in some other cases as well. Especially when there
5881 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5882 and there is exactly one register to pop. This heuristic may need some
5883 tuning in future. */
5884 if ((!sp_valid && frame.nregs <= 1)
5885 || (TARGET_EPILOGUE_USING_MOVE
5886 && cfun->machine->use_fast_prologue_epilogue
5887 && (frame.nregs > 1 || frame.to_allocate))
5888 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5889 || (frame_pointer_needed && TARGET_USE_LEAVE
5890 && cfun->machine->use_fast_prologue_epilogue
5891 && frame.nregs == 1)
5892 || current_function_calls_eh_return)
5894 /* Restore registers. We can use ebp or esp to address the memory
5895 locations. If both are available, default to ebp, since offsets
5896 are known to be small. Only exception is esp pointing directly to the
5897 end of block of saved registers, where we may simplify addressing
5898 mode. */
5900 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5901 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5902 frame.to_allocate, style == 2);
5903 else
5904 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5905 offset, style == 2);
5907 /* eh_return epilogues need %ecx added to the stack pointer. */
5908 if (style == 2)
5910 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5912 if (frame_pointer_needed)
5914 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5915 tmp = plus_constant (tmp, UNITS_PER_WORD);
5916 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5918 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5919 emit_move_insn (hard_frame_pointer_rtx, tmp);
5921 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5922 const0_rtx, style);
5924 else
5926 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5927 tmp = plus_constant (tmp, (frame.to_allocate
5928 + frame.nregs * UNITS_PER_WORD));
5929 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5932 else if (!frame_pointer_needed)
5933 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5934 GEN_INT (frame.to_allocate
5935 + frame.nregs * UNITS_PER_WORD),
5936 style);
5937 /* If not an i386, mov & pop is faster than "leave". */
5938 else if (TARGET_USE_LEAVE || optimize_size
5939 || !cfun->machine->use_fast_prologue_epilogue)
5940 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5941 else
5943 pro_epilogue_adjust_stack (stack_pointer_rtx,
5944 hard_frame_pointer_rtx,
5945 const0_rtx, style);
5946 if (TARGET_64BIT)
5947 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5948 else
5949 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5952 else
5954 /* First step is to deallocate the stack frame so that we can
5955 pop the registers. */
5956 if (!sp_valid)
5958 gcc_assert (frame_pointer_needed);
5959 pro_epilogue_adjust_stack (stack_pointer_rtx,
5960 hard_frame_pointer_rtx,
5961 GEN_INT (offset), style);
5963 else if (frame.to_allocate)
5964 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5965 GEN_INT (frame.to_allocate), style);
5967 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5968 if (ix86_save_reg (regno, false))
5970 if (TARGET_64BIT)
5971 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5972 else
5973 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5975 if (frame_pointer_needed)
5977 /* Leave results in shorter dependency chains on CPUs that are
5978 able to grok it fast. */
5979 if (TARGET_USE_LEAVE)
5980 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5981 else if (TARGET_64BIT)
5982 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5983 else
5984 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5988 if (cfun->machine->force_align_arg_pointer)
5990 emit_insn (gen_addsi3 (stack_pointer_rtx,
5991 cfun->machine->force_align_arg_pointer,
5992 GEN_INT (-4)));
5995 /* Sibcall epilogues don't want a return instruction. */
5996 if (style == 0)
5997 return;
5999 if (current_function_pops_args && current_function_args_size)
6001 rtx popc = GEN_INT (current_function_pops_args);
6003 /* i386 can only pop 64K bytes. If asked to pop more, pop
6004 return address, do explicit add, and jump indirectly to the
6005 caller. */
6007 if (current_function_pops_args >= 65536)
6009 rtx ecx = gen_rtx_REG (SImode, 2);
6011 /* There is no "pascal" calling convention in 64bit ABI. */
6012 gcc_assert (!TARGET_64BIT);
6014 emit_insn (gen_popsi1 (ecx));
6015 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6016 emit_jump_insn (gen_return_indirect_internal (ecx));
6018 else
6019 emit_jump_insn (gen_return_pop_internal (popc));
6021 else
6022 emit_jump_insn (gen_return_internal ());
6025 /* Reset from the function's potential modifications. */
6027 static void
6028 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6029 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6031 if (pic_offset_table_rtx)
6032 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6033 #if TARGET_MACHO
6034 /* Mach-O doesn't support labels at the end of objects, so if
6035 it looks like we might want one, insert a NOP. */
6037 rtx insn = get_last_insn ();
6038 while (insn
6039 && NOTE_P (insn)
6040 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6041 insn = PREV_INSN (insn);
6042 if (insn
6043 && (LABEL_P (insn)
6044 || (NOTE_P (insn)
6045 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6046 fputs ("\tnop\n", file);
6048 #endif
6052 /* Extract the parts of an RTL expression that is a valid memory address
6053 for an instruction. Return 0 if the structure of the address is
6054 grossly off. Return -1 if the address contains ASHIFT, so it is not
6055 strictly valid, but still used for computing length of lea instruction. */
6058 ix86_decompose_address (rtx addr, struct ix86_address *out)
6060 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6061 rtx base_reg, index_reg;
6062 HOST_WIDE_INT scale = 1;
6063 rtx scale_rtx = NULL_RTX;
6064 int retval = 1;
6065 enum ix86_address_seg seg = SEG_DEFAULT;
6067 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
6068 base = addr;
6069 else if (GET_CODE (addr) == PLUS)
6071 rtx addends[4], op;
6072 int n = 0, i;
6074 op = addr;
6077 if (n >= 4)
6078 return 0;
6079 addends[n++] = XEXP (op, 1);
6080 op = XEXP (op, 0);
6082 while (GET_CODE (op) == PLUS);
6083 if (n >= 4)
6084 return 0;
6085 addends[n] = op;
6087 for (i = n; i >= 0; --i)
6089 op = addends[i];
6090 switch (GET_CODE (op))
6092 case MULT:
6093 if (index)
6094 return 0;
6095 index = XEXP (op, 0);
6096 scale_rtx = XEXP (op, 1);
6097 break;
6099 case UNSPEC:
6100 if (XINT (op, 1) == UNSPEC_TP
6101 && TARGET_TLS_DIRECT_SEG_REFS
6102 && seg == SEG_DEFAULT)
6103 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6104 else
6105 return 0;
6106 break;
6108 case REG:
6109 case SUBREG:
6110 if (!base)
6111 base = op;
6112 else if (!index)
6113 index = op;
6114 else
6115 return 0;
6116 break;
6118 case CONST:
6119 case CONST_INT:
6120 case SYMBOL_REF:
6121 case LABEL_REF:
6122 if (disp)
6123 return 0;
6124 disp = op;
6125 break;
6127 default:
6128 return 0;
6132 else if (GET_CODE (addr) == MULT)
6134 index = XEXP (addr, 0); /* index*scale */
6135 scale_rtx = XEXP (addr, 1);
6137 else if (GET_CODE (addr) == ASHIFT)
6139 rtx tmp;
6141 /* We're called for lea too, which implements ashift on occasion. */
6142 index = XEXP (addr, 0);
6143 tmp = XEXP (addr, 1);
6144 if (GET_CODE (tmp) != CONST_INT)
6145 return 0;
6146 scale = INTVAL (tmp);
6147 if ((unsigned HOST_WIDE_INT) scale > 3)
6148 return 0;
6149 scale = 1 << scale;
6150 retval = -1;
6152 else
6153 disp = addr; /* displacement */
6155 /* Extract the integral value of scale. */
6156 if (scale_rtx)
6158 if (GET_CODE (scale_rtx) != CONST_INT)
6159 return 0;
6160 scale = INTVAL (scale_rtx);
6163 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6164 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6166 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6167 if (base_reg && index_reg && scale == 1
6168 && (index_reg == arg_pointer_rtx
6169 || index_reg == frame_pointer_rtx
6170 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6172 rtx tmp;
6173 tmp = base, base = index, index = tmp;
6174 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6177 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6178 if ((base_reg == hard_frame_pointer_rtx
6179 || base_reg == frame_pointer_rtx
6180 || base_reg == arg_pointer_rtx) && !disp)
6181 disp = const0_rtx;
6183 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6184 Avoid this by transforming to [%esi+0]. */
6185 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6186 && base_reg && !index_reg && !disp
6187 && REG_P (base_reg)
6188 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6189 disp = const0_rtx;
6191 /* Special case: encode reg+reg instead of reg*2. */
6192 if (!base && index && scale && scale == 2)
6193 base = index, base_reg = index_reg, scale = 1;
6195 /* Special case: scaling cannot be encoded without base or displacement. */
6196 if (!base && !disp && index && scale != 1)
6197 disp = const0_rtx;
6199 out->base = base;
6200 out->index = index;
6201 out->disp = disp;
6202 out->scale = scale;
6203 out->seg = seg;
6205 return retval;
6208 /* Return cost of the memory address x.
6209 For i386, it is better to use a complex address than let gcc copy
6210 the address into a reg and make a new pseudo. But not if the address
6211 requires to two regs - that would mean more pseudos with longer
6212 lifetimes. */
6213 static int
6214 ix86_address_cost (rtx x)
6216 struct ix86_address parts;
6217 int cost = 1;
6218 int ok = ix86_decompose_address (x, &parts);
6220 gcc_assert (ok);
6222 if (parts.base && GET_CODE (parts.base) == SUBREG)
6223 parts.base = SUBREG_REG (parts.base);
6224 if (parts.index && GET_CODE (parts.index) == SUBREG)
6225 parts.index = SUBREG_REG (parts.index);
6227 /* More complex memory references are better. */
6228 if (parts.disp && parts.disp != const0_rtx)
6229 cost--;
6230 if (parts.seg != SEG_DEFAULT)
6231 cost--;
6233 /* Attempt to minimize number of registers in the address. */
6234 if ((parts.base
6235 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6236 || (parts.index
6237 && (!REG_P (parts.index)
6238 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6239 cost++;
6241 if (parts.base
6242 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6243 && parts.index
6244 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6245 && parts.base != parts.index)
6246 cost++;
6248 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6249 since it's predecode logic can't detect the length of instructions
6250 and it degenerates to vector decoded. Increase cost of such
6251 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6252 to split such addresses or even refuse such addresses at all.
6254 Following addressing modes are affected:
6255 [base+scale*index]
6256 [scale*index+disp]
6257 [base+index]
6259 The first and last case may be avoidable by explicitly coding the zero in
6260 memory address, but I don't have AMD-K6 machine handy to check this
6261 theory. */
6263 if (TARGET_K6
6264 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6265 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6266 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6267 cost += 10;
6269 return cost;
6272 /* If X is a machine specific address (i.e. a symbol or label being
6273 referenced as a displacement from the GOT implemented using an
6274 UNSPEC), then return the base term. Otherwise return X. */
6277 ix86_find_base_term (rtx x)
6279 rtx term;
6281 if (TARGET_64BIT)
6283 if (GET_CODE (x) != CONST)
6284 return x;
6285 term = XEXP (x, 0);
6286 if (GET_CODE (term) == PLUS
6287 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6288 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6289 term = XEXP (term, 0);
6290 if (GET_CODE (term) != UNSPEC
6291 || XINT (term, 1) != UNSPEC_GOTPCREL)
6292 return x;
6294 term = XVECEXP (term, 0, 0);
6296 if (GET_CODE (term) != SYMBOL_REF
6297 && GET_CODE (term) != LABEL_REF)
6298 return x;
6300 return term;
6303 term = ix86_delegitimize_address (x);
6305 if (GET_CODE (term) != SYMBOL_REF
6306 && GET_CODE (term) != LABEL_REF)
6307 return x;
6309 return term;
6312 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6313 this is used for to form addresses to local data when -fPIC is in
6314 use. */
6316 static bool
6317 darwin_local_data_pic (rtx disp)
6319 if (GET_CODE (disp) == MINUS)
6321 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6322 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6323 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6325 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6326 if (! strcmp (sym_name, "<pic base>"))
6327 return true;
6331 return false;
6334 /* Determine if a given RTX is a valid constant. We already know this
6335 satisfies CONSTANT_P. */
6337 bool
6338 legitimate_constant_p (rtx x)
6340 switch (GET_CODE (x))
6342 case CONST:
6343 x = XEXP (x, 0);
6345 if (GET_CODE (x) == PLUS)
6347 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6348 return false;
6349 x = XEXP (x, 0);
6352 if (TARGET_MACHO && darwin_local_data_pic (x))
6353 return true;
6355 /* Only some unspecs are valid as "constants". */
6356 if (GET_CODE (x) == UNSPEC)
6357 switch (XINT (x, 1))
6359 case UNSPEC_GOTOFF:
6360 return TARGET_64BIT;
6361 case UNSPEC_TPOFF:
6362 case UNSPEC_NTPOFF:
6363 x = XVECEXP (x, 0, 0);
6364 return (GET_CODE (x) == SYMBOL_REF
6365 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6366 case UNSPEC_DTPOFF:
6367 x = XVECEXP (x, 0, 0);
6368 return (GET_CODE (x) == SYMBOL_REF
6369 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6370 default:
6371 return false;
6374 /* We must have drilled down to a symbol. */
6375 if (GET_CODE (x) == LABEL_REF)
6376 return true;
6377 if (GET_CODE (x) != SYMBOL_REF)
6378 return false;
6379 /* FALLTHRU */
6381 case SYMBOL_REF:
6382 /* TLS symbols are never valid. */
6383 if (SYMBOL_REF_TLS_MODEL (x))
6384 return false;
6385 break;
6387 case CONST_DOUBLE:
6388 if (GET_MODE (x) == TImode
6389 && x != CONST0_RTX (TImode)
6390 && !TARGET_64BIT)
6391 return false;
6392 break;
6394 case CONST_VECTOR:
6395 if (x == CONST0_RTX (GET_MODE (x)))
6396 return true;
6397 return false;
6399 default:
6400 break;
6403 /* Otherwise we handle everything else in the move patterns. */
6404 return true;
6407 /* Determine if it's legal to put X into the constant pool. This
6408 is not possible for the address of thread-local symbols, which
6409 is checked above. */
6411 static bool
6412 ix86_cannot_force_const_mem (rtx x)
6414 /* We can always put integral constants and vectors in memory. */
6415 switch (GET_CODE (x))
6417 case CONST_INT:
6418 case CONST_DOUBLE:
6419 case CONST_VECTOR:
6420 return false;
6422 default:
6423 break;
6425 return !legitimate_constant_p (x);
6428 /* Determine if a given RTX is a valid constant address. */
6430 bool
6431 constant_address_p (rtx x)
6433 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6436 /* Nonzero if the constant value X is a legitimate general operand
6437 when generating PIC code. It is given that flag_pic is on and
6438 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6440 bool
6441 legitimate_pic_operand_p (rtx x)
6443 rtx inner;
6445 switch (GET_CODE (x))
6447 case CONST:
6448 inner = XEXP (x, 0);
6449 if (GET_CODE (inner) == PLUS
6450 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6451 inner = XEXP (inner, 0);
6453 /* Only some unspecs are valid as "constants". */
6454 if (GET_CODE (inner) == UNSPEC)
6455 switch (XINT (inner, 1))
6457 case UNSPEC_GOTOFF:
6458 return TARGET_64BIT;
6459 case UNSPEC_TPOFF:
6460 x = XVECEXP (inner, 0, 0);
6461 return (GET_CODE (x) == SYMBOL_REF
6462 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6463 default:
6464 return false;
6466 /* FALLTHRU */
6468 case SYMBOL_REF:
6469 case LABEL_REF:
6470 return legitimate_pic_address_disp_p (x);
6472 default:
6473 return true;
6477 /* Determine if a given CONST RTX is a valid memory displacement
6478 in PIC mode. */
6481 legitimate_pic_address_disp_p (rtx disp)
6483 bool saw_plus;
6485 /* In 64bit mode we can allow direct addresses of symbols and labels
6486 when they are not dynamic symbols. */
6487 if (TARGET_64BIT)
6489 rtx op0 = disp, op1;
6491 switch (GET_CODE (disp))
6493 case LABEL_REF:
6494 return true;
6496 case CONST:
6497 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6498 break;
6499 op0 = XEXP (XEXP (disp, 0), 0);
6500 op1 = XEXP (XEXP (disp, 0), 1);
6501 if (GET_CODE (op1) != CONST_INT
6502 || INTVAL (op1) >= 16*1024*1024
6503 || INTVAL (op1) < -16*1024*1024)
6504 break;
6505 if (GET_CODE (op0) == LABEL_REF)
6506 return true;
6507 if (GET_CODE (op0) != SYMBOL_REF)
6508 break;
6509 /* FALLTHRU */
6511 case SYMBOL_REF:
6512 /* TLS references should always be enclosed in UNSPEC. */
6513 if (SYMBOL_REF_TLS_MODEL (op0))
6514 return false;
6515 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6516 return true;
6517 break;
6519 default:
6520 break;
6523 if (GET_CODE (disp) != CONST)
6524 return 0;
6525 disp = XEXP (disp, 0);
6527 if (TARGET_64BIT)
6529 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6530 of GOT tables. We should not need these anyway. */
6531 if (GET_CODE (disp) != UNSPEC
6532 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6533 && XINT (disp, 1) != UNSPEC_GOTOFF))
6534 return 0;
6536 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6537 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6538 return 0;
6539 return 1;
6542 saw_plus = false;
6543 if (GET_CODE (disp) == PLUS)
6545 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6546 return 0;
6547 disp = XEXP (disp, 0);
6548 saw_plus = true;
6551 if (TARGET_MACHO && darwin_local_data_pic (disp))
6552 return 1;
6554 if (GET_CODE (disp) != UNSPEC)
6555 return 0;
6557 switch (XINT (disp, 1))
6559 case UNSPEC_GOT:
6560 if (saw_plus)
6561 return false;
6562 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6563 case UNSPEC_GOTOFF:
6564 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6565 While ABI specify also 32bit relocation but we don't produce it in
6566 small PIC model at all. */
6567 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6568 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6569 && !TARGET_64BIT)
6570 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6571 return false;
6572 case UNSPEC_GOTTPOFF:
6573 case UNSPEC_GOTNTPOFF:
6574 case UNSPEC_INDNTPOFF:
6575 if (saw_plus)
6576 return false;
6577 disp = XVECEXP (disp, 0, 0);
6578 return (GET_CODE (disp) == SYMBOL_REF
6579 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6580 case UNSPEC_NTPOFF:
6581 disp = XVECEXP (disp, 0, 0);
6582 return (GET_CODE (disp) == SYMBOL_REF
6583 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6584 case UNSPEC_DTPOFF:
6585 disp = XVECEXP (disp, 0, 0);
6586 return (GET_CODE (disp) == SYMBOL_REF
6587 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6590 return 0;
6593 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6594 memory address for an instruction. The MODE argument is the machine mode
6595 for the MEM expression that wants to use this address.
6597 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6598 convert common non-canonical forms to canonical form so that they will
6599 be recognized. */
6602 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6604 struct ix86_address parts;
6605 rtx base, index, disp;
6606 HOST_WIDE_INT scale;
6607 const char *reason = NULL;
6608 rtx reason_rtx = NULL_RTX;
6610 if (TARGET_DEBUG_ADDR)
6612 fprintf (stderr,
6613 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6614 GET_MODE_NAME (mode), strict);
6615 debug_rtx (addr);
6618 if (ix86_decompose_address (addr, &parts) <= 0)
6620 reason = "decomposition failed";
6621 goto report_error;
6624 base = parts.base;
6625 index = parts.index;
6626 disp = parts.disp;
6627 scale = parts.scale;
6629 /* Validate base register.
6631 Don't allow SUBREG's that span more than a word here. It can lead to spill
6632 failures when the base is one word out of a two word structure, which is
6633 represented internally as a DImode int. */
6635 if (base)
6637 rtx reg;
6638 reason_rtx = base;
6640 if (REG_P (base))
6641 reg = base;
6642 else if (GET_CODE (base) == SUBREG
6643 && REG_P (SUBREG_REG (base))
6644 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6645 <= UNITS_PER_WORD)
6646 reg = SUBREG_REG (base);
6647 else
6649 reason = "base is not a register";
6650 goto report_error;
6653 if (GET_MODE (base) != Pmode)
6655 reason = "base is not in Pmode";
6656 goto report_error;
6659 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6660 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6662 reason = "base is not valid";
6663 goto report_error;
6667 /* Validate index register.
6669 Don't allow SUBREG's that span more than a word here -- same as above. */
6671 if (index)
6673 rtx reg;
6674 reason_rtx = index;
6676 if (REG_P (index))
6677 reg = index;
6678 else if (GET_CODE (index) == SUBREG
6679 && REG_P (SUBREG_REG (index))
6680 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6681 <= UNITS_PER_WORD)
6682 reg = SUBREG_REG (index);
6683 else
6685 reason = "index is not a register";
6686 goto report_error;
6689 if (GET_MODE (index) != Pmode)
6691 reason = "index is not in Pmode";
6692 goto report_error;
6695 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6696 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6698 reason = "index is not valid";
6699 goto report_error;
6703 /* Validate scale factor. */
6704 if (scale != 1)
6706 reason_rtx = GEN_INT (scale);
6707 if (!index)
6709 reason = "scale without index";
6710 goto report_error;
6713 if (scale != 2 && scale != 4 && scale != 8)
6715 reason = "scale is not a valid multiplier";
6716 goto report_error;
6720 /* Validate displacement. */
6721 if (disp)
6723 reason_rtx = disp;
6725 if (GET_CODE (disp) == CONST
6726 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6727 switch (XINT (XEXP (disp, 0), 1))
6729 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6730 used. While ABI specify also 32bit relocations, we don't produce
6731 them at all and use IP relative instead. */
6732 case UNSPEC_GOT:
6733 case UNSPEC_GOTOFF:
6734 gcc_assert (flag_pic);
6735 if (!TARGET_64BIT)
6736 goto is_legitimate_pic;
6737 reason = "64bit address unspec";
6738 goto report_error;
6740 case UNSPEC_GOTPCREL:
6741 gcc_assert (flag_pic);
6742 goto is_legitimate_pic;
6744 case UNSPEC_GOTTPOFF:
6745 case UNSPEC_GOTNTPOFF:
6746 case UNSPEC_INDNTPOFF:
6747 case UNSPEC_NTPOFF:
6748 case UNSPEC_DTPOFF:
6749 break;
6751 default:
6752 reason = "invalid address unspec";
6753 goto report_error;
6756 else if (SYMBOLIC_CONST (disp)
6757 && (flag_pic
6758 || (TARGET_MACHO
6759 #if TARGET_MACHO
6760 && MACHOPIC_INDIRECT
6761 && !machopic_operand_p (disp)
6762 #endif
6766 is_legitimate_pic:
6767 if (TARGET_64BIT && (index || base))
6769 /* foo@dtpoff(%rX) is ok. */
6770 if (GET_CODE (disp) != CONST
6771 || GET_CODE (XEXP (disp, 0)) != PLUS
6772 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6773 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6774 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6775 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6777 reason = "non-constant pic memory reference";
6778 goto report_error;
6781 else if (! legitimate_pic_address_disp_p (disp))
6783 reason = "displacement is an invalid pic construct";
6784 goto report_error;
6787 /* This code used to verify that a symbolic pic displacement
6788 includes the pic_offset_table_rtx register.
6790 While this is good idea, unfortunately these constructs may
6791 be created by "adds using lea" optimization for incorrect
6792 code like:
6794 int a;
6795 int foo(int i)
6797 return *(&a+i);
6800 This code is nonsensical, but results in addressing
6801 GOT table with pic_offset_table_rtx base. We can't
6802 just refuse it easily, since it gets matched by
6803 "addsi3" pattern, that later gets split to lea in the
6804 case output register differs from input. While this
6805 can be handled by separate addsi pattern for this case
6806 that never results in lea, this seems to be easier and
6807 correct fix for crash to disable this test. */
6809 else if (GET_CODE (disp) != LABEL_REF
6810 && GET_CODE (disp) != CONST_INT
6811 && (GET_CODE (disp) != CONST
6812 || !legitimate_constant_p (disp))
6813 && (GET_CODE (disp) != SYMBOL_REF
6814 || !legitimate_constant_p (disp)))
6816 reason = "displacement is not constant";
6817 goto report_error;
6819 else if (TARGET_64BIT
6820 && !x86_64_immediate_operand (disp, VOIDmode))
6822 reason = "displacement is out of range";
6823 goto report_error;
6827 /* Everything looks valid. */
6828 if (TARGET_DEBUG_ADDR)
6829 fprintf (stderr, "Success.\n");
6830 return TRUE;
6832 report_error:
6833 if (TARGET_DEBUG_ADDR)
6835 fprintf (stderr, "Error: %s\n", reason);
6836 debug_rtx (reason_rtx);
6838 return FALSE;
6841 /* Return a unique alias set for the GOT. */
6843 static HOST_WIDE_INT
6844 ix86_GOT_alias_set (void)
6846 static HOST_WIDE_INT set = -1;
6847 if (set == -1)
6848 set = new_alias_set ();
6849 return set;
6852 /* Return a legitimate reference for ORIG (an address) using the
6853 register REG. If REG is 0, a new pseudo is generated.
6855 There are two types of references that must be handled:
6857 1. Global data references must load the address from the GOT, via
6858 the PIC reg. An insn is emitted to do this load, and the reg is
6859 returned.
6861 2. Static data references, constant pool addresses, and code labels
6862 compute the address as an offset from the GOT, whose base is in
6863 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6864 differentiate them from global data objects. The returned
6865 address is the PIC reg + an unspec constant.
6867 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6868 reg also appears in the address. */
6870 static rtx
6871 legitimize_pic_address (rtx orig, rtx reg)
6873 rtx addr = orig;
6874 rtx new = orig;
6875 rtx base;
6877 #if TARGET_MACHO
6878 if (TARGET_MACHO && !TARGET_64BIT)
6880 if (reg == 0)
6881 reg = gen_reg_rtx (Pmode);
6882 /* Use the generic Mach-O PIC machinery. */
6883 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6885 #endif
6887 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6888 new = addr;
6889 else if (TARGET_64BIT
6890 && ix86_cmodel != CM_SMALL_PIC
6891 && local_symbolic_operand (addr, Pmode))
6893 rtx tmpreg;
6894 /* This symbol may be referenced via a displacement from the PIC
6895 base address (@GOTOFF). */
6897 if (reload_in_progress)
6898 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6899 if (GET_CODE (addr) == CONST)
6900 addr = XEXP (addr, 0);
6901 if (GET_CODE (addr) == PLUS)
6903 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6904 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6906 else
6907 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6908 new = gen_rtx_CONST (Pmode, new);
6909 if (!reg)
6910 tmpreg = gen_reg_rtx (Pmode);
6911 else
6912 tmpreg = reg;
6913 emit_move_insn (tmpreg, new);
6915 if (reg != 0)
6917 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6918 tmpreg, 1, OPTAB_DIRECT);
6919 new = reg;
6921 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6923 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6925 /* This symbol may be referenced via a displacement from the PIC
6926 base address (@GOTOFF). */
6928 if (reload_in_progress)
6929 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6930 if (GET_CODE (addr) == CONST)
6931 addr = XEXP (addr, 0);
6932 if (GET_CODE (addr) == PLUS)
6934 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6935 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6937 else
6938 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6939 new = gen_rtx_CONST (Pmode, new);
6940 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6942 if (reg != 0)
6944 emit_move_insn (reg, new);
6945 new = reg;
6948 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6950 if (TARGET_64BIT)
6952 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6953 new = gen_rtx_CONST (Pmode, new);
6954 new = gen_const_mem (Pmode, new);
6955 set_mem_alias_set (new, ix86_GOT_alias_set ());
6957 if (reg == 0)
6958 reg = gen_reg_rtx (Pmode);
6959 /* Use directly gen_movsi, otherwise the address is loaded
6960 into register for CSE. We don't want to CSE this addresses,
6961 instead we CSE addresses from the GOT table, so skip this. */
6962 emit_insn (gen_movsi (reg, new));
6963 new = reg;
6965 else
6967 /* This symbol must be referenced via a load from the
6968 Global Offset Table (@GOT). */
6970 if (reload_in_progress)
6971 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6972 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6973 new = gen_rtx_CONST (Pmode, new);
6974 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6975 new = gen_const_mem (Pmode, new);
6976 set_mem_alias_set (new, ix86_GOT_alias_set ());
6978 if (reg == 0)
6979 reg = gen_reg_rtx (Pmode);
6980 emit_move_insn (reg, new);
6981 new = reg;
6984 else
6986 if (GET_CODE (addr) == CONST_INT
6987 && !x86_64_immediate_operand (addr, VOIDmode))
6989 if (reg)
6991 emit_move_insn (reg, addr);
6992 new = reg;
6994 else
6995 new = force_reg (Pmode, addr);
6997 else if (GET_CODE (addr) == CONST)
6999 addr = XEXP (addr, 0);
7001 /* We must match stuff we generate before. Assume the only
7002 unspecs that can get here are ours. Not that we could do
7003 anything with them anyway.... */
7004 if (GET_CODE (addr) == UNSPEC
7005 || (GET_CODE (addr) == PLUS
7006 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7007 return orig;
7008 gcc_assert (GET_CODE (addr) == PLUS);
7010 if (GET_CODE (addr) == PLUS)
7012 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7014 /* Check first to see if this is a constant offset from a @GOTOFF
7015 symbol reference. */
7016 if (local_symbolic_operand (op0, Pmode)
7017 && GET_CODE (op1) == CONST_INT)
7019 if (!TARGET_64BIT)
7021 if (reload_in_progress)
7022 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7023 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7024 UNSPEC_GOTOFF);
7025 new = gen_rtx_PLUS (Pmode, new, op1);
7026 new = gen_rtx_CONST (Pmode, new);
7027 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7029 if (reg != 0)
7031 emit_move_insn (reg, new);
7032 new = reg;
7035 else
7037 if (INTVAL (op1) < -16*1024*1024
7038 || INTVAL (op1) >= 16*1024*1024)
7040 if (!x86_64_immediate_operand (op1, Pmode))
7041 op1 = force_reg (Pmode, op1);
7042 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7046 else
7048 base = legitimize_pic_address (XEXP (addr, 0), reg);
7049 new = legitimize_pic_address (XEXP (addr, 1),
7050 base == reg ? NULL_RTX : reg);
7052 if (GET_CODE (new) == CONST_INT)
7053 new = plus_constant (base, INTVAL (new));
7054 else
7056 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7058 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7059 new = XEXP (new, 1);
7061 new = gen_rtx_PLUS (Pmode, base, new);
7066 return new;
7069 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7071 static rtx
7072 get_thread_pointer (int to_reg)
7074 rtx tp, reg, insn;
7076 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7077 if (!to_reg)
7078 return tp;
7080 reg = gen_reg_rtx (Pmode);
7081 insn = gen_rtx_SET (VOIDmode, reg, tp);
7082 insn = emit_insn (insn);
7084 return reg;
7087 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7088 false if we expect this to be used for a memory address and true if
7089 we expect to load the address into a register. */
7091 static rtx
7092 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7094 rtx dest, base, off, pic, tp;
7095 int type;
7097 switch (model)
7099 case TLS_MODEL_GLOBAL_DYNAMIC:
7100 dest = gen_reg_rtx (Pmode);
7101 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7103 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7105 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7107 start_sequence ();
7108 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7109 insns = get_insns ();
7110 end_sequence ();
7112 emit_libcall_block (insns, dest, rax, x);
7114 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7115 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7116 else
7117 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7119 if (TARGET_GNU2_TLS)
7121 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7123 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7125 break;
7127 case TLS_MODEL_LOCAL_DYNAMIC:
7128 base = gen_reg_rtx (Pmode);
7129 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7131 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7133 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7135 start_sequence ();
7136 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7137 insns = get_insns ();
7138 end_sequence ();
7140 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7141 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7142 emit_libcall_block (insns, base, rax, note);
7144 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7145 emit_insn (gen_tls_local_dynamic_base_64 (base));
7146 else
7147 emit_insn (gen_tls_local_dynamic_base_32 (base));
7149 if (TARGET_GNU2_TLS)
7151 rtx x = ix86_tls_module_base ();
7153 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7154 gen_rtx_MINUS (Pmode, x, tp));
7157 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7158 off = gen_rtx_CONST (Pmode, off);
7160 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7162 if (TARGET_GNU2_TLS)
7164 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7166 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7169 break;
7171 case TLS_MODEL_INITIAL_EXEC:
7172 if (TARGET_64BIT)
7174 pic = NULL;
7175 type = UNSPEC_GOTNTPOFF;
7177 else if (flag_pic)
7179 if (reload_in_progress)
7180 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7181 pic = pic_offset_table_rtx;
7182 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7184 else if (!TARGET_ANY_GNU_TLS)
7186 pic = gen_reg_rtx (Pmode);
7187 emit_insn (gen_set_got (pic));
7188 type = UNSPEC_GOTTPOFF;
7190 else
7192 pic = NULL;
7193 type = UNSPEC_INDNTPOFF;
7196 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7197 off = gen_rtx_CONST (Pmode, off);
7198 if (pic)
7199 off = gen_rtx_PLUS (Pmode, pic, off);
7200 off = gen_const_mem (Pmode, off);
7201 set_mem_alias_set (off, ix86_GOT_alias_set ());
7203 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7205 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7206 off = force_reg (Pmode, off);
7207 return gen_rtx_PLUS (Pmode, base, off);
7209 else
7211 base = get_thread_pointer (true);
7212 dest = gen_reg_rtx (Pmode);
7213 emit_insn (gen_subsi3 (dest, base, off));
7215 break;
7217 case TLS_MODEL_LOCAL_EXEC:
7218 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7219 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7220 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7221 off = gen_rtx_CONST (Pmode, off);
7223 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7225 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7226 return gen_rtx_PLUS (Pmode, base, off);
7228 else
7230 base = get_thread_pointer (true);
7231 dest = gen_reg_rtx (Pmode);
7232 emit_insn (gen_subsi3 (dest, base, off));
7234 break;
7236 default:
7237 gcc_unreachable ();
7240 return dest;
7243 /* Try machine-dependent ways of modifying an illegitimate address
7244 to be legitimate. If we find one, return the new, valid address.
7245 This macro is used in only one place: `memory_address' in explow.c.
7247 OLDX is the address as it was before break_out_memory_refs was called.
7248 In some cases it is useful to look at this to decide what needs to be done.
7250 MODE and WIN are passed so that this macro can use
7251 GO_IF_LEGITIMATE_ADDRESS.
7253 It is always safe for this macro to do nothing. It exists to recognize
7254 opportunities to optimize the output.
7256 For the 80386, we handle X+REG by loading X into a register R and
7257 using R+REG. R will go in a general reg and indexing will be used.
7258 However, if REG is a broken-out memory address or multiplication,
7259 nothing needs to be done because REG can certainly go in a general reg.
7261 When -fpic is used, special handling is needed for symbolic references.
7262 See comments by legitimize_pic_address in i386.c for details. */
7265 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7267 int changed = 0;
7268 unsigned log;
7270 if (TARGET_DEBUG_ADDR)
7272 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7273 GET_MODE_NAME (mode));
7274 debug_rtx (x);
7277 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7278 if (log)
7279 return legitimize_tls_address (x, log, false);
7280 if (GET_CODE (x) == CONST
7281 && GET_CODE (XEXP (x, 0)) == PLUS
7282 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7283 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7285 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7286 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7289 if (flag_pic && SYMBOLIC_CONST (x))
7290 return legitimize_pic_address (x, 0);
7292 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7293 if (GET_CODE (x) == ASHIFT
7294 && GET_CODE (XEXP (x, 1)) == CONST_INT
7295 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7297 changed = 1;
7298 log = INTVAL (XEXP (x, 1));
7299 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7300 GEN_INT (1 << log));
7303 if (GET_CODE (x) == PLUS)
7305 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7307 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7308 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7309 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7311 changed = 1;
7312 log = INTVAL (XEXP (XEXP (x, 0), 1));
7313 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7314 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7315 GEN_INT (1 << log));
7318 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7319 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7320 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7322 changed = 1;
7323 log = INTVAL (XEXP (XEXP (x, 1), 1));
7324 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7325 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7326 GEN_INT (1 << log));
7329 /* Put multiply first if it isn't already. */
7330 if (GET_CODE (XEXP (x, 1)) == MULT)
7332 rtx tmp = XEXP (x, 0);
7333 XEXP (x, 0) = XEXP (x, 1);
7334 XEXP (x, 1) = tmp;
7335 changed = 1;
7338 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7339 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7340 created by virtual register instantiation, register elimination, and
7341 similar optimizations. */
7342 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7344 changed = 1;
7345 x = gen_rtx_PLUS (Pmode,
7346 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7347 XEXP (XEXP (x, 1), 0)),
7348 XEXP (XEXP (x, 1), 1));
7351 /* Canonicalize
7352 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7353 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7354 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7355 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7356 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7357 && CONSTANT_P (XEXP (x, 1)))
7359 rtx constant;
7360 rtx other = NULL_RTX;
7362 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7364 constant = XEXP (x, 1);
7365 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7367 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7369 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7370 other = XEXP (x, 1);
7372 else
7373 constant = 0;
7375 if (constant)
7377 changed = 1;
7378 x = gen_rtx_PLUS (Pmode,
7379 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7380 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7381 plus_constant (other, INTVAL (constant)));
7385 if (changed && legitimate_address_p (mode, x, FALSE))
7386 return x;
7388 if (GET_CODE (XEXP (x, 0)) == MULT)
7390 changed = 1;
7391 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7394 if (GET_CODE (XEXP (x, 1)) == MULT)
7396 changed = 1;
7397 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7400 if (changed
7401 && GET_CODE (XEXP (x, 1)) == REG
7402 && GET_CODE (XEXP (x, 0)) == REG)
7403 return x;
7405 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7407 changed = 1;
7408 x = legitimize_pic_address (x, 0);
7411 if (changed && legitimate_address_p (mode, x, FALSE))
7412 return x;
7414 if (GET_CODE (XEXP (x, 0)) == REG)
7416 rtx temp = gen_reg_rtx (Pmode);
7417 rtx val = force_operand (XEXP (x, 1), temp);
7418 if (val != temp)
7419 emit_move_insn (temp, val);
7421 XEXP (x, 1) = temp;
7422 return x;
7425 else if (GET_CODE (XEXP (x, 1)) == REG)
7427 rtx temp = gen_reg_rtx (Pmode);
7428 rtx val = force_operand (XEXP (x, 0), temp);
7429 if (val != temp)
7430 emit_move_insn (temp, val);
7432 XEXP (x, 0) = temp;
7433 return x;
7437 return x;
7440 /* Print an integer constant expression in assembler syntax. Addition
7441 and subtraction are the only arithmetic that may appear in these
7442 expressions. FILE is the stdio stream to write to, X is the rtx, and
7443 CODE is the operand print code from the output string. */
7445 static void
7446 output_pic_addr_const (FILE *file, rtx x, int code)
7448 char buf[256];
7450 switch (GET_CODE (x))
7452 case PC:
7453 gcc_assert (flag_pic);
7454 putc ('.', file);
7455 break;
7457 case SYMBOL_REF:
7458 output_addr_const (file, x);
7459 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7460 fputs ("@PLT", file);
7461 break;
7463 case LABEL_REF:
7464 x = XEXP (x, 0);
7465 /* FALLTHRU */
7466 case CODE_LABEL:
7467 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7468 assemble_name (asm_out_file, buf);
7469 break;
7471 case CONST_INT:
7472 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7473 break;
7475 case CONST:
7476 /* This used to output parentheses around the expression,
7477 but that does not work on the 386 (either ATT or BSD assembler). */
7478 output_pic_addr_const (file, XEXP (x, 0), code);
7479 break;
7481 case CONST_DOUBLE:
7482 if (GET_MODE (x) == VOIDmode)
7484 /* We can use %d if the number is <32 bits and positive. */
7485 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7486 fprintf (file, "0x%lx%08lx",
7487 (unsigned long) CONST_DOUBLE_HIGH (x),
7488 (unsigned long) CONST_DOUBLE_LOW (x));
7489 else
7490 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7492 else
7493 /* We can't handle floating point constants;
7494 PRINT_OPERAND must handle them. */
7495 output_operand_lossage ("floating constant misused");
7496 break;
7498 case PLUS:
7499 /* Some assemblers need integer constants to appear first. */
7500 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7502 output_pic_addr_const (file, XEXP (x, 0), code);
7503 putc ('+', file);
7504 output_pic_addr_const (file, XEXP (x, 1), code);
7506 else
7508 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7509 output_pic_addr_const (file, XEXP (x, 1), code);
7510 putc ('+', file);
7511 output_pic_addr_const (file, XEXP (x, 0), code);
7513 break;
7515 case MINUS:
7516 if (!TARGET_MACHO)
7517 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7518 output_pic_addr_const (file, XEXP (x, 0), code);
7519 putc ('-', file);
7520 output_pic_addr_const (file, XEXP (x, 1), code);
7521 if (!TARGET_MACHO)
7522 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7523 break;
7525 case UNSPEC:
7526 gcc_assert (XVECLEN (x, 0) == 1);
7527 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7528 switch (XINT (x, 1))
7530 case UNSPEC_GOT:
7531 fputs ("@GOT", file);
7532 break;
7533 case UNSPEC_GOTOFF:
7534 fputs ("@GOTOFF", file);
7535 break;
7536 case UNSPEC_GOTPCREL:
7537 fputs ("@GOTPCREL(%rip)", file);
7538 break;
7539 case UNSPEC_GOTTPOFF:
7540 /* FIXME: This might be @TPOFF in Sun ld too. */
7541 fputs ("@GOTTPOFF", file);
7542 break;
7543 case UNSPEC_TPOFF:
7544 fputs ("@TPOFF", file);
7545 break;
7546 case UNSPEC_NTPOFF:
7547 if (TARGET_64BIT)
7548 fputs ("@TPOFF", file);
7549 else
7550 fputs ("@NTPOFF", file);
7551 break;
7552 case UNSPEC_DTPOFF:
7553 fputs ("@DTPOFF", file);
7554 break;
7555 case UNSPEC_GOTNTPOFF:
7556 if (TARGET_64BIT)
7557 fputs ("@GOTTPOFF(%rip)", file);
7558 else
7559 fputs ("@GOTNTPOFF", file);
7560 break;
7561 case UNSPEC_INDNTPOFF:
7562 fputs ("@INDNTPOFF", file);
7563 break;
7564 default:
7565 output_operand_lossage ("invalid UNSPEC as operand");
7566 break;
7568 break;
7570 default:
7571 output_operand_lossage ("invalid expression as operand");
7575 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7576 We need to emit DTP-relative relocations. */
7578 static void
7579 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7581 fputs (ASM_LONG, file);
7582 output_addr_const (file, x);
7583 fputs ("@DTPOFF", file);
7584 switch (size)
7586 case 4:
7587 break;
7588 case 8:
7589 fputs (", 0", file);
7590 break;
7591 default:
7592 gcc_unreachable ();
7596 /* In the name of slightly smaller debug output, and to cater to
7597 general assembler lossage, recognize PIC+GOTOFF and turn it back
7598 into a direct symbol reference.
7600 On Darwin, this is necessary to avoid a crash, because Darwin
7601 has a different PIC label for each routine but the DWARF debugging
7602 information is not associated with any particular routine, so it's
7603 necessary to remove references to the PIC label from RTL stored by
7604 the DWARF output code. */
7606 static rtx
7607 ix86_delegitimize_address (rtx orig_x)
7609 rtx x = orig_x;
7610 /* reg_addend is NULL or a multiple of some register. */
7611 rtx reg_addend = NULL_RTX;
7612 /* const_addend is NULL or a const_int. */
7613 rtx const_addend = NULL_RTX;
7614 /* This is the result, or NULL. */
7615 rtx result = NULL_RTX;
7617 if (GET_CODE (x) == MEM)
7618 x = XEXP (x, 0);
7620 if (TARGET_64BIT)
7622 if (GET_CODE (x) != CONST
7623 || GET_CODE (XEXP (x, 0)) != UNSPEC
7624 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7625 || GET_CODE (orig_x) != MEM)
7626 return orig_x;
7627 return XVECEXP (XEXP (x, 0), 0, 0);
7630 if (GET_CODE (x) != PLUS
7631 || GET_CODE (XEXP (x, 1)) != CONST)
7632 return orig_x;
7634 if (GET_CODE (XEXP (x, 0)) == REG
7635 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7636 /* %ebx + GOT/GOTOFF */
7638 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7640 /* %ebx + %reg * scale + GOT/GOTOFF */
7641 reg_addend = XEXP (x, 0);
7642 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7643 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7644 reg_addend = XEXP (reg_addend, 1);
7645 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7646 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7647 reg_addend = XEXP (reg_addend, 0);
7648 else
7649 return orig_x;
7650 if (GET_CODE (reg_addend) != REG
7651 && GET_CODE (reg_addend) != MULT
7652 && GET_CODE (reg_addend) != ASHIFT)
7653 return orig_x;
7655 else
7656 return orig_x;
7658 x = XEXP (XEXP (x, 1), 0);
7659 if (GET_CODE (x) == PLUS
7660 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7662 const_addend = XEXP (x, 1);
7663 x = XEXP (x, 0);
7666 if (GET_CODE (x) == UNSPEC
7667 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7668 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7669 result = XVECEXP (x, 0, 0);
7671 if (TARGET_MACHO && darwin_local_data_pic (x)
7672 && GET_CODE (orig_x) != MEM)
7673 result = XEXP (x, 0);
7675 if (! result)
7676 return orig_x;
7678 if (const_addend)
7679 result = gen_rtx_PLUS (Pmode, result, const_addend);
7680 if (reg_addend)
7681 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7682 return result;
7685 static void
7686 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7687 int fp, FILE *file)
7689 const char *suffix;
7691 if (mode == CCFPmode || mode == CCFPUmode)
7693 enum rtx_code second_code, bypass_code;
7694 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7695 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7696 code = ix86_fp_compare_code_to_integer (code);
7697 mode = CCmode;
7699 if (reverse)
7700 code = reverse_condition (code);
7702 switch (code)
7704 case EQ:
7705 suffix = "e";
7706 break;
7707 case NE:
7708 suffix = "ne";
7709 break;
7710 case GT:
7711 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7712 suffix = "g";
7713 break;
7714 case GTU:
7715 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7716 Those same assemblers have the same but opposite lossage on cmov. */
7717 gcc_assert (mode == CCmode);
7718 suffix = fp ? "nbe" : "a";
7719 break;
7720 case LT:
7721 switch (mode)
7723 case CCNOmode:
7724 case CCGOCmode:
7725 suffix = "s";
7726 break;
7728 case CCmode:
7729 case CCGCmode:
7730 suffix = "l";
7731 break;
7733 default:
7734 gcc_unreachable ();
7736 break;
7737 case LTU:
7738 gcc_assert (mode == CCmode);
7739 suffix = "b";
7740 break;
7741 case GE:
7742 switch (mode)
7744 case CCNOmode:
7745 case CCGOCmode:
7746 suffix = "ns";
7747 break;
7749 case CCmode:
7750 case CCGCmode:
7751 suffix = "ge";
7752 break;
7754 default:
7755 gcc_unreachable ();
7757 break;
7758 case GEU:
7759 /* ??? As above. */
7760 gcc_assert (mode == CCmode);
7761 suffix = fp ? "nb" : "ae";
7762 break;
7763 case LE:
7764 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7765 suffix = "le";
7766 break;
7767 case LEU:
7768 gcc_assert (mode == CCmode);
7769 suffix = "be";
7770 break;
7771 case UNORDERED:
7772 suffix = fp ? "u" : "p";
7773 break;
7774 case ORDERED:
7775 suffix = fp ? "nu" : "np";
7776 break;
7777 default:
7778 gcc_unreachable ();
7780 fputs (suffix, file);
7783 /* Print the name of register X to FILE based on its machine mode and number.
7784 If CODE is 'w', pretend the mode is HImode.
7785 If CODE is 'b', pretend the mode is QImode.
7786 If CODE is 'k', pretend the mode is SImode.
7787 If CODE is 'q', pretend the mode is DImode.
7788 If CODE is 'h', pretend the reg is the 'high' byte register.
7789 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7791 void
7792 print_reg (rtx x, int code, FILE *file)
7794 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7795 && REGNO (x) != FRAME_POINTER_REGNUM
7796 && REGNO (x) != FLAGS_REG
7797 && REGNO (x) != FPSR_REG
7798 && REGNO (x) != FPCR_REG);
7800 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7801 putc ('%', file);
7803 if (code == 'w' || MMX_REG_P (x))
7804 code = 2;
7805 else if (code == 'b')
7806 code = 1;
7807 else if (code == 'k')
7808 code = 4;
7809 else if (code == 'q')
7810 code = 8;
7811 else if (code == 'y')
7812 code = 3;
7813 else if (code == 'h')
7814 code = 0;
7815 else
7816 code = GET_MODE_SIZE (GET_MODE (x));
7818 /* Irritatingly, AMD extended registers use different naming convention
7819 from the normal registers. */
7820 if (REX_INT_REG_P (x))
7822 gcc_assert (TARGET_64BIT);
7823 switch (code)
7825 case 0:
7826 error ("extended registers have no high halves");
7827 break;
7828 case 1:
7829 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7830 break;
7831 case 2:
7832 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7833 break;
7834 case 4:
7835 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7836 break;
7837 case 8:
7838 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7839 break;
7840 default:
7841 error ("unsupported operand size for extended register");
7842 break;
7844 return;
7846 switch (code)
7848 case 3:
7849 if (STACK_TOP_P (x))
7851 fputs ("st(0)", file);
7852 break;
7854 /* FALLTHRU */
7855 case 8:
7856 case 4:
7857 case 12:
7858 if (! ANY_FP_REG_P (x))
7859 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7860 /* FALLTHRU */
7861 case 16:
7862 case 2:
7863 normal:
7864 fputs (hi_reg_name[REGNO (x)], file);
7865 break;
7866 case 1:
7867 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7868 goto normal;
7869 fputs (qi_reg_name[REGNO (x)], file);
7870 break;
7871 case 0:
7872 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7873 goto normal;
7874 fputs (qi_high_reg_name[REGNO (x)], file);
7875 break;
7876 default:
7877 gcc_unreachable ();
7881 /* Locate some local-dynamic symbol still in use by this function
7882 so that we can print its name in some tls_local_dynamic_base
7883 pattern. */
7885 static const char *
7886 get_some_local_dynamic_name (void)
7888 rtx insn;
7890 if (cfun->machine->some_ld_name)
7891 return cfun->machine->some_ld_name;
7893 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7894 if (INSN_P (insn)
7895 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7896 return cfun->machine->some_ld_name;
7898 gcc_unreachable ();
7901 static int
7902 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7904 rtx x = *px;
7906 if (GET_CODE (x) == SYMBOL_REF
7907 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7909 cfun->machine->some_ld_name = XSTR (x, 0);
7910 return 1;
7913 return 0;
7916 /* Meaning of CODE:
7917 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7918 C -- print opcode suffix for set/cmov insn.
7919 c -- like C, but print reversed condition
7920 F,f -- likewise, but for floating-point.
7921 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7922 otherwise nothing
7923 R -- print the prefix for register names.
7924 z -- print the opcode suffix for the size of the current operand.
7925 * -- print a star (in certain assembler syntax)
7926 A -- print an absolute memory reference.
7927 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7928 s -- print a shift double count, followed by the assemblers argument
7929 delimiter.
7930 b -- print the QImode name of the register for the indicated operand.
7931 %b0 would print %al if operands[0] is reg 0.
7932 w -- likewise, print the HImode name of the register.
7933 k -- likewise, print the SImode name of the register.
7934 q -- likewise, print the DImode name of the register.
7935 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7936 y -- print "st(0)" instead of "st" as a register.
7937 D -- print condition for SSE cmp instruction.
7938 P -- if PIC, print an @PLT suffix.
7939 X -- don't print any sort of PIC '@' suffix for a symbol.
7940 & -- print some in-use local-dynamic symbol name.
7941 H -- print a memory address offset by 8; used for sse high-parts
7944 void
7945 print_operand (FILE *file, rtx x, int code)
7947 if (code)
7949 switch (code)
7951 case '*':
7952 if (ASSEMBLER_DIALECT == ASM_ATT)
7953 putc ('*', file);
7954 return;
7956 case '&':
7957 assemble_name (file, get_some_local_dynamic_name ());
7958 return;
7960 case 'A':
7961 switch (ASSEMBLER_DIALECT)
7963 case ASM_ATT:
7964 putc ('*', file);
7965 break;
7967 case ASM_INTEL:
7968 /* Intel syntax. For absolute addresses, registers should not
7969 be surrounded by braces. */
7970 if (GET_CODE (x) != REG)
7972 putc ('[', file);
7973 PRINT_OPERAND (file, x, 0);
7974 putc (']', file);
7975 return;
7977 break;
7979 default:
7980 gcc_unreachable ();
7983 PRINT_OPERAND (file, x, 0);
7984 return;
7987 case 'L':
7988 if (ASSEMBLER_DIALECT == ASM_ATT)
7989 putc ('l', file);
7990 return;
7992 case 'W':
7993 if (ASSEMBLER_DIALECT == ASM_ATT)
7994 putc ('w', file);
7995 return;
7997 case 'B':
7998 if (ASSEMBLER_DIALECT == ASM_ATT)
7999 putc ('b', file);
8000 return;
8002 case 'Q':
8003 if (ASSEMBLER_DIALECT == ASM_ATT)
8004 putc ('l', file);
8005 return;
8007 case 'S':
8008 if (ASSEMBLER_DIALECT == ASM_ATT)
8009 putc ('s', file);
8010 return;
8012 case 'T':
8013 if (ASSEMBLER_DIALECT == ASM_ATT)
8014 putc ('t', file);
8015 return;
8017 case 'z':
8018 /* 387 opcodes don't get size suffixes if the operands are
8019 registers. */
8020 if (STACK_REG_P (x))
8021 return;
8023 /* Likewise if using Intel opcodes. */
8024 if (ASSEMBLER_DIALECT == ASM_INTEL)
8025 return;
8027 /* This is the size of op from size of operand. */
8028 switch (GET_MODE_SIZE (GET_MODE (x)))
8030 case 2:
8031 #ifdef HAVE_GAS_FILDS_FISTS
8032 putc ('s', file);
8033 #endif
8034 return;
8036 case 4:
8037 if (GET_MODE (x) == SFmode)
8039 putc ('s', file);
8040 return;
8042 else
8043 putc ('l', file);
8044 return;
8046 case 12:
8047 case 16:
8048 putc ('t', file);
8049 return;
8051 case 8:
8052 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8054 #ifdef GAS_MNEMONICS
8055 putc ('q', file);
8056 #else
8057 putc ('l', file);
8058 putc ('l', file);
8059 #endif
8061 else
8062 putc ('l', file);
8063 return;
8065 default:
8066 gcc_unreachable ();
8069 case 'b':
8070 case 'w':
8071 case 'k':
8072 case 'q':
8073 case 'h':
8074 case 'y':
8075 case 'X':
8076 case 'P':
8077 break;
8079 case 's':
8080 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
8082 PRINT_OPERAND (file, x, 0);
8083 putc (',', file);
8085 return;
8087 case 'D':
8088 /* Little bit of braindamage here. The SSE compare instructions
8089 does use completely different names for the comparisons that the
8090 fp conditional moves. */
8091 switch (GET_CODE (x))
8093 case EQ:
8094 case UNEQ:
8095 fputs ("eq", file);
8096 break;
8097 case LT:
8098 case UNLT:
8099 fputs ("lt", file);
8100 break;
8101 case LE:
8102 case UNLE:
8103 fputs ("le", file);
8104 break;
8105 case UNORDERED:
8106 fputs ("unord", file);
8107 break;
8108 case NE:
8109 case LTGT:
8110 fputs ("neq", file);
8111 break;
8112 case UNGE:
8113 case GE:
8114 fputs ("nlt", file);
8115 break;
8116 case UNGT:
8117 case GT:
8118 fputs ("nle", file);
8119 break;
8120 case ORDERED:
8121 fputs ("ord", file);
8122 break;
8123 default:
8124 gcc_unreachable ();
8126 return;
8127 case 'O':
8128 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8129 if (ASSEMBLER_DIALECT == ASM_ATT)
8131 switch (GET_MODE (x))
8133 case HImode: putc ('w', file); break;
8134 case SImode:
8135 case SFmode: putc ('l', file); break;
8136 case DImode:
8137 case DFmode: putc ('q', file); break;
8138 default: gcc_unreachable ();
8140 putc ('.', file);
8142 #endif
8143 return;
8144 case 'C':
8145 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8146 return;
8147 case 'F':
8148 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8149 if (ASSEMBLER_DIALECT == ASM_ATT)
8150 putc ('.', file);
8151 #endif
8152 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8153 return;
8155 /* Like above, but reverse condition */
8156 case 'c':
8157 /* Check to see if argument to %c is really a constant
8158 and not a condition code which needs to be reversed. */
8159 if (!COMPARISON_P (x))
8161 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8162 return;
8164 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8165 return;
8166 case 'f':
8167 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8168 if (ASSEMBLER_DIALECT == ASM_ATT)
8169 putc ('.', file);
8170 #endif
8171 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8172 return;
8174 case 'H':
8175 /* It doesn't actually matter what mode we use here, as we're
8176 only going to use this for printing. */
8177 x = adjust_address_nv (x, DImode, 8);
8178 break;
8180 case '+':
8182 rtx x;
8184 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8185 return;
8187 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8188 if (x)
8190 int pred_val = INTVAL (XEXP (x, 0));
8192 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8193 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8195 int taken = pred_val > REG_BR_PROB_BASE / 2;
8196 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8198 /* Emit hints only in the case default branch prediction
8199 heuristics would fail. */
8200 if (taken != cputaken)
8202 /* We use 3e (DS) prefix for taken branches and
8203 2e (CS) prefix for not taken branches. */
8204 if (taken)
8205 fputs ("ds ; ", file);
8206 else
8207 fputs ("cs ; ", file);
8211 return;
8213 default:
8214 output_operand_lossage ("invalid operand code '%c'", code);
8218 if (GET_CODE (x) == REG)
8219 print_reg (x, code, file);
8221 else if (GET_CODE (x) == MEM)
8223 /* No `byte ptr' prefix for call instructions. */
8224 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8226 const char * size;
8227 switch (GET_MODE_SIZE (GET_MODE (x)))
8229 case 1: size = "BYTE"; break;
8230 case 2: size = "WORD"; break;
8231 case 4: size = "DWORD"; break;
8232 case 8: size = "QWORD"; break;
8233 case 12: size = "XWORD"; break;
8234 case 16: size = "XMMWORD"; break;
8235 default:
8236 gcc_unreachable ();
8239 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8240 if (code == 'b')
8241 size = "BYTE";
8242 else if (code == 'w')
8243 size = "WORD";
8244 else if (code == 'k')
8245 size = "DWORD";
8247 fputs (size, file);
8248 fputs (" PTR ", file);
8251 x = XEXP (x, 0);
8252 /* Avoid (%rip) for call operands. */
8253 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8254 && GET_CODE (x) != CONST_INT)
8255 output_addr_const (file, x);
8256 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8257 output_operand_lossage ("invalid constraints for operand");
8258 else
8259 output_address (x);
8262 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8264 REAL_VALUE_TYPE r;
8265 long l;
8267 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8268 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8270 if (ASSEMBLER_DIALECT == ASM_ATT)
8271 putc ('$', file);
8272 fprintf (file, "0x%08lx", l);
8275 /* These float cases don't actually occur as immediate operands. */
8276 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8278 char dstr[30];
8280 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8281 fprintf (file, "%s", dstr);
8284 else if (GET_CODE (x) == CONST_DOUBLE
8285 && GET_MODE (x) == XFmode)
8287 char dstr[30];
8289 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8290 fprintf (file, "%s", dstr);
8293 else
8295 /* We have patterns that allow zero sets of memory, for instance.
8296 In 64-bit mode, we should probably support all 8-byte vectors,
8297 since we can in fact encode that into an immediate. */
8298 if (GET_CODE (x) == CONST_VECTOR)
8300 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8301 x = const0_rtx;
8304 if (code != 'P')
8306 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8308 if (ASSEMBLER_DIALECT == ASM_ATT)
8309 putc ('$', file);
8311 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8312 || GET_CODE (x) == LABEL_REF)
8314 if (ASSEMBLER_DIALECT == ASM_ATT)
8315 putc ('$', file);
8316 else
8317 fputs ("OFFSET FLAT:", file);
8320 if (GET_CODE (x) == CONST_INT)
8321 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8322 else if (flag_pic)
8323 output_pic_addr_const (file, x, code);
8324 else
8325 output_addr_const (file, x);
8329 /* Print a memory operand whose address is ADDR. */
8331 void
8332 print_operand_address (FILE *file, rtx addr)
8334 struct ix86_address parts;
8335 rtx base, index, disp;
8336 int scale;
8337 int ok = ix86_decompose_address (addr, &parts);
8339 gcc_assert (ok);
8341 base = parts.base;
8342 index = parts.index;
8343 disp = parts.disp;
8344 scale = parts.scale;
8346 switch (parts.seg)
8348 case SEG_DEFAULT:
8349 break;
8350 case SEG_FS:
8351 case SEG_GS:
8352 if (USER_LABEL_PREFIX[0] == 0)
8353 putc ('%', file);
8354 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8355 break;
8356 default:
8357 gcc_unreachable ();
8360 if (!base && !index)
8362 /* Displacement only requires special attention. */
8364 if (GET_CODE (disp) == CONST_INT)
8366 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8368 if (USER_LABEL_PREFIX[0] == 0)
8369 putc ('%', file);
8370 fputs ("ds:", file);
8372 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8374 else if (flag_pic)
8375 output_pic_addr_const (file, disp, 0);
8376 else
8377 output_addr_const (file, disp);
8379 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8380 if (TARGET_64BIT)
8382 if (GET_CODE (disp) == CONST
8383 && GET_CODE (XEXP (disp, 0)) == PLUS
8384 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8385 disp = XEXP (XEXP (disp, 0), 0);
8386 if (GET_CODE (disp) == LABEL_REF
8387 || (GET_CODE (disp) == SYMBOL_REF
8388 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8389 fputs ("(%rip)", file);
8392 else
8394 if (ASSEMBLER_DIALECT == ASM_ATT)
8396 if (disp)
8398 if (flag_pic)
8399 output_pic_addr_const (file, disp, 0);
8400 else if (GET_CODE (disp) == LABEL_REF)
8401 output_asm_label (disp);
8402 else
8403 output_addr_const (file, disp);
8406 putc ('(', file);
8407 if (base)
8408 print_reg (base, 0, file);
8409 if (index)
8411 putc (',', file);
8412 print_reg (index, 0, file);
8413 if (scale != 1)
8414 fprintf (file, ",%d", scale);
8416 putc (')', file);
8418 else
8420 rtx offset = NULL_RTX;
8422 if (disp)
8424 /* Pull out the offset of a symbol; print any symbol itself. */
8425 if (GET_CODE (disp) == CONST
8426 && GET_CODE (XEXP (disp, 0)) == PLUS
8427 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8429 offset = XEXP (XEXP (disp, 0), 1);
8430 disp = gen_rtx_CONST (VOIDmode,
8431 XEXP (XEXP (disp, 0), 0));
8434 if (flag_pic)
8435 output_pic_addr_const (file, disp, 0);
8436 else if (GET_CODE (disp) == LABEL_REF)
8437 output_asm_label (disp);
8438 else if (GET_CODE (disp) == CONST_INT)
8439 offset = disp;
8440 else
8441 output_addr_const (file, disp);
8444 putc ('[', file);
8445 if (base)
8447 print_reg (base, 0, file);
8448 if (offset)
8450 if (INTVAL (offset) >= 0)
8451 putc ('+', file);
8452 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8455 else if (offset)
8456 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8457 else
8458 putc ('0', file);
8460 if (index)
8462 putc ('+', file);
8463 print_reg (index, 0, file);
8464 if (scale != 1)
8465 fprintf (file, "*%d", scale);
8467 putc (']', file);
8472 bool
8473 output_addr_const_extra (FILE *file, rtx x)
8475 rtx op;
8477 if (GET_CODE (x) != UNSPEC)
8478 return false;
8480 op = XVECEXP (x, 0, 0);
8481 switch (XINT (x, 1))
8483 case UNSPEC_GOTTPOFF:
8484 output_addr_const (file, op);
8485 /* FIXME: This might be @TPOFF in Sun ld. */
8486 fputs ("@GOTTPOFF", file);
8487 break;
8488 case UNSPEC_TPOFF:
8489 output_addr_const (file, op);
8490 fputs ("@TPOFF", file);
8491 break;
8492 case UNSPEC_NTPOFF:
8493 output_addr_const (file, op);
8494 if (TARGET_64BIT)
8495 fputs ("@TPOFF", file);
8496 else
8497 fputs ("@NTPOFF", file);
8498 break;
8499 case UNSPEC_DTPOFF:
8500 output_addr_const (file, op);
8501 fputs ("@DTPOFF", file);
8502 break;
8503 case UNSPEC_GOTNTPOFF:
8504 output_addr_const (file, op);
8505 if (TARGET_64BIT)
8506 fputs ("@GOTTPOFF(%rip)", file);
8507 else
8508 fputs ("@GOTNTPOFF", file);
8509 break;
8510 case UNSPEC_INDNTPOFF:
8511 output_addr_const (file, op);
8512 fputs ("@INDNTPOFF", file);
8513 break;
8515 default:
8516 return false;
8519 return true;
8522 /* Split one or more DImode RTL references into pairs of SImode
8523 references. The RTL can be REG, offsettable MEM, integer constant, or
8524 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8525 split and "num" is its length. lo_half and hi_half are output arrays
8526 that parallel "operands". */
8528 void
8529 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8531 while (num--)
8533 rtx op = operands[num];
8535 /* simplify_subreg refuse to split volatile memory addresses,
8536 but we still have to handle it. */
8537 if (GET_CODE (op) == MEM)
8539 lo_half[num] = adjust_address (op, SImode, 0);
8540 hi_half[num] = adjust_address (op, SImode, 4);
8542 else
8544 lo_half[num] = simplify_gen_subreg (SImode, op,
8545 GET_MODE (op) == VOIDmode
8546 ? DImode : GET_MODE (op), 0);
8547 hi_half[num] = simplify_gen_subreg (SImode, op,
8548 GET_MODE (op) == VOIDmode
8549 ? DImode : GET_MODE (op), 4);
8553 /* Split one or more TImode RTL references into pairs of DImode
8554 references. The RTL can be REG, offsettable MEM, integer constant, or
8555 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8556 split and "num" is its length. lo_half and hi_half are output arrays
8557 that parallel "operands". */
8559 void
8560 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8562 while (num--)
8564 rtx op = operands[num];
8566 /* simplify_subreg refuse to split volatile memory addresses, but we
8567 still have to handle it. */
8568 if (GET_CODE (op) == MEM)
8570 lo_half[num] = adjust_address (op, DImode, 0);
8571 hi_half[num] = adjust_address (op, DImode, 8);
8573 else
8575 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8576 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8581 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8582 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8583 is the expression of the binary operation. The output may either be
8584 emitted here, or returned to the caller, like all output_* functions.
8586 There is no guarantee that the operands are the same mode, as they
8587 might be within FLOAT or FLOAT_EXTEND expressions. */
8589 #ifndef SYSV386_COMPAT
8590 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8591 wants to fix the assemblers because that causes incompatibility
8592 with gcc. No-one wants to fix gcc because that causes
8593 incompatibility with assemblers... You can use the option of
8594 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8595 #define SYSV386_COMPAT 1
8596 #endif
8598 const char *
8599 output_387_binary_op (rtx insn, rtx *operands)
8601 static char buf[30];
8602 const char *p;
8603 const char *ssep;
8604 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8606 #ifdef ENABLE_CHECKING
8607 /* Even if we do not want to check the inputs, this documents input
8608 constraints. Which helps in understanding the following code. */
8609 if (STACK_REG_P (operands[0])
8610 && ((REG_P (operands[1])
8611 && REGNO (operands[0]) == REGNO (operands[1])
8612 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8613 || (REG_P (operands[2])
8614 && REGNO (operands[0]) == REGNO (operands[2])
8615 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8616 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8617 ; /* ok */
8618 else
8619 gcc_assert (is_sse);
8620 #endif
8622 switch (GET_CODE (operands[3]))
8624 case PLUS:
8625 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8626 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8627 p = "fiadd";
8628 else
8629 p = "fadd";
8630 ssep = "add";
8631 break;
8633 case MINUS:
8634 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8635 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8636 p = "fisub";
8637 else
8638 p = "fsub";
8639 ssep = "sub";
8640 break;
8642 case MULT:
8643 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8644 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8645 p = "fimul";
8646 else
8647 p = "fmul";
8648 ssep = "mul";
8649 break;
8651 case DIV:
8652 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8653 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8654 p = "fidiv";
8655 else
8656 p = "fdiv";
8657 ssep = "div";
8658 break;
8660 default:
8661 gcc_unreachable ();
8664 if (is_sse)
8666 strcpy (buf, ssep);
8667 if (GET_MODE (operands[0]) == SFmode)
8668 strcat (buf, "ss\t{%2, %0|%0, %2}");
8669 else
8670 strcat (buf, "sd\t{%2, %0|%0, %2}");
8671 return buf;
8673 strcpy (buf, p);
8675 switch (GET_CODE (operands[3]))
8677 case MULT:
8678 case PLUS:
8679 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8681 rtx temp = operands[2];
8682 operands[2] = operands[1];
8683 operands[1] = temp;
8686 /* know operands[0] == operands[1]. */
8688 if (GET_CODE (operands[2]) == MEM)
8690 p = "%z2\t%2";
8691 break;
8694 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8696 if (STACK_TOP_P (operands[0]))
8697 /* How is it that we are storing to a dead operand[2]?
8698 Well, presumably operands[1] is dead too. We can't
8699 store the result to st(0) as st(0) gets popped on this
8700 instruction. Instead store to operands[2] (which I
8701 think has to be st(1)). st(1) will be popped later.
8702 gcc <= 2.8.1 didn't have this check and generated
8703 assembly code that the Unixware assembler rejected. */
8704 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8705 else
8706 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8707 break;
8710 if (STACK_TOP_P (operands[0]))
8711 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8712 else
8713 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8714 break;
8716 case MINUS:
8717 case DIV:
8718 if (GET_CODE (operands[1]) == MEM)
8720 p = "r%z1\t%1";
8721 break;
8724 if (GET_CODE (operands[2]) == MEM)
8726 p = "%z2\t%2";
8727 break;
8730 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8732 #if SYSV386_COMPAT
8733 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8734 derived assemblers, confusingly reverse the direction of
8735 the operation for fsub{r} and fdiv{r} when the
8736 destination register is not st(0). The Intel assembler
8737 doesn't have this brain damage. Read !SYSV386_COMPAT to
8738 figure out what the hardware really does. */
8739 if (STACK_TOP_P (operands[0]))
8740 p = "{p\t%0, %2|rp\t%2, %0}";
8741 else
8742 p = "{rp\t%2, %0|p\t%0, %2}";
8743 #else
8744 if (STACK_TOP_P (operands[0]))
8745 /* As above for fmul/fadd, we can't store to st(0). */
8746 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8747 else
8748 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8749 #endif
8750 break;
8753 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8755 #if SYSV386_COMPAT
8756 if (STACK_TOP_P (operands[0]))
8757 p = "{rp\t%0, %1|p\t%1, %0}";
8758 else
8759 p = "{p\t%1, %0|rp\t%0, %1}";
8760 #else
8761 if (STACK_TOP_P (operands[0]))
8762 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8763 else
8764 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8765 #endif
8766 break;
8769 if (STACK_TOP_P (operands[0]))
8771 if (STACK_TOP_P (operands[1]))
8772 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8773 else
8774 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8775 break;
8777 else if (STACK_TOP_P (operands[1]))
8779 #if SYSV386_COMPAT
8780 p = "{\t%1, %0|r\t%0, %1}";
8781 #else
8782 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8783 #endif
8785 else
8787 #if SYSV386_COMPAT
8788 p = "{r\t%2, %0|\t%0, %2}";
8789 #else
8790 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8791 #endif
8793 break;
8795 default:
8796 gcc_unreachable ();
8799 strcat (buf, p);
8800 return buf;
8803 /* Return needed mode for entity in optimize_mode_switching pass. */
8806 ix86_mode_needed (int entity, rtx insn)
8808 enum attr_i387_cw mode;
8810 /* The mode UNINITIALIZED is used to store control word after a
8811 function call or ASM pattern. The mode ANY specify that function
8812 has no requirements on the control word and make no changes in the
8813 bits we are interested in. */
8815 if (CALL_P (insn)
8816 || (NONJUMP_INSN_P (insn)
8817 && (asm_noperands (PATTERN (insn)) >= 0
8818 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8819 return I387_CW_UNINITIALIZED;
8821 if (recog_memoized (insn) < 0)
8822 return I387_CW_ANY;
8824 mode = get_attr_i387_cw (insn);
8826 switch (entity)
8828 case I387_TRUNC:
8829 if (mode == I387_CW_TRUNC)
8830 return mode;
8831 break;
8833 case I387_FLOOR:
8834 if (mode == I387_CW_FLOOR)
8835 return mode;
8836 break;
8838 case I387_CEIL:
8839 if (mode == I387_CW_CEIL)
8840 return mode;
8841 break;
8843 case I387_MASK_PM:
8844 if (mode == I387_CW_MASK_PM)
8845 return mode;
8846 break;
8848 default:
8849 gcc_unreachable ();
8852 return I387_CW_ANY;
8855 /* Output code to initialize control word copies used by trunc?f?i and
8856 rounding patterns. CURRENT_MODE is set to current control word,
8857 while NEW_MODE is set to new control word. */
8859 void
8860 emit_i387_cw_initialization (int mode)
8862 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8863 rtx new_mode;
8865 int slot;
8867 rtx reg = gen_reg_rtx (HImode);
8869 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8870 emit_move_insn (reg, copy_rtx (stored_mode));
8872 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8874 switch (mode)
8876 case I387_CW_TRUNC:
8877 /* round toward zero (truncate) */
8878 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8879 slot = SLOT_CW_TRUNC;
8880 break;
8882 case I387_CW_FLOOR:
8883 /* round down toward -oo */
8884 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8885 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8886 slot = SLOT_CW_FLOOR;
8887 break;
8889 case I387_CW_CEIL:
8890 /* round up toward +oo */
8891 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8892 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8893 slot = SLOT_CW_CEIL;
8894 break;
8896 case I387_CW_MASK_PM:
8897 /* mask precision exception for nearbyint() */
8898 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8899 slot = SLOT_CW_MASK_PM;
8900 break;
8902 default:
8903 gcc_unreachable ();
8906 else
8908 switch (mode)
8910 case I387_CW_TRUNC:
8911 /* round toward zero (truncate) */
8912 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8913 slot = SLOT_CW_TRUNC;
8914 break;
8916 case I387_CW_FLOOR:
8917 /* round down toward -oo */
8918 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8919 slot = SLOT_CW_FLOOR;
8920 break;
8922 case I387_CW_CEIL:
8923 /* round up toward +oo */
8924 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8925 slot = SLOT_CW_CEIL;
8926 break;
8928 case I387_CW_MASK_PM:
8929 /* mask precision exception for nearbyint() */
8930 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8931 slot = SLOT_CW_MASK_PM;
8932 break;
8934 default:
8935 gcc_unreachable ();
8939 gcc_assert (slot < MAX_386_STACK_LOCALS);
8941 new_mode = assign_386_stack_local (HImode, slot);
8942 emit_move_insn (new_mode, reg);
8945 /* Output code for INSN to convert a float to a signed int. OPERANDS
8946 are the insn operands. The output may be [HSD]Imode and the input
8947 operand may be [SDX]Fmode. */
8949 const char *
8950 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8952 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8953 int dimode_p = GET_MODE (operands[0]) == DImode;
8954 int round_mode = get_attr_i387_cw (insn);
8956 /* Jump through a hoop or two for DImode, since the hardware has no
8957 non-popping instruction. We used to do this a different way, but
8958 that was somewhat fragile and broke with post-reload splitters. */
8959 if ((dimode_p || fisttp) && !stack_top_dies)
8960 output_asm_insn ("fld\t%y1", operands);
8962 gcc_assert (STACK_TOP_P (operands[1]));
8963 gcc_assert (GET_CODE (operands[0]) == MEM);
8965 if (fisttp)
8966 output_asm_insn ("fisttp%z0\t%0", operands);
8967 else
8969 if (round_mode != I387_CW_ANY)
8970 output_asm_insn ("fldcw\t%3", operands);
8971 if (stack_top_dies || dimode_p)
8972 output_asm_insn ("fistp%z0\t%0", operands);
8973 else
8974 output_asm_insn ("fist%z0\t%0", operands);
8975 if (round_mode != I387_CW_ANY)
8976 output_asm_insn ("fldcw\t%2", operands);
8979 return "";
8982 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8983 have the values zero or one, indicates the ffreep insn's operand
8984 from the OPERANDS array. */
8986 static const char *
8987 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8989 if (TARGET_USE_FFREEP)
8990 #if HAVE_AS_IX86_FFREEP
8991 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8992 #else
8994 static char retval[] = ".word\t0xc_df";
8995 int regno = REGNO (operands[opno]);
8997 gcc_assert (FP_REGNO_P (regno));
8999 retval[9] = '0' + (regno - FIRST_STACK_REG);
9000 return retval;
9002 #endif
9004 return opno ? "fstp\t%y1" : "fstp\t%y0";
9008 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9009 should be used. UNORDERED_P is true when fucom should be used. */
9011 const char *
9012 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9014 int stack_top_dies;
9015 rtx cmp_op0, cmp_op1;
9016 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9018 if (eflags_p)
9020 cmp_op0 = operands[0];
9021 cmp_op1 = operands[1];
9023 else
9025 cmp_op0 = operands[1];
9026 cmp_op1 = operands[2];
9029 if (is_sse)
9031 if (GET_MODE (operands[0]) == SFmode)
9032 if (unordered_p)
9033 return "ucomiss\t{%1, %0|%0, %1}";
9034 else
9035 return "comiss\t{%1, %0|%0, %1}";
9036 else
9037 if (unordered_p)
9038 return "ucomisd\t{%1, %0|%0, %1}";
9039 else
9040 return "comisd\t{%1, %0|%0, %1}";
9043 gcc_assert (STACK_TOP_P (cmp_op0));
9045 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9047 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9049 if (stack_top_dies)
9051 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9052 return output_387_ffreep (operands, 1);
9054 else
9055 return "ftst\n\tfnstsw\t%0";
9058 if (STACK_REG_P (cmp_op1)
9059 && stack_top_dies
9060 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9061 && REGNO (cmp_op1) != FIRST_STACK_REG)
9063 /* If both the top of the 387 stack dies, and the other operand
9064 is also a stack register that dies, then this must be a
9065 `fcompp' float compare */
9067 if (eflags_p)
9069 /* There is no double popping fcomi variant. Fortunately,
9070 eflags is immune from the fstp's cc clobbering. */
9071 if (unordered_p)
9072 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9073 else
9074 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9075 return output_387_ffreep (operands, 0);
9077 else
9079 if (unordered_p)
9080 return "fucompp\n\tfnstsw\t%0";
9081 else
9082 return "fcompp\n\tfnstsw\t%0";
9085 else
9087 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9089 static const char * const alt[16] =
9091 "fcom%z2\t%y2\n\tfnstsw\t%0",
9092 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9093 "fucom%z2\t%y2\n\tfnstsw\t%0",
9094 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9096 "ficom%z2\t%y2\n\tfnstsw\t%0",
9097 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9098 NULL,
9099 NULL,
9101 "fcomi\t{%y1, %0|%0, %y1}",
9102 "fcomip\t{%y1, %0|%0, %y1}",
9103 "fucomi\t{%y1, %0|%0, %y1}",
9104 "fucomip\t{%y1, %0|%0, %y1}",
9106 NULL,
9107 NULL,
9108 NULL,
9109 NULL
9112 int mask;
9113 const char *ret;
9115 mask = eflags_p << 3;
9116 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9117 mask |= unordered_p << 1;
9118 mask |= stack_top_dies;
9120 gcc_assert (mask < 16);
9121 ret = alt[mask];
9122 gcc_assert (ret);
9124 return ret;
9128 void
9129 ix86_output_addr_vec_elt (FILE *file, int value)
9131 const char *directive = ASM_LONG;
9133 #ifdef ASM_QUAD
9134 if (TARGET_64BIT)
9135 directive = ASM_QUAD;
9136 #else
9137 gcc_assert (!TARGET_64BIT);
9138 #endif
9140 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9143 void
9144 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9146 if (TARGET_64BIT)
9147 fprintf (file, "%s%s%d-%s%d\n",
9148 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9149 else if (HAVE_AS_GOTOFF_IN_DATA)
9150 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9151 #if TARGET_MACHO
9152 else if (TARGET_MACHO)
9154 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9155 machopic_output_function_base_name (file);
9156 fprintf(file, "\n");
9158 #endif
9159 else
9160 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9161 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9164 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9165 for the target. */
9167 void
9168 ix86_expand_clear (rtx dest)
9170 rtx tmp;
9172 /* We play register width games, which are only valid after reload. */
9173 gcc_assert (reload_completed);
9175 /* Avoid HImode and its attendant prefix byte. */
9176 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9177 dest = gen_rtx_REG (SImode, REGNO (dest));
9179 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9181 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9182 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9184 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9185 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9188 emit_insn (tmp);
9191 /* X is an unchanging MEM. If it is a constant pool reference, return
9192 the constant pool rtx, else NULL. */
9195 maybe_get_pool_constant (rtx x)
9197 x = ix86_delegitimize_address (XEXP (x, 0));
9199 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9200 return get_pool_constant (x);
9202 return NULL_RTX;
9205 void
9206 ix86_expand_move (enum machine_mode mode, rtx operands[])
9208 int strict = (reload_in_progress || reload_completed);
9209 rtx op0, op1;
9210 enum tls_model model;
9212 op0 = operands[0];
9213 op1 = operands[1];
9215 if (GET_CODE (op1) == SYMBOL_REF)
9217 model = SYMBOL_REF_TLS_MODEL (op1);
9218 if (model)
9220 op1 = legitimize_tls_address (op1, model, true);
9221 op1 = force_operand (op1, op0);
9222 if (op1 == op0)
9223 return;
9226 else if (GET_CODE (op1) == CONST
9227 && GET_CODE (XEXP (op1, 0)) == PLUS
9228 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9230 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9231 if (model)
9233 rtx addend = XEXP (XEXP (op1, 0), 1);
9234 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9235 op1 = force_operand (op1, NULL);
9236 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9237 op0, 1, OPTAB_DIRECT);
9238 if (op1 == op0)
9239 return;
9243 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9245 if (TARGET_MACHO && !TARGET_64BIT)
9247 #if TARGET_MACHO
9248 if (MACHOPIC_PURE)
9250 rtx temp = ((reload_in_progress
9251 || ((op0 && GET_CODE (op0) == REG)
9252 && mode == Pmode))
9253 ? op0 : gen_reg_rtx (Pmode));
9254 op1 = machopic_indirect_data_reference (op1, temp);
9255 op1 = machopic_legitimize_pic_address (op1, mode,
9256 temp == op1 ? 0 : temp);
9258 else if (MACHOPIC_INDIRECT)
9259 op1 = machopic_indirect_data_reference (op1, 0);
9260 if (op0 == op1)
9261 return;
9262 #endif
9264 else
9266 if (GET_CODE (op0) == MEM)
9267 op1 = force_reg (Pmode, op1);
9268 else
9269 op1 = legitimize_address (op1, op1, Pmode);
9272 else
9274 if (GET_CODE (op0) == MEM
9275 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9276 || !push_operand (op0, mode))
9277 && GET_CODE (op1) == MEM)
9278 op1 = force_reg (mode, op1);
9280 if (push_operand (op0, mode)
9281 && ! general_no_elim_operand (op1, mode))
9282 op1 = copy_to_mode_reg (mode, op1);
9284 /* Force large constants in 64bit compilation into register
9285 to get them CSEed. */
9286 if (TARGET_64BIT && mode == DImode
9287 && immediate_operand (op1, mode)
9288 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9289 && !register_operand (op0, mode)
9290 && optimize && !reload_completed && !reload_in_progress)
9291 op1 = copy_to_mode_reg (mode, op1);
9293 if (FLOAT_MODE_P (mode))
9295 /* If we are loading a floating point constant to a register,
9296 force the value to memory now, since we'll get better code
9297 out the back end. */
9299 if (strict)
9301 else if (GET_CODE (op1) == CONST_DOUBLE)
9303 op1 = validize_mem (force_const_mem (mode, op1));
9304 if (!register_operand (op0, mode))
9306 rtx temp = gen_reg_rtx (mode);
9307 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9308 emit_move_insn (op0, temp);
9309 return;
9315 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9318 void
9319 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9321 rtx op0 = operands[0], op1 = operands[1];
9323 /* Force constants other than zero into memory. We do not know how
9324 the instructions used to build constants modify the upper 64 bits
9325 of the register, once we have that information we may be able
9326 to handle some of them more efficiently. */
9327 if ((reload_in_progress | reload_completed) == 0
9328 && register_operand (op0, mode)
9329 && CONSTANT_P (op1)
9330 && standard_sse_constant_p (op1) <= 0)
9331 op1 = validize_mem (force_const_mem (mode, op1));
9333 /* Make operand1 a register if it isn't already. */
9334 if (!no_new_pseudos
9335 && !register_operand (op0, mode)
9336 && !register_operand (op1, mode))
9338 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9339 return;
9342 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9345 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9346 straight to ix86_expand_vector_move. */
9348 void
9349 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9351 rtx op0, op1, m;
9353 op0 = operands[0];
9354 op1 = operands[1];
9356 if (MEM_P (op1))
9358 /* If we're optimizing for size, movups is the smallest. */
9359 if (optimize_size)
9361 op0 = gen_lowpart (V4SFmode, op0);
9362 op1 = gen_lowpart (V4SFmode, op1);
9363 emit_insn (gen_sse_movups (op0, op1));
9364 return;
9367 /* ??? If we have typed data, then it would appear that using
9368 movdqu is the only way to get unaligned data loaded with
9369 integer type. */
9370 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9372 op0 = gen_lowpart (V16QImode, op0);
9373 op1 = gen_lowpart (V16QImode, op1);
9374 emit_insn (gen_sse2_movdqu (op0, op1));
9375 return;
9378 if (TARGET_SSE2 && mode == V2DFmode)
9380 rtx zero;
9382 /* When SSE registers are split into halves, we can avoid
9383 writing to the top half twice. */
9384 if (TARGET_SSE_SPLIT_REGS)
9386 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9387 zero = op0;
9389 else
9391 /* ??? Not sure about the best option for the Intel chips.
9392 The following would seem to satisfy; the register is
9393 entirely cleared, breaking the dependency chain. We
9394 then store to the upper half, with a dependency depth
9395 of one. A rumor has it that Intel recommends two movsd
9396 followed by an unpacklpd, but this is unconfirmed. And
9397 given that the dependency depth of the unpacklpd would
9398 still be one, I'm not sure why this would be better. */
9399 zero = CONST0_RTX (V2DFmode);
9402 m = adjust_address (op1, DFmode, 0);
9403 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9404 m = adjust_address (op1, DFmode, 8);
9405 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9407 else
9409 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9410 emit_move_insn (op0, CONST0_RTX (mode));
9411 else
9412 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9414 if (mode != V4SFmode)
9415 op0 = gen_lowpart (V4SFmode, op0);
9416 m = adjust_address (op1, V2SFmode, 0);
9417 emit_insn (gen_sse_loadlps (op0, op0, m));
9418 m = adjust_address (op1, V2SFmode, 8);
9419 emit_insn (gen_sse_loadhps (op0, op0, m));
9422 else if (MEM_P (op0))
9424 /* If we're optimizing for size, movups is the smallest. */
9425 if (optimize_size)
9427 op0 = gen_lowpart (V4SFmode, op0);
9428 op1 = gen_lowpart (V4SFmode, op1);
9429 emit_insn (gen_sse_movups (op0, op1));
9430 return;
9433 /* ??? Similar to above, only less clear because of quote
9434 typeless stores unquote. */
9435 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9436 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9438 op0 = gen_lowpart (V16QImode, op0);
9439 op1 = gen_lowpart (V16QImode, op1);
9440 emit_insn (gen_sse2_movdqu (op0, op1));
9441 return;
9444 if (TARGET_SSE2 && mode == V2DFmode)
9446 m = adjust_address (op0, DFmode, 0);
9447 emit_insn (gen_sse2_storelpd (m, op1));
9448 m = adjust_address (op0, DFmode, 8);
9449 emit_insn (gen_sse2_storehpd (m, op1));
9451 else
9453 if (mode != V4SFmode)
9454 op1 = gen_lowpart (V4SFmode, op1);
9455 m = adjust_address (op0, V2SFmode, 0);
9456 emit_insn (gen_sse_storelps (m, op1));
9457 m = adjust_address (op0, V2SFmode, 8);
9458 emit_insn (gen_sse_storehps (m, op1));
9461 else
9462 gcc_unreachable ();
9465 /* Expand a push in MODE. This is some mode for which we do not support
9466 proper push instructions, at least from the registers that we expect
9467 the value to live in. */
9469 void
9470 ix86_expand_push (enum machine_mode mode, rtx x)
9472 rtx tmp;
9474 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9475 GEN_INT (-GET_MODE_SIZE (mode)),
9476 stack_pointer_rtx, 1, OPTAB_DIRECT);
9477 if (tmp != stack_pointer_rtx)
9478 emit_move_insn (stack_pointer_rtx, tmp);
9480 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9481 emit_move_insn (tmp, x);
9484 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9485 destination to use for the operation. If different from the true
9486 destination in operands[0], a copy operation will be required. */
9489 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9490 rtx operands[])
9492 int matching_memory;
9493 rtx src1, src2, dst;
9495 dst = operands[0];
9496 src1 = operands[1];
9497 src2 = operands[2];
9499 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9500 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9501 && (rtx_equal_p (dst, src2)
9502 || immediate_operand (src1, mode)))
9504 rtx temp = src1;
9505 src1 = src2;
9506 src2 = temp;
9509 /* If the destination is memory, and we do not have matching source
9510 operands, do things in registers. */
9511 matching_memory = 0;
9512 if (GET_CODE (dst) == MEM)
9514 if (rtx_equal_p (dst, src1))
9515 matching_memory = 1;
9516 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9517 && rtx_equal_p (dst, src2))
9518 matching_memory = 2;
9519 else
9520 dst = gen_reg_rtx (mode);
9523 /* Both source operands cannot be in memory. */
9524 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9526 if (matching_memory != 2)
9527 src2 = force_reg (mode, src2);
9528 else
9529 src1 = force_reg (mode, src1);
9532 /* If the operation is not commutable, source 1 cannot be a constant
9533 or non-matching memory. */
9534 if ((CONSTANT_P (src1)
9535 || (!matching_memory && GET_CODE (src1) == MEM))
9536 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9537 src1 = force_reg (mode, src1);
9539 src1 = operands[1] = src1;
9540 src2 = operands[2] = src2;
9541 return dst;
9544 /* Similarly, but assume that the destination has already been
9545 set up properly. */
9547 void
9548 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9549 enum machine_mode mode, rtx operands[])
9551 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9552 gcc_assert (dst == operands[0]);
9555 /* Attempt to expand a binary operator. Make the expansion closer to the
9556 actual machine, then just general_operand, which will allow 3 separate
9557 memory references (one output, two input) in a single insn. */
9559 void
9560 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9561 rtx operands[])
9563 rtx src1, src2, dst, op, clob;
9565 dst = ix86_fixup_binary_operands (code, mode, operands);
9566 src1 = operands[1];
9567 src2 = operands[2];
9569 /* Emit the instruction. */
9571 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9572 if (reload_in_progress)
9574 /* Reload doesn't know about the flags register, and doesn't know that
9575 it doesn't want to clobber it. We can only do this with PLUS. */
9576 gcc_assert (code == PLUS);
9577 emit_insn (op);
9579 else
9581 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9582 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9585 /* Fix up the destination if needed. */
9586 if (dst != operands[0])
9587 emit_move_insn (operands[0], dst);
9590 /* Return TRUE or FALSE depending on whether the binary operator meets the
9591 appropriate constraints. */
9594 ix86_binary_operator_ok (enum rtx_code code,
9595 enum machine_mode mode ATTRIBUTE_UNUSED,
9596 rtx operands[3])
9598 /* Both source operands cannot be in memory. */
9599 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9600 return 0;
9601 /* If the operation is not commutable, source 1 cannot be a constant. */
9602 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9603 return 0;
9604 /* If the destination is memory, we must have a matching source operand. */
9605 if (GET_CODE (operands[0]) == MEM
9606 && ! (rtx_equal_p (operands[0], operands[1])
9607 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9608 && rtx_equal_p (operands[0], operands[2]))))
9609 return 0;
9610 /* If the operation is not commutable and the source 1 is memory, we must
9611 have a matching destination. */
9612 if (GET_CODE (operands[1]) == MEM
9613 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9614 && ! rtx_equal_p (operands[0], operands[1]))
9615 return 0;
9616 return 1;
9619 /* Attempt to expand a unary operator. Make the expansion closer to the
9620 actual machine, then just general_operand, which will allow 2 separate
9621 memory references (one output, one input) in a single insn. */
9623 void
9624 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9625 rtx operands[])
9627 int matching_memory;
9628 rtx src, dst, op, clob;
9630 dst = operands[0];
9631 src = operands[1];
9633 /* If the destination is memory, and we do not have matching source
9634 operands, do things in registers. */
9635 matching_memory = 0;
9636 if (MEM_P (dst))
9638 if (rtx_equal_p (dst, src))
9639 matching_memory = 1;
9640 else
9641 dst = gen_reg_rtx (mode);
9644 /* When source operand is memory, destination must match. */
9645 if (MEM_P (src) && !matching_memory)
9646 src = force_reg (mode, src);
9648 /* Emit the instruction. */
9650 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9651 if (reload_in_progress || code == NOT)
9653 /* Reload doesn't know about the flags register, and doesn't know that
9654 it doesn't want to clobber it. */
9655 gcc_assert (code == NOT);
9656 emit_insn (op);
9658 else
9660 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9661 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9664 /* Fix up the destination if needed. */
9665 if (dst != operands[0])
9666 emit_move_insn (operands[0], dst);
9669 /* Return TRUE or FALSE depending on whether the unary operator meets the
9670 appropriate constraints. */
9673 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9674 enum machine_mode mode ATTRIBUTE_UNUSED,
9675 rtx operands[2] ATTRIBUTE_UNUSED)
9677 /* If one of operands is memory, source and destination must match. */
9678 if ((GET_CODE (operands[0]) == MEM
9679 || GET_CODE (operands[1]) == MEM)
9680 && ! rtx_equal_p (operands[0], operands[1]))
9681 return FALSE;
9682 return TRUE;
9685 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9686 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9687 true, then replicate the mask for all elements of the vector register.
9688 If INVERT is true, then create a mask excluding the sign bit. */
9691 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9693 enum machine_mode vec_mode;
9694 HOST_WIDE_INT hi, lo;
9695 int shift = 63;
9696 rtvec v;
9697 rtx mask;
9699 /* Find the sign bit, sign extended to 2*HWI. */
9700 if (mode == SFmode)
9701 lo = 0x80000000, hi = lo < 0;
9702 else if (HOST_BITS_PER_WIDE_INT >= 64)
9703 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9704 else
9705 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9707 if (invert)
9708 lo = ~lo, hi = ~hi;
9710 /* Force this value into the low part of a fp vector constant. */
9711 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9712 mask = gen_lowpart (mode, mask);
9714 if (mode == SFmode)
9716 if (vect)
9717 v = gen_rtvec (4, mask, mask, mask, mask);
9718 else
9719 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9720 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9721 vec_mode = V4SFmode;
9723 else
9725 if (vect)
9726 v = gen_rtvec (2, mask, mask);
9727 else
9728 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9729 vec_mode = V2DFmode;
9732 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9735 /* Generate code for floating point ABS or NEG. */
9737 void
9738 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9739 rtx operands[])
9741 rtx mask, set, use, clob, dst, src;
9742 bool matching_memory;
9743 bool use_sse = false;
9744 bool vector_mode = VECTOR_MODE_P (mode);
9745 enum machine_mode elt_mode = mode;
9747 if (vector_mode)
9749 elt_mode = GET_MODE_INNER (mode);
9750 use_sse = true;
9752 else if (TARGET_SSE_MATH)
9753 use_sse = SSE_FLOAT_MODE_P (mode);
9755 /* NEG and ABS performed with SSE use bitwise mask operations.
9756 Create the appropriate mask now. */
9757 if (use_sse)
9758 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9759 else
9760 mask = NULL_RTX;
9762 dst = operands[0];
9763 src = operands[1];
9765 /* If the destination is memory, and we don't have matching source
9766 operands or we're using the x87, do things in registers. */
9767 matching_memory = false;
9768 if (MEM_P (dst))
9770 if (use_sse && rtx_equal_p (dst, src))
9771 matching_memory = true;
9772 else
9773 dst = gen_reg_rtx (mode);
9775 if (MEM_P (src) && !matching_memory)
9776 src = force_reg (mode, src);
9778 if (vector_mode)
9780 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9781 set = gen_rtx_SET (VOIDmode, dst, set);
9782 emit_insn (set);
9784 else
9786 set = gen_rtx_fmt_e (code, mode, src);
9787 set = gen_rtx_SET (VOIDmode, dst, set);
9788 if (mask)
9790 use = gen_rtx_USE (VOIDmode, mask);
9791 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9792 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9793 gen_rtvec (3, set, use, clob)));
9795 else
9796 emit_insn (set);
9799 if (dst != operands[0])
9800 emit_move_insn (operands[0], dst);
9803 /* Expand a copysign operation. Special case operand 0 being a constant. */
9805 void
9806 ix86_expand_copysign (rtx operands[])
9808 enum machine_mode mode, vmode;
9809 rtx dest, op0, op1, mask, nmask;
9811 dest = operands[0];
9812 op0 = operands[1];
9813 op1 = operands[2];
9815 mode = GET_MODE (dest);
9816 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9818 if (GET_CODE (op0) == CONST_DOUBLE)
9820 rtvec v;
9822 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9823 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9825 if (op0 == CONST0_RTX (mode))
9826 op0 = CONST0_RTX (vmode);
9827 else
9829 if (mode == SFmode)
9830 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9831 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9832 else
9833 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9834 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9837 mask = ix86_build_signbit_mask (mode, 0, 0);
9839 if (mode == SFmode)
9840 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9841 else
9842 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9844 else
9846 nmask = ix86_build_signbit_mask (mode, 0, 1);
9847 mask = ix86_build_signbit_mask (mode, 0, 0);
9849 if (mode == SFmode)
9850 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9851 else
9852 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9856 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9857 be a constant, and so has already been expanded into a vector constant. */
9859 void
9860 ix86_split_copysign_const (rtx operands[])
9862 enum machine_mode mode, vmode;
9863 rtx dest, op0, op1, mask, x;
9865 dest = operands[0];
9866 op0 = operands[1];
9867 op1 = operands[2];
9868 mask = operands[3];
9870 mode = GET_MODE (dest);
9871 vmode = GET_MODE (mask);
9873 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9874 x = gen_rtx_AND (vmode, dest, mask);
9875 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9877 if (op0 != CONST0_RTX (vmode))
9879 x = gen_rtx_IOR (vmode, dest, op0);
9880 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9884 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9885 so we have to do two masks. */
9887 void
9888 ix86_split_copysign_var (rtx operands[])
9890 enum machine_mode mode, vmode;
9891 rtx dest, scratch, op0, op1, mask, nmask, x;
9893 dest = operands[0];
9894 scratch = operands[1];
9895 op0 = operands[2];
9896 op1 = operands[3];
9897 nmask = operands[4];
9898 mask = operands[5];
9900 mode = GET_MODE (dest);
9901 vmode = GET_MODE (mask);
9903 if (rtx_equal_p (op0, op1))
9905 /* Shouldn't happen often (it's useless, obviously), but when it does
9906 we'd generate incorrect code if we continue below. */
9907 emit_move_insn (dest, op0);
9908 return;
9911 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9913 gcc_assert (REGNO (op1) == REGNO (scratch));
9915 x = gen_rtx_AND (vmode, scratch, mask);
9916 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9918 dest = mask;
9919 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9920 x = gen_rtx_NOT (vmode, dest);
9921 x = gen_rtx_AND (vmode, x, op0);
9922 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9924 else
9926 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9928 x = gen_rtx_AND (vmode, scratch, mask);
9930 else /* alternative 2,4 */
9932 gcc_assert (REGNO (mask) == REGNO (scratch));
9933 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9934 x = gen_rtx_AND (vmode, scratch, op1);
9936 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9938 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9940 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9941 x = gen_rtx_AND (vmode, dest, nmask);
9943 else /* alternative 3,4 */
9945 gcc_assert (REGNO (nmask) == REGNO (dest));
9946 dest = nmask;
9947 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9948 x = gen_rtx_AND (vmode, dest, op0);
9950 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9953 x = gen_rtx_IOR (vmode, dest, scratch);
9954 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9957 /* Return TRUE or FALSE depending on whether the first SET in INSN
9958 has source and destination with matching CC modes, and that the
9959 CC mode is at least as constrained as REQ_MODE. */
9962 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9964 rtx set;
9965 enum machine_mode set_mode;
9967 set = PATTERN (insn);
9968 if (GET_CODE (set) == PARALLEL)
9969 set = XVECEXP (set, 0, 0);
9970 gcc_assert (GET_CODE (set) == SET);
9971 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9973 set_mode = GET_MODE (SET_DEST (set));
9974 switch (set_mode)
9976 case CCNOmode:
9977 if (req_mode != CCNOmode
9978 && (req_mode != CCmode
9979 || XEXP (SET_SRC (set), 1) != const0_rtx))
9980 return 0;
9981 break;
9982 case CCmode:
9983 if (req_mode == CCGCmode)
9984 return 0;
9985 /* FALLTHRU */
9986 case CCGCmode:
9987 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9988 return 0;
9989 /* FALLTHRU */
9990 case CCGOCmode:
9991 if (req_mode == CCZmode)
9992 return 0;
9993 /* FALLTHRU */
9994 case CCZmode:
9995 break;
9997 default:
9998 gcc_unreachable ();
10001 return (GET_MODE (SET_SRC (set)) == set_mode);
10004 /* Generate insn patterns to do an integer compare of OPERANDS. */
10006 static rtx
10007 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10009 enum machine_mode cmpmode;
10010 rtx tmp, flags;
10012 cmpmode = SELECT_CC_MODE (code, op0, op1);
10013 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10015 /* This is very simple, but making the interface the same as in the
10016 FP case makes the rest of the code easier. */
10017 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10018 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10020 /* Return the test that should be put into the flags user, i.e.
10021 the bcc, scc, or cmov instruction. */
10022 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10025 /* Figure out whether to use ordered or unordered fp comparisons.
10026 Return the appropriate mode to use. */
10028 enum machine_mode
10029 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10031 /* ??? In order to make all comparisons reversible, we do all comparisons
10032 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10033 all forms trapping and nontrapping comparisons, we can make inequality
10034 comparisons trapping again, since it results in better code when using
10035 FCOM based compares. */
10036 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10039 enum machine_mode
10040 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10042 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10043 return ix86_fp_compare_mode (code);
10044 switch (code)
10046 /* Only zero flag is needed. */
10047 case EQ: /* ZF=0 */
10048 case NE: /* ZF!=0 */
10049 return CCZmode;
10050 /* Codes needing carry flag. */
10051 case GEU: /* CF=0 */
10052 case GTU: /* CF=0 & ZF=0 */
10053 case LTU: /* CF=1 */
10054 case LEU: /* CF=1 | ZF=1 */
10055 return CCmode;
10056 /* Codes possibly doable only with sign flag when
10057 comparing against zero. */
10058 case GE: /* SF=OF or SF=0 */
10059 case LT: /* SF<>OF or SF=1 */
10060 if (op1 == const0_rtx)
10061 return CCGOCmode;
10062 else
10063 /* For other cases Carry flag is not required. */
10064 return CCGCmode;
10065 /* Codes doable only with sign flag when comparing
10066 against zero, but we miss jump instruction for it
10067 so we need to use relational tests against overflow
10068 that thus needs to be zero. */
10069 case GT: /* ZF=0 & SF=OF */
10070 case LE: /* ZF=1 | SF<>OF */
10071 if (op1 == const0_rtx)
10072 return CCNOmode;
10073 else
10074 return CCGCmode;
10075 /* strcmp pattern do (use flags) and combine may ask us for proper
10076 mode. */
10077 case USE:
10078 return CCmode;
10079 default:
10080 gcc_unreachable ();
10084 /* Return the fixed registers used for condition codes. */
10086 static bool
10087 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10089 *p1 = FLAGS_REG;
10090 *p2 = FPSR_REG;
10091 return true;
10094 /* If two condition code modes are compatible, return a condition code
10095 mode which is compatible with both. Otherwise, return
10096 VOIDmode. */
10098 static enum machine_mode
10099 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10101 if (m1 == m2)
10102 return m1;
10104 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10105 return VOIDmode;
10107 if ((m1 == CCGCmode && m2 == CCGOCmode)
10108 || (m1 == CCGOCmode && m2 == CCGCmode))
10109 return CCGCmode;
10111 switch (m1)
10113 default:
10114 gcc_unreachable ();
10116 case CCmode:
10117 case CCGCmode:
10118 case CCGOCmode:
10119 case CCNOmode:
10120 case CCZmode:
10121 switch (m2)
10123 default:
10124 return VOIDmode;
10126 case CCmode:
10127 case CCGCmode:
10128 case CCGOCmode:
10129 case CCNOmode:
10130 case CCZmode:
10131 return CCmode;
10134 case CCFPmode:
10135 case CCFPUmode:
10136 /* These are only compatible with themselves, which we already
10137 checked above. */
10138 return VOIDmode;
10142 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10145 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10147 enum rtx_code swapped_code = swap_condition (code);
10148 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10149 || (ix86_fp_comparison_cost (swapped_code)
10150 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10153 /* Swap, force into registers, or otherwise massage the two operands
10154 to a fp comparison. The operands are updated in place; the new
10155 comparison code is returned. */
10157 static enum rtx_code
10158 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10160 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10161 rtx op0 = *pop0, op1 = *pop1;
10162 enum machine_mode op_mode = GET_MODE (op0);
10163 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10165 /* All of the unordered compare instructions only work on registers.
10166 The same is true of the fcomi compare instructions. The XFmode
10167 compare instructions require registers except when comparing
10168 against zero or when converting operand 1 from fixed point to
10169 floating point. */
10171 if (!is_sse
10172 && (fpcmp_mode == CCFPUmode
10173 || (op_mode == XFmode
10174 && ! (standard_80387_constant_p (op0) == 1
10175 || standard_80387_constant_p (op1) == 1)
10176 && GET_CODE (op1) != FLOAT)
10177 || ix86_use_fcomi_compare (code)))
10179 op0 = force_reg (op_mode, op0);
10180 op1 = force_reg (op_mode, op1);
10182 else
10184 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10185 things around if they appear profitable, otherwise force op0
10186 into a register. */
10188 if (standard_80387_constant_p (op0) == 0
10189 || (GET_CODE (op0) == MEM
10190 && ! (standard_80387_constant_p (op1) == 0
10191 || GET_CODE (op1) == MEM)))
10193 rtx tmp;
10194 tmp = op0, op0 = op1, op1 = tmp;
10195 code = swap_condition (code);
10198 if (GET_CODE (op0) != REG)
10199 op0 = force_reg (op_mode, op0);
10201 if (CONSTANT_P (op1))
10203 int tmp = standard_80387_constant_p (op1);
10204 if (tmp == 0)
10205 op1 = validize_mem (force_const_mem (op_mode, op1));
10206 else if (tmp == 1)
10208 if (TARGET_CMOVE)
10209 op1 = force_reg (op_mode, op1);
10211 else
10212 op1 = force_reg (op_mode, op1);
10216 /* Try to rearrange the comparison to make it cheaper. */
10217 if (ix86_fp_comparison_cost (code)
10218 > ix86_fp_comparison_cost (swap_condition (code))
10219 && (GET_CODE (op1) == REG || !no_new_pseudos))
10221 rtx tmp;
10222 tmp = op0, op0 = op1, op1 = tmp;
10223 code = swap_condition (code);
10224 if (GET_CODE (op0) != REG)
10225 op0 = force_reg (op_mode, op0);
10228 *pop0 = op0;
10229 *pop1 = op1;
10230 return code;
10233 /* Convert comparison codes we use to represent FP comparison to integer
10234 code that will result in proper branch. Return UNKNOWN if no such code
10235 is available. */
10237 enum rtx_code
10238 ix86_fp_compare_code_to_integer (enum rtx_code code)
10240 switch (code)
10242 case GT:
10243 return GTU;
10244 case GE:
10245 return GEU;
10246 case ORDERED:
10247 case UNORDERED:
10248 return code;
10249 break;
10250 case UNEQ:
10251 return EQ;
10252 break;
10253 case UNLT:
10254 return LTU;
10255 break;
10256 case UNLE:
10257 return LEU;
10258 break;
10259 case LTGT:
10260 return NE;
10261 break;
10262 default:
10263 return UNKNOWN;
10267 /* Split comparison code CODE into comparisons we can do using branch
10268 instructions. BYPASS_CODE is comparison code for branch that will
10269 branch around FIRST_CODE and SECOND_CODE. If some of branches
10270 is not required, set value to UNKNOWN.
10271 We never require more than two branches. */
10273 void
10274 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10275 enum rtx_code *first_code,
10276 enum rtx_code *second_code)
10278 *first_code = code;
10279 *bypass_code = UNKNOWN;
10280 *second_code = UNKNOWN;
10282 /* The fcomi comparison sets flags as follows:
10284 cmp ZF PF CF
10285 > 0 0 0
10286 < 0 0 1
10287 = 1 0 0
10288 un 1 1 1 */
10290 switch (code)
10292 case GT: /* GTU - CF=0 & ZF=0 */
10293 case GE: /* GEU - CF=0 */
10294 case ORDERED: /* PF=0 */
10295 case UNORDERED: /* PF=1 */
10296 case UNEQ: /* EQ - ZF=1 */
10297 case UNLT: /* LTU - CF=1 */
10298 case UNLE: /* LEU - CF=1 | ZF=1 */
10299 case LTGT: /* EQ - ZF=0 */
10300 break;
10301 case LT: /* LTU - CF=1 - fails on unordered */
10302 *first_code = UNLT;
10303 *bypass_code = UNORDERED;
10304 break;
10305 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10306 *first_code = UNLE;
10307 *bypass_code = UNORDERED;
10308 break;
10309 case EQ: /* EQ - ZF=1 - fails on unordered */
10310 *first_code = UNEQ;
10311 *bypass_code = UNORDERED;
10312 break;
10313 case NE: /* NE - ZF=0 - fails on unordered */
10314 *first_code = LTGT;
10315 *second_code = UNORDERED;
10316 break;
10317 case UNGE: /* GEU - CF=0 - fails on unordered */
10318 *first_code = GE;
10319 *second_code = UNORDERED;
10320 break;
10321 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10322 *first_code = GT;
10323 *second_code = UNORDERED;
10324 break;
10325 default:
10326 gcc_unreachable ();
10328 if (!TARGET_IEEE_FP)
10330 *second_code = UNKNOWN;
10331 *bypass_code = UNKNOWN;
10335 /* Return cost of comparison done fcom + arithmetics operations on AX.
10336 All following functions do use number of instructions as a cost metrics.
10337 In future this should be tweaked to compute bytes for optimize_size and
10338 take into account performance of various instructions on various CPUs. */
10339 static int
10340 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10342 if (!TARGET_IEEE_FP)
10343 return 4;
10344 /* The cost of code output by ix86_expand_fp_compare. */
10345 switch (code)
10347 case UNLE:
10348 case UNLT:
10349 case LTGT:
10350 case GT:
10351 case GE:
10352 case UNORDERED:
10353 case ORDERED:
10354 case UNEQ:
10355 return 4;
10356 break;
10357 case LT:
10358 case NE:
10359 case EQ:
10360 case UNGE:
10361 return 5;
10362 break;
10363 case LE:
10364 case UNGT:
10365 return 6;
10366 break;
10367 default:
10368 gcc_unreachable ();
10372 /* Return cost of comparison done using fcomi operation.
10373 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10374 static int
10375 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10377 enum rtx_code bypass_code, first_code, second_code;
10378 /* Return arbitrarily high cost when instruction is not supported - this
10379 prevents gcc from using it. */
10380 if (!TARGET_CMOVE)
10381 return 1024;
10382 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10383 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10386 /* Return cost of comparison done using sahf operation.
10387 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10388 static int
10389 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10391 enum rtx_code bypass_code, first_code, second_code;
10392 /* Return arbitrarily high cost when instruction is not preferred - this
10393 avoids gcc from using it. */
10394 if (!TARGET_USE_SAHF && !optimize_size)
10395 return 1024;
10396 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10397 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10400 /* Compute cost of the comparison done using any method.
10401 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10402 static int
10403 ix86_fp_comparison_cost (enum rtx_code code)
10405 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10406 int min;
10408 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10409 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10411 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10412 if (min > sahf_cost)
10413 min = sahf_cost;
10414 if (min > fcomi_cost)
10415 min = fcomi_cost;
10416 return min;
10419 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10421 static rtx
10422 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10423 rtx *second_test, rtx *bypass_test)
10425 enum machine_mode fpcmp_mode, intcmp_mode;
10426 rtx tmp, tmp2;
10427 int cost = ix86_fp_comparison_cost (code);
10428 enum rtx_code bypass_code, first_code, second_code;
10430 fpcmp_mode = ix86_fp_compare_mode (code);
10431 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10433 if (second_test)
10434 *second_test = NULL_RTX;
10435 if (bypass_test)
10436 *bypass_test = NULL_RTX;
10438 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10440 /* Do fcomi/sahf based test when profitable. */
10441 if ((bypass_code == UNKNOWN || bypass_test)
10442 && (second_code == UNKNOWN || second_test)
10443 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10445 if (TARGET_CMOVE)
10447 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10448 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10449 tmp);
10450 emit_insn (tmp);
10452 else
10454 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10455 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10456 if (!scratch)
10457 scratch = gen_reg_rtx (HImode);
10458 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10459 emit_insn (gen_x86_sahf_1 (scratch));
10462 /* The FP codes work out to act like unsigned. */
10463 intcmp_mode = fpcmp_mode;
10464 code = first_code;
10465 if (bypass_code != UNKNOWN)
10466 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10467 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10468 const0_rtx);
10469 if (second_code != UNKNOWN)
10470 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10471 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10472 const0_rtx);
10474 else
10476 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10477 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10478 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10479 if (!scratch)
10480 scratch = gen_reg_rtx (HImode);
10481 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10483 /* In the unordered case, we have to check C2 for NaN's, which
10484 doesn't happen to work out to anything nice combination-wise.
10485 So do some bit twiddling on the value we've got in AH to come
10486 up with an appropriate set of condition codes. */
10488 intcmp_mode = CCNOmode;
10489 switch (code)
10491 case GT:
10492 case UNGT:
10493 if (code == GT || !TARGET_IEEE_FP)
10495 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10496 code = EQ;
10498 else
10500 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10501 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10502 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10503 intcmp_mode = CCmode;
10504 code = GEU;
10506 break;
10507 case LT:
10508 case UNLT:
10509 if (code == LT && TARGET_IEEE_FP)
10511 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10512 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10513 intcmp_mode = CCmode;
10514 code = EQ;
10516 else
10518 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10519 code = NE;
10521 break;
10522 case GE:
10523 case UNGE:
10524 if (code == GE || !TARGET_IEEE_FP)
10526 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10527 code = EQ;
10529 else
10531 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10532 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10533 GEN_INT (0x01)));
10534 code = NE;
10536 break;
10537 case LE:
10538 case UNLE:
10539 if (code == LE && TARGET_IEEE_FP)
10541 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10542 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10543 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10544 intcmp_mode = CCmode;
10545 code = LTU;
10547 else
10549 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10550 code = NE;
10552 break;
10553 case EQ:
10554 case UNEQ:
10555 if (code == EQ && TARGET_IEEE_FP)
10557 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10558 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10559 intcmp_mode = CCmode;
10560 code = EQ;
10562 else
10564 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10565 code = NE;
10566 break;
10568 break;
10569 case NE:
10570 case LTGT:
10571 if (code == NE && TARGET_IEEE_FP)
10573 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10574 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10575 GEN_INT (0x40)));
10576 code = NE;
10578 else
10580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10581 code = EQ;
10583 break;
10585 case UNORDERED:
10586 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10587 code = NE;
10588 break;
10589 case ORDERED:
10590 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10591 code = EQ;
10592 break;
10594 default:
10595 gcc_unreachable ();
10599 /* Return the test that should be put into the flags user, i.e.
10600 the bcc, scc, or cmov instruction. */
10601 return gen_rtx_fmt_ee (code, VOIDmode,
10602 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10603 const0_rtx);
10607 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10609 rtx op0, op1, ret;
10610 op0 = ix86_compare_op0;
10611 op1 = ix86_compare_op1;
10613 if (second_test)
10614 *second_test = NULL_RTX;
10615 if (bypass_test)
10616 *bypass_test = NULL_RTX;
10618 if (ix86_compare_emitted)
10620 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10621 ix86_compare_emitted = NULL_RTX;
10623 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10624 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10625 second_test, bypass_test);
10626 else
10627 ret = ix86_expand_int_compare (code, op0, op1);
10629 return ret;
10632 /* Return true if the CODE will result in nontrivial jump sequence. */
10633 bool
10634 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10636 enum rtx_code bypass_code, first_code, second_code;
10637 if (!TARGET_CMOVE)
10638 return true;
10639 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10640 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10643 void
10644 ix86_expand_branch (enum rtx_code code, rtx label)
10646 rtx tmp;
10648 /* If we have emitted a compare insn, go straight to simple.
10649 ix86_expand_compare won't emit anything if ix86_compare_emitted
10650 is non NULL. */
10651 if (ix86_compare_emitted)
10652 goto simple;
10654 switch (GET_MODE (ix86_compare_op0))
10656 case QImode:
10657 case HImode:
10658 case SImode:
10659 simple:
10660 tmp = ix86_expand_compare (code, NULL, NULL);
10661 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10662 gen_rtx_LABEL_REF (VOIDmode, label),
10663 pc_rtx);
10664 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10665 return;
10667 case SFmode:
10668 case DFmode:
10669 case XFmode:
10671 rtvec vec;
10672 int use_fcomi;
10673 enum rtx_code bypass_code, first_code, second_code;
10675 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10676 &ix86_compare_op1);
10678 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10680 /* Check whether we will use the natural sequence with one jump. If
10681 so, we can expand jump early. Otherwise delay expansion by
10682 creating compound insn to not confuse optimizers. */
10683 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10684 && TARGET_CMOVE)
10686 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10687 gen_rtx_LABEL_REF (VOIDmode, label),
10688 pc_rtx, NULL_RTX, NULL_RTX);
10690 else
10692 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10693 ix86_compare_op0, ix86_compare_op1);
10694 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10695 gen_rtx_LABEL_REF (VOIDmode, label),
10696 pc_rtx);
10697 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10699 use_fcomi = ix86_use_fcomi_compare (code);
10700 vec = rtvec_alloc (3 + !use_fcomi);
10701 RTVEC_ELT (vec, 0) = tmp;
10702 RTVEC_ELT (vec, 1)
10703 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10704 RTVEC_ELT (vec, 2)
10705 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10706 if (! use_fcomi)
10707 RTVEC_ELT (vec, 3)
10708 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10710 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10712 return;
10715 case DImode:
10716 if (TARGET_64BIT)
10717 goto simple;
10718 case TImode:
10719 /* Expand DImode branch into multiple compare+branch. */
10721 rtx lo[2], hi[2], label2;
10722 enum rtx_code code1, code2, code3;
10723 enum machine_mode submode;
10725 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10727 tmp = ix86_compare_op0;
10728 ix86_compare_op0 = ix86_compare_op1;
10729 ix86_compare_op1 = tmp;
10730 code = swap_condition (code);
10732 if (GET_MODE (ix86_compare_op0) == DImode)
10734 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10735 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10736 submode = SImode;
10738 else
10740 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10741 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10742 submode = DImode;
10745 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10746 avoid two branches. This costs one extra insn, so disable when
10747 optimizing for size. */
10749 if ((code == EQ || code == NE)
10750 && (!optimize_size
10751 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10753 rtx xor0, xor1;
10755 xor1 = hi[0];
10756 if (hi[1] != const0_rtx)
10757 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10758 NULL_RTX, 0, OPTAB_WIDEN);
10760 xor0 = lo[0];
10761 if (lo[1] != const0_rtx)
10762 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10763 NULL_RTX, 0, OPTAB_WIDEN);
10765 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10766 NULL_RTX, 0, OPTAB_WIDEN);
10768 ix86_compare_op0 = tmp;
10769 ix86_compare_op1 = const0_rtx;
10770 ix86_expand_branch (code, label);
10771 return;
10774 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10775 op1 is a constant and the low word is zero, then we can just
10776 examine the high word. */
10778 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10779 switch (code)
10781 case LT: case LTU: case GE: case GEU:
10782 ix86_compare_op0 = hi[0];
10783 ix86_compare_op1 = hi[1];
10784 ix86_expand_branch (code, label);
10785 return;
10786 default:
10787 break;
10790 /* Otherwise, we need two or three jumps. */
10792 label2 = gen_label_rtx ();
10794 code1 = code;
10795 code2 = swap_condition (code);
10796 code3 = unsigned_condition (code);
10798 switch (code)
10800 case LT: case GT: case LTU: case GTU:
10801 break;
10803 case LE: code1 = LT; code2 = GT; break;
10804 case GE: code1 = GT; code2 = LT; break;
10805 case LEU: code1 = LTU; code2 = GTU; break;
10806 case GEU: code1 = GTU; code2 = LTU; break;
10808 case EQ: code1 = UNKNOWN; code2 = NE; break;
10809 case NE: code2 = UNKNOWN; break;
10811 default:
10812 gcc_unreachable ();
10816 * a < b =>
10817 * if (hi(a) < hi(b)) goto true;
10818 * if (hi(a) > hi(b)) goto false;
10819 * if (lo(a) < lo(b)) goto true;
10820 * false:
10823 ix86_compare_op0 = hi[0];
10824 ix86_compare_op1 = hi[1];
10826 if (code1 != UNKNOWN)
10827 ix86_expand_branch (code1, label);
10828 if (code2 != UNKNOWN)
10829 ix86_expand_branch (code2, label2);
10831 ix86_compare_op0 = lo[0];
10832 ix86_compare_op1 = lo[1];
10833 ix86_expand_branch (code3, label);
10835 if (code2 != UNKNOWN)
10836 emit_label (label2);
10837 return;
10840 default:
10841 gcc_unreachable ();
10845 /* Split branch based on floating point condition. */
10846 void
10847 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10848 rtx target1, rtx target2, rtx tmp, rtx pushed)
10850 rtx second, bypass;
10851 rtx label = NULL_RTX;
10852 rtx condition;
10853 int bypass_probability = -1, second_probability = -1, probability = -1;
10854 rtx i;
10856 if (target2 != pc_rtx)
10858 rtx tmp = target2;
10859 code = reverse_condition_maybe_unordered (code);
10860 target2 = target1;
10861 target1 = tmp;
10864 condition = ix86_expand_fp_compare (code, op1, op2,
10865 tmp, &second, &bypass);
10867 /* Remove pushed operand from stack. */
10868 if (pushed)
10869 ix86_free_from_memory (GET_MODE (pushed));
10871 if (split_branch_probability >= 0)
10873 /* Distribute the probabilities across the jumps.
10874 Assume the BYPASS and SECOND to be always test
10875 for UNORDERED. */
10876 probability = split_branch_probability;
10878 /* Value of 1 is low enough to make no need for probability
10879 to be updated. Later we may run some experiments and see
10880 if unordered values are more frequent in practice. */
10881 if (bypass)
10882 bypass_probability = 1;
10883 if (second)
10884 second_probability = 1;
10886 if (bypass != NULL_RTX)
10888 label = gen_label_rtx ();
10889 i = emit_jump_insn (gen_rtx_SET
10890 (VOIDmode, pc_rtx,
10891 gen_rtx_IF_THEN_ELSE (VOIDmode,
10892 bypass,
10893 gen_rtx_LABEL_REF (VOIDmode,
10894 label),
10895 pc_rtx)));
10896 if (bypass_probability >= 0)
10897 REG_NOTES (i)
10898 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10899 GEN_INT (bypass_probability),
10900 REG_NOTES (i));
10902 i = emit_jump_insn (gen_rtx_SET
10903 (VOIDmode, pc_rtx,
10904 gen_rtx_IF_THEN_ELSE (VOIDmode,
10905 condition, target1, target2)));
10906 if (probability >= 0)
10907 REG_NOTES (i)
10908 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10909 GEN_INT (probability),
10910 REG_NOTES (i));
10911 if (second != NULL_RTX)
10913 i = emit_jump_insn (gen_rtx_SET
10914 (VOIDmode, pc_rtx,
10915 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10916 target2)));
10917 if (second_probability >= 0)
10918 REG_NOTES (i)
10919 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10920 GEN_INT (second_probability),
10921 REG_NOTES (i));
10923 if (label != NULL_RTX)
10924 emit_label (label);
10928 ix86_expand_setcc (enum rtx_code code, rtx dest)
10930 rtx ret, tmp, tmpreg, equiv;
10931 rtx second_test, bypass_test;
10933 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10934 return 0; /* FAIL */
10936 gcc_assert (GET_MODE (dest) == QImode);
10938 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10939 PUT_MODE (ret, QImode);
10941 tmp = dest;
10942 tmpreg = dest;
10944 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10945 if (bypass_test || second_test)
10947 rtx test = second_test;
10948 int bypass = 0;
10949 rtx tmp2 = gen_reg_rtx (QImode);
10950 if (bypass_test)
10952 gcc_assert (!second_test);
10953 test = bypass_test;
10954 bypass = 1;
10955 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10957 PUT_MODE (test, QImode);
10958 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10960 if (bypass)
10961 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10962 else
10963 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10966 /* Attach a REG_EQUAL note describing the comparison result. */
10967 if (ix86_compare_op0 && ix86_compare_op1)
10969 equiv = simplify_gen_relational (code, QImode,
10970 GET_MODE (ix86_compare_op0),
10971 ix86_compare_op0, ix86_compare_op1);
10972 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10975 return 1; /* DONE */
10978 /* Expand comparison setting or clearing carry flag. Return true when
10979 successful and set pop for the operation. */
10980 static bool
10981 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10983 enum machine_mode mode =
10984 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10986 /* Do not handle DImode compares that go through special path. Also we can't
10987 deal with FP compares yet. This is possible to add. */
10988 if (mode == (TARGET_64BIT ? TImode : DImode))
10989 return false;
10990 if (FLOAT_MODE_P (mode))
10992 rtx second_test = NULL, bypass_test = NULL;
10993 rtx compare_op, compare_seq;
10995 /* Shortcut: following common codes never translate into carry flag compares. */
10996 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10997 || code == ORDERED || code == UNORDERED)
10998 return false;
11000 /* These comparisons require zero flag; swap operands so they won't. */
11001 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11002 && !TARGET_IEEE_FP)
11004 rtx tmp = op0;
11005 op0 = op1;
11006 op1 = tmp;
11007 code = swap_condition (code);
11010 /* Try to expand the comparison and verify that we end up with carry flag
11011 based comparison. This is fails to be true only when we decide to expand
11012 comparison using arithmetic that is not too common scenario. */
11013 start_sequence ();
11014 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11015 &second_test, &bypass_test);
11016 compare_seq = get_insns ();
11017 end_sequence ();
11019 if (second_test || bypass_test)
11020 return false;
11021 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11022 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11023 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11024 else
11025 code = GET_CODE (compare_op);
11026 if (code != LTU && code != GEU)
11027 return false;
11028 emit_insn (compare_seq);
11029 *pop = compare_op;
11030 return true;
11032 if (!INTEGRAL_MODE_P (mode))
11033 return false;
11034 switch (code)
11036 case LTU:
11037 case GEU:
11038 break;
11040 /* Convert a==0 into (unsigned)a<1. */
11041 case EQ:
11042 case NE:
11043 if (op1 != const0_rtx)
11044 return false;
11045 op1 = const1_rtx;
11046 code = (code == EQ ? LTU : GEU);
11047 break;
11049 /* Convert a>b into b<a or a>=b-1. */
11050 case GTU:
11051 case LEU:
11052 if (GET_CODE (op1) == CONST_INT)
11054 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11055 /* Bail out on overflow. We still can swap operands but that
11056 would force loading of the constant into register. */
11057 if (op1 == const0_rtx
11058 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11059 return false;
11060 code = (code == GTU ? GEU : LTU);
11062 else
11064 rtx tmp = op1;
11065 op1 = op0;
11066 op0 = tmp;
11067 code = (code == GTU ? LTU : GEU);
11069 break;
11071 /* Convert a>=0 into (unsigned)a<0x80000000. */
11072 case LT:
11073 case GE:
11074 if (mode == DImode || op1 != const0_rtx)
11075 return false;
11076 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11077 code = (code == LT ? GEU : LTU);
11078 break;
11079 case LE:
11080 case GT:
11081 if (mode == DImode || op1 != constm1_rtx)
11082 return false;
11083 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11084 code = (code == LE ? GEU : LTU);
11085 break;
11087 default:
11088 return false;
11090 /* Swapping operands may cause constant to appear as first operand. */
11091 if (!nonimmediate_operand (op0, VOIDmode))
11093 if (no_new_pseudos)
11094 return false;
11095 op0 = force_reg (mode, op0);
11097 ix86_compare_op0 = op0;
11098 ix86_compare_op1 = op1;
11099 *pop = ix86_expand_compare (code, NULL, NULL);
11100 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11101 return true;
11105 ix86_expand_int_movcc (rtx operands[])
11107 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11108 rtx compare_seq, compare_op;
11109 rtx second_test, bypass_test;
11110 enum machine_mode mode = GET_MODE (operands[0]);
11111 bool sign_bit_compare_p = false;;
11113 start_sequence ();
11114 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11115 compare_seq = get_insns ();
11116 end_sequence ();
11118 compare_code = GET_CODE (compare_op);
11120 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11121 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11122 sign_bit_compare_p = true;
11124 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11125 HImode insns, we'd be swallowed in word prefix ops. */
11127 if ((mode != HImode || TARGET_FAST_PREFIX)
11128 && (mode != (TARGET_64BIT ? TImode : DImode))
11129 && GET_CODE (operands[2]) == CONST_INT
11130 && GET_CODE (operands[3]) == CONST_INT)
11132 rtx out = operands[0];
11133 HOST_WIDE_INT ct = INTVAL (operands[2]);
11134 HOST_WIDE_INT cf = INTVAL (operands[3]);
11135 HOST_WIDE_INT diff;
11137 diff = ct - cf;
11138 /* Sign bit compares are better done using shifts than we do by using
11139 sbb. */
11140 if (sign_bit_compare_p
11141 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11142 ix86_compare_op1, &compare_op))
11144 /* Detect overlap between destination and compare sources. */
11145 rtx tmp = out;
11147 if (!sign_bit_compare_p)
11149 bool fpcmp = false;
11151 compare_code = GET_CODE (compare_op);
11153 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11154 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11156 fpcmp = true;
11157 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11160 /* To simplify rest of code, restrict to the GEU case. */
11161 if (compare_code == LTU)
11163 HOST_WIDE_INT tmp = ct;
11164 ct = cf;
11165 cf = tmp;
11166 compare_code = reverse_condition (compare_code);
11167 code = reverse_condition (code);
11169 else
11171 if (fpcmp)
11172 PUT_CODE (compare_op,
11173 reverse_condition_maybe_unordered
11174 (GET_CODE (compare_op)));
11175 else
11176 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11178 diff = ct - cf;
11180 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11181 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11182 tmp = gen_reg_rtx (mode);
11184 if (mode == DImode)
11185 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11186 else
11187 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11189 else
11191 if (code == GT || code == GE)
11192 code = reverse_condition (code);
11193 else
11195 HOST_WIDE_INT tmp = ct;
11196 ct = cf;
11197 cf = tmp;
11198 diff = ct - cf;
11200 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11201 ix86_compare_op1, VOIDmode, 0, -1);
11204 if (diff == 1)
11207 * cmpl op0,op1
11208 * sbbl dest,dest
11209 * [addl dest, ct]
11211 * Size 5 - 8.
11213 if (ct)
11214 tmp = expand_simple_binop (mode, PLUS,
11215 tmp, GEN_INT (ct),
11216 copy_rtx (tmp), 1, OPTAB_DIRECT);
11218 else if (cf == -1)
11221 * cmpl op0,op1
11222 * sbbl dest,dest
11223 * orl $ct, dest
11225 * Size 8.
11227 tmp = expand_simple_binop (mode, IOR,
11228 tmp, GEN_INT (ct),
11229 copy_rtx (tmp), 1, OPTAB_DIRECT);
11231 else if (diff == -1 && ct)
11234 * cmpl op0,op1
11235 * sbbl dest,dest
11236 * notl dest
11237 * [addl dest, cf]
11239 * Size 8 - 11.
11241 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11242 if (cf)
11243 tmp = expand_simple_binop (mode, PLUS,
11244 copy_rtx (tmp), GEN_INT (cf),
11245 copy_rtx (tmp), 1, OPTAB_DIRECT);
11247 else
11250 * cmpl op0,op1
11251 * sbbl dest,dest
11252 * [notl dest]
11253 * andl cf - ct, dest
11254 * [addl dest, ct]
11256 * Size 8 - 11.
11259 if (cf == 0)
11261 cf = ct;
11262 ct = 0;
11263 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11266 tmp = expand_simple_binop (mode, AND,
11267 copy_rtx (tmp),
11268 gen_int_mode (cf - ct, mode),
11269 copy_rtx (tmp), 1, OPTAB_DIRECT);
11270 if (ct)
11271 tmp = expand_simple_binop (mode, PLUS,
11272 copy_rtx (tmp), GEN_INT (ct),
11273 copy_rtx (tmp), 1, OPTAB_DIRECT);
11276 if (!rtx_equal_p (tmp, out))
11277 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11279 return 1; /* DONE */
11282 if (diff < 0)
11284 HOST_WIDE_INT tmp;
11285 tmp = ct, ct = cf, cf = tmp;
11286 diff = -diff;
11287 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11289 /* We may be reversing unordered compare to normal compare, that
11290 is not valid in general (we may convert non-trapping condition
11291 to trapping one), however on i386 we currently emit all
11292 comparisons unordered. */
11293 compare_code = reverse_condition_maybe_unordered (compare_code);
11294 code = reverse_condition_maybe_unordered (code);
11296 else
11298 compare_code = reverse_condition (compare_code);
11299 code = reverse_condition (code);
11303 compare_code = UNKNOWN;
11304 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11305 && GET_CODE (ix86_compare_op1) == CONST_INT)
11307 if (ix86_compare_op1 == const0_rtx
11308 && (code == LT || code == GE))
11309 compare_code = code;
11310 else if (ix86_compare_op1 == constm1_rtx)
11312 if (code == LE)
11313 compare_code = LT;
11314 else if (code == GT)
11315 compare_code = GE;
11319 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11320 if (compare_code != UNKNOWN
11321 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11322 && (cf == -1 || ct == -1))
11324 /* If lea code below could be used, only optimize
11325 if it results in a 2 insn sequence. */
11327 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11328 || diff == 3 || diff == 5 || diff == 9)
11329 || (compare_code == LT && ct == -1)
11330 || (compare_code == GE && cf == -1))
11333 * notl op1 (if necessary)
11334 * sarl $31, op1
11335 * orl cf, op1
11337 if (ct != -1)
11339 cf = ct;
11340 ct = -1;
11341 code = reverse_condition (code);
11344 out = emit_store_flag (out, code, ix86_compare_op0,
11345 ix86_compare_op1, VOIDmode, 0, -1);
11347 out = expand_simple_binop (mode, IOR,
11348 out, GEN_INT (cf),
11349 out, 1, OPTAB_DIRECT);
11350 if (out != operands[0])
11351 emit_move_insn (operands[0], out);
11353 return 1; /* DONE */
11358 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11359 || diff == 3 || diff == 5 || diff == 9)
11360 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11361 && (mode != DImode
11362 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11365 * xorl dest,dest
11366 * cmpl op1,op2
11367 * setcc dest
11368 * lea cf(dest*(ct-cf)),dest
11370 * Size 14.
11372 * This also catches the degenerate setcc-only case.
11375 rtx tmp;
11376 int nops;
11378 out = emit_store_flag (out, code, ix86_compare_op0,
11379 ix86_compare_op1, VOIDmode, 0, 1);
11381 nops = 0;
11382 /* On x86_64 the lea instruction operates on Pmode, so we need
11383 to get arithmetics done in proper mode to match. */
11384 if (diff == 1)
11385 tmp = copy_rtx (out);
11386 else
11388 rtx out1;
11389 out1 = copy_rtx (out);
11390 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11391 nops++;
11392 if (diff & 1)
11394 tmp = gen_rtx_PLUS (mode, tmp, out1);
11395 nops++;
11398 if (cf != 0)
11400 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11401 nops++;
11403 if (!rtx_equal_p (tmp, out))
11405 if (nops == 1)
11406 out = force_operand (tmp, copy_rtx (out));
11407 else
11408 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11410 if (!rtx_equal_p (out, operands[0]))
11411 emit_move_insn (operands[0], copy_rtx (out));
11413 return 1; /* DONE */
11417 * General case: Jumpful:
11418 * xorl dest,dest cmpl op1, op2
11419 * cmpl op1, op2 movl ct, dest
11420 * setcc dest jcc 1f
11421 * decl dest movl cf, dest
11422 * andl (cf-ct),dest 1:
11423 * addl ct,dest
11425 * Size 20. Size 14.
11427 * This is reasonably steep, but branch mispredict costs are
11428 * high on modern cpus, so consider failing only if optimizing
11429 * for space.
11432 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11433 && BRANCH_COST >= 2)
11435 if (cf == 0)
11437 cf = ct;
11438 ct = 0;
11439 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11440 /* We may be reversing unordered compare to normal compare,
11441 that is not valid in general (we may convert non-trapping
11442 condition to trapping one), however on i386 we currently
11443 emit all comparisons unordered. */
11444 code = reverse_condition_maybe_unordered (code);
11445 else
11447 code = reverse_condition (code);
11448 if (compare_code != UNKNOWN)
11449 compare_code = reverse_condition (compare_code);
11453 if (compare_code != UNKNOWN)
11455 /* notl op1 (if needed)
11456 sarl $31, op1
11457 andl (cf-ct), op1
11458 addl ct, op1
11460 For x < 0 (resp. x <= -1) there will be no notl,
11461 so if possible swap the constants to get rid of the
11462 complement.
11463 True/false will be -1/0 while code below (store flag
11464 followed by decrement) is 0/-1, so the constants need
11465 to be exchanged once more. */
11467 if (compare_code == GE || !cf)
11469 code = reverse_condition (code);
11470 compare_code = LT;
11472 else
11474 HOST_WIDE_INT tmp = cf;
11475 cf = ct;
11476 ct = tmp;
11479 out = emit_store_flag (out, code, ix86_compare_op0,
11480 ix86_compare_op1, VOIDmode, 0, -1);
11482 else
11484 out = emit_store_flag (out, code, ix86_compare_op0,
11485 ix86_compare_op1, VOIDmode, 0, 1);
11487 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11488 copy_rtx (out), 1, OPTAB_DIRECT);
11491 out = expand_simple_binop (mode, AND, copy_rtx (out),
11492 gen_int_mode (cf - ct, mode),
11493 copy_rtx (out), 1, OPTAB_DIRECT);
11494 if (ct)
11495 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11496 copy_rtx (out), 1, OPTAB_DIRECT);
11497 if (!rtx_equal_p (out, operands[0]))
11498 emit_move_insn (operands[0], copy_rtx (out));
11500 return 1; /* DONE */
11504 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11506 /* Try a few things more with specific constants and a variable. */
11508 optab op;
11509 rtx var, orig_out, out, tmp;
11511 if (BRANCH_COST <= 2)
11512 return 0; /* FAIL */
11514 /* If one of the two operands is an interesting constant, load a
11515 constant with the above and mask it in with a logical operation. */
11517 if (GET_CODE (operands[2]) == CONST_INT)
11519 var = operands[3];
11520 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11521 operands[3] = constm1_rtx, op = and_optab;
11522 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11523 operands[3] = const0_rtx, op = ior_optab;
11524 else
11525 return 0; /* FAIL */
11527 else if (GET_CODE (operands[3]) == CONST_INT)
11529 var = operands[2];
11530 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11531 operands[2] = constm1_rtx, op = and_optab;
11532 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11533 operands[2] = const0_rtx, op = ior_optab;
11534 else
11535 return 0; /* FAIL */
11537 else
11538 return 0; /* FAIL */
11540 orig_out = operands[0];
11541 tmp = gen_reg_rtx (mode);
11542 operands[0] = tmp;
11544 /* Recurse to get the constant loaded. */
11545 if (ix86_expand_int_movcc (operands) == 0)
11546 return 0; /* FAIL */
11548 /* Mask in the interesting variable. */
11549 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11550 OPTAB_WIDEN);
11551 if (!rtx_equal_p (out, orig_out))
11552 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11554 return 1; /* DONE */
11558 * For comparison with above,
11560 * movl cf,dest
11561 * movl ct,tmp
11562 * cmpl op1,op2
11563 * cmovcc tmp,dest
11565 * Size 15.
11568 if (! nonimmediate_operand (operands[2], mode))
11569 operands[2] = force_reg (mode, operands[2]);
11570 if (! nonimmediate_operand (operands[3], mode))
11571 operands[3] = force_reg (mode, operands[3]);
11573 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11575 rtx tmp = gen_reg_rtx (mode);
11576 emit_move_insn (tmp, operands[3]);
11577 operands[3] = tmp;
11579 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11581 rtx tmp = gen_reg_rtx (mode);
11582 emit_move_insn (tmp, operands[2]);
11583 operands[2] = tmp;
11586 if (! register_operand (operands[2], VOIDmode)
11587 && (mode == QImode
11588 || ! register_operand (operands[3], VOIDmode)))
11589 operands[2] = force_reg (mode, operands[2]);
11591 if (mode == QImode
11592 && ! register_operand (operands[3], VOIDmode))
11593 operands[3] = force_reg (mode, operands[3]);
11595 emit_insn (compare_seq);
11596 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11597 gen_rtx_IF_THEN_ELSE (mode,
11598 compare_op, operands[2],
11599 operands[3])));
11600 if (bypass_test)
11601 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11602 gen_rtx_IF_THEN_ELSE (mode,
11603 bypass_test,
11604 copy_rtx (operands[3]),
11605 copy_rtx (operands[0]))));
11606 if (second_test)
11607 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11608 gen_rtx_IF_THEN_ELSE (mode,
11609 second_test,
11610 copy_rtx (operands[2]),
11611 copy_rtx (operands[0]))));
11613 return 1; /* DONE */
11616 /* Swap, force into registers, or otherwise massage the two operands
11617 to an sse comparison with a mask result. Thus we differ a bit from
11618 ix86_prepare_fp_compare_args which expects to produce a flags result.
11620 The DEST operand exists to help determine whether to commute commutative
11621 operators. The POP0/POP1 operands are updated in place. The new
11622 comparison code is returned, or UNKNOWN if not implementable. */
11624 static enum rtx_code
11625 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11626 rtx *pop0, rtx *pop1)
11628 rtx tmp;
11630 switch (code)
11632 case LTGT:
11633 case UNEQ:
11634 /* We have no LTGT as an operator. We could implement it with
11635 NE & ORDERED, but this requires an extra temporary. It's
11636 not clear that it's worth it. */
11637 return UNKNOWN;
11639 case LT:
11640 case LE:
11641 case UNGT:
11642 case UNGE:
11643 /* These are supported directly. */
11644 break;
11646 case EQ:
11647 case NE:
11648 case UNORDERED:
11649 case ORDERED:
11650 /* For commutative operators, try to canonicalize the destination
11651 operand to be first in the comparison - this helps reload to
11652 avoid extra moves. */
11653 if (!dest || !rtx_equal_p (dest, *pop1))
11654 break;
11655 /* FALLTHRU */
11657 case GE:
11658 case GT:
11659 case UNLE:
11660 case UNLT:
11661 /* These are not supported directly. Swap the comparison operands
11662 to transform into something that is supported. */
11663 tmp = *pop0;
11664 *pop0 = *pop1;
11665 *pop1 = tmp;
11666 code = swap_condition (code);
11667 break;
11669 default:
11670 gcc_unreachable ();
11673 return code;
11676 /* Detect conditional moves that exactly match min/max operational
11677 semantics. Note that this is IEEE safe, as long as we don't
11678 interchange the operands.
11680 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11681 and TRUE if the operation is successful and instructions are emitted. */
11683 static bool
11684 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11685 rtx cmp_op1, rtx if_true, rtx if_false)
11687 enum machine_mode mode;
11688 bool is_min;
11689 rtx tmp;
11691 if (code == LT)
11693 else if (code == UNGE)
11695 tmp = if_true;
11696 if_true = if_false;
11697 if_false = tmp;
11699 else
11700 return false;
11702 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11703 is_min = true;
11704 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11705 is_min = false;
11706 else
11707 return false;
11709 mode = GET_MODE (dest);
11711 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11712 but MODE may be a vector mode and thus not appropriate. */
11713 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11715 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11716 rtvec v;
11718 if_true = force_reg (mode, if_true);
11719 v = gen_rtvec (2, if_true, if_false);
11720 tmp = gen_rtx_UNSPEC (mode, v, u);
11722 else
11724 code = is_min ? SMIN : SMAX;
11725 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11728 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11729 return true;
11732 /* Expand an sse vector comparison. Return the register with the result. */
11734 static rtx
11735 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11736 rtx op_true, rtx op_false)
11738 enum machine_mode mode = GET_MODE (dest);
11739 rtx x;
11741 cmp_op0 = force_reg (mode, cmp_op0);
11742 if (!nonimmediate_operand (cmp_op1, mode))
11743 cmp_op1 = force_reg (mode, cmp_op1);
11745 if (optimize
11746 || reg_overlap_mentioned_p (dest, op_true)
11747 || reg_overlap_mentioned_p (dest, op_false))
11748 dest = gen_reg_rtx (mode);
11750 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11751 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11753 return dest;
11756 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11757 operations. This is used for both scalar and vector conditional moves. */
11759 static void
11760 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11762 enum machine_mode mode = GET_MODE (dest);
11763 rtx t2, t3, x;
11765 if (op_false == CONST0_RTX (mode))
11767 op_true = force_reg (mode, op_true);
11768 x = gen_rtx_AND (mode, cmp, op_true);
11769 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11771 else if (op_true == CONST0_RTX (mode))
11773 op_false = force_reg (mode, op_false);
11774 x = gen_rtx_NOT (mode, cmp);
11775 x = gen_rtx_AND (mode, x, op_false);
11776 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11778 else
11780 op_true = force_reg (mode, op_true);
11781 op_false = force_reg (mode, op_false);
11783 t2 = gen_reg_rtx (mode);
11784 if (optimize)
11785 t3 = gen_reg_rtx (mode);
11786 else
11787 t3 = dest;
11789 x = gen_rtx_AND (mode, op_true, cmp);
11790 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11792 x = gen_rtx_NOT (mode, cmp);
11793 x = gen_rtx_AND (mode, x, op_false);
11794 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11796 x = gen_rtx_IOR (mode, t3, t2);
11797 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11801 /* Expand a floating-point conditional move. Return true if successful. */
11804 ix86_expand_fp_movcc (rtx operands[])
11806 enum machine_mode mode = GET_MODE (operands[0]);
11807 enum rtx_code code = GET_CODE (operands[1]);
11808 rtx tmp, compare_op, second_test, bypass_test;
11810 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11812 enum machine_mode cmode;
11814 /* Since we've no cmove for sse registers, don't force bad register
11815 allocation just to gain access to it. Deny movcc when the
11816 comparison mode doesn't match the move mode. */
11817 cmode = GET_MODE (ix86_compare_op0);
11818 if (cmode == VOIDmode)
11819 cmode = GET_MODE (ix86_compare_op1);
11820 if (cmode != mode)
11821 return 0;
11823 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11824 &ix86_compare_op0,
11825 &ix86_compare_op1);
11826 if (code == UNKNOWN)
11827 return 0;
11829 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11830 ix86_compare_op1, operands[2],
11831 operands[3]))
11832 return 1;
11834 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11835 ix86_compare_op1, operands[2], operands[3]);
11836 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11837 return 1;
11840 /* The floating point conditional move instructions don't directly
11841 support conditions resulting from a signed integer comparison. */
11843 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11845 /* The floating point conditional move instructions don't directly
11846 support signed integer comparisons. */
11848 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11850 gcc_assert (!second_test && !bypass_test);
11851 tmp = gen_reg_rtx (QImode);
11852 ix86_expand_setcc (code, tmp);
11853 code = NE;
11854 ix86_compare_op0 = tmp;
11855 ix86_compare_op1 = const0_rtx;
11856 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11858 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11860 tmp = gen_reg_rtx (mode);
11861 emit_move_insn (tmp, operands[3]);
11862 operands[3] = tmp;
11864 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11866 tmp = gen_reg_rtx (mode);
11867 emit_move_insn (tmp, operands[2]);
11868 operands[2] = tmp;
11871 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11872 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11873 operands[2], operands[3])));
11874 if (bypass_test)
11875 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11876 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11877 operands[3], operands[0])));
11878 if (second_test)
11879 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11880 gen_rtx_IF_THEN_ELSE (mode, second_test,
11881 operands[2], operands[0])));
11883 return 1;
11886 /* Expand a floating-point vector conditional move; a vcond operation
11887 rather than a movcc operation. */
11889 bool
11890 ix86_expand_fp_vcond (rtx operands[])
11892 enum rtx_code code = GET_CODE (operands[3]);
11893 rtx cmp;
11895 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11896 &operands[4], &operands[5]);
11897 if (code == UNKNOWN)
11898 return false;
11900 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11901 operands[5], operands[1], operands[2]))
11902 return true;
11904 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11905 operands[1], operands[2]);
11906 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11907 return true;
11910 /* Expand a signed integral vector conditional move. */
11912 bool
11913 ix86_expand_int_vcond (rtx operands[])
11915 enum machine_mode mode = GET_MODE (operands[0]);
11916 enum rtx_code code = GET_CODE (operands[3]);
11917 bool negate = false;
11918 rtx x, cop0, cop1;
11920 cop0 = operands[4];
11921 cop1 = operands[5];
11923 /* Canonicalize the comparison to EQ, GT, GTU. */
11924 switch (code)
11926 case EQ:
11927 case GT:
11928 case GTU:
11929 break;
11931 case NE:
11932 case LE:
11933 case LEU:
11934 code = reverse_condition (code);
11935 negate = true;
11936 break;
11938 case GE:
11939 case GEU:
11940 code = reverse_condition (code);
11941 negate = true;
11942 /* FALLTHRU */
11944 case LT:
11945 case LTU:
11946 code = swap_condition (code);
11947 x = cop0, cop0 = cop1, cop1 = x;
11948 break;
11950 default:
11951 gcc_unreachable ();
11954 /* Unsigned parallel compare is not supported by the hardware. Play some
11955 tricks to turn this into a signed comparison against 0. */
11956 if (code == GTU)
11958 cop0 = force_reg (mode, cop0);
11960 switch (mode)
11962 case V4SImode:
11964 rtx t1, t2, mask;
11966 /* Perform a parallel modulo subtraction. */
11967 t1 = gen_reg_rtx (mode);
11968 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11970 /* Extract the original sign bit of op0. */
11971 mask = GEN_INT (-0x80000000);
11972 mask = gen_rtx_CONST_VECTOR (mode,
11973 gen_rtvec (4, mask, mask, mask, mask));
11974 mask = force_reg (mode, mask);
11975 t2 = gen_reg_rtx (mode);
11976 emit_insn (gen_andv4si3 (t2, cop0, mask));
11978 /* XOR it back into the result of the subtraction. This results
11979 in the sign bit set iff we saw unsigned underflow. */
11980 x = gen_reg_rtx (mode);
11981 emit_insn (gen_xorv4si3 (x, t1, t2));
11983 code = GT;
11985 break;
11987 case V16QImode:
11988 case V8HImode:
11989 /* Perform a parallel unsigned saturating subtraction. */
11990 x = gen_reg_rtx (mode);
11991 emit_insn (gen_rtx_SET (VOIDmode, x,
11992 gen_rtx_US_MINUS (mode, cop0, cop1)));
11994 code = EQ;
11995 negate = !negate;
11996 break;
11998 default:
11999 gcc_unreachable ();
12002 cop0 = x;
12003 cop1 = CONST0_RTX (mode);
12006 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12007 operands[1+negate], operands[2-negate]);
12009 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12010 operands[2-negate]);
12011 return true;
12014 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12015 true if we should do zero extension, else sign extension. HIGH_P is
12016 true if we want the N/2 high elements, else the low elements. */
12018 void
12019 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12021 enum machine_mode imode = GET_MODE (operands[1]);
12022 rtx (*unpack)(rtx, rtx, rtx);
12023 rtx se, dest;
12025 switch (imode)
12027 case V16QImode:
12028 if (high_p)
12029 unpack = gen_vec_interleave_highv16qi;
12030 else
12031 unpack = gen_vec_interleave_lowv16qi;
12032 break;
12033 case V8HImode:
12034 if (high_p)
12035 unpack = gen_vec_interleave_highv8hi;
12036 else
12037 unpack = gen_vec_interleave_lowv8hi;
12038 break;
12039 case V4SImode:
12040 if (high_p)
12041 unpack = gen_vec_interleave_highv4si;
12042 else
12043 unpack = gen_vec_interleave_lowv4si;
12044 break;
12045 default:
12046 gcc_unreachable ();
12049 dest = gen_lowpart (imode, operands[0]);
12051 if (unsigned_p)
12052 se = force_reg (imode, CONST0_RTX (imode));
12053 else
12054 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12055 operands[1], pc_rtx, pc_rtx);
12057 emit_insn (unpack (dest, operands[1], se));
12060 /* Expand conditional increment or decrement using adb/sbb instructions.
12061 The default case using setcc followed by the conditional move can be
12062 done by generic code. */
12064 ix86_expand_int_addcc (rtx operands[])
12066 enum rtx_code code = GET_CODE (operands[1]);
12067 rtx compare_op;
12068 rtx val = const0_rtx;
12069 bool fpcmp = false;
12070 enum machine_mode mode = GET_MODE (operands[0]);
12072 if (operands[3] != const1_rtx
12073 && operands[3] != constm1_rtx)
12074 return 0;
12075 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12076 ix86_compare_op1, &compare_op))
12077 return 0;
12078 code = GET_CODE (compare_op);
12080 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12081 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12083 fpcmp = true;
12084 code = ix86_fp_compare_code_to_integer (code);
12087 if (code != LTU)
12089 val = constm1_rtx;
12090 if (fpcmp)
12091 PUT_CODE (compare_op,
12092 reverse_condition_maybe_unordered
12093 (GET_CODE (compare_op)));
12094 else
12095 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12097 PUT_MODE (compare_op, mode);
12099 /* Construct either adc or sbb insn. */
12100 if ((code == LTU) == (operands[3] == constm1_rtx))
12102 switch (GET_MODE (operands[0]))
12104 case QImode:
12105 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12106 break;
12107 case HImode:
12108 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12109 break;
12110 case SImode:
12111 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12112 break;
12113 case DImode:
12114 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12115 break;
12116 default:
12117 gcc_unreachable ();
12120 else
12122 switch (GET_MODE (operands[0]))
12124 case QImode:
12125 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12126 break;
12127 case HImode:
12128 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12129 break;
12130 case SImode:
12131 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12132 break;
12133 case DImode:
12134 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12135 break;
12136 default:
12137 gcc_unreachable ();
12140 return 1; /* DONE */
12144 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12145 works for floating pointer parameters and nonoffsetable memories.
12146 For pushes, it returns just stack offsets; the values will be saved
12147 in the right order. Maximally three parts are generated. */
12149 static int
12150 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12152 int size;
12154 if (!TARGET_64BIT)
12155 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12156 else
12157 size = (GET_MODE_SIZE (mode) + 4) / 8;
12159 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12160 gcc_assert (size >= 2 && size <= 3);
12162 /* Optimize constant pool reference to immediates. This is used by fp
12163 moves, that force all constants to memory to allow combining. */
12164 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12166 rtx tmp = maybe_get_pool_constant (operand);
12167 if (tmp)
12168 operand = tmp;
12171 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12173 /* The only non-offsetable memories we handle are pushes. */
12174 int ok = push_operand (operand, VOIDmode);
12176 gcc_assert (ok);
12178 operand = copy_rtx (operand);
12179 PUT_MODE (operand, Pmode);
12180 parts[0] = parts[1] = parts[2] = operand;
12181 return size;
12184 if (GET_CODE (operand) == CONST_VECTOR)
12186 enum machine_mode imode = int_mode_for_mode (mode);
12187 /* Caution: if we looked through a constant pool memory above,
12188 the operand may actually have a different mode now. That's
12189 ok, since we want to pun this all the way back to an integer. */
12190 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12191 gcc_assert (operand != NULL);
12192 mode = imode;
12195 if (!TARGET_64BIT)
12197 if (mode == DImode)
12198 split_di (&operand, 1, &parts[0], &parts[1]);
12199 else
12201 if (REG_P (operand))
12203 gcc_assert (reload_completed);
12204 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12205 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12206 if (size == 3)
12207 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12209 else if (offsettable_memref_p (operand))
12211 operand = adjust_address (operand, SImode, 0);
12212 parts[0] = operand;
12213 parts[1] = adjust_address (operand, SImode, 4);
12214 if (size == 3)
12215 parts[2] = adjust_address (operand, SImode, 8);
12217 else if (GET_CODE (operand) == CONST_DOUBLE)
12219 REAL_VALUE_TYPE r;
12220 long l[4];
12222 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12223 switch (mode)
12225 case XFmode:
12226 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12227 parts[2] = gen_int_mode (l[2], SImode);
12228 break;
12229 case DFmode:
12230 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12231 break;
12232 default:
12233 gcc_unreachable ();
12235 parts[1] = gen_int_mode (l[1], SImode);
12236 parts[0] = gen_int_mode (l[0], SImode);
12238 else
12239 gcc_unreachable ();
12242 else
12244 if (mode == TImode)
12245 split_ti (&operand, 1, &parts[0], &parts[1]);
12246 if (mode == XFmode || mode == TFmode)
12248 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12249 if (REG_P (operand))
12251 gcc_assert (reload_completed);
12252 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12253 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12255 else if (offsettable_memref_p (operand))
12257 operand = adjust_address (operand, DImode, 0);
12258 parts[0] = operand;
12259 parts[1] = adjust_address (operand, upper_mode, 8);
12261 else if (GET_CODE (operand) == CONST_DOUBLE)
12263 REAL_VALUE_TYPE r;
12264 long l[4];
12266 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12267 real_to_target (l, &r, mode);
12269 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12270 if (HOST_BITS_PER_WIDE_INT >= 64)
12271 parts[0]
12272 = gen_int_mode
12273 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12274 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12275 DImode);
12276 else
12277 parts[0] = immed_double_const (l[0], l[1], DImode);
12279 if (upper_mode == SImode)
12280 parts[1] = gen_int_mode (l[2], SImode);
12281 else if (HOST_BITS_PER_WIDE_INT >= 64)
12282 parts[1]
12283 = gen_int_mode
12284 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12285 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12286 DImode);
12287 else
12288 parts[1] = immed_double_const (l[2], l[3], DImode);
12290 else
12291 gcc_unreachable ();
12295 return size;
12298 /* Emit insns to perform a move or push of DI, DF, and XF values.
12299 Return false when normal moves are needed; true when all required
12300 insns have been emitted. Operands 2-4 contain the input values
12301 int the correct order; operands 5-7 contain the output values. */
12303 void
12304 ix86_split_long_move (rtx operands[])
12306 rtx part[2][3];
12307 int nparts;
12308 int push = 0;
12309 int collisions = 0;
12310 enum machine_mode mode = GET_MODE (operands[0]);
12312 /* The DFmode expanders may ask us to move double.
12313 For 64bit target this is single move. By hiding the fact
12314 here we simplify i386.md splitters. */
12315 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12317 /* Optimize constant pool reference to immediates. This is used by
12318 fp moves, that force all constants to memory to allow combining. */
12320 if (GET_CODE (operands[1]) == MEM
12321 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12322 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12323 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12324 if (push_operand (operands[0], VOIDmode))
12326 operands[0] = copy_rtx (operands[0]);
12327 PUT_MODE (operands[0], Pmode);
12329 else
12330 operands[0] = gen_lowpart (DImode, operands[0]);
12331 operands[1] = gen_lowpart (DImode, operands[1]);
12332 emit_move_insn (operands[0], operands[1]);
12333 return;
12336 /* The only non-offsettable memory we handle is push. */
12337 if (push_operand (operands[0], VOIDmode))
12338 push = 1;
12339 else
12340 gcc_assert (GET_CODE (operands[0]) != MEM
12341 || offsettable_memref_p (operands[0]));
12343 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12344 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12346 /* When emitting push, take care for source operands on the stack. */
12347 if (push && GET_CODE (operands[1]) == MEM
12348 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12350 if (nparts == 3)
12351 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12352 XEXP (part[1][2], 0));
12353 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12354 XEXP (part[1][1], 0));
12357 /* We need to do copy in the right order in case an address register
12358 of the source overlaps the destination. */
12359 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12361 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12362 collisions++;
12363 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12364 collisions++;
12365 if (nparts == 3
12366 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12367 collisions++;
12369 /* Collision in the middle part can be handled by reordering. */
12370 if (collisions == 1 && nparts == 3
12371 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12373 rtx tmp;
12374 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12375 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12378 /* If there are more collisions, we can't handle it by reordering.
12379 Do an lea to the last part and use only one colliding move. */
12380 else if (collisions > 1)
12382 rtx base;
12384 collisions = 1;
12386 base = part[0][nparts - 1];
12388 /* Handle the case when the last part isn't valid for lea.
12389 Happens in 64-bit mode storing the 12-byte XFmode. */
12390 if (GET_MODE (base) != Pmode)
12391 base = gen_rtx_REG (Pmode, REGNO (base));
12393 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12394 part[1][0] = replace_equiv_address (part[1][0], base);
12395 part[1][1] = replace_equiv_address (part[1][1],
12396 plus_constant (base, UNITS_PER_WORD));
12397 if (nparts == 3)
12398 part[1][2] = replace_equiv_address (part[1][2],
12399 plus_constant (base, 8));
12403 if (push)
12405 if (!TARGET_64BIT)
12407 if (nparts == 3)
12409 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12410 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12411 emit_move_insn (part[0][2], part[1][2]);
12414 else
12416 /* In 64bit mode we don't have 32bit push available. In case this is
12417 register, it is OK - we will just use larger counterpart. We also
12418 retype memory - these comes from attempt to avoid REX prefix on
12419 moving of second half of TFmode value. */
12420 if (GET_MODE (part[1][1]) == SImode)
12422 switch (GET_CODE (part[1][1]))
12424 case MEM:
12425 part[1][1] = adjust_address (part[1][1], DImode, 0);
12426 break;
12428 case REG:
12429 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12430 break;
12432 default:
12433 gcc_unreachable ();
12436 if (GET_MODE (part[1][0]) == SImode)
12437 part[1][0] = part[1][1];
12440 emit_move_insn (part[0][1], part[1][1]);
12441 emit_move_insn (part[0][0], part[1][0]);
12442 return;
12445 /* Choose correct order to not overwrite the source before it is copied. */
12446 if ((REG_P (part[0][0])
12447 && REG_P (part[1][1])
12448 && (REGNO (part[0][0]) == REGNO (part[1][1])
12449 || (nparts == 3
12450 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12451 || (collisions > 0
12452 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12454 if (nparts == 3)
12456 operands[2] = part[0][2];
12457 operands[3] = part[0][1];
12458 operands[4] = part[0][0];
12459 operands[5] = part[1][2];
12460 operands[6] = part[1][1];
12461 operands[7] = part[1][0];
12463 else
12465 operands[2] = part[0][1];
12466 operands[3] = part[0][0];
12467 operands[5] = part[1][1];
12468 operands[6] = part[1][0];
12471 else
12473 if (nparts == 3)
12475 operands[2] = part[0][0];
12476 operands[3] = part[0][1];
12477 operands[4] = part[0][2];
12478 operands[5] = part[1][0];
12479 operands[6] = part[1][1];
12480 operands[7] = part[1][2];
12482 else
12484 operands[2] = part[0][0];
12485 operands[3] = part[0][1];
12486 operands[5] = part[1][0];
12487 operands[6] = part[1][1];
12491 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12492 if (optimize_size)
12494 if (GET_CODE (operands[5]) == CONST_INT
12495 && operands[5] != const0_rtx
12496 && REG_P (operands[2]))
12498 if (GET_CODE (operands[6]) == CONST_INT
12499 && INTVAL (operands[6]) == INTVAL (operands[5]))
12500 operands[6] = operands[2];
12502 if (nparts == 3
12503 && GET_CODE (operands[7]) == CONST_INT
12504 && INTVAL (operands[7]) == INTVAL (operands[5]))
12505 operands[7] = operands[2];
12508 if (nparts == 3
12509 && GET_CODE (operands[6]) == CONST_INT
12510 && operands[6] != const0_rtx
12511 && REG_P (operands[3])
12512 && GET_CODE (operands[7]) == CONST_INT
12513 && INTVAL (operands[7]) == INTVAL (operands[6]))
12514 operands[7] = operands[3];
12517 emit_move_insn (operands[2], operands[5]);
12518 emit_move_insn (operands[3], operands[6]);
12519 if (nparts == 3)
12520 emit_move_insn (operands[4], operands[7]);
12522 return;
12525 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12526 left shift by a constant, either using a single shift or
12527 a sequence of add instructions. */
12529 static void
12530 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12532 if (count == 1)
12534 emit_insn ((mode == DImode
12535 ? gen_addsi3
12536 : gen_adddi3) (operand, operand, operand));
12538 else if (!optimize_size
12539 && count * ix86_cost->add <= ix86_cost->shift_const)
12541 int i;
12542 for (i=0; i<count; i++)
12544 emit_insn ((mode == DImode
12545 ? gen_addsi3
12546 : gen_adddi3) (operand, operand, operand));
12549 else
12550 emit_insn ((mode == DImode
12551 ? gen_ashlsi3
12552 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12555 void
12556 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12558 rtx low[2], high[2];
12559 int count;
12560 const int single_width = mode == DImode ? 32 : 64;
12562 if (GET_CODE (operands[2]) == CONST_INT)
12564 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12565 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12567 if (count >= single_width)
12569 emit_move_insn (high[0], low[1]);
12570 emit_move_insn (low[0], const0_rtx);
12572 if (count > single_width)
12573 ix86_expand_ashl_const (high[0], count - single_width, mode);
12575 else
12577 if (!rtx_equal_p (operands[0], operands[1]))
12578 emit_move_insn (operands[0], operands[1]);
12579 emit_insn ((mode == DImode
12580 ? gen_x86_shld_1
12581 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12582 ix86_expand_ashl_const (low[0], count, mode);
12584 return;
12587 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12589 if (operands[1] == const1_rtx)
12591 /* Assuming we've chosen a QImode capable registers, then 1 << N
12592 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12593 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12595 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12597 ix86_expand_clear (low[0]);
12598 ix86_expand_clear (high[0]);
12599 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12601 d = gen_lowpart (QImode, low[0]);
12602 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12603 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12604 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12606 d = gen_lowpart (QImode, high[0]);
12607 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12608 s = gen_rtx_NE (QImode, flags, const0_rtx);
12609 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12612 /* Otherwise, we can get the same results by manually performing
12613 a bit extract operation on bit 5/6, and then performing the two
12614 shifts. The two methods of getting 0/1 into low/high are exactly
12615 the same size. Avoiding the shift in the bit extract case helps
12616 pentium4 a bit; no one else seems to care much either way. */
12617 else
12619 rtx x;
12621 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12622 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12623 else
12624 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12625 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12627 emit_insn ((mode == DImode
12628 ? gen_lshrsi3
12629 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12630 emit_insn ((mode == DImode
12631 ? gen_andsi3
12632 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12633 emit_move_insn (low[0], high[0]);
12634 emit_insn ((mode == DImode
12635 ? gen_xorsi3
12636 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12639 emit_insn ((mode == DImode
12640 ? gen_ashlsi3
12641 : gen_ashldi3) (low[0], low[0], operands[2]));
12642 emit_insn ((mode == DImode
12643 ? gen_ashlsi3
12644 : gen_ashldi3) (high[0], high[0], operands[2]));
12645 return;
12648 if (operands[1] == constm1_rtx)
12650 /* For -1 << N, we can avoid the shld instruction, because we
12651 know that we're shifting 0...31/63 ones into a -1. */
12652 emit_move_insn (low[0], constm1_rtx);
12653 if (optimize_size)
12654 emit_move_insn (high[0], low[0]);
12655 else
12656 emit_move_insn (high[0], constm1_rtx);
12658 else
12660 if (!rtx_equal_p (operands[0], operands[1]))
12661 emit_move_insn (operands[0], operands[1]);
12663 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12664 emit_insn ((mode == DImode
12665 ? gen_x86_shld_1
12666 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12669 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12671 if (TARGET_CMOVE && scratch)
12673 ix86_expand_clear (scratch);
12674 emit_insn ((mode == DImode
12675 ? gen_x86_shift_adj_1
12676 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12678 else
12679 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12682 void
12683 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12685 rtx low[2], high[2];
12686 int count;
12687 const int single_width = mode == DImode ? 32 : 64;
12689 if (GET_CODE (operands[2]) == CONST_INT)
12691 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12692 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12694 if (count == single_width * 2 - 1)
12696 emit_move_insn (high[0], high[1]);
12697 emit_insn ((mode == DImode
12698 ? gen_ashrsi3
12699 : gen_ashrdi3) (high[0], high[0],
12700 GEN_INT (single_width - 1)));
12701 emit_move_insn (low[0], high[0]);
12704 else if (count >= single_width)
12706 emit_move_insn (low[0], high[1]);
12707 emit_move_insn (high[0], low[0]);
12708 emit_insn ((mode == DImode
12709 ? gen_ashrsi3
12710 : gen_ashrdi3) (high[0], high[0],
12711 GEN_INT (single_width - 1)));
12712 if (count > single_width)
12713 emit_insn ((mode == DImode
12714 ? gen_ashrsi3
12715 : gen_ashrdi3) (low[0], low[0],
12716 GEN_INT (count - single_width)));
12718 else
12720 if (!rtx_equal_p (operands[0], operands[1]))
12721 emit_move_insn (operands[0], operands[1]);
12722 emit_insn ((mode == DImode
12723 ? gen_x86_shrd_1
12724 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12725 emit_insn ((mode == DImode
12726 ? gen_ashrsi3
12727 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12730 else
12732 if (!rtx_equal_p (operands[0], operands[1]))
12733 emit_move_insn (operands[0], operands[1]);
12735 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12737 emit_insn ((mode == DImode
12738 ? gen_x86_shrd_1
12739 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12740 emit_insn ((mode == DImode
12741 ? gen_ashrsi3
12742 : gen_ashrdi3) (high[0], high[0], operands[2]));
12744 if (TARGET_CMOVE && scratch)
12746 emit_move_insn (scratch, high[0]);
12747 emit_insn ((mode == DImode
12748 ? gen_ashrsi3
12749 : gen_ashrdi3) (scratch, scratch,
12750 GEN_INT (single_width - 1)));
12751 emit_insn ((mode == DImode
12752 ? gen_x86_shift_adj_1
12753 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12754 scratch));
12756 else
12757 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12761 void
12762 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12764 rtx low[2], high[2];
12765 int count;
12766 const int single_width = mode == DImode ? 32 : 64;
12768 if (GET_CODE (operands[2]) == CONST_INT)
12770 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12771 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12773 if (count >= single_width)
12775 emit_move_insn (low[0], high[1]);
12776 ix86_expand_clear (high[0]);
12778 if (count > single_width)
12779 emit_insn ((mode == DImode
12780 ? gen_lshrsi3
12781 : gen_lshrdi3) (low[0], low[0],
12782 GEN_INT (count - single_width)));
12784 else
12786 if (!rtx_equal_p (operands[0], operands[1]))
12787 emit_move_insn (operands[0], operands[1]);
12788 emit_insn ((mode == DImode
12789 ? gen_x86_shrd_1
12790 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12791 emit_insn ((mode == DImode
12792 ? gen_lshrsi3
12793 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12796 else
12798 if (!rtx_equal_p (operands[0], operands[1]))
12799 emit_move_insn (operands[0], operands[1]);
12801 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12803 emit_insn ((mode == DImode
12804 ? gen_x86_shrd_1
12805 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12806 emit_insn ((mode == DImode
12807 ? gen_lshrsi3
12808 : gen_lshrdi3) (high[0], high[0], operands[2]));
12810 /* Heh. By reversing the arguments, we can reuse this pattern. */
12811 if (TARGET_CMOVE && scratch)
12813 ix86_expand_clear (scratch);
12814 emit_insn ((mode == DImode
12815 ? gen_x86_shift_adj_1
12816 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12817 scratch));
12819 else
12820 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12824 /* Predict just emitted jump instruction to be taken with probability PROB. */
12825 static void
12826 predict_jump (int prob)
12828 rtx insn = get_last_insn ();
12829 gcc_assert (GET_CODE (insn) == JUMP_INSN);
12830 REG_NOTES (insn)
12831 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12832 GEN_INT (prob),
12833 REG_NOTES (insn));
12836 /* Helper function for the string operations below. Dest VARIABLE whether
12837 it is aligned to VALUE bytes. If true, jump to the label. */
12838 static rtx
12839 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
12841 rtx label = gen_label_rtx ();
12842 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12843 if (GET_MODE (variable) == DImode)
12844 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12845 else
12846 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12847 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12848 1, label);
12849 if (epilogue)
12850 predict_jump (REG_BR_PROB_BASE * 50 / 100);
12851 else
12852 predict_jump (REG_BR_PROB_BASE * 90 / 100);
12853 return label;
12856 /* Adjust COUNTER by the VALUE. */
12857 static void
12858 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12860 if (GET_MODE (countreg) == DImode)
12861 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12862 else
12863 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12866 /* Zero extend possibly SImode EXP to Pmode register. */
12868 ix86_zero_extend_to_Pmode (rtx exp)
12870 rtx r;
12871 if (GET_MODE (exp) == VOIDmode)
12872 return force_reg (Pmode, exp);
12873 if (GET_MODE (exp) == Pmode)
12874 return copy_to_mode_reg (Pmode, exp);
12875 r = gen_reg_rtx (Pmode);
12876 emit_insn (gen_zero_extendsidi2 (r, exp));
12877 return r;
12880 /* Divide COUNTREG by SCALE. */
12881 static rtx
12882 scale_counter (rtx countreg, int scale)
12884 rtx sc;
12885 rtx piece_size_mask;
12887 if (scale == 1)
12888 return countreg;
12889 if (GET_CODE (countreg) == CONST_INT)
12890 return GEN_INT (INTVAL (countreg) / scale);
12891 gcc_assert (REG_P (countreg));
12893 piece_size_mask = GEN_INT (scale - 1);
12894 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
12895 GEN_INT (exact_log2 (scale)),
12896 NULL, 1, OPTAB_DIRECT);
12897 return sc;
12900 /* When SRCPTR is non-NULL, output simple loop to move memory
12901 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
12902 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
12903 equivalent loop to set memory by VALUE (supposed to be in MODE).
12905 The size is rounded down to whole number of chunk size moved at once.
12906 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
12909 static void
12910 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
12911 rtx destptr, rtx srcptr, rtx value,
12912 rtx count, enum machine_mode mode, int unroll,
12913 int expected_size)
12915 rtx out_label, top_label, iter, tmp;
12916 enum machine_mode iter_mode;
12917 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
12918 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
12919 rtx size;
12920 rtx x_addr;
12921 rtx y_addr;
12922 int i;
12924 iter_mode = GET_MODE (count);
12925 if (iter_mode == VOIDmode)
12926 iter_mode = word_mode;
12928 top_label = gen_label_rtx ();
12929 out_label = gen_label_rtx ();
12930 iter = gen_reg_rtx (iter_mode);
12932 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
12933 NULL, 1, OPTAB_DIRECT);
12934 /* Those two should combine. */
12935 if (piece_size == const1_rtx)
12937 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
12938 true, out_label);
12939 predict_jump (REG_BR_PROB_BASE * 10 / 100);
12941 emit_move_insn (iter, const0_rtx);
12943 emit_label (top_label);
12945 tmp = convert_modes (Pmode, iter_mode, iter, true);
12946 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
12947 destmem = change_address (destmem, mode, x_addr);
12949 if (srcmem)
12951 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
12952 srcmem = change_address (srcmem, mode, y_addr);
12954 /* When unrolling for chips that reorder memory reads and writes,
12955 we can save registers by using single temporary.
12956 Also using 4 temporaries is overkill in 32bit mode. */
12957 if (!TARGET_64BIT && 0)
12959 for (i = 0; i < unroll; i++)
12961 if (i)
12963 destmem =
12964 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
12965 srcmem =
12966 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
12968 emit_move_insn (destmem, srcmem);
12971 else
12973 rtx tmpreg[4];
12974 gcc_assert (unroll <= 4);
12975 for (i = 0; i < unroll; i++)
12977 tmpreg[i] = gen_reg_rtx (mode);
12978 if (i)
12980 srcmem =
12981 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
12983 emit_move_insn (tmpreg[i], srcmem);
12985 for (i = 0; i < unroll; i++)
12987 if (i)
12989 destmem =
12990 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
12992 emit_move_insn (destmem, tmpreg[i]);
12996 else
12997 for (i = 0; i < unroll; i++)
12999 if (i)
13000 destmem =
13001 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13002 emit_move_insn (destmem, value);
13005 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13006 true, OPTAB_LIB_WIDEN);
13007 if (tmp != iter)
13008 emit_move_insn (iter, tmp);
13010 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13011 true, top_label);
13012 if (expected_size != -1)
13014 expected_size /= GET_MODE_SIZE (mode) * unroll;
13015 if (expected_size == 0)
13016 predict_jump (0);
13017 else if (expected_size > REG_BR_PROB_BASE)
13018 predict_jump (REG_BR_PROB_BASE - 1);
13019 else
13020 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13022 else
13023 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13024 iter = ix86_zero_extend_to_Pmode (iter);
13025 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13026 true, OPTAB_LIB_WIDEN);
13027 if (tmp != destptr)
13028 emit_move_insn (destptr, tmp);
13029 if (srcptr)
13031 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13032 true, OPTAB_LIB_WIDEN);
13033 if (tmp != srcptr)
13034 emit_move_insn (srcptr, tmp);
13036 emit_label (out_label);
13039 /* Output "rep; mov" instruction.
13040 Arguments have same meaning as for previous function */
13041 static void
13042 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13043 rtx destptr, rtx srcptr,
13044 rtx count,
13045 enum machine_mode mode)
13047 rtx destexp;
13048 rtx srcexp;
13049 rtx countreg;
13051 /* If the size is known, it is shorter to use rep movs. */
13052 if (mode == QImode && GET_CODE (count) == CONST_INT
13053 && !(INTVAL (count) & 3))
13054 mode = SImode;
13056 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13057 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13058 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13059 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13060 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13061 if (mode != QImode)
13063 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13064 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13065 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13066 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13067 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13068 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13070 else
13072 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13073 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13075 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13076 destexp, srcexp));
13079 /* Output "rep; stos" instruction.
13080 Arguments have same meaning as for previous function */
13081 static void
13082 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13083 rtx count,
13084 enum machine_mode mode)
13086 rtx destexp;
13087 rtx countreg;
13089 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13090 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13091 value = force_reg (mode, gen_lowpart (mode, value));
13092 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13093 if (mode != QImode)
13095 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13096 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13097 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13099 else
13100 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13101 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13104 static void
13105 emit_strmov (rtx destmem, rtx srcmem,
13106 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13108 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13109 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13110 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13113 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13114 static void
13115 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13116 rtx destptr, rtx srcptr, rtx count, int max_size)
13118 rtx src, dest;
13119 if (GET_CODE (count) == CONST_INT)
13121 HOST_WIDE_INT countval = INTVAL (count);
13122 int offset = 0;
13124 if ((countval & 0x16) && max_size > 16)
13126 if (TARGET_64BIT)
13128 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13129 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13131 else
13132 gcc_unreachable ();
13133 offset += 16;
13135 if ((countval & 0x08) && max_size > 8)
13137 if (TARGET_64BIT)
13138 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13139 else
13141 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13142 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 4);
13144 offset += 8;
13146 if ((countval & 0x04) && max_size > 4)
13148 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13149 offset += 4;
13151 if ((countval & 0x02) && max_size > 2)
13153 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13154 offset += 2;
13156 if ((countval & 0x01) && max_size > 1)
13158 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13159 offset += 1;
13161 return;
13163 if (max_size > 8)
13165 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13166 count, 1, OPTAB_DIRECT);
13167 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13168 count, QImode, 1, 4);
13169 return;
13172 /* When there are stringops, we can cheaply increase dest and src pointers.
13173 Otherwise we save code size by maintaining offset (zero is readily
13174 available from preceeding rep operation) and using x86 addressing modes.
13176 if (TARGET_SINGLE_STRINGOP)
13178 if (max_size > 4)
13180 rtx label = ix86_expand_aligntest (count, 4, true);
13181 src = change_address (srcmem, SImode, srcptr);
13182 dest = change_address (destmem, SImode, destptr);
13183 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13184 emit_label (label);
13185 LABEL_NUSES (label) = 1;
13187 if (max_size > 2)
13189 rtx label = ix86_expand_aligntest (count, 2, true);
13190 src = change_address (srcmem, HImode, srcptr);
13191 dest = change_address (destmem, HImode, destptr);
13192 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13193 emit_label (label);
13194 LABEL_NUSES (label) = 1;
13196 if (max_size > 1)
13198 rtx label = ix86_expand_aligntest (count, 1, true);
13199 src = change_address (srcmem, QImode, srcptr);
13200 dest = change_address (destmem, QImode, destptr);
13201 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13202 emit_label (label);
13203 LABEL_NUSES (label) = 1;
13206 else
13208 rtx offset = force_reg (Pmode, const0_rtx);
13209 rtx tmp;
13211 if (max_size > 4)
13213 rtx label = ix86_expand_aligntest (count, 4, true);
13214 src = change_address (srcmem, SImode, srcptr);
13215 dest = change_address (destmem, SImode, destptr);
13216 emit_move_insn (dest, src);
13217 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13218 true, OPTAB_LIB_WIDEN);
13219 if (tmp != offset)
13220 emit_move_insn (offset, tmp);
13221 emit_label (label);
13222 LABEL_NUSES (label) = 1;
13224 if (max_size > 2)
13226 rtx label = ix86_expand_aligntest (count, 2, true);
13227 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13228 src = change_address (srcmem, HImode, tmp);
13229 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13230 dest = change_address (destmem, HImode, tmp);
13231 emit_move_insn (dest, src);
13232 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13233 true, OPTAB_LIB_WIDEN);
13234 if (tmp != offset)
13235 emit_move_insn (offset, tmp);
13236 emit_label (label);
13237 LABEL_NUSES (label) = 1;
13239 if (max_size > 1)
13241 rtx label = ix86_expand_aligntest (count, 1, true);
13242 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13243 src = change_address (srcmem, QImode, tmp);
13244 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13245 dest = change_address (destmem, QImode, tmp);
13246 emit_move_insn (dest, src);
13247 emit_label (label);
13248 LABEL_NUSES (label) = 1;
13253 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13254 static void
13255 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13256 rtx count, int max_size)
13258 count =
13259 expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13260 count, 1, OPTAB_DIRECT);
13261 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13262 gen_lowpart (QImode, value), count, QImode,
13263 1, max_size / 2);
13266 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13267 static void
13268 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13270 rtx dest;
13271 if (GET_CODE (count) == CONST_INT)
13273 HOST_WIDE_INT countval = INTVAL (count);
13274 int offset = 0;
13276 if ((countval & 0x16) && max_size > 16)
13278 if (TARGET_64BIT)
13280 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13281 emit_insn (gen_strset (destptr, dest, value));
13282 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13283 emit_insn (gen_strset (destptr, dest, value));
13285 else
13286 gcc_unreachable ();
13287 offset += 16;
13289 if ((countval & 0x08) && max_size > 8)
13291 if (TARGET_64BIT)
13293 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13294 emit_insn (gen_strset (destptr, dest, value));
13296 else
13298 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13299 emit_insn (gen_strset (destptr, dest, value));
13300 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13301 emit_insn (gen_strset (destptr, dest, value));
13303 offset += 8;
13305 if ((countval & 0x04) && max_size > 4)
13307 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13308 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13309 offset += 4;
13311 if ((countval & 0x02) && max_size > 2)
13313 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13314 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13315 offset += 2;
13317 if ((countval & 0x01) && max_size > 1)
13319 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13320 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13321 offset += 1;
13323 return;
13325 if (max_size > 32)
13327 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13328 return;
13330 if (max_size > 16)
13332 rtx label = ix86_expand_aligntest (count, 16, true);
13333 if (TARGET_64BIT)
13335 dest = change_address (destmem, DImode, destptr);
13336 emit_insn (gen_strset (destptr, dest, value));
13337 emit_insn (gen_strset (destptr, dest, value));
13339 else
13341 dest = change_address (destmem, SImode, destptr);
13342 emit_insn (gen_strset (destptr, dest, value));
13343 emit_insn (gen_strset (destptr, dest, value));
13344 emit_insn (gen_strset (destptr, dest, value));
13345 emit_insn (gen_strset (destptr, dest, value));
13347 emit_label (label);
13348 LABEL_NUSES (label) = 1;
13350 if (max_size > 8)
13352 rtx label = ix86_expand_aligntest (count, 8, true);
13353 if (TARGET_64BIT)
13355 dest = change_address (destmem, DImode, destptr);
13356 emit_insn (gen_strset (destptr, dest, value));
13358 else
13360 dest = change_address (destmem, SImode, destptr);
13361 emit_insn (gen_strset (destptr, dest, value));
13362 emit_insn (gen_strset (destptr, dest, value));
13364 emit_label (label);
13365 LABEL_NUSES (label) = 1;
13367 if (max_size > 4)
13369 rtx label = ix86_expand_aligntest (count, 4, true);
13370 dest = change_address (destmem, SImode, destptr);
13371 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13372 emit_label (label);
13373 LABEL_NUSES (label) = 1;
13375 if (max_size > 2)
13377 rtx label = ix86_expand_aligntest (count, 2, true);
13378 dest = change_address (destmem, HImode, destptr);
13379 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13380 emit_label (label);
13381 LABEL_NUSES (label) = 1;
13383 if (max_size > 1)
13385 rtx label = ix86_expand_aligntest (count, 1, true);
13386 dest = change_address (destmem, QImode, destptr);
13387 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13388 emit_label (label);
13389 LABEL_NUSES (label) = 1;
13393 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13394 DESIRED_ALIGNMENT. */
13395 static void
13396 expand_movmem_prologue (rtx destmem, rtx srcmem,
13397 rtx destptr, rtx srcptr, rtx count,
13398 int align, int desired_alignment)
13400 if (align <= 1 && desired_alignment > 1)
13402 rtx label = ix86_expand_aligntest (destptr, 1, false);
13403 srcmem = change_address (srcmem, QImode, srcptr);
13404 destmem = change_address (destmem, QImode, destptr);
13405 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13406 ix86_adjust_counter (count, 1);
13407 emit_label (label);
13408 LABEL_NUSES (label) = 1;
13410 if (align <= 2 && desired_alignment > 2)
13412 rtx label = ix86_expand_aligntest (destptr, 2, false);
13413 srcmem = change_address (srcmem, HImode, srcptr);
13414 destmem = change_address (destmem, HImode, destptr);
13415 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13416 ix86_adjust_counter (count, 2);
13417 emit_label (label);
13418 LABEL_NUSES (label) = 1;
13420 if (align <= 4 && desired_alignment > 4)
13422 rtx label = ix86_expand_aligntest (destptr, 4, false);
13423 srcmem = change_address (srcmem, SImode, srcptr);
13424 destmem = change_address (destmem, SImode, destptr);
13425 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13426 ix86_adjust_counter (count, 4);
13427 emit_label (label);
13428 LABEL_NUSES (label) = 1;
13430 gcc_assert (desired_alignment <= 8);
13433 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13434 DESIRED_ALIGNMENT. */
13435 static void
13436 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13437 int align, int desired_alignment)
13439 if (align <= 1 && desired_alignment > 1)
13441 rtx label = ix86_expand_aligntest (destptr, 1, false);
13442 destmem = change_address (destmem, QImode, destptr);
13443 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13444 ix86_adjust_counter (count, 1);
13445 emit_label (label);
13446 LABEL_NUSES (label) = 1;
13448 if (align <= 2 && desired_alignment > 2)
13450 rtx label = ix86_expand_aligntest (destptr, 2, false);
13451 destmem = change_address (destmem, HImode, destptr);
13452 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13453 ix86_adjust_counter (count, 2);
13454 emit_label (label);
13455 LABEL_NUSES (label) = 1;
13457 if (align <= 4 && desired_alignment > 4)
13459 rtx label = ix86_expand_aligntest (destptr, 4, false);
13460 destmem = change_address (destmem, SImode, destptr);
13461 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13462 ix86_adjust_counter (count, 4);
13463 emit_label (label);
13464 LABEL_NUSES (label) = 1;
13466 gcc_assert (desired_alignment <= 8);
13469 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13470 static enum stringop_alg
13471 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13472 int *dynamic_check)
13474 const struct stringop_algs * algs;
13476 *dynamic_check = -1;
13477 if (memset)
13478 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13479 else
13480 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13481 if (stringop_alg != no_stringop)
13482 return stringop_alg;
13483 /* rep; movq or rep; movl is the smallest variant. */
13484 else if (optimize_size)
13486 if (!count || (count & 3))
13487 return rep_prefix_1_byte;
13488 else
13489 return rep_prefix_4_byte;
13491 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13493 else if (expected_size != -1 && expected_size < 4)
13494 return loop_1_byte;
13495 else if (expected_size != -1)
13497 unsigned int i;
13498 enum stringop_alg alg = libcall;
13499 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13501 gcc_assert (algs->size[i].max);
13502 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13504 if (algs->size[i].alg != libcall)
13505 alg = algs->size[i].alg;
13506 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13507 last non-libcall inline algorithm. */
13508 if (TARGET_INLINE_ALL_STRINGOPS)
13510 gcc_assert (alg != libcall);
13511 return alg;
13513 else
13514 return algs->size[i].alg;
13517 gcc_unreachable ();
13519 /* When asked to inline the call anyway, try to pick meaningful choice.
13520 We look for maximal size of block that is faster to copy by hand and
13521 take blocks of at most of that size guessing that average size will
13522 be roughly half of the block.
13524 If this turns out to be bad, we might simply specify the preferred
13525 choice in ix86_costs. */
13526 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13527 && algs->unknown_size == libcall)
13529 int max = -1;
13530 enum stringop_alg alg;
13531 int i;
13533 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13534 if (algs->size[i].alg != libcall && algs->size[i].alg)
13535 max = algs->size[i].max;
13536 if (max == -1)
13537 max = 4096;
13538 alg = decide_alg (count, max / 2, memset, dynamic_check);
13539 gcc_assert (*dynamic_check == -1);
13540 gcc_assert (alg != libcall);
13541 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13542 *dynamic_check = max;
13543 return alg;
13545 return algs->unknown_size;
13548 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13549 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13550 static int
13551 decide_alignment (int align,
13552 enum stringop_alg alg,
13553 int expected_size)
13555 int desired_align = 0;
13556 switch (alg)
13558 case no_stringop:
13559 gcc_unreachable ();
13560 case loop:
13561 case unrolled_loop:
13562 desired_align = GET_MODE_SIZE (Pmode);
13563 break;
13564 case rep_prefix_8_byte:
13565 desired_align = 8;
13566 break;
13567 case rep_prefix_4_byte:
13568 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13569 copying whole cacheline at once. */
13570 if (TARGET_PENTIUMPRO)
13571 desired_align = 8;
13572 else
13573 desired_align = 4;
13574 break;
13575 case rep_prefix_1_byte:
13576 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13577 copying whole cacheline at once. */
13578 if (TARGET_PENTIUMPRO)
13579 desired_align = 8;
13580 else
13581 desired_align = 1;
13582 break;
13583 case loop_1_byte:
13584 desired_align = 1;
13585 break;
13586 case libcall:
13587 return 0;
13590 if (optimize_size)
13591 desired_align = 1;
13592 if (desired_align < align)
13593 desired_align = align;
13594 if (expected_size != -1 && expected_size < 4)
13595 desired_align = align;
13596 return desired_align;
13599 /* Expand string move (memcpy) operation. Use i386 string operations when
13600 profitable. expand_clrmem contains similar code. */
13602 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
13603 rtx expected_align_exp, rtx expected_size_exp)
13605 rtx destreg;
13606 rtx srcreg;
13607 rtx label = NULL;
13608 rtx tmp;
13609 rtx jump_around_label = NULL;
13610 HOST_WIDE_INT align = 1;
13611 unsigned HOST_WIDE_INT count = 0;
13612 HOST_WIDE_INT expected_size = -1;
13613 int size_needed = 0;
13614 int desired_align = 0;
13615 enum stringop_alg alg;
13616 int dynamic_check;
13617 /* Precise placement on cld depends whether stringops will be emit in
13618 prologue, main copying body or epilogue. This variable keeps track
13619 if cld was already needed. */
13620 bool cld_done = false;
13622 if (GET_CODE (align_exp) == CONST_INT)
13623 align = INTVAL (align_exp);
13624 /* i386 can do missaligned access on resonably increased cost. */
13625 if (GET_CODE (expected_align_exp) == CONST_INT
13626 && INTVAL (expected_align_exp) > align)
13627 align = INTVAL (expected_align_exp);
13628 if (GET_CODE (count_exp) == CONST_INT)
13629 count = expected_size = INTVAL (count_exp);
13630 if (GET_CODE (expected_size_exp) == CONST_INT && count == 0)
13632 expected_size = INTVAL (expected_size_exp);
13635 alg = decide_alg (count, expected_size, false, &dynamic_check);
13636 desired_align = decide_alignment (align, alg, expected_size);
13638 if (!TARGET_ALIGN_STRINGOPS)
13639 align = desired_align;
13641 if (alg == libcall)
13642 return 0;
13643 gcc_assert (alg != no_stringop);
13644 if (!count)
13645 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
13646 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13647 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
13648 switch (alg)
13650 case libcall:
13651 case no_stringop:
13652 gcc_unreachable ();
13653 case loop:
13654 size_needed = GET_MODE_SIZE (Pmode);
13655 break;
13656 case unrolled_loop:
13657 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
13658 break;
13659 case rep_prefix_8_byte:
13660 size_needed = 8;
13661 break;
13662 case rep_prefix_4_byte:
13663 size_needed = 4;
13664 break;
13665 case rep_prefix_1_byte:
13666 case loop_1_byte:
13667 size_needed = 1;
13668 break;
13671 /* Alignment code needs count to be in register. */
13672 if (GET_CODE (count_exp) == CONST_INT && desired_align > align)
13674 enum machine_mode mode = SImode;
13675 if (TARGET_64BIT && (count & ~0xffffffff))
13676 mode = DImode;
13677 count_exp = force_reg (mode, count_exp);
13679 gcc_assert (desired_align >= 1 && align >= 1);
13680 /* Ensure that alignment prologue won't copy past end of block. */
13681 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
13682 && !count)
13684 int size = MAX (size_needed - 1, desired_align - align);
13685 if (TARGET_SINGLE_STRINGOP)
13686 emit_insn (gen_cld ()), cld_done = true;
13687 label = gen_label_rtx ();
13688 emit_cmp_and_jump_insns (count_exp,
13689 GEN_INT (size),
13690 LEU, 0, GET_MODE (count_exp), 1, label);
13691 if (expected_size == -1 || expected_size < size)
13692 predict_jump (REG_BR_PROB_BASE * 60 / 100);
13693 else
13694 predict_jump (REG_BR_PROB_BASE * 20 / 100);
13696 /* Emit code to decide on runtime whether library call or inline should be
13697 used. */
13698 if (dynamic_check != -1)
13700 rtx hot_label = gen_label_rtx ();
13701 jump_around_label = gen_label_rtx ();
13702 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
13703 LEU, 0, GET_MODE (count_exp), 1, hot_label);
13704 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13705 emit_block_move_via_libcall (dst, src, count_exp, false);
13706 emit_jump (jump_around_label);
13707 emit_label (hot_label);
13711 /* Alignment prologue. */
13712 if (desired_align > align)
13714 /* Except for the first move in epilogue, we no longer know
13715 constant offset in aliasing info. It don't seems to worth
13716 the pain to maintain it for the first move, so throw away
13717 the info early. */
13718 src = change_address (src, BLKmode, srcreg);
13719 dst = change_address (dst, BLKmode, destreg);
13720 if (TARGET_SINGLE_STRINGOP && !cld_done)
13721 emit_insn (gen_cld ()), cld_done = true;
13722 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
13723 desired_align);
13725 if (label && size_needed == 1)
13727 emit_label (label);
13728 LABEL_NUSES (label) = 1;
13729 label = NULL;
13732 /* Main body. */
13733 switch (alg)
13735 case libcall:
13736 case no_stringop:
13737 gcc_unreachable ();
13738 case loop_1_byte:
13739 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13740 count_exp, QImode, 1, expected_size);
13741 break;
13742 case loop:
13743 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13744 count_exp, Pmode, 1, expected_size);
13745 break;
13746 case unrolled_loop:
13747 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
13748 registers for 4 temporaries anyway. */
13749 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13750 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
13751 expected_size);
13752 break;
13753 case rep_prefix_8_byte:
13754 if (!cld_done)
13755 emit_insn (gen_cld ()), cld_done = true;
13756 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13757 DImode);
13758 break;
13759 case rep_prefix_4_byte:
13760 if (!cld_done)
13761 emit_insn (gen_cld ()), cld_done = true;
13762 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13763 SImode);
13764 break;
13765 case rep_prefix_1_byte:
13766 if (!cld_done)
13767 emit_insn (gen_cld ()), cld_done = true;
13768 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13769 QImode);
13770 break;
13772 /* Adjust properly the offset of src and dest memory for aliasing. */
13773 if (GET_CODE (count_exp) == CONST_INT)
13775 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
13776 (count / size_needed) * size_needed);
13777 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
13778 (count / size_needed) * size_needed);
13780 else
13782 src = change_address (src, BLKmode, srcreg);
13783 dst = change_address (dst, BLKmode, destreg);
13786 /* Epologue to copy the remaining bytes. */
13787 if (label)
13789 if (size_needed < desired_align - align)
13791 tmp =
13792 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
13793 GEN_INT (size_needed - 1), count_exp, 1,
13794 OPTAB_DIRECT);
13795 size_needed = desired_align - align + 1;
13796 if (tmp != count_exp)
13797 emit_move_insn (count_exp, tmp);
13799 emit_label (label);
13800 LABEL_NUSES (label) = 1;
13802 if (count_exp != const0_rtx && size_needed > 1)
13804 if (TARGET_SINGLE_STRINGOP && !cld_done)
13805 emit_insn (gen_cld ()), cld_done = true;
13806 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
13807 size_needed);
13809 if (jump_around_label)
13810 emit_label (jump_around_label);
13811 return 1;
13814 /* Helper function for memcpy. For QImode value 0xXY produce
13815 0xXYXYXYXY of wide specified by MODE. This is essentially
13816 a * 0x10101010, but we can do slightly better than
13817 synth_mult by unwinding the sequence by hand on CPUs with
13818 slow multiply. */
13819 static rtx
13820 promote_duplicated_reg (enum machine_mode mode, rtx val)
13822 enum machine_mode valmode = GET_MODE (val);
13823 rtx tmp;
13824 int nops = mode == DImode ? 3 : 2;
13826 gcc_assert (mode == SImode || mode == DImode);
13827 if (val == const0_rtx)
13828 return copy_to_mode_reg (mode, const0_rtx);
13829 if (GET_CODE (val) == CONST_INT)
13831 HOST_WIDE_INT v = INTVAL (val) & 255;
13833 v |= v << 8;
13834 v |= v << 16;
13835 if (mode == DImode)
13836 v |= (v << 16) << 16;
13837 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
13840 if (valmode == VOIDmode)
13841 valmode = QImode;
13842 if (valmode != QImode)
13843 val = gen_lowpart (QImode, val);
13844 if (mode == QImode)
13845 return val;
13846 if (!TARGET_PARTIAL_REG_STALL)
13847 nops--;
13848 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
13849 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
13850 <= (ix86_cost->shift_const + ix86_cost->add) * nops
13851 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
13853 rtx reg = convert_modes (mode, QImode, val, true);
13854 tmp = promote_duplicated_reg (mode, const1_rtx);
13855 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
13856 OPTAB_DIRECT);
13858 else
13860 rtx reg = convert_modes (mode, QImode, val, true);
13862 if (!TARGET_PARTIAL_REG_STALL)
13863 if (mode == SImode)
13864 emit_insn (gen_movsi_insv_1 (reg, reg));
13865 else
13866 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
13867 else
13869 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
13870 NULL, 1, OPTAB_DIRECT);
13871 reg =
13872 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13874 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
13875 NULL, 1, OPTAB_DIRECT);
13876 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13877 if (mode == SImode)
13878 return reg;
13879 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
13880 NULL, 1, OPTAB_DIRECT);
13881 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13882 return reg;
13886 /* Expand string clear operation (bzero). Use i386 string operations when
13887 profitable. expand_movmem contains similar code. */
13889 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
13890 rtx expected_align_exp, rtx expected_size_exp)
13892 rtx destreg;
13893 rtx label = NULL;
13894 rtx tmp;
13895 rtx jump_around_label = NULL;
13896 HOST_WIDE_INT align = 1;
13897 unsigned HOST_WIDE_INT count = 0;
13898 HOST_WIDE_INT expected_size = -1;
13899 int size_needed = 0;
13900 int desired_align = 0;
13901 enum stringop_alg alg;
13902 /* Precise placement on cld depends whether stringops will be emit in
13903 prologue, main copying body or epilogue. This variable keeps track
13904 if cld was already needed. */
13905 bool cld_done = false;
13906 rtx promoted_val = val_exp;
13907 bool force_loopy_epilogue = false;
13908 int dynamic_check;
13910 if (GET_CODE (align_exp) == CONST_INT)
13911 align = INTVAL (align_exp);
13912 /* i386 can do missaligned access on resonably increased cost. */
13913 if (GET_CODE (expected_align_exp) == CONST_INT
13914 && INTVAL (expected_align_exp) > align)
13915 align = INTVAL (expected_align_exp);
13916 if (GET_CODE (count_exp) == CONST_INT)
13917 count = expected_size = INTVAL (count_exp);
13918 if (GET_CODE (expected_size_exp) == CONST_INT && count == 0)
13919 expected_size = INTVAL (expected_size_exp);
13921 alg = decide_alg (count, expected_size, true, &dynamic_check);
13922 desired_align = decide_alignment (align, alg, expected_size);
13924 if (!TARGET_ALIGN_STRINGOPS)
13925 align = desired_align;
13927 if (alg == libcall)
13928 return 0;
13929 gcc_assert (alg != no_stringop);
13930 if (!count)
13931 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
13932 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13933 switch (alg)
13935 case libcall:
13936 case no_stringop:
13937 gcc_unreachable ();
13938 case loop:
13939 size_needed = GET_MODE_SIZE (Pmode);
13940 break;
13941 case unrolled_loop:
13942 size_needed = GET_MODE_SIZE (Pmode) * 4;
13943 break;
13944 case rep_prefix_8_byte:
13945 size_needed = 8;
13946 break;
13947 case rep_prefix_4_byte:
13948 size_needed = 4;
13949 break;
13950 case rep_prefix_1_byte:
13951 case loop_1_byte:
13952 size_needed = 1;
13953 break;
13955 /* Alignment code needs count to be in register. */
13956 if (GET_CODE (count_exp) == CONST_INT && desired_align > align)
13958 enum machine_mode mode = SImode;
13959 if (TARGET_64BIT && (count & ~0xffffffff))
13960 mode = DImode;
13961 count_exp = force_reg (mode, count_exp);
13963 /* Ensure that alignment prologue won't copy past end of block. */
13964 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
13965 && !count)
13967 int size = MAX (size_needed - 1, desired_align - align);
13968 /* To improve performance of small blocks, we jump around the promoting
13969 code, so we need to use QImode accesses in epilogue. */
13970 if (GET_CODE (val_exp) != CONST_INT && size_needed > 1)
13971 force_loopy_epilogue = true;
13972 else if (TARGET_SINGLE_STRINGOP)
13973 emit_insn (gen_cld ()), cld_done = true;
13974 label = gen_label_rtx ();
13975 emit_cmp_and_jump_insns (count_exp,
13976 GEN_INT (size),
13977 LEU, 0, GET_MODE (count_exp), 1, label);
13978 if (expected_size == -1 || expected_size <= size)
13979 predict_jump (REG_BR_PROB_BASE * 60 / 100);
13980 else
13981 predict_jump (REG_BR_PROB_BASE * 20 / 100);
13983 if (dynamic_check != -1)
13985 rtx hot_label = gen_label_rtx ();
13986 jump_around_label = gen_label_rtx ();
13987 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
13988 LEU, 0, GET_MODE (count_exp), 1, hot_label);
13989 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13990 set_storage_via_libcall (dst, count_exp, val_exp, false);
13991 emit_jump (jump_around_label);
13992 emit_label (hot_label);
13994 if (TARGET_64BIT
13995 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
13996 promoted_val = promote_duplicated_reg (DImode, val_exp);
13997 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
13998 promoted_val = promote_duplicated_reg (SImode, val_exp);
13999 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14000 promoted_val = promote_duplicated_reg (HImode, val_exp);
14001 else
14002 promoted_val = val_exp;
14003 gcc_assert (desired_align >= 1 && align >= 1);
14004 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
14005 && !count && !label)
14007 int size = MAX (size_needed - 1, desired_align - align);
14008 if (TARGET_SINGLE_STRINGOP)
14009 emit_insn (gen_cld ()), cld_done = true;
14010 label = gen_label_rtx ();
14011 emit_cmp_and_jump_insns (count_exp,
14012 GEN_INT (size),
14013 LEU, 0, GET_MODE (count_exp), 1, label);
14014 if (expected_size == -1 || expected_size <= size)
14015 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14016 else
14017 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14019 if (desired_align > align)
14021 /* Except for the first move in epilogue, we no longer know
14022 constant offset in aliasing info. It don't seems to worth
14023 the pain to maintain it for the first move, so throw away
14024 the info early. */
14025 dst = change_address (dst, BLKmode, destreg);
14026 if (TARGET_SINGLE_STRINGOP && !cld_done)
14027 emit_insn (gen_cld ()), cld_done = true;
14028 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14029 desired_align);
14031 if (label && size_needed == 1)
14033 emit_label (label);
14034 LABEL_NUSES (label) = 1;
14035 label = NULL;
14037 switch (alg)
14039 case libcall:
14040 case no_stringop:
14041 gcc_unreachable ();
14042 case loop_1_byte:
14043 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14044 count_exp, QImode, 1, expected_size);
14045 break;
14046 case loop:
14047 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14048 count_exp, Pmode, 1, expected_size);
14049 break;
14050 case unrolled_loop:
14051 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14052 count_exp, Pmode, 4, expected_size);
14053 break;
14054 case rep_prefix_8_byte:
14055 if (!cld_done)
14056 emit_insn (gen_cld ()), cld_done = true;
14057 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14058 DImode);
14059 break;
14060 case rep_prefix_4_byte:
14061 if (!cld_done)
14062 emit_insn (gen_cld ()), cld_done = true;
14063 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14064 SImode);
14065 break;
14066 case rep_prefix_1_byte:
14067 if (!cld_done)
14068 emit_insn (gen_cld ()), cld_done = true;
14069 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14070 QImode);
14071 break;
14073 /* Adjust properly the offset of src and dest memory for aliasing. */
14074 if (GET_CODE (count_exp) == CONST_INT)
14075 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14076 (count / size_needed) * size_needed);
14077 else
14078 dst = change_address (dst, BLKmode, destreg);
14080 if (label)
14082 if (size_needed < desired_align - align)
14084 tmp =
14085 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
14086 GEN_INT (size_needed - 1), count_exp, 1,
14087 OPTAB_DIRECT);
14088 size_needed = desired_align - align + 1;
14089 if (tmp != count_exp)
14090 emit_move_insn (count_exp, tmp);
14092 emit_label (label);
14093 LABEL_NUSES (label) = 1;
14095 if (count_exp != const0_rtx && size_needed > 1)
14097 if (force_loopy_epilogue)
14098 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14099 size_needed);
14100 else
14102 if (TARGET_SINGLE_STRINGOP && !cld_done)
14103 emit_insn (gen_cld ()), cld_done = true;
14104 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14105 size_needed);
14108 if (jump_around_label)
14109 emit_label (jump_around_label);
14110 return 1;
14113 /* Expand strlen. */
14115 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14117 rtx addr, scratch1, scratch2, scratch3, scratch4;
14119 /* The generic case of strlen expander is long. Avoid it's
14120 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14122 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14123 && !TARGET_INLINE_ALL_STRINGOPS
14124 && !optimize_size
14125 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
14126 return 0;
14128 addr = force_reg (Pmode, XEXP (src, 0));
14129 scratch1 = gen_reg_rtx (Pmode);
14131 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14132 && !optimize_size)
14134 /* Well it seems that some optimizer does not combine a call like
14135 foo(strlen(bar), strlen(bar));
14136 when the move and the subtraction is done here. It does calculate
14137 the length just once when these instructions are done inside of
14138 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14139 often used and I use one fewer register for the lifetime of
14140 output_strlen_unroll() this is better. */
14142 emit_move_insn (out, addr);
14144 ix86_expand_strlensi_unroll_1 (out, src, align);
14146 /* strlensi_unroll_1 returns the address of the zero at the end of
14147 the string, like memchr(), so compute the length by subtracting
14148 the start address. */
14149 if (TARGET_64BIT)
14150 emit_insn (gen_subdi3 (out, out, addr));
14151 else
14152 emit_insn (gen_subsi3 (out, out, addr));
14154 else
14156 rtx unspec;
14157 scratch2 = gen_reg_rtx (Pmode);
14158 scratch3 = gen_reg_rtx (Pmode);
14159 scratch4 = force_reg (Pmode, constm1_rtx);
14161 emit_move_insn (scratch3, addr);
14162 eoschar = force_reg (QImode, eoschar);
14164 emit_insn (gen_cld ());
14165 src = replace_equiv_address_nv (src, scratch3);
14167 /* If .md starts supporting :P, this can be done in .md. */
14168 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14169 scratch4), UNSPEC_SCAS);
14170 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14171 if (TARGET_64BIT)
14173 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14174 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14176 else
14178 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14179 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14182 return 1;
14185 /* Expand the appropriate insns for doing strlen if not just doing
14186 repnz; scasb
14188 out = result, initialized with the start address
14189 align_rtx = alignment of the address.
14190 scratch = scratch register, initialized with the startaddress when
14191 not aligned, otherwise undefined
14193 This is just the body. It needs the initializations mentioned above and
14194 some address computing at the end. These things are done in i386.md. */
14196 static void
14197 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14199 int align;
14200 rtx tmp;
14201 rtx align_2_label = NULL_RTX;
14202 rtx align_3_label = NULL_RTX;
14203 rtx align_4_label = gen_label_rtx ();
14204 rtx end_0_label = gen_label_rtx ();
14205 rtx mem;
14206 rtx tmpreg = gen_reg_rtx (SImode);
14207 rtx scratch = gen_reg_rtx (SImode);
14208 rtx cmp;
14210 align = 0;
14211 if (GET_CODE (align_rtx) == CONST_INT)
14212 align = INTVAL (align_rtx);
14214 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14216 /* Is there a known alignment and is it less than 4? */
14217 if (align < 4)
14219 rtx scratch1 = gen_reg_rtx (Pmode);
14220 emit_move_insn (scratch1, out);
14221 /* Is there a known alignment and is it not 2? */
14222 if (align != 2)
14224 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14225 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14227 /* Leave just the 3 lower bits. */
14228 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14229 NULL_RTX, 0, OPTAB_WIDEN);
14231 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14232 Pmode, 1, align_4_label);
14233 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14234 Pmode, 1, align_2_label);
14235 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14236 Pmode, 1, align_3_label);
14238 else
14240 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14241 check if is aligned to 4 - byte. */
14243 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14244 NULL_RTX, 0, OPTAB_WIDEN);
14246 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14247 Pmode, 1, align_4_label);
14250 mem = change_address (src, QImode, out);
14252 /* Now compare the bytes. */
14254 /* Compare the first n unaligned byte on a byte per byte basis. */
14255 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14256 QImode, 1, end_0_label);
14258 /* Increment the address. */
14259 if (TARGET_64BIT)
14260 emit_insn (gen_adddi3 (out, out, const1_rtx));
14261 else
14262 emit_insn (gen_addsi3 (out, out, const1_rtx));
14264 /* Not needed with an alignment of 2 */
14265 if (align != 2)
14267 emit_label (align_2_label);
14269 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14270 end_0_label);
14272 if (TARGET_64BIT)
14273 emit_insn (gen_adddi3 (out, out, const1_rtx));
14274 else
14275 emit_insn (gen_addsi3 (out, out, const1_rtx));
14277 emit_label (align_3_label);
14280 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14281 end_0_label);
14283 if (TARGET_64BIT)
14284 emit_insn (gen_adddi3 (out, out, const1_rtx));
14285 else
14286 emit_insn (gen_addsi3 (out, out, const1_rtx));
14289 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14290 align this loop. It gives only huge programs, but does not help to
14291 speed up. */
14292 emit_label (align_4_label);
14294 mem = change_address (src, SImode, out);
14295 emit_move_insn (scratch, mem);
14296 if (TARGET_64BIT)
14297 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14298 else
14299 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14301 /* This formula yields a nonzero result iff one of the bytes is zero.
14302 This saves three branches inside loop and many cycles. */
14304 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14305 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14306 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14307 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14308 gen_int_mode (0x80808080, SImode)));
14309 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14310 align_4_label);
14312 if (TARGET_CMOVE)
14314 rtx reg = gen_reg_rtx (SImode);
14315 rtx reg2 = gen_reg_rtx (Pmode);
14316 emit_move_insn (reg, tmpreg);
14317 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14319 /* If zero is not in the first two bytes, move two bytes forward. */
14320 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14321 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14322 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14323 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14324 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14325 reg,
14326 tmpreg)));
14327 /* Emit lea manually to avoid clobbering of flags. */
14328 emit_insn (gen_rtx_SET (SImode, reg2,
14329 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14331 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14332 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14333 emit_insn (gen_rtx_SET (VOIDmode, out,
14334 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14335 reg2,
14336 out)));
14339 else
14341 rtx end_2_label = gen_label_rtx ();
14342 /* Is zero in the first two bytes? */
14344 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14345 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14346 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14347 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14348 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14349 pc_rtx);
14350 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14351 JUMP_LABEL (tmp) = end_2_label;
14353 /* Not in the first two. Move two bytes forward. */
14354 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14355 if (TARGET_64BIT)
14356 emit_insn (gen_adddi3 (out, out, const2_rtx));
14357 else
14358 emit_insn (gen_addsi3 (out, out, const2_rtx));
14360 emit_label (end_2_label);
14364 /* Avoid branch in fixing the byte. */
14365 tmpreg = gen_lowpart (QImode, tmpreg);
14366 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14367 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14368 if (TARGET_64BIT)
14369 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14370 else
14371 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14373 emit_label (end_0_label);
14376 void
14377 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14378 rtx callarg2 ATTRIBUTE_UNUSED,
14379 rtx pop, int sibcall)
14381 rtx use = NULL, call;
14383 if (pop == const0_rtx)
14384 pop = NULL;
14385 gcc_assert (!TARGET_64BIT || !pop);
14387 if (TARGET_MACHO && !TARGET_64BIT)
14389 #if TARGET_MACHO
14390 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14391 fnaddr = machopic_indirect_call_target (fnaddr);
14392 #endif
14394 else
14396 /* Static functions and indirect calls don't need the pic register. */
14397 if (! TARGET_64BIT && flag_pic
14398 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14399 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14400 use_reg (&use, pic_offset_table_rtx);
14403 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14405 rtx al = gen_rtx_REG (QImode, 0);
14406 emit_move_insn (al, callarg2);
14407 use_reg (&use, al);
14410 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14412 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14413 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14415 if (sibcall && TARGET_64BIT
14416 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14418 rtx addr;
14419 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14420 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
14421 emit_move_insn (fnaddr, addr);
14422 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14425 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14426 if (retval)
14427 call = gen_rtx_SET (VOIDmode, retval, call);
14428 if (pop)
14430 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14431 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14432 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14435 call = emit_call_insn (call);
14436 if (use)
14437 CALL_INSN_FUNCTION_USAGE (call) = use;
14441 /* Clear stack slot assignments remembered from previous functions.
14442 This is called from INIT_EXPANDERS once before RTL is emitted for each
14443 function. */
14445 static struct machine_function *
14446 ix86_init_machine_status (void)
14448 struct machine_function *f;
14450 f = ggc_alloc_cleared (sizeof (struct machine_function));
14451 f->use_fast_prologue_epilogue_nregs = -1;
14452 f->tls_descriptor_call_expanded_p = 0;
14454 return f;
14457 /* Return a MEM corresponding to a stack slot with mode MODE.
14458 Allocate a new slot if necessary.
14460 The RTL for a function can have several slots available: N is
14461 which slot to use. */
14464 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
14466 struct stack_local_entry *s;
14468 gcc_assert (n < MAX_386_STACK_LOCALS);
14470 for (s = ix86_stack_locals; s; s = s->next)
14471 if (s->mode == mode && s->n == n)
14472 return copy_rtx (s->rtl);
14474 s = (struct stack_local_entry *)
14475 ggc_alloc (sizeof (struct stack_local_entry));
14476 s->n = n;
14477 s->mode = mode;
14478 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14480 s->next = ix86_stack_locals;
14481 ix86_stack_locals = s;
14482 return s->rtl;
14485 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14487 static GTY(()) rtx ix86_tls_symbol;
14489 ix86_tls_get_addr (void)
14492 if (!ix86_tls_symbol)
14494 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
14495 (TARGET_ANY_GNU_TLS
14496 && !TARGET_64BIT)
14497 ? "___tls_get_addr"
14498 : "__tls_get_addr");
14501 return ix86_tls_symbol;
14504 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14506 static GTY(()) rtx ix86_tls_module_base_symbol;
14508 ix86_tls_module_base (void)
14511 if (!ix86_tls_module_base_symbol)
14513 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
14514 "_TLS_MODULE_BASE_");
14515 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14516 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14519 return ix86_tls_module_base_symbol;
14522 /* Calculate the length of the memory address in the instruction
14523 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14526 memory_address_length (rtx addr)
14528 struct ix86_address parts;
14529 rtx base, index, disp;
14530 int len;
14531 int ok;
14533 if (GET_CODE (addr) == PRE_DEC
14534 || GET_CODE (addr) == POST_INC
14535 || GET_CODE (addr) == PRE_MODIFY
14536 || GET_CODE (addr) == POST_MODIFY)
14537 return 0;
14539 ok = ix86_decompose_address (addr, &parts);
14540 gcc_assert (ok);
14542 if (parts.base && GET_CODE (parts.base) == SUBREG)
14543 parts.base = SUBREG_REG (parts.base);
14544 if (parts.index && GET_CODE (parts.index) == SUBREG)
14545 parts.index = SUBREG_REG (parts.index);
14547 base = parts.base;
14548 index = parts.index;
14549 disp = parts.disp;
14550 len = 0;
14552 /* Rule of thumb:
14553 - esp as the base always wants an index,
14554 - ebp as the base always wants a displacement. */
14556 /* Register Indirect. */
14557 if (base && !index && !disp)
14559 /* esp (for its index) and ebp (for its displacement) need
14560 the two-byte modrm form. */
14561 if (addr == stack_pointer_rtx
14562 || addr == arg_pointer_rtx
14563 || addr == frame_pointer_rtx
14564 || addr == hard_frame_pointer_rtx)
14565 len = 1;
14568 /* Direct Addressing. */
14569 else if (disp && !base && !index)
14570 len = 4;
14572 else
14574 /* Find the length of the displacement constant. */
14575 if (disp)
14577 if (base && satisfies_constraint_K (disp))
14578 len = 1;
14579 else
14580 len = 4;
14582 /* ebp always wants a displacement. */
14583 else if (base == hard_frame_pointer_rtx)
14584 len = 1;
14586 /* An index requires the two-byte modrm form.... */
14587 if (index
14588 /* ...like esp, which always wants an index. */
14589 || base == stack_pointer_rtx
14590 || base == arg_pointer_rtx
14591 || base == frame_pointer_rtx)
14592 len += 1;
14595 return len;
14598 /* Compute default value for "length_immediate" attribute. When SHORTFORM
14599 is set, expect that insn have 8bit immediate alternative. */
14601 ix86_attr_length_immediate_default (rtx insn, int shortform)
14603 int len = 0;
14604 int i;
14605 extract_insn_cached (insn);
14606 for (i = recog_data.n_operands - 1; i >= 0; --i)
14607 if (CONSTANT_P (recog_data.operand[i]))
14609 gcc_assert (!len);
14610 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
14611 len = 1;
14612 else
14614 switch (get_attr_mode (insn))
14616 case MODE_QI:
14617 len+=1;
14618 break;
14619 case MODE_HI:
14620 len+=2;
14621 break;
14622 case MODE_SI:
14623 len+=4;
14624 break;
14625 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
14626 case MODE_DI:
14627 len+=4;
14628 break;
14629 default:
14630 fatal_insn ("unknown insn mode", insn);
14634 return len;
14636 /* Compute default value for "length_address" attribute. */
14638 ix86_attr_length_address_default (rtx insn)
14640 int i;
14642 if (get_attr_type (insn) == TYPE_LEA)
14644 rtx set = PATTERN (insn);
14646 if (GET_CODE (set) == PARALLEL)
14647 set = XVECEXP (set, 0, 0);
14649 gcc_assert (GET_CODE (set) == SET);
14651 return memory_address_length (SET_SRC (set));
14654 extract_insn_cached (insn);
14655 for (i = recog_data.n_operands - 1; i >= 0; --i)
14656 if (GET_CODE (recog_data.operand[i]) == MEM)
14658 return memory_address_length (XEXP (recog_data.operand[i], 0));
14659 break;
14661 return 0;
14664 /* Return the maximum number of instructions a cpu can issue. */
14666 static int
14667 ix86_issue_rate (void)
14669 switch (ix86_tune)
14671 case PROCESSOR_PENTIUM:
14672 case PROCESSOR_K6:
14673 return 2;
14675 case PROCESSOR_PENTIUMPRO:
14676 case PROCESSOR_PENTIUM4:
14677 case PROCESSOR_ATHLON:
14678 case PROCESSOR_K8:
14679 case PROCESSOR_NOCONA:
14680 case PROCESSOR_GENERIC32:
14681 case PROCESSOR_GENERIC64:
14682 return 3;
14684 case PROCESSOR_CORE2:
14685 return 4;
14687 default:
14688 return 1;
14692 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14693 by DEP_INSN and nothing set by DEP_INSN. */
14695 static int
14696 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14698 rtx set, set2;
14700 /* Simplify the test for uninteresting insns. */
14701 if (insn_type != TYPE_SETCC
14702 && insn_type != TYPE_ICMOV
14703 && insn_type != TYPE_FCMOV
14704 && insn_type != TYPE_IBR)
14705 return 0;
14707 if ((set = single_set (dep_insn)) != 0)
14709 set = SET_DEST (set);
14710 set2 = NULL_RTX;
14712 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14713 && XVECLEN (PATTERN (dep_insn), 0) == 2
14714 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14715 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14717 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14718 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14720 else
14721 return 0;
14723 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
14724 return 0;
14726 /* This test is true if the dependent insn reads the flags but
14727 not any other potentially set register. */
14728 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14729 return 0;
14731 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14732 return 0;
14734 return 1;
14737 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14738 address with operands set by DEP_INSN. */
14740 static int
14741 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14743 rtx addr;
14745 if (insn_type == TYPE_LEA
14746 && TARGET_PENTIUM)
14748 addr = PATTERN (insn);
14750 if (GET_CODE (addr) == PARALLEL)
14751 addr = XVECEXP (addr, 0, 0);
14753 gcc_assert (GET_CODE (addr) == SET);
14755 addr = SET_SRC (addr);
14757 else
14759 int i;
14760 extract_insn_cached (insn);
14761 for (i = recog_data.n_operands - 1; i >= 0; --i)
14762 if (GET_CODE (recog_data.operand[i]) == MEM)
14764 addr = XEXP (recog_data.operand[i], 0);
14765 goto found;
14767 return 0;
14768 found:;
14771 return modified_in_p (addr, dep_insn);
14774 static int
14775 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14777 enum attr_type insn_type, dep_insn_type;
14778 enum attr_memory memory;
14779 rtx set, set2;
14780 int dep_insn_code_number;
14782 /* Anti and output dependencies have zero cost on all CPUs. */
14783 if (REG_NOTE_KIND (link) != 0)
14784 return 0;
14786 dep_insn_code_number = recog_memoized (dep_insn);
14788 /* If we can't recognize the insns, we can't really do anything. */
14789 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14790 return cost;
14792 insn_type = get_attr_type (insn);
14793 dep_insn_type = get_attr_type (dep_insn);
14795 switch (ix86_tune)
14797 case PROCESSOR_PENTIUM:
14798 /* Address Generation Interlock adds a cycle of latency. */
14799 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14800 cost += 1;
14802 /* ??? Compares pair with jump/setcc. */
14803 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14804 cost = 0;
14806 /* Floating point stores require value to be ready one cycle earlier. */
14807 if (insn_type == TYPE_FMOV
14808 && get_attr_memory (insn) == MEMORY_STORE
14809 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14810 cost += 1;
14811 break;
14813 case PROCESSOR_PENTIUMPRO:
14814 memory = get_attr_memory (insn);
14816 /* INT->FP conversion is expensive. */
14817 if (get_attr_fp_int_src (dep_insn))
14818 cost += 5;
14820 /* There is one cycle extra latency between an FP op and a store. */
14821 if (insn_type == TYPE_FMOV
14822 && (set = single_set (dep_insn)) != NULL_RTX
14823 && (set2 = single_set (insn)) != NULL_RTX
14824 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14825 && GET_CODE (SET_DEST (set2)) == MEM)
14826 cost += 1;
14828 /* Show ability of reorder buffer to hide latency of load by executing
14829 in parallel with previous instruction in case
14830 previous instruction is not needed to compute the address. */
14831 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14832 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14834 /* Claim moves to take one cycle, as core can issue one load
14835 at time and the next load can start cycle later. */
14836 if (dep_insn_type == TYPE_IMOV
14837 || dep_insn_type == TYPE_FMOV)
14838 cost = 1;
14839 else if (cost > 1)
14840 cost--;
14842 break;
14844 case PROCESSOR_K6:
14845 memory = get_attr_memory (insn);
14847 /* The esp dependency is resolved before the instruction is really
14848 finished. */
14849 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14850 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14851 return 1;
14853 /* INT->FP conversion is expensive. */
14854 if (get_attr_fp_int_src (dep_insn))
14855 cost += 5;
14857 /* Show ability of reorder buffer to hide latency of load by executing
14858 in parallel with previous instruction in case
14859 previous instruction is not needed to compute the address. */
14860 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14861 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14863 /* Claim moves to take one cycle, as core can issue one load
14864 at time and the next load can start cycle later. */
14865 if (dep_insn_type == TYPE_IMOV
14866 || dep_insn_type == TYPE_FMOV)
14867 cost = 1;
14868 else if (cost > 2)
14869 cost -= 2;
14870 else
14871 cost = 1;
14873 break;
14875 case PROCESSOR_ATHLON:
14876 case PROCESSOR_K8:
14877 case PROCESSOR_GENERIC32:
14878 case PROCESSOR_GENERIC64:
14879 memory = get_attr_memory (insn);
14881 /* Show ability of reorder buffer to hide latency of load by executing
14882 in parallel with previous instruction in case
14883 previous instruction is not needed to compute the address. */
14884 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14885 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14887 enum attr_unit unit = get_attr_unit (insn);
14888 int loadcost = 3;
14890 /* Because of the difference between the length of integer and
14891 floating unit pipeline preparation stages, the memory operands
14892 for floating point are cheaper.
14894 ??? For Athlon it the difference is most probably 2. */
14895 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14896 loadcost = 3;
14897 else
14898 loadcost = TARGET_ATHLON ? 2 : 0;
14900 if (cost >= loadcost)
14901 cost -= loadcost;
14902 else
14903 cost = 0;
14906 default:
14907 break;
14910 return cost;
14913 /* How many alternative schedules to try. This should be as wide as the
14914 scheduling freedom in the DFA, but no wider. Making this value too
14915 large results extra work for the scheduler. */
14917 static int
14918 ia32_multipass_dfa_lookahead (void)
14920 if (ix86_tune == PROCESSOR_PENTIUM)
14921 return 2;
14923 if (ix86_tune == PROCESSOR_PENTIUMPRO
14924 || ix86_tune == PROCESSOR_K6)
14925 return 1;
14927 else
14928 return 0;
14932 /* Compute the alignment given to a constant that is being placed in memory.
14933 EXP is the constant and ALIGN is the alignment that the object would
14934 ordinarily have.
14935 The value of this function is used instead of that alignment to align
14936 the object. */
14939 ix86_constant_alignment (tree exp, int align)
14941 if (TREE_CODE (exp) == REAL_CST)
14943 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14944 return 64;
14945 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14946 return 128;
14948 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14949 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14950 return BITS_PER_WORD;
14952 return align;
14955 /* Compute the alignment for a static variable.
14956 TYPE is the data type, and ALIGN is the alignment that
14957 the object would ordinarily have. The value of this function is used
14958 instead of that alignment to align the object. */
14961 ix86_data_alignment (tree type, int align)
14963 int max_align = optimize_size ? BITS_PER_WORD : 256;
14965 if (AGGREGATE_TYPE_P (type)
14966 && TYPE_SIZE (type)
14967 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14968 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14969 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14970 && align < max_align)
14971 align = max_align;
14973 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14974 to 16byte boundary. */
14975 if (TARGET_64BIT)
14977 if (AGGREGATE_TYPE_P (type)
14978 && TYPE_SIZE (type)
14979 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14980 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14981 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14982 return 128;
14985 if (TREE_CODE (type) == ARRAY_TYPE)
14987 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14988 return 64;
14989 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14990 return 128;
14992 else if (TREE_CODE (type) == COMPLEX_TYPE)
14995 if (TYPE_MODE (type) == DCmode && align < 64)
14996 return 64;
14997 if (TYPE_MODE (type) == XCmode && align < 128)
14998 return 128;
15000 else if ((TREE_CODE (type) == RECORD_TYPE
15001 || TREE_CODE (type) == UNION_TYPE
15002 || TREE_CODE (type) == QUAL_UNION_TYPE)
15003 && TYPE_FIELDS (type))
15005 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15006 return 64;
15007 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15008 return 128;
15010 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15011 || TREE_CODE (type) == INTEGER_TYPE)
15013 if (TYPE_MODE (type) == DFmode && align < 64)
15014 return 64;
15015 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15016 return 128;
15019 return align;
15022 /* Compute the alignment for a local variable.
15023 TYPE is the data type, and ALIGN is the alignment that
15024 the object would ordinarily have. The value of this macro is used
15025 instead of that alignment to align the object. */
15028 ix86_local_alignment (tree type, int align)
15030 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15031 to 16byte boundary. */
15032 if (TARGET_64BIT)
15034 if (AGGREGATE_TYPE_P (type)
15035 && TYPE_SIZE (type)
15036 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15037 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15038 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15039 return 128;
15041 if (TREE_CODE (type) == ARRAY_TYPE)
15043 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15044 return 64;
15045 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15046 return 128;
15048 else if (TREE_CODE (type) == COMPLEX_TYPE)
15050 if (TYPE_MODE (type) == DCmode && align < 64)
15051 return 64;
15052 if (TYPE_MODE (type) == XCmode && align < 128)
15053 return 128;
15055 else if ((TREE_CODE (type) == RECORD_TYPE
15056 || TREE_CODE (type) == UNION_TYPE
15057 || TREE_CODE (type) == QUAL_UNION_TYPE)
15058 && TYPE_FIELDS (type))
15060 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15061 return 64;
15062 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15063 return 128;
15065 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15066 || TREE_CODE (type) == INTEGER_TYPE)
15069 if (TYPE_MODE (type) == DFmode && align < 64)
15070 return 64;
15071 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15072 return 128;
15074 return align;
15077 /* Emit RTL insns to initialize the variable parts of a trampoline.
15078 FNADDR is an RTX for the address of the function's pure code.
15079 CXT is an RTX for the static chain value for the function. */
15080 void
15081 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15083 if (!TARGET_64BIT)
15085 /* Compute offset from the end of the jmp to the target function. */
15086 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15087 plus_constant (tramp, 10),
15088 NULL_RTX, 1, OPTAB_DIRECT);
15089 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15090 gen_int_mode (0xb9, QImode));
15091 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15092 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15093 gen_int_mode (0xe9, QImode));
15094 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15096 else
15098 int offset = 0;
15099 /* Try to load address using shorter movl instead of movabs.
15100 We may want to support movq for kernel mode, but kernel does not use
15101 trampolines at the moment. */
15102 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15104 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15105 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15106 gen_int_mode (0xbb41, HImode));
15107 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15108 gen_lowpart (SImode, fnaddr));
15109 offset += 6;
15111 else
15113 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15114 gen_int_mode (0xbb49, HImode));
15115 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15116 fnaddr);
15117 offset += 10;
15119 /* Load static chain using movabs to r10. */
15120 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15121 gen_int_mode (0xba49, HImode));
15122 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15123 cxt);
15124 offset += 10;
15125 /* Jump to the r11 */
15126 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15127 gen_int_mode (0xff49, HImode));
15128 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15129 gen_int_mode (0xe3, QImode));
15130 offset += 3;
15131 gcc_assert (offset <= TRAMPOLINE_SIZE);
15134 #ifdef ENABLE_EXECUTE_STACK
15135 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15136 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15137 #endif
15140 /* Codes for all the SSE/MMX builtins. */
15141 enum ix86_builtins
15143 IX86_BUILTIN_ADDPS,
15144 IX86_BUILTIN_ADDSS,
15145 IX86_BUILTIN_DIVPS,
15146 IX86_BUILTIN_DIVSS,
15147 IX86_BUILTIN_MULPS,
15148 IX86_BUILTIN_MULSS,
15149 IX86_BUILTIN_SUBPS,
15150 IX86_BUILTIN_SUBSS,
15152 IX86_BUILTIN_CMPEQPS,
15153 IX86_BUILTIN_CMPLTPS,
15154 IX86_BUILTIN_CMPLEPS,
15155 IX86_BUILTIN_CMPGTPS,
15156 IX86_BUILTIN_CMPGEPS,
15157 IX86_BUILTIN_CMPNEQPS,
15158 IX86_BUILTIN_CMPNLTPS,
15159 IX86_BUILTIN_CMPNLEPS,
15160 IX86_BUILTIN_CMPNGTPS,
15161 IX86_BUILTIN_CMPNGEPS,
15162 IX86_BUILTIN_CMPORDPS,
15163 IX86_BUILTIN_CMPUNORDPS,
15164 IX86_BUILTIN_CMPEQSS,
15165 IX86_BUILTIN_CMPLTSS,
15166 IX86_BUILTIN_CMPLESS,
15167 IX86_BUILTIN_CMPNEQSS,
15168 IX86_BUILTIN_CMPNLTSS,
15169 IX86_BUILTIN_CMPNLESS,
15170 IX86_BUILTIN_CMPNGTSS,
15171 IX86_BUILTIN_CMPNGESS,
15172 IX86_BUILTIN_CMPORDSS,
15173 IX86_BUILTIN_CMPUNORDSS,
15175 IX86_BUILTIN_COMIEQSS,
15176 IX86_BUILTIN_COMILTSS,
15177 IX86_BUILTIN_COMILESS,
15178 IX86_BUILTIN_COMIGTSS,
15179 IX86_BUILTIN_COMIGESS,
15180 IX86_BUILTIN_COMINEQSS,
15181 IX86_BUILTIN_UCOMIEQSS,
15182 IX86_BUILTIN_UCOMILTSS,
15183 IX86_BUILTIN_UCOMILESS,
15184 IX86_BUILTIN_UCOMIGTSS,
15185 IX86_BUILTIN_UCOMIGESS,
15186 IX86_BUILTIN_UCOMINEQSS,
15188 IX86_BUILTIN_CVTPI2PS,
15189 IX86_BUILTIN_CVTPS2PI,
15190 IX86_BUILTIN_CVTSI2SS,
15191 IX86_BUILTIN_CVTSI642SS,
15192 IX86_BUILTIN_CVTSS2SI,
15193 IX86_BUILTIN_CVTSS2SI64,
15194 IX86_BUILTIN_CVTTPS2PI,
15195 IX86_BUILTIN_CVTTSS2SI,
15196 IX86_BUILTIN_CVTTSS2SI64,
15198 IX86_BUILTIN_MAXPS,
15199 IX86_BUILTIN_MAXSS,
15200 IX86_BUILTIN_MINPS,
15201 IX86_BUILTIN_MINSS,
15203 IX86_BUILTIN_LOADUPS,
15204 IX86_BUILTIN_STOREUPS,
15205 IX86_BUILTIN_MOVSS,
15207 IX86_BUILTIN_MOVHLPS,
15208 IX86_BUILTIN_MOVLHPS,
15209 IX86_BUILTIN_LOADHPS,
15210 IX86_BUILTIN_LOADLPS,
15211 IX86_BUILTIN_STOREHPS,
15212 IX86_BUILTIN_STORELPS,
15214 IX86_BUILTIN_MASKMOVQ,
15215 IX86_BUILTIN_MOVMSKPS,
15216 IX86_BUILTIN_PMOVMSKB,
15218 IX86_BUILTIN_MOVNTPS,
15219 IX86_BUILTIN_MOVNTQ,
15221 IX86_BUILTIN_LOADDQU,
15222 IX86_BUILTIN_STOREDQU,
15224 IX86_BUILTIN_PACKSSWB,
15225 IX86_BUILTIN_PACKSSDW,
15226 IX86_BUILTIN_PACKUSWB,
15228 IX86_BUILTIN_PADDB,
15229 IX86_BUILTIN_PADDW,
15230 IX86_BUILTIN_PADDD,
15231 IX86_BUILTIN_PADDQ,
15232 IX86_BUILTIN_PADDSB,
15233 IX86_BUILTIN_PADDSW,
15234 IX86_BUILTIN_PADDUSB,
15235 IX86_BUILTIN_PADDUSW,
15236 IX86_BUILTIN_PSUBB,
15237 IX86_BUILTIN_PSUBW,
15238 IX86_BUILTIN_PSUBD,
15239 IX86_BUILTIN_PSUBQ,
15240 IX86_BUILTIN_PSUBSB,
15241 IX86_BUILTIN_PSUBSW,
15242 IX86_BUILTIN_PSUBUSB,
15243 IX86_BUILTIN_PSUBUSW,
15245 IX86_BUILTIN_PAND,
15246 IX86_BUILTIN_PANDN,
15247 IX86_BUILTIN_POR,
15248 IX86_BUILTIN_PXOR,
15250 IX86_BUILTIN_PAVGB,
15251 IX86_BUILTIN_PAVGW,
15253 IX86_BUILTIN_PCMPEQB,
15254 IX86_BUILTIN_PCMPEQW,
15255 IX86_BUILTIN_PCMPEQD,
15256 IX86_BUILTIN_PCMPGTB,
15257 IX86_BUILTIN_PCMPGTW,
15258 IX86_BUILTIN_PCMPGTD,
15260 IX86_BUILTIN_PMADDWD,
15262 IX86_BUILTIN_PMAXSW,
15263 IX86_BUILTIN_PMAXUB,
15264 IX86_BUILTIN_PMINSW,
15265 IX86_BUILTIN_PMINUB,
15267 IX86_BUILTIN_PMULHUW,
15268 IX86_BUILTIN_PMULHW,
15269 IX86_BUILTIN_PMULLW,
15271 IX86_BUILTIN_PSADBW,
15272 IX86_BUILTIN_PSHUFW,
15274 IX86_BUILTIN_PSLLW,
15275 IX86_BUILTIN_PSLLD,
15276 IX86_BUILTIN_PSLLQ,
15277 IX86_BUILTIN_PSRAW,
15278 IX86_BUILTIN_PSRAD,
15279 IX86_BUILTIN_PSRLW,
15280 IX86_BUILTIN_PSRLD,
15281 IX86_BUILTIN_PSRLQ,
15282 IX86_BUILTIN_PSLLWI,
15283 IX86_BUILTIN_PSLLDI,
15284 IX86_BUILTIN_PSLLQI,
15285 IX86_BUILTIN_PSRAWI,
15286 IX86_BUILTIN_PSRADI,
15287 IX86_BUILTIN_PSRLWI,
15288 IX86_BUILTIN_PSRLDI,
15289 IX86_BUILTIN_PSRLQI,
15291 IX86_BUILTIN_PUNPCKHBW,
15292 IX86_BUILTIN_PUNPCKHWD,
15293 IX86_BUILTIN_PUNPCKHDQ,
15294 IX86_BUILTIN_PUNPCKLBW,
15295 IX86_BUILTIN_PUNPCKLWD,
15296 IX86_BUILTIN_PUNPCKLDQ,
15298 IX86_BUILTIN_SHUFPS,
15300 IX86_BUILTIN_RCPPS,
15301 IX86_BUILTIN_RCPSS,
15302 IX86_BUILTIN_RSQRTPS,
15303 IX86_BUILTIN_RSQRTSS,
15304 IX86_BUILTIN_SQRTPS,
15305 IX86_BUILTIN_SQRTSS,
15307 IX86_BUILTIN_UNPCKHPS,
15308 IX86_BUILTIN_UNPCKLPS,
15310 IX86_BUILTIN_ANDPS,
15311 IX86_BUILTIN_ANDNPS,
15312 IX86_BUILTIN_ORPS,
15313 IX86_BUILTIN_XORPS,
15315 IX86_BUILTIN_EMMS,
15316 IX86_BUILTIN_LDMXCSR,
15317 IX86_BUILTIN_STMXCSR,
15318 IX86_BUILTIN_SFENCE,
15320 /* 3DNow! Original */
15321 IX86_BUILTIN_FEMMS,
15322 IX86_BUILTIN_PAVGUSB,
15323 IX86_BUILTIN_PF2ID,
15324 IX86_BUILTIN_PFACC,
15325 IX86_BUILTIN_PFADD,
15326 IX86_BUILTIN_PFCMPEQ,
15327 IX86_BUILTIN_PFCMPGE,
15328 IX86_BUILTIN_PFCMPGT,
15329 IX86_BUILTIN_PFMAX,
15330 IX86_BUILTIN_PFMIN,
15331 IX86_BUILTIN_PFMUL,
15332 IX86_BUILTIN_PFRCP,
15333 IX86_BUILTIN_PFRCPIT1,
15334 IX86_BUILTIN_PFRCPIT2,
15335 IX86_BUILTIN_PFRSQIT1,
15336 IX86_BUILTIN_PFRSQRT,
15337 IX86_BUILTIN_PFSUB,
15338 IX86_BUILTIN_PFSUBR,
15339 IX86_BUILTIN_PI2FD,
15340 IX86_BUILTIN_PMULHRW,
15342 /* 3DNow! Athlon Extensions */
15343 IX86_BUILTIN_PF2IW,
15344 IX86_BUILTIN_PFNACC,
15345 IX86_BUILTIN_PFPNACC,
15346 IX86_BUILTIN_PI2FW,
15347 IX86_BUILTIN_PSWAPDSI,
15348 IX86_BUILTIN_PSWAPDSF,
15350 /* SSE2 */
15351 IX86_BUILTIN_ADDPD,
15352 IX86_BUILTIN_ADDSD,
15353 IX86_BUILTIN_DIVPD,
15354 IX86_BUILTIN_DIVSD,
15355 IX86_BUILTIN_MULPD,
15356 IX86_BUILTIN_MULSD,
15357 IX86_BUILTIN_SUBPD,
15358 IX86_BUILTIN_SUBSD,
15360 IX86_BUILTIN_CMPEQPD,
15361 IX86_BUILTIN_CMPLTPD,
15362 IX86_BUILTIN_CMPLEPD,
15363 IX86_BUILTIN_CMPGTPD,
15364 IX86_BUILTIN_CMPGEPD,
15365 IX86_BUILTIN_CMPNEQPD,
15366 IX86_BUILTIN_CMPNLTPD,
15367 IX86_BUILTIN_CMPNLEPD,
15368 IX86_BUILTIN_CMPNGTPD,
15369 IX86_BUILTIN_CMPNGEPD,
15370 IX86_BUILTIN_CMPORDPD,
15371 IX86_BUILTIN_CMPUNORDPD,
15372 IX86_BUILTIN_CMPNEPD,
15373 IX86_BUILTIN_CMPEQSD,
15374 IX86_BUILTIN_CMPLTSD,
15375 IX86_BUILTIN_CMPLESD,
15376 IX86_BUILTIN_CMPNEQSD,
15377 IX86_BUILTIN_CMPNLTSD,
15378 IX86_BUILTIN_CMPNLESD,
15379 IX86_BUILTIN_CMPORDSD,
15380 IX86_BUILTIN_CMPUNORDSD,
15381 IX86_BUILTIN_CMPNESD,
15383 IX86_BUILTIN_COMIEQSD,
15384 IX86_BUILTIN_COMILTSD,
15385 IX86_BUILTIN_COMILESD,
15386 IX86_BUILTIN_COMIGTSD,
15387 IX86_BUILTIN_COMIGESD,
15388 IX86_BUILTIN_COMINEQSD,
15389 IX86_BUILTIN_UCOMIEQSD,
15390 IX86_BUILTIN_UCOMILTSD,
15391 IX86_BUILTIN_UCOMILESD,
15392 IX86_BUILTIN_UCOMIGTSD,
15393 IX86_BUILTIN_UCOMIGESD,
15394 IX86_BUILTIN_UCOMINEQSD,
15396 IX86_BUILTIN_MAXPD,
15397 IX86_BUILTIN_MAXSD,
15398 IX86_BUILTIN_MINPD,
15399 IX86_BUILTIN_MINSD,
15401 IX86_BUILTIN_ANDPD,
15402 IX86_BUILTIN_ANDNPD,
15403 IX86_BUILTIN_ORPD,
15404 IX86_BUILTIN_XORPD,
15406 IX86_BUILTIN_SQRTPD,
15407 IX86_BUILTIN_SQRTSD,
15409 IX86_BUILTIN_UNPCKHPD,
15410 IX86_BUILTIN_UNPCKLPD,
15412 IX86_BUILTIN_SHUFPD,
15414 IX86_BUILTIN_LOADUPD,
15415 IX86_BUILTIN_STOREUPD,
15416 IX86_BUILTIN_MOVSD,
15418 IX86_BUILTIN_LOADHPD,
15419 IX86_BUILTIN_LOADLPD,
15421 IX86_BUILTIN_CVTDQ2PD,
15422 IX86_BUILTIN_CVTDQ2PS,
15424 IX86_BUILTIN_CVTPD2DQ,
15425 IX86_BUILTIN_CVTPD2PI,
15426 IX86_BUILTIN_CVTPD2PS,
15427 IX86_BUILTIN_CVTTPD2DQ,
15428 IX86_BUILTIN_CVTTPD2PI,
15430 IX86_BUILTIN_CVTPI2PD,
15431 IX86_BUILTIN_CVTSI2SD,
15432 IX86_BUILTIN_CVTSI642SD,
15434 IX86_BUILTIN_CVTSD2SI,
15435 IX86_BUILTIN_CVTSD2SI64,
15436 IX86_BUILTIN_CVTSD2SS,
15437 IX86_BUILTIN_CVTSS2SD,
15438 IX86_BUILTIN_CVTTSD2SI,
15439 IX86_BUILTIN_CVTTSD2SI64,
15441 IX86_BUILTIN_CVTPS2DQ,
15442 IX86_BUILTIN_CVTPS2PD,
15443 IX86_BUILTIN_CVTTPS2DQ,
15445 IX86_BUILTIN_MOVNTI,
15446 IX86_BUILTIN_MOVNTPD,
15447 IX86_BUILTIN_MOVNTDQ,
15449 /* SSE2 MMX */
15450 IX86_BUILTIN_MASKMOVDQU,
15451 IX86_BUILTIN_MOVMSKPD,
15452 IX86_BUILTIN_PMOVMSKB128,
15454 IX86_BUILTIN_PACKSSWB128,
15455 IX86_BUILTIN_PACKSSDW128,
15456 IX86_BUILTIN_PACKUSWB128,
15458 IX86_BUILTIN_PADDB128,
15459 IX86_BUILTIN_PADDW128,
15460 IX86_BUILTIN_PADDD128,
15461 IX86_BUILTIN_PADDQ128,
15462 IX86_BUILTIN_PADDSB128,
15463 IX86_BUILTIN_PADDSW128,
15464 IX86_BUILTIN_PADDUSB128,
15465 IX86_BUILTIN_PADDUSW128,
15466 IX86_BUILTIN_PSUBB128,
15467 IX86_BUILTIN_PSUBW128,
15468 IX86_BUILTIN_PSUBD128,
15469 IX86_BUILTIN_PSUBQ128,
15470 IX86_BUILTIN_PSUBSB128,
15471 IX86_BUILTIN_PSUBSW128,
15472 IX86_BUILTIN_PSUBUSB128,
15473 IX86_BUILTIN_PSUBUSW128,
15475 IX86_BUILTIN_PAND128,
15476 IX86_BUILTIN_PANDN128,
15477 IX86_BUILTIN_POR128,
15478 IX86_BUILTIN_PXOR128,
15480 IX86_BUILTIN_PAVGB128,
15481 IX86_BUILTIN_PAVGW128,
15483 IX86_BUILTIN_PCMPEQB128,
15484 IX86_BUILTIN_PCMPEQW128,
15485 IX86_BUILTIN_PCMPEQD128,
15486 IX86_BUILTIN_PCMPGTB128,
15487 IX86_BUILTIN_PCMPGTW128,
15488 IX86_BUILTIN_PCMPGTD128,
15490 IX86_BUILTIN_PMADDWD128,
15492 IX86_BUILTIN_PMAXSW128,
15493 IX86_BUILTIN_PMAXUB128,
15494 IX86_BUILTIN_PMINSW128,
15495 IX86_BUILTIN_PMINUB128,
15497 IX86_BUILTIN_PMULUDQ,
15498 IX86_BUILTIN_PMULUDQ128,
15499 IX86_BUILTIN_PMULHUW128,
15500 IX86_BUILTIN_PMULHW128,
15501 IX86_BUILTIN_PMULLW128,
15503 IX86_BUILTIN_PSADBW128,
15504 IX86_BUILTIN_PSHUFHW,
15505 IX86_BUILTIN_PSHUFLW,
15506 IX86_BUILTIN_PSHUFD,
15508 IX86_BUILTIN_PSLLW128,
15509 IX86_BUILTIN_PSLLD128,
15510 IX86_BUILTIN_PSLLQ128,
15511 IX86_BUILTIN_PSRAW128,
15512 IX86_BUILTIN_PSRAD128,
15513 IX86_BUILTIN_PSRLW128,
15514 IX86_BUILTIN_PSRLD128,
15515 IX86_BUILTIN_PSRLQ128,
15516 IX86_BUILTIN_PSLLDQI128,
15517 IX86_BUILTIN_PSLLWI128,
15518 IX86_BUILTIN_PSLLDI128,
15519 IX86_BUILTIN_PSLLQI128,
15520 IX86_BUILTIN_PSRAWI128,
15521 IX86_BUILTIN_PSRADI128,
15522 IX86_BUILTIN_PSRLDQI128,
15523 IX86_BUILTIN_PSRLWI128,
15524 IX86_BUILTIN_PSRLDI128,
15525 IX86_BUILTIN_PSRLQI128,
15527 IX86_BUILTIN_PUNPCKHBW128,
15528 IX86_BUILTIN_PUNPCKHWD128,
15529 IX86_BUILTIN_PUNPCKHDQ128,
15530 IX86_BUILTIN_PUNPCKHQDQ128,
15531 IX86_BUILTIN_PUNPCKLBW128,
15532 IX86_BUILTIN_PUNPCKLWD128,
15533 IX86_BUILTIN_PUNPCKLDQ128,
15534 IX86_BUILTIN_PUNPCKLQDQ128,
15536 IX86_BUILTIN_CLFLUSH,
15537 IX86_BUILTIN_MFENCE,
15538 IX86_BUILTIN_LFENCE,
15540 /* Prescott New Instructions. */
15541 IX86_BUILTIN_ADDSUBPS,
15542 IX86_BUILTIN_HADDPS,
15543 IX86_BUILTIN_HSUBPS,
15544 IX86_BUILTIN_MOVSHDUP,
15545 IX86_BUILTIN_MOVSLDUP,
15546 IX86_BUILTIN_ADDSUBPD,
15547 IX86_BUILTIN_HADDPD,
15548 IX86_BUILTIN_HSUBPD,
15549 IX86_BUILTIN_LDDQU,
15551 IX86_BUILTIN_MONITOR,
15552 IX86_BUILTIN_MWAIT,
15554 /* SSSE3. */
15555 IX86_BUILTIN_PHADDW,
15556 IX86_BUILTIN_PHADDD,
15557 IX86_BUILTIN_PHADDSW,
15558 IX86_BUILTIN_PHSUBW,
15559 IX86_BUILTIN_PHSUBD,
15560 IX86_BUILTIN_PHSUBSW,
15561 IX86_BUILTIN_PMADDUBSW,
15562 IX86_BUILTIN_PMULHRSW,
15563 IX86_BUILTIN_PSHUFB,
15564 IX86_BUILTIN_PSIGNB,
15565 IX86_BUILTIN_PSIGNW,
15566 IX86_BUILTIN_PSIGND,
15567 IX86_BUILTIN_PALIGNR,
15568 IX86_BUILTIN_PABSB,
15569 IX86_BUILTIN_PABSW,
15570 IX86_BUILTIN_PABSD,
15572 IX86_BUILTIN_PHADDW128,
15573 IX86_BUILTIN_PHADDD128,
15574 IX86_BUILTIN_PHADDSW128,
15575 IX86_BUILTIN_PHSUBW128,
15576 IX86_BUILTIN_PHSUBD128,
15577 IX86_BUILTIN_PHSUBSW128,
15578 IX86_BUILTIN_PMADDUBSW128,
15579 IX86_BUILTIN_PMULHRSW128,
15580 IX86_BUILTIN_PSHUFB128,
15581 IX86_BUILTIN_PSIGNB128,
15582 IX86_BUILTIN_PSIGNW128,
15583 IX86_BUILTIN_PSIGND128,
15584 IX86_BUILTIN_PALIGNR128,
15585 IX86_BUILTIN_PABSB128,
15586 IX86_BUILTIN_PABSW128,
15587 IX86_BUILTIN_PABSD128,
15589 IX86_BUILTIN_VEC_INIT_V2SI,
15590 IX86_BUILTIN_VEC_INIT_V4HI,
15591 IX86_BUILTIN_VEC_INIT_V8QI,
15592 IX86_BUILTIN_VEC_EXT_V2DF,
15593 IX86_BUILTIN_VEC_EXT_V2DI,
15594 IX86_BUILTIN_VEC_EXT_V4SF,
15595 IX86_BUILTIN_VEC_EXT_V4SI,
15596 IX86_BUILTIN_VEC_EXT_V8HI,
15597 IX86_BUILTIN_VEC_EXT_V2SI,
15598 IX86_BUILTIN_VEC_EXT_V4HI,
15599 IX86_BUILTIN_VEC_SET_V8HI,
15600 IX86_BUILTIN_VEC_SET_V4HI,
15602 IX86_BUILTIN_MAX
15605 /* Table for the ix86 builtin decls. */
15606 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
15608 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
15609 * if the target_flags include one of MASK. Stores the function decl
15610 * in the ix86_builtins array.
15611 * Returns the function decl or NULL_TREE, if the builtin was not added. */
15613 static inline tree
15614 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
15616 tree decl = NULL_TREE;
15618 if (mask & target_flags
15619 && (!(mask & MASK_64BIT) || TARGET_64BIT))
15621 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
15622 NULL, NULL_TREE);
15623 ix86_builtins[(int) code] = decl;
15626 return decl;
15629 /* Like def_builtin, but also marks the function decl "const". */
15631 static inline tree
15632 def_builtin_const (int mask, const char *name, tree type,
15633 enum ix86_builtins code)
15635 tree decl = def_builtin (mask, name, type, code);
15636 if (decl)
15637 TREE_READONLY (decl) = 1;
15638 return decl;
15641 /* Bits for builtin_description.flag. */
15643 /* Set when we don't support the comparison natively, and should
15644 swap_comparison in order to support it. */
15645 #define BUILTIN_DESC_SWAP_OPERANDS 1
15647 struct builtin_description
15649 const unsigned int mask;
15650 const enum insn_code icode;
15651 const char *const name;
15652 const enum ix86_builtins code;
15653 const enum rtx_code comparison;
15654 const unsigned int flag;
15657 static const struct builtin_description bdesc_comi[] =
15659 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15660 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15661 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15662 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15663 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15664 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15665 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15666 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15667 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15668 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15669 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15670 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15671 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15672 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15673 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15674 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15675 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15676 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15677 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15678 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15679 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15680 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15681 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15682 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15685 static const struct builtin_description bdesc_2arg[] =
15687 /* SSE */
15688 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15689 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15690 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15691 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15692 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15693 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15694 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15695 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15697 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15698 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15699 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15700 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15701 BUILTIN_DESC_SWAP_OPERANDS },
15702 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15703 BUILTIN_DESC_SWAP_OPERANDS },
15704 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15705 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15706 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15707 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15708 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15709 BUILTIN_DESC_SWAP_OPERANDS },
15710 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15711 BUILTIN_DESC_SWAP_OPERANDS },
15712 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15713 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15714 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15715 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15716 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15717 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15718 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15719 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15720 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15721 BUILTIN_DESC_SWAP_OPERANDS },
15722 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15723 BUILTIN_DESC_SWAP_OPERANDS },
15724 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
15726 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15727 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15728 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15729 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15731 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15732 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15733 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15734 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15736 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15737 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15738 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15739 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15740 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15742 /* MMX */
15743 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15744 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15745 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15746 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15747 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15748 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15749 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15750 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15752 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15753 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15754 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15755 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15756 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15757 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15758 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15759 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15761 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15762 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15763 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15765 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15766 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15767 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15768 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15770 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15771 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15773 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15774 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15775 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15776 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15777 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15778 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15780 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15781 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15782 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15783 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15785 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15786 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15787 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15788 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15789 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15790 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15792 /* Special. */
15793 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15794 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15795 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15797 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15798 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15799 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15801 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15802 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15803 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15804 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15805 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15806 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15808 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15809 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15810 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15811 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15812 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15813 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15815 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15816 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15817 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15818 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15820 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15821 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15823 /* SSE2 */
15824 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15825 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15826 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15827 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15828 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15829 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15830 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15831 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15833 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15834 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15835 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15836 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15837 BUILTIN_DESC_SWAP_OPERANDS },
15838 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15839 BUILTIN_DESC_SWAP_OPERANDS },
15840 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15841 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15842 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15843 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15844 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15845 BUILTIN_DESC_SWAP_OPERANDS },
15846 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15847 BUILTIN_DESC_SWAP_OPERANDS },
15848 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15849 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15850 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15851 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15852 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15853 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15854 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15855 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15856 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15858 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15859 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15860 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15861 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15863 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15864 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15865 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15866 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15868 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15869 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15870 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15872 /* SSE2 MMX */
15873 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15874 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15875 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15876 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15877 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15878 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15879 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15880 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15882 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15883 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15884 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15885 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15886 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15887 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15888 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15889 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15891 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15892 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15894 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15895 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15896 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15897 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15899 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15900 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15902 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15903 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15904 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15905 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15906 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15907 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15909 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15910 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15911 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15912 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15914 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15915 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15916 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15917 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15918 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15919 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15920 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15921 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15923 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15924 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15925 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15927 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15928 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15930 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15931 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15933 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15934 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15935 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15937 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15938 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15939 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15941 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15942 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15944 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15946 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15947 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15948 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15949 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15951 /* SSE3 MMX */
15952 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15953 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15954 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15955 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15956 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15957 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15959 /* SSSE3 */
15960 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15961 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15962 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15963 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15964 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15965 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15966 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15967 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15968 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15969 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15970 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15971 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15972 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15973 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15974 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15975 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15976 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15977 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15978 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15979 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15980 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15981 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15982 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15983 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15986 static const struct builtin_description bdesc_1arg[] =
15988 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15989 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15991 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15992 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15993 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15995 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15996 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15997 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15998 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15999 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16000 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16002 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16003 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16005 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16007 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16008 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16010 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16011 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16012 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16013 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16014 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16016 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16018 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16019 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16020 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16021 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16023 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16024 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16025 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16027 /* SSE3 */
16028 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
16029 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
16031 /* SSSE3 */
16032 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16033 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16034 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16035 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16036 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16037 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16040 static void
16041 ix86_init_builtins (void)
16043 if (TARGET_MMX)
16044 ix86_init_mmx_sse_builtins ();
16047 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16048 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16049 builtins. */
16050 static void
16051 ix86_init_mmx_sse_builtins (void)
16053 const struct builtin_description * d;
16054 size_t i;
16056 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
16057 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16058 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16059 tree V2DI_type_node
16060 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16061 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16062 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16063 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16064 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16065 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16066 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16068 tree pchar_type_node = build_pointer_type (char_type_node);
16069 tree pcchar_type_node = build_pointer_type (
16070 build_type_variant (char_type_node, 1, 0));
16071 tree pfloat_type_node = build_pointer_type (float_type_node);
16072 tree pcfloat_type_node = build_pointer_type (
16073 build_type_variant (float_type_node, 1, 0));
16074 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16075 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16076 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16078 /* Comparisons. */
16079 tree int_ftype_v4sf_v4sf
16080 = build_function_type_list (integer_type_node,
16081 V4SF_type_node, V4SF_type_node, NULL_TREE);
16082 tree v4si_ftype_v4sf_v4sf
16083 = build_function_type_list (V4SI_type_node,
16084 V4SF_type_node, V4SF_type_node, NULL_TREE);
16085 /* MMX/SSE/integer conversions. */
16086 tree int_ftype_v4sf
16087 = build_function_type_list (integer_type_node,
16088 V4SF_type_node, NULL_TREE);
16089 tree int64_ftype_v4sf
16090 = build_function_type_list (long_long_integer_type_node,
16091 V4SF_type_node, NULL_TREE);
16092 tree int_ftype_v8qi
16093 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16094 tree v4sf_ftype_v4sf_int
16095 = build_function_type_list (V4SF_type_node,
16096 V4SF_type_node, integer_type_node, NULL_TREE);
16097 tree v4sf_ftype_v4sf_int64
16098 = build_function_type_list (V4SF_type_node,
16099 V4SF_type_node, long_long_integer_type_node,
16100 NULL_TREE);
16101 tree v4sf_ftype_v4sf_v2si
16102 = build_function_type_list (V4SF_type_node,
16103 V4SF_type_node, V2SI_type_node, NULL_TREE);
16105 /* Miscellaneous. */
16106 tree v8qi_ftype_v4hi_v4hi
16107 = build_function_type_list (V8QI_type_node,
16108 V4HI_type_node, V4HI_type_node, NULL_TREE);
16109 tree v4hi_ftype_v2si_v2si
16110 = build_function_type_list (V4HI_type_node,
16111 V2SI_type_node, V2SI_type_node, NULL_TREE);
16112 tree v4sf_ftype_v4sf_v4sf_int
16113 = build_function_type_list (V4SF_type_node,
16114 V4SF_type_node, V4SF_type_node,
16115 integer_type_node, NULL_TREE);
16116 tree v2si_ftype_v4hi_v4hi
16117 = build_function_type_list (V2SI_type_node,
16118 V4HI_type_node, V4HI_type_node, NULL_TREE);
16119 tree v4hi_ftype_v4hi_int
16120 = build_function_type_list (V4HI_type_node,
16121 V4HI_type_node, integer_type_node, NULL_TREE);
16122 tree v4hi_ftype_v4hi_di
16123 = build_function_type_list (V4HI_type_node,
16124 V4HI_type_node, long_long_unsigned_type_node,
16125 NULL_TREE);
16126 tree v2si_ftype_v2si_di
16127 = build_function_type_list (V2SI_type_node,
16128 V2SI_type_node, long_long_unsigned_type_node,
16129 NULL_TREE);
16130 tree void_ftype_void
16131 = build_function_type (void_type_node, void_list_node);
16132 tree void_ftype_unsigned
16133 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16134 tree void_ftype_unsigned_unsigned
16135 = build_function_type_list (void_type_node, unsigned_type_node,
16136 unsigned_type_node, NULL_TREE);
16137 tree void_ftype_pcvoid_unsigned_unsigned
16138 = build_function_type_list (void_type_node, const_ptr_type_node,
16139 unsigned_type_node, unsigned_type_node,
16140 NULL_TREE);
16141 tree unsigned_ftype_void
16142 = build_function_type (unsigned_type_node, void_list_node);
16143 tree v2si_ftype_v4sf
16144 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16145 /* Loads/stores. */
16146 tree void_ftype_v8qi_v8qi_pchar
16147 = build_function_type_list (void_type_node,
16148 V8QI_type_node, V8QI_type_node,
16149 pchar_type_node, NULL_TREE);
16150 tree v4sf_ftype_pcfloat
16151 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16152 /* @@@ the type is bogus */
16153 tree v4sf_ftype_v4sf_pv2si
16154 = build_function_type_list (V4SF_type_node,
16155 V4SF_type_node, pv2si_type_node, NULL_TREE);
16156 tree void_ftype_pv2si_v4sf
16157 = build_function_type_list (void_type_node,
16158 pv2si_type_node, V4SF_type_node, NULL_TREE);
16159 tree void_ftype_pfloat_v4sf
16160 = build_function_type_list (void_type_node,
16161 pfloat_type_node, V4SF_type_node, NULL_TREE);
16162 tree void_ftype_pdi_di
16163 = build_function_type_list (void_type_node,
16164 pdi_type_node, long_long_unsigned_type_node,
16165 NULL_TREE);
16166 tree void_ftype_pv2di_v2di
16167 = build_function_type_list (void_type_node,
16168 pv2di_type_node, V2DI_type_node, NULL_TREE);
16169 /* Normal vector unops. */
16170 tree v4sf_ftype_v4sf
16171 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16172 tree v16qi_ftype_v16qi
16173 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16174 tree v8hi_ftype_v8hi
16175 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16176 tree v4si_ftype_v4si
16177 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16178 tree v8qi_ftype_v8qi
16179 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16180 tree v4hi_ftype_v4hi
16181 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16183 /* Normal vector binops. */
16184 tree v4sf_ftype_v4sf_v4sf
16185 = build_function_type_list (V4SF_type_node,
16186 V4SF_type_node, V4SF_type_node, NULL_TREE);
16187 tree v8qi_ftype_v8qi_v8qi
16188 = build_function_type_list (V8QI_type_node,
16189 V8QI_type_node, V8QI_type_node, NULL_TREE);
16190 tree v4hi_ftype_v4hi_v4hi
16191 = build_function_type_list (V4HI_type_node,
16192 V4HI_type_node, V4HI_type_node, NULL_TREE);
16193 tree v2si_ftype_v2si_v2si
16194 = build_function_type_list (V2SI_type_node,
16195 V2SI_type_node, V2SI_type_node, NULL_TREE);
16196 tree di_ftype_di_di
16197 = build_function_type_list (long_long_unsigned_type_node,
16198 long_long_unsigned_type_node,
16199 long_long_unsigned_type_node, NULL_TREE);
16201 tree di_ftype_di_di_int
16202 = build_function_type_list (long_long_unsigned_type_node,
16203 long_long_unsigned_type_node,
16204 long_long_unsigned_type_node,
16205 integer_type_node, NULL_TREE);
16207 tree v2si_ftype_v2sf
16208 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16209 tree v2sf_ftype_v2si
16210 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16211 tree v2si_ftype_v2si
16212 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16213 tree v2sf_ftype_v2sf
16214 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16215 tree v2sf_ftype_v2sf_v2sf
16216 = build_function_type_list (V2SF_type_node,
16217 V2SF_type_node, V2SF_type_node, NULL_TREE);
16218 tree v2si_ftype_v2sf_v2sf
16219 = build_function_type_list (V2SI_type_node,
16220 V2SF_type_node, V2SF_type_node, NULL_TREE);
16221 tree pint_type_node = build_pointer_type (integer_type_node);
16222 tree pdouble_type_node = build_pointer_type (double_type_node);
16223 tree pcdouble_type_node = build_pointer_type (
16224 build_type_variant (double_type_node, 1, 0));
16225 tree int_ftype_v2df_v2df
16226 = build_function_type_list (integer_type_node,
16227 V2DF_type_node, V2DF_type_node, NULL_TREE);
16229 tree void_ftype_pcvoid
16230 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16231 tree v4sf_ftype_v4si
16232 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16233 tree v4si_ftype_v4sf
16234 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16235 tree v2df_ftype_v4si
16236 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16237 tree v4si_ftype_v2df
16238 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16239 tree v2si_ftype_v2df
16240 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16241 tree v4sf_ftype_v2df
16242 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16243 tree v2df_ftype_v2si
16244 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16245 tree v2df_ftype_v4sf
16246 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16247 tree int_ftype_v2df
16248 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16249 tree int64_ftype_v2df
16250 = build_function_type_list (long_long_integer_type_node,
16251 V2DF_type_node, NULL_TREE);
16252 tree v2df_ftype_v2df_int
16253 = build_function_type_list (V2DF_type_node,
16254 V2DF_type_node, integer_type_node, NULL_TREE);
16255 tree v2df_ftype_v2df_int64
16256 = build_function_type_list (V2DF_type_node,
16257 V2DF_type_node, long_long_integer_type_node,
16258 NULL_TREE);
16259 tree v4sf_ftype_v4sf_v2df
16260 = build_function_type_list (V4SF_type_node,
16261 V4SF_type_node, V2DF_type_node, NULL_TREE);
16262 tree v2df_ftype_v2df_v4sf
16263 = build_function_type_list (V2DF_type_node,
16264 V2DF_type_node, V4SF_type_node, NULL_TREE);
16265 tree v2df_ftype_v2df_v2df_int
16266 = build_function_type_list (V2DF_type_node,
16267 V2DF_type_node, V2DF_type_node,
16268 integer_type_node,
16269 NULL_TREE);
16270 tree v2df_ftype_v2df_pcdouble
16271 = build_function_type_list (V2DF_type_node,
16272 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16273 tree void_ftype_pdouble_v2df
16274 = build_function_type_list (void_type_node,
16275 pdouble_type_node, V2DF_type_node, NULL_TREE);
16276 tree void_ftype_pint_int
16277 = build_function_type_list (void_type_node,
16278 pint_type_node, integer_type_node, NULL_TREE);
16279 tree void_ftype_v16qi_v16qi_pchar
16280 = build_function_type_list (void_type_node,
16281 V16QI_type_node, V16QI_type_node,
16282 pchar_type_node, NULL_TREE);
16283 tree v2df_ftype_pcdouble
16284 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16285 tree v2df_ftype_v2df_v2df
16286 = build_function_type_list (V2DF_type_node,
16287 V2DF_type_node, V2DF_type_node, NULL_TREE);
16288 tree v16qi_ftype_v16qi_v16qi
16289 = build_function_type_list (V16QI_type_node,
16290 V16QI_type_node, V16QI_type_node, NULL_TREE);
16291 tree v8hi_ftype_v8hi_v8hi
16292 = build_function_type_list (V8HI_type_node,
16293 V8HI_type_node, V8HI_type_node, NULL_TREE);
16294 tree v4si_ftype_v4si_v4si
16295 = build_function_type_list (V4SI_type_node,
16296 V4SI_type_node, V4SI_type_node, NULL_TREE);
16297 tree v2di_ftype_v2di_v2di
16298 = build_function_type_list (V2DI_type_node,
16299 V2DI_type_node, V2DI_type_node, NULL_TREE);
16300 tree v2di_ftype_v2df_v2df
16301 = build_function_type_list (V2DI_type_node,
16302 V2DF_type_node, V2DF_type_node, NULL_TREE);
16303 tree v2df_ftype_v2df
16304 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16305 tree v2di_ftype_v2di_int
16306 = build_function_type_list (V2DI_type_node,
16307 V2DI_type_node, integer_type_node, NULL_TREE);
16308 tree v2di_ftype_v2di_v2di_int
16309 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16310 V2DI_type_node, integer_type_node, NULL_TREE);
16311 tree v4si_ftype_v4si_int
16312 = build_function_type_list (V4SI_type_node,
16313 V4SI_type_node, integer_type_node, NULL_TREE);
16314 tree v8hi_ftype_v8hi_int
16315 = build_function_type_list (V8HI_type_node,
16316 V8HI_type_node, integer_type_node, NULL_TREE);
16317 tree v8hi_ftype_v8hi_v2di
16318 = build_function_type_list (V8HI_type_node,
16319 V8HI_type_node, V2DI_type_node, NULL_TREE);
16320 tree v4si_ftype_v4si_v2di
16321 = build_function_type_list (V4SI_type_node,
16322 V4SI_type_node, V2DI_type_node, NULL_TREE);
16323 tree v4si_ftype_v8hi_v8hi
16324 = build_function_type_list (V4SI_type_node,
16325 V8HI_type_node, V8HI_type_node, NULL_TREE);
16326 tree di_ftype_v8qi_v8qi
16327 = build_function_type_list (long_long_unsigned_type_node,
16328 V8QI_type_node, V8QI_type_node, NULL_TREE);
16329 tree di_ftype_v2si_v2si
16330 = build_function_type_list (long_long_unsigned_type_node,
16331 V2SI_type_node, V2SI_type_node, NULL_TREE);
16332 tree v2di_ftype_v16qi_v16qi
16333 = build_function_type_list (V2DI_type_node,
16334 V16QI_type_node, V16QI_type_node, NULL_TREE);
16335 tree v2di_ftype_v4si_v4si
16336 = build_function_type_list (V2DI_type_node,
16337 V4SI_type_node, V4SI_type_node, NULL_TREE);
16338 tree int_ftype_v16qi
16339 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16340 tree v16qi_ftype_pcchar
16341 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16342 tree void_ftype_pchar_v16qi
16343 = build_function_type_list (void_type_node,
16344 pchar_type_node, V16QI_type_node, NULL_TREE);
16346 tree float80_type;
16347 tree float128_type;
16348 tree ftype;
16350 /* The __float80 type. */
16351 if (TYPE_MODE (long_double_type_node) == XFmode)
16352 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16353 "__float80");
16354 else
16356 /* The __float80 type. */
16357 float80_type = make_node (REAL_TYPE);
16358 TYPE_PRECISION (float80_type) = 80;
16359 layout_type (float80_type);
16360 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16363 if (TARGET_64BIT)
16365 float128_type = make_node (REAL_TYPE);
16366 TYPE_PRECISION (float128_type) = 128;
16367 layout_type (float128_type);
16368 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16371 /* Add all builtins that are more or less simple operations on two
16372 operands. */
16373 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16375 /* Use one of the operands; the target can have a different mode for
16376 mask-generating compares. */
16377 enum machine_mode mode;
16378 tree type;
16380 if (d->name == 0)
16381 continue;
16382 mode = insn_data[d->icode].operand[1].mode;
16384 switch (mode)
16386 case V16QImode:
16387 type = v16qi_ftype_v16qi_v16qi;
16388 break;
16389 case V8HImode:
16390 type = v8hi_ftype_v8hi_v8hi;
16391 break;
16392 case V4SImode:
16393 type = v4si_ftype_v4si_v4si;
16394 break;
16395 case V2DImode:
16396 type = v2di_ftype_v2di_v2di;
16397 break;
16398 case V2DFmode:
16399 type = v2df_ftype_v2df_v2df;
16400 break;
16401 case V4SFmode:
16402 type = v4sf_ftype_v4sf_v4sf;
16403 break;
16404 case V8QImode:
16405 type = v8qi_ftype_v8qi_v8qi;
16406 break;
16407 case V4HImode:
16408 type = v4hi_ftype_v4hi_v4hi;
16409 break;
16410 case V2SImode:
16411 type = v2si_ftype_v2si_v2si;
16412 break;
16413 case DImode:
16414 type = di_ftype_di_di;
16415 break;
16417 default:
16418 gcc_unreachable ();
16421 /* Override for comparisons. */
16422 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16423 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16424 type = v4si_ftype_v4sf_v4sf;
16426 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16427 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16428 type = v2di_ftype_v2df_v2df;
16430 def_builtin (d->mask, d->name, type, d->code);
16433 /* Add all builtins that are more or less simple operations on 1 operand. */
16434 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16436 enum machine_mode mode;
16437 tree type;
16439 if (d->name == 0)
16440 continue;
16441 mode = insn_data[d->icode].operand[1].mode;
16443 switch (mode)
16445 case V16QImode:
16446 type = v16qi_ftype_v16qi;
16447 break;
16448 case V8HImode:
16449 type = v8hi_ftype_v8hi;
16450 break;
16451 case V4SImode:
16452 type = v4si_ftype_v4si;
16453 break;
16454 case V2DFmode:
16455 type = v2df_ftype_v2df;
16456 break;
16457 case V4SFmode:
16458 type = v4sf_ftype_v4sf;
16459 break;
16460 case V8QImode:
16461 type = v8qi_ftype_v8qi;
16462 break;
16463 case V4HImode:
16464 type = v4hi_ftype_v4hi;
16465 break;
16466 case V2SImode:
16467 type = v2si_ftype_v2si;
16468 break;
16470 default:
16471 abort ();
16474 def_builtin (d->mask, d->name, type, d->code);
16477 /* Add the remaining MMX insns with somewhat more complicated types. */
16478 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
16479 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
16480 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
16481 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
16483 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
16484 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
16485 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
16487 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
16488 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
16490 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
16491 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
16493 /* comi/ucomi insns. */
16494 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16495 if (d->mask == MASK_SSE2)
16496 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
16497 else
16498 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
16500 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
16501 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
16502 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
16504 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
16505 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
16506 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
16507 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
16508 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
16509 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
16510 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
16511 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
16512 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
16513 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
16514 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
16516 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
16518 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
16519 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
16521 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
16522 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
16523 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
16524 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
16526 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
16527 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
16528 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
16529 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
16531 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
16533 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
16535 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
16536 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
16537 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
16538 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
16539 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
16540 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
16542 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
16544 /* Original 3DNow! */
16545 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
16546 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
16547 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
16548 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
16549 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
16550 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
16551 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
16552 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
16553 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
16554 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
16555 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
16556 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
16557 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
16558 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
16559 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
16560 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
16561 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
16562 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
16563 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
16564 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
16566 /* 3DNow! extension as used in the Athlon CPU. */
16567 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
16568 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
16569 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
16570 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
16571 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
16572 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
16574 /* SSE2 */
16575 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
16577 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
16578 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
16580 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
16581 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
16583 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
16584 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
16585 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
16586 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
16587 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
16589 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
16590 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
16591 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
16592 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
16594 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
16595 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
16597 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
16599 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
16600 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
16602 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
16603 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
16604 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
16605 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
16606 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
16608 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
16610 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
16611 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
16612 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
16613 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
16615 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
16616 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
16617 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
16619 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
16620 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
16621 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
16622 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
16624 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
16625 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
16626 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
16628 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
16629 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
16631 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
16632 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
16634 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
16635 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
16636 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
16638 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
16639 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
16640 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
16642 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
16643 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
16645 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
16646 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
16647 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
16648 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
16650 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
16651 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
16652 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16653 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16655 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16656 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16658 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16660 /* Prescott New Instructions. */
16661 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16662 void_ftype_pcvoid_unsigned_unsigned,
16663 IX86_BUILTIN_MONITOR);
16664 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16665 void_ftype_unsigned_unsigned,
16666 IX86_BUILTIN_MWAIT);
16667 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16668 v4sf_ftype_v4sf,
16669 IX86_BUILTIN_MOVSHDUP);
16670 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16671 v4sf_ftype_v4sf,
16672 IX86_BUILTIN_MOVSLDUP);
16673 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16674 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16676 /* SSSE3. */
16677 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16678 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16679 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16680 IX86_BUILTIN_PALIGNR);
16682 /* Access to the vec_init patterns. */
16683 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16684 integer_type_node, NULL_TREE);
16685 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16686 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16688 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16689 short_integer_type_node,
16690 short_integer_type_node,
16691 short_integer_type_node, NULL_TREE);
16692 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16693 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16695 ftype = build_function_type_list (V8QI_type_node, char_type_node,
16696 char_type_node, char_type_node,
16697 char_type_node, char_type_node,
16698 char_type_node, char_type_node,
16699 char_type_node, NULL_TREE);
16700 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16701 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16703 /* Access to the vec_extract patterns. */
16704 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16705 integer_type_node, NULL_TREE);
16706 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
16707 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16709 ftype = build_function_type_list (long_long_integer_type_node,
16710 V2DI_type_node, integer_type_node,
16711 NULL_TREE);
16712 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
16713 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16715 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16716 integer_type_node, NULL_TREE);
16717 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16718 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16720 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16721 integer_type_node, NULL_TREE);
16722 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
16723 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16725 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16726 integer_type_node, NULL_TREE);
16727 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
16728 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16730 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16731 integer_type_node, NULL_TREE);
16732 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16733 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16735 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16736 integer_type_node, NULL_TREE);
16737 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16738 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16740 /* Access to the vec_set patterns. */
16741 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16742 intHI_type_node,
16743 integer_type_node, NULL_TREE);
16744 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
16745 ftype, IX86_BUILTIN_VEC_SET_V8HI);
16747 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16748 intHI_type_node,
16749 integer_type_node, NULL_TREE);
16750 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16751 ftype, IX86_BUILTIN_VEC_SET_V4HI);
16754 /* Errors in the source file can cause expand_expr to return const0_rtx
16755 where we expect a vector. To avoid crashing, use one of the vector
16756 clear instructions. */
16757 static rtx
16758 safe_vector_operand (rtx x, enum machine_mode mode)
16760 if (x == const0_rtx)
16761 x = CONST0_RTX (mode);
16762 return x;
16765 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
16767 static rtx
16768 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16770 rtx pat, xops[3];
16771 tree arg0 = TREE_VALUE (arglist);
16772 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16773 rtx op0 = expand_normal (arg0);
16774 rtx op1 = expand_normal (arg1);
16775 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16776 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16777 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16779 if (VECTOR_MODE_P (mode0))
16780 op0 = safe_vector_operand (op0, mode0);
16781 if (VECTOR_MODE_P (mode1))
16782 op1 = safe_vector_operand (op1, mode1);
16784 if (optimize || !target
16785 || GET_MODE (target) != tmode
16786 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16787 target = gen_reg_rtx (tmode);
16789 if (GET_MODE (op1) == SImode && mode1 == TImode)
16791 rtx x = gen_reg_rtx (V4SImode);
16792 emit_insn (gen_sse2_loadd (x, op1));
16793 op1 = gen_lowpart (TImode, x);
16796 /* The insn must want input operands in the same modes as the
16797 result. */
16798 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16799 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16801 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16802 op0 = copy_to_mode_reg (mode0, op0);
16803 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16804 op1 = copy_to_mode_reg (mode1, op1);
16806 /* ??? Using ix86_fixup_binary_operands is problematic when
16807 we've got mismatched modes. Fake it. */
16809 xops[0] = target;
16810 xops[1] = op0;
16811 xops[2] = op1;
16813 if (tmode == mode0 && tmode == mode1)
16815 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16816 op0 = xops[1];
16817 op1 = xops[2];
16819 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16821 op0 = force_reg (mode0, op0);
16822 op1 = force_reg (mode1, op1);
16823 target = gen_reg_rtx (tmode);
16826 pat = GEN_FCN (icode) (target, op0, op1);
16827 if (! pat)
16828 return 0;
16829 emit_insn (pat);
16830 return target;
16833 /* Subroutine of ix86_expand_builtin to take care of stores. */
16835 static rtx
16836 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16838 rtx pat;
16839 tree arg0 = TREE_VALUE (arglist);
16840 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16841 rtx op0 = expand_normal (arg0);
16842 rtx op1 = expand_normal (arg1);
16843 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16844 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16846 if (VECTOR_MODE_P (mode1))
16847 op1 = safe_vector_operand (op1, mode1);
16849 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16850 op1 = copy_to_mode_reg (mode1, op1);
16852 pat = GEN_FCN (icode) (op0, op1);
16853 if (pat)
16854 emit_insn (pat);
16855 return 0;
16858 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16860 static rtx
16861 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16862 rtx target, int do_load)
16864 rtx pat;
16865 tree arg0 = TREE_VALUE (arglist);
16866 rtx op0 = expand_normal (arg0);
16867 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16868 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16870 if (optimize || !target
16871 || GET_MODE (target) != tmode
16872 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16873 target = gen_reg_rtx (tmode);
16874 if (do_load)
16875 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16876 else
16878 if (VECTOR_MODE_P (mode0))
16879 op0 = safe_vector_operand (op0, mode0);
16881 if ((optimize && !register_operand (op0, mode0))
16882 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16883 op0 = copy_to_mode_reg (mode0, op0);
16886 pat = GEN_FCN (icode) (target, op0);
16887 if (! pat)
16888 return 0;
16889 emit_insn (pat);
16890 return target;
16893 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16894 sqrtss, rsqrtss, rcpss. */
16896 static rtx
16897 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16899 rtx pat;
16900 tree arg0 = TREE_VALUE (arglist);
16901 rtx op1, op0 = expand_normal (arg0);
16902 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16903 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16905 if (optimize || !target
16906 || GET_MODE (target) != tmode
16907 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16908 target = gen_reg_rtx (tmode);
16910 if (VECTOR_MODE_P (mode0))
16911 op0 = safe_vector_operand (op0, mode0);
16913 if ((optimize && !register_operand (op0, mode0))
16914 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16915 op0 = copy_to_mode_reg (mode0, op0);
16917 op1 = op0;
16918 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16919 op1 = copy_to_mode_reg (mode0, op1);
16921 pat = GEN_FCN (icode) (target, op0, op1);
16922 if (! pat)
16923 return 0;
16924 emit_insn (pat);
16925 return target;
16928 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16930 static rtx
16931 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16932 rtx target)
16934 rtx pat;
16935 tree arg0 = TREE_VALUE (arglist);
16936 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16937 rtx op0 = expand_normal (arg0);
16938 rtx op1 = expand_normal (arg1);
16939 rtx op2;
16940 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16941 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16942 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16943 enum rtx_code comparison = d->comparison;
16945 if (VECTOR_MODE_P (mode0))
16946 op0 = safe_vector_operand (op0, mode0);
16947 if (VECTOR_MODE_P (mode1))
16948 op1 = safe_vector_operand (op1, mode1);
16950 /* Swap operands if we have a comparison that isn't available in
16951 hardware. */
16952 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16954 rtx tmp = gen_reg_rtx (mode1);
16955 emit_move_insn (tmp, op1);
16956 op1 = op0;
16957 op0 = tmp;
16960 if (optimize || !target
16961 || GET_MODE (target) != tmode
16962 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16963 target = gen_reg_rtx (tmode);
16965 if ((optimize && !register_operand (op0, mode0))
16966 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16967 op0 = copy_to_mode_reg (mode0, op0);
16968 if ((optimize && !register_operand (op1, mode1))
16969 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16970 op1 = copy_to_mode_reg (mode1, op1);
16972 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16973 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16974 if (! pat)
16975 return 0;
16976 emit_insn (pat);
16977 return target;
16980 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16982 static rtx
16983 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16984 rtx target)
16986 rtx pat;
16987 tree arg0 = TREE_VALUE (arglist);
16988 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16989 rtx op0 = expand_normal (arg0);
16990 rtx op1 = expand_normal (arg1);
16991 rtx op2;
16992 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16993 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16994 enum rtx_code comparison = d->comparison;
16996 if (VECTOR_MODE_P (mode0))
16997 op0 = safe_vector_operand (op0, mode0);
16998 if (VECTOR_MODE_P (mode1))
16999 op1 = safe_vector_operand (op1, mode1);
17001 /* Swap operands if we have a comparison that isn't available in
17002 hardware. */
17003 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17005 rtx tmp = op1;
17006 op1 = op0;
17007 op0 = tmp;
17010 target = gen_reg_rtx (SImode);
17011 emit_move_insn (target, const0_rtx);
17012 target = gen_rtx_SUBREG (QImode, target, 0);
17014 if ((optimize && !register_operand (op0, mode0))
17015 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17016 op0 = copy_to_mode_reg (mode0, op0);
17017 if ((optimize && !register_operand (op1, mode1))
17018 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17019 op1 = copy_to_mode_reg (mode1, op1);
17021 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17022 pat = GEN_FCN (d->icode) (op0, op1);
17023 if (! pat)
17024 return 0;
17025 emit_insn (pat);
17026 emit_insn (gen_rtx_SET (VOIDmode,
17027 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17028 gen_rtx_fmt_ee (comparison, QImode,
17029 SET_DEST (pat),
17030 const0_rtx)));
17032 return SUBREG_REG (target);
17035 /* Return the integer constant in ARG. Constrain it to be in the range
17036 of the subparts of VEC_TYPE; issue an error if not. */
17038 static int
17039 get_element_number (tree vec_type, tree arg)
17041 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17043 if (!host_integerp (arg, 1)
17044 || (elt = tree_low_cst (arg, 1), elt > max))
17046 error ("selector must be an integer constant in the range 0..%wi", max);
17047 return 0;
17050 return elt;
17053 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17054 ix86_expand_vector_init. We DO have language-level syntax for this, in
17055 the form of (type){ init-list }. Except that since we can't place emms
17056 instructions from inside the compiler, we can't allow the use of MMX
17057 registers unless the user explicitly asks for it. So we do *not* define
17058 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17059 we have builtins invoked by mmintrin.h that gives us license to emit
17060 these sorts of instructions. */
17062 static rtx
17063 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
17065 enum machine_mode tmode = TYPE_MODE (type);
17066 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17067 int i, n_elt = GET_MODE_NUNITS (tmode);
17068 rtvec v = rtvec_alloc (n_elt);
17070 gcc_assert (VECTOR_MODE_P (tmode));
17072 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
17074 rtx x = expand_normal (TREE_VALUE (arglist));
17075 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17078 gcc_assert (arglist == NULL);
17080 if (!target || !register_operand (target, tmode))
17081 target = gen_reg_rtx (tmode);
17083 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17084 return target;
17087 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17088 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17089 had a language-level syntax for referencing vector elements. */
17091 static rtx
17092 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
17094 enum machine_mode tmode, mode0;
17095 tree arg0, arg1;
17096 int elt;
17097 rtx op0;
17099 arg0 = TREE_VALUE (arglist);
17100 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17102 op0 = expand_normal (arg0);
17103 elt = get_element_number (TREE_TYPE (arg0), arg1);
17105 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17106 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17107 gcc_assert (VECTOR_MODE_P (mode0));
17109 op0 = force_reg (mode0, op0);
17111 if (optimize || !target || !register_operand (target, tmode))
17112 target = gen_reg_rtx (tmode);
17114 ix86_expand_vector_extract (true, target, op0, elt);
17116 return target;
17119 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17120 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17121 a language-level syntax for referencing vector elements. */
17123 static rtx
17124 ix86_expand_vec_set_builtin (tree arglist)
17126 enum machine_mode tmode, mode1;
17127 tree arg0, arg1, arg2;
17128 int elt;
17129 rtx op0, op1;
17131 arg0 = TREE_VALUE (arglist);
17132 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17133 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17135 tmode = TYPE_MODE (TREE_TYPE (arg0));
17136 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17137 gcc_assert (VECTOR_MODE_P (tmode));
17139 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17140 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17141 elt = get_element_number (TREE_TYPE (arg0), arg2);
17143 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17144 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17146 op0 = force_reg (tmode, op0);
17147 op1 = force_reg (mode1, op1);
17149 ix86_expand_vector_set (true, op0, op1, elt);
17151 return op0;
17154 /* Expand an expression EXP that calls a built-in function,
17155 with result going to TARGET if that's convenient
17156 (and in mode MODE if that's convenient).
17157 SUBTARGET may be used as the target for computing one of EXP's operands.
17158 IGNORE is nonzero if the value is to be ignored. */
17160 static rtx
17161 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17162 enum machine_mode mode ATTRIBUTE_UNUSED,
17163 int ignore ATTRIBUTE_UNUSED)
17165 const struct builtin_description *d;
17166 size_t i;
17167 enum insn_code icode;
17168 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
17169 tree arglist = TREE_OPERAND (exp, 1);
17170 tree arg0, arg1, arg2;
17171 rtx op0, op1, op2, pat;
17172 enum machine_mode tmode, mode0, mode1, mode2, mode3;
17173 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17175 switch (fcode)
17177 case IX86_BUILTIN_EMMS:
17178 emit_insn (gen_mmx_emms ());
17179 return 0;
17181 case IX86_BUILTIN_SFENCE:
17182 emit_insn (gen_sse_sfence ());
17183 return 0;
17185 case IX86_BUILTIN_MASKMOVQ:
17186 case IX86_BUILTIN_MASKMOVDQU:
17187 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17188 ? CODE_FOR_mmx_maskmovq
17189 : CODE_FOR_sse2_maskmovdqu);
17190 /* Note the arg order is different from the operand order. */
17191 arg1 = TREE_VALUE (arglist);
17192 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
17193 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17194 op0 = expand_normal (arg0);
17195 op1 = expand_normal (arg1);
17196 op2 = expand_normal (arg2);
17197 mode0 = insn_data[icode].operand[0].mode;
17198 mode1 = insn_data[icode].operand[1].mode;
17199 mode2 = insn_data[icode].operand[2].mode;
17201 op0 = force_reg (Pmode, op0);
17202 op0 = gen_rtx_MEM (mode1, op0);
17204 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17205 op0 = copy_to_mode_reg (mode0, op0);
17206 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17207 op1 = copy_to_mode_reg (mode1, op1);
17208 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17209 op2 = copy_to_mode_reg (mode2, op2);
17210 pat = GEN_FCN (icode) (op0, op1, op2);
17211 if (! pat)
17212 return 0;
17213 emit_insn (pat);
17214 return 0;
17216 case IX86_BUILTIN_SQRTSS:
17217 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
17218 case IX86_BUILTIN_RSQRTSS:
17219 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
17220 case IX86_BUILTIN_RCPSS:
17221 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
17223 case IX86_BUILTIN_LOADUPS:
17224 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
17226 case IX86_BUILTIN_STOREUPS:
17227 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
17229 case IX86_BUILTIN_LOADHPS:
17230 case IX86_BUILTIN_LOADLPS:
17231 case IX86_BUILTIN_LOADHPD:
17232 case IX86_BUILTIN_LOADLPD:
17233 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17234 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17235 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17236 : CODE_FOR_sse2_loadlpd);
17237 arg0 = TREE_VALUE (arglist);
17238 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17239 op0 = expand_normal (arg0);
17240 op1 = expand_normal (arg1);
17241 tmode = insn_data[icode].operand[0].mode;
17242 mode0 = insn_data[icode].operand[1].mode;
17243 mode1 = insn_data[icode].operand[2].mode;
17245 op0 = force_reg (mode0, op0);
17246 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17247 if (optimize || target == 0
17248 || GET_MODE (target) != tmode
17249 || !register_operand (target, tmode))
17250 target = gen_reg_rtx (tmode);
17251 pat = GEN_FCN (icode) (target, op0, op1);
17252 if (! pat)
17253 return 0;
17254 emit_insn (pat);
17255 return target;
17257 case IX86_BUILTIN_STOREHPS:
17258 case IX86_BUILTIN_STORELPS:
17259 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17260 : CODE_FOR_sse_storelps);
17261 arg0 = TREE_VALUE (arglist);
17262 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17263 op0 = expand_normal (arg0);
17264 op1 = expand_normal (arg1);
17265 mode0 = insn_data[icode].operand[0].mode;
17266 mode1 = insn_data[icode].operand[1].mode;
17268 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17269 op1 = force_reg (mode1, op1);
17271 pat = GEN_FCN (icode) (op0, op1);
17272 if (! pat)
17273 return 0;
17274 emit_insn (pat);
17275 return const0_rtx;
17277 case IX86_BUILTIN_MOVNTPS:
17278 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
17279 case IX86_BUILTIN_MOVNTQ:
17280 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
17282 case IX86_BUILTIN_LDMXCSR:
17283 op0 = expand_normal (TREE_VALUE (arglist));
17284 target = assign_386_stack_local (SImode, SLOT_TEMP);
17285 emit_move_insn (target, op0);
17286 emit_insn (gen_sse_ldmxcsr (target));
17287 return 0;
17289 case IX86_BUILTIN_STMXCSR:
17290 target = assign_386_stack_local (SImode, SLOT_TEMP);
17291 emit_insn (gen_sse_stmxcsr (target));
17292 return copy_to_mode_reg (SImode, target);
17294 case IX86_BUILTIN_SHUFPS:
17295 case IX86_BUILTIN_SHUFPD:
17296 icode = (fcode == IX86_BUILTIN_SHUFPS
17297 ? CODE_FOR_sse_shufps
17298 : CODE_FOR_sse2_shufpd);
17299 arg0 = TREE_VALUE (arglist);
17300 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17301 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17302 op0 = expand_normal (arg0);
17303 op1 = expand_normal (arg1);
17304 op2 = expand_normal (arg2);
17305 tmode = insn_data[icode].operand[0].mode;
17306 mode0 = insn_data[icode].operand[1].mode;
17307 mode1 = insn_data[icode].operand[2].mode;
17308 mode2 = insn_data[icode].operand[3].mode;
17310 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17311 op0 = copy_to_mode_reg (mode0, op0);
17312 if ((optimize && !register_operand (op1, mode1))
17313 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17314 op1 = copy_to_mode_reg (mode1, op1);
17315 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17317 /* @@@ better error message */
17318 error ("mask must be an immediate");
17319 return gen_reg_rtx (tmode);
17321 if (optimize || target == 0
17322 || GET_MODE (target) != tmode
17323 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17324 target = gen_reg_rtx (tmode);
17325 pat = GEN_FCN (icode) (target, op0, op1, op2);
17326 if (! pat)
17327 return 0;
17328 emit_insn (pat);
17329 return target;
17331 case IX86_BUILTIN_PSHUFW:
17332 case IX86_BUILTIN_PSHUFD:
17333 case IX86_BUILTIN_PSHUFHW:
17334 case IX86_BUILTIN_PSHUFLW:
17335 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17336 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17337 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17338 : CODE_FOR_mmx_pshufw);
17339 arg0 = TREE_VALUE (arglist);
17340 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17341 op0 = expand_normal (arg0);
17342 op1 = expand_normal (arg1);
17343 tmode = insn_data[icode].operand[0].mode;
17344 mode1 = insn_data[icode].operand[1].mode;
17345 mode2 = insn_data[icode].operand[2].mode;
17347 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17348 op0 = copy_to_mode_reg (mode1, op0);
17349 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17351 /* @@@ better error message */
17352 error ("mask must be an immediate");
17353 return const0_rtx;
17355 if (target == 0
17356 || GET_MODE (target) != tmode
17357 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17358 target = gen_reg_rtx (tmode);
17359 pat = GEN_FCN (icode) (target, op0, op1);
17360 if (! pat)
17361 return 0;
17362 emit_insn (pat);
17363 return target;
17365 case IX86_BUILTIN_PSLLDQI128:
17366 case IX86_BUILTIN_PSRLDQI128:
17367 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17368 : CODE_FOR_sse2_lshrti3);
17369 arg0 = TREE_VALUE (arglist);
17370 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17371 op0 = expand_normal (arg0);
17372 op1 = expand_normal (arg1);
17373 tmode = insn_data[icode].operand[0].mode;
17374 mode1 = insn_data[icode].operand[1].mode;
17375 mode2 = insn_data[icode].operand[2].mode;
17377 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17379 op0 = copy_to_reg (op0);
17380 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17382 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17384 error ("shift must be an immediate");
17385 return const0_rtx;
17387 target = gen_reg_rtx (V2DImode);
17388 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
17389 if (! pat)
17390 return 0;
17391 emit_insn (pat);
17392 return target;
17394 case IX86_BUILTIN_FEMMS:
17395 emit_insn (gen_mmx_femms ());
17396 return NULL_RTX;
17398 case IX86_BUILTIN_PAVGUSB:
17399 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
17401 case IX86_BUILTIN_PF2ID:
17402 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
17404 case IX86_BUILTIN_PFACC:
17405 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
17407 case IX86_BUILTIN_PFADD:
17408 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
17410 case IX86_BUILTIN_PFCMPEQ:
17411 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
17413 case IX86_BUILTIN_PFCMPGE:
17414 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
17416 case IX86_BUILTIN_PFCMPGT:
17417 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
17419 case IX86_BUILTIN_PFMAX:
17420 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
17422 case IX86_BUILTIN_PFMIN:
17423 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
17425 case IX86_BUILTIN_PFMUL:
17426 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
17428 case IX86_BUILTIN_PFRCP:
17429 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
17431 case IX86_BUILTIN_PFRCPIT1:
17432 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
17434 case IX86_BUILTIN_PFRCPIT2:
17435 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
17437 case IX86_BUILTIN_PFRSQIT1:
17438 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
17440 case IX86_BUILTIN_PFRSQRT:
17441 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
17443 case IX86_BUILTIN_PFSUB:
17444 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
17446 case IX86_BUILTIN_PFSUBR:
17447 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
17449 case IX86_BUILTIN_PI2FD:
17450 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
17452 case IX86_BUILTIN_PMULHRW:
17453 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
17455 case IX86_BUILTIN_PF2IW:
17456 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
17458 case IX86_BUILTIN_PFNACC:
17459 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
17461 case IX86_BUILTIN_PFPNACC:
17462 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
17464 case IX86_BUILTIN_PI2FW:
17465 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
17467 case IX86_BUILTIN_PSWAPDSI:
17468 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
17470 case IX86_BUILTIN_PSWAPDSF:
17471 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
17473 case IX86_BUILTIN_SQRTSD:
17474 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
17475 case IX86_BUILTIN_LOADUPD:
17476 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
17477 case IX86_BUILTIN_STOREUPD:
17478 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
17480 case IX86_BUILTIN_MFENCE:
17481 emit_insn (gen_sse2_mfence ());
17482 return 0;
17483 case IX86_BUILTIN_LFENCE:
17484 emit_insn (gen_sse2_lfence ());
17485 return 0;
17487 case IX86_BUILTIN_CLFLUSH:
17488 arg0 = TREE_VALUE (arglist);
17489 op0 = expand_normal (arg0);
17490 icode = CODE_FOR_sse2_clflush;
17491 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
17492 op0 = copy_to_mode_reg (Pmode, op0);
17494 emit_insn (gen_sse2_clflush (op0));
17495 return 0;
17497 case IX86_BUILTIN_MOVNTPD:
17498 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
17499 case IX86_BUILTIN_MOVNTDQ:
17500 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
17501 case IX86_BUILTIN_MOVNTI:
17502 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
17504 case IX86_BUILTIN_LOADDQU:
17505 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
17506 case IX86_BUILTIN_STOREDQU:
17507 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
17509 case IX86_BUILTIN_MONITOR:
17510 arg0 = TREE_VALUE (arglist);
17511 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17512 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17513 op0 = expand_normal (arg0);
17514 op1 = expand_normal (arg1);
17515 op2 = expand_normal (arg2);
17516 if (!REG_P (op0))
17517 op0 = copy_to_mode_reg (Pmode, op0);
17518 if (!REG_P (op1))
17519 op1 = copy_to_mode_reg (SImode, op1);
17520 if (!REG_P (op2))
17521 op2 = copy_to_mode_reg (SImode, op2);
17522 if (!TARGET_64BIT)
17523 emit_insn (gen_sse3_monitor (op0, op1, op2));
17524 else
17525 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
17526 return 0;
17528 case IX86_BUILTIN_MWAIT:
17529 arg0 = TREE_VALUE (arglist);
17530 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17531 op0 = expand_normal (arg0);
17532 op1 = expand_normal (arg1);
17533 if (!REG_P (op0))
17534 op0 = copy_to_mode_reg (SImode, op0);
17535 if (!REG_P (op1))
17536 op1 = copy_to_mode_reg (SImode, op1);
17537 emit_insn (gen_sse3_mwait (op0, op1));
17538 return 0;
17540 case IX86_BUILTIN_LDDQU:
17541 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17542 target, 1);
17544 case IX86_BUILTIN_PALIGNR:
17545 case IX86_BUILTIN_PALIGNR128:
17546 if (fcode == IX86_BUILTIN_PALIGNR)
17548 icode = CODE_FOR_ssse3_palignrdi;
17549 mode = DImode;
17551 else
17553 icode = CODE_FOR_ssse3_palignrti;
17554 mode = V2DImode;
17556 arg0 = TREE_VALUE (arglist);
17557 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17558 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17559 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17560 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17561 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17562 tmode = insn_data[icode].operand[0].mode;
17563 mode1 = insn_data[icode].operand[1].mode;
17564 mode2 = insn_data[icode].operand[2].mode;
17565 mode3 = insn_data[icode].operand[3].mode;
17567 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17569 op0 = copy_to_reg (op0);
17570 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17572 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17574 op1 = copy_to_reg (op1);
17575 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17577 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17579 error ("shift must be an immediate");
17580 return const0_rtx;
17582 target = gen_reg_rtx (mode);
17583 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17584 op0, op1, op2);
17585 if (! pat)
17586 return 0;
17587 emit_insn (pat);
17588 return target;
17590 case IX86_BUILTIN_VEC_INIT_V2SI:
17591 case IX86_BUILTIN_VEC_INIT_V4HI:
17592 case IX86_BUILTIN_VEC_INIT_V8QI:
17593 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17595 case IX86_BUILTIN_VEC_EXT_V2DF:
17596 case IX86_BUILTIN_VEC_EXT_V2DI:
17597 case IX86_BUILTIN_VEC_EXT_V4SF:
17598 case IX86_BUILTIN_VEC_EXT_V4SI:
17599 case IX86_BUILTIN_VEC_EXT_V8HI:
17600 case IX86_BUILTIN_VEC_EXT_V2SI:
17601 case IX86_BUILTIN_VEC_EXT_V4HI:
17602 return ix86_expand_vec_ext_builtin (arglist, target);
17604 case IX86_BUILTIN_VEC_SET_V8HI:
17605 case IX86_BUILTIN_VEC_SET_V4HI:
17606 return ix86_expand_vec_set_builtin (arglist);
17608 default:
17609 break;
17612 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17613 if (d->code == fcode)
17615 /* Compares are treated specially. */
17616 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17617 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17618 || d->icode == CODE_FOR_sse2_maskcmpv2df3
17619 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17620 return ix86_expand_sse_compare (d, arglist, target);
17622 return ix86_expand_binop_builtin (d->icode, arglist, target);
17625 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17626 if (d->code == fcode)
17627 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17629 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17630 if (d->code == fcode)
17631 return ix86_expand_sse_comi (d, arglist, target);
17633 gcc_unreachable ();
17636 /* Store OPERAND to the memory after reload is completed. This means
17637 that we can't easily use assign_stack_local. */
17639 ix86_force_to_memory (enum machine_mode mode, rtx operand)
17641 rtx result;
17643 gcc_assert (reload_completed);
17644 if (TARGET_RED_ZONE)
17646 result = gen_rtx_MEM (mode,
17647 gen_rtx_PLUS (Pmode,
17648 stack_pointer_rtx,
17649 GEN_INT (-RED_ZONE_SIZE)));
17650 emit_move_insn (result, operand);
17652 else if (!TARGET_RED_ZONE && TARGET_64BIT)
17654 switch (mode)
17656 case HImode:
17657 case SImode:
17658 operand = gen_lowpart (DImode, operand);
17659 /* FALLTHRU */
17660 case DImode:
17661 emit_insn (
17662 gen_rtx_SET (VOIDmode,
17663 gen_rtx_MEM (DImode,
17664 gen_rtx_PRE_DEC (DImode,
17665 stack_pointer_rtx)),
17666 operand));
17667 break;
17668 default:
17669 gcc_unreachable ();
17671 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17673 else
17675 switch (mode)
17677 case DImode:
17679 rtx operands[2];
17680 split_di (&operand, 1, operands, operands + 1);
17681 emit_insn (
17682 gen_rtx_SET (VOIDmode,
17683 gen_rtx_MEM (SImode,
17684 gen_rtx_PRE_DEC (Pmode,
17685 stack_pointer_rtx)),
17686 operands[1]));
17687 emit_insn (
17688 gen_rtx_SET (VOIDmode,
17689 gen_rtx_MEM (SImode,
17690 gen_rtx_PRE_DEC (Pmode,
17691 stack_pointer_rtx)),
17692 operands[0]));
17694 break;
17695 case HImode:
17696 /* Store HImodes as SImodes. */
17697 operand = gen_lowpart (SImode, operand);
17698 /* FALLTHRU */
17699 case SImode:
17700 emit_insn (
17701 gen_rtx_SET (VOIDmode,
17702 gen_rtx_MEM (GET_MODE (operand),
17703 gen_rtx_PRE_DEC (SImode,
17704 stack_pointer_rtx)),
17705 operand));
17706 break;
17707 default:
17708 gcc_unreachable ();
17710 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17712 return result;
17715 /* Free operand from the memory. */
17716 void
17717 ix86_free_from_memory (enum machine_mode mode)
17719 if (!TARGET_RED_ZONE)
17721 int size;
17723 if (mode == DImode || TARGET_64BIT)
17724 size = 8;
17725 else
17726 size = 4;
17727 /* Use LEA to deallocate stack space. In peephole2 it will be converted
17728 to pop or add instruction if registers are available. */
17729 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17730 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17731 GEN_INT (size))));
17735 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17736 QImode must go into class Q_REGS.
17737 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
17738 movdf to do mem-to-mem moves through integer regs. */
17739 enum reg_class
17740 ix86_preferred_reload_class (rtx x, enum reg_class class)
17742 enum machine_mode mode = GET_MODE (x);
17744 /* We're only allowed to return a subclass of CLASS. Many of the
17745 following checks fail for NO_REGS, so eliminate that early. */
17746 if (class == NO_REGS)
17747 return NO_REGS;
17749 /* All classes can load zeros. */
17750 if (x == CONST0_RTX (mode))
17751 return class;
17753 /* Force constants into memory if we are loading a (nonzero) constant into
17754 an MMX or SSE register. This is because there are no MMX/SSE instructions
17755 to load from a constant. */
17756 if (CONSTANT_P (x)
17757 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17758 return NO_REGS;
17760 /* Prefer SSE regs only, if we can use them for math. */
17761 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17762 return SSE_CLASS_P (class) ? class : NO_REGS;
17764 /* Floating-point constants need more complex checks. */
17765 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17767 /* General regs can load everything. */
17768 if (reg_class_subset_p (class, GENERAL_REGS))
17769 return class;
17771 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17772 zero above. We only want to wind up preferring 80387 registers if
17773 we plan on doing computation with them. */
17774 if (TARGET_80387
17775 && standard_80387_constant_p (x))
17777 /* Limit class to non-sse. */
17778 if (class == FLOAT_SSE_REGS)
17779 return FLOAT_REGS;
17780 if (class == FP_TOP_SSE_REGS)
17781 return FP_TOP_REG;
17782 if (class == FP_SECOND_SSE_REGS)
17783 return FP_SECOND_REG;
17784 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17785 return class;
17788 return NO_REGS;
17791 /* Generally when we see PLUS here, it's the function invariant
17792 (plus soft-fp const_int). Which can only be computed into general
17793 regs. */
17794 if (GET_CODE (x) == PLUS)
17795 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17797 /* QImode constants are easy to load, but non-constant QImode data
17798 must go into Q_REGS. */
17799 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17801 if (reg_class_subset_p (class, Q_REGS))
17802 return class;
17803 if (reg_class_subset_p (Q_REGS, class))
17804 return Q_REGS;
17805 return NO_REGS;
17808 return class;
17811 /* Discourage putting floating-point values in SSE registers unless
17812 SSE math is being used, and likewise for the 387 registers. */
17813 enum reg_class
17814 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17816 enum machine_mode mode = GET_MODE (x);
17818 /* Restrict the output reload class to the register bank that we are doing
17819 math on. If we would like not to return a subset of CLASS, reject this
17820 alternative: if reload cannot do this, it will still use its choice. */
17821 mode = GET_MODE (x);
17822 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17823 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17825 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17827 if (class == FP_TOP_SSE_REGS)
17828 return FP_TOP_REG;
17829 else if (class == FP_SECOND_SSE_REGS)
17830 return FP_SECOND_REG;
17831 else
17832 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17835 return class;
17838 /* If we are copying between general and FP registers, we need a memory
17839 location. The same is true for SSE and MMX registers.
17841 The macro can't work reliably when one of the CLASSES is class containing
17842 registers from multiple units (SSE, MMX, integer). We avoid this by never
17843 combining those units in single alternative in the machine description.
17844 Ensure that this constraint holds to avoid unexpected surprises.
17846 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17847 enforce these sanity checks. */
17850 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17851 enum machine_mode mode, int strict)
17853 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17854 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17855 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17856 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17857 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17858 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17860 gcc_assert (!strict);
17861 return true;
17864 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17865 return true;
17867 /* ??? This is a lie. We do have moves between mmx/general, and for
17868 mmx/sse2. But by saying we need secondary memory we discourage the
17869 register allocator from using the mmx registers unless needed. */
17870 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17871 return true;
17873 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17875 /* SSE1 doesn't have any direct moves from other classes. */
17876 if (!TARGET_SSE2)
17877 return true;
17879 /* If the target says that inter-unit moves are more expensive
17880 than moving through memory, then don't generate them. */
17881 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17882 return true;
17884 /* Between SSE and general, we have moves no larger than word size. */
17885 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17886 return true;
17888 /* ??? For the cost of one register reformat penalty, we could use
17889 the same instructions to move SFmode and DFmode data, but the
17890 relevant move patterns don't support those alternatives. */
17891 if (mode == SFmode || mode == DFmode)
17892 return true;
17895 return false;
17898 /* Return true if the registers in CLASS cannot represent the change from
17899 modes FROM to TO. */
17901 bool
17902 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17903 enum reg_class class)
17905 if (from == to)
17906 return false;
17908 /* x87 registers can't do subreg at all, as all values are reformatted
17909 to extended precision. */
17910 if (MAYBE_FLOAT_CLASS_P (class))
17911 return true;
17913 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17915 /* Vector registers do not support QI or HImode loads. If we don't
17916 disallow a change to these modes, reload will assume it's ok to
17917 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17918 the vec_dupv4hi pattern. */
17919 if (GET_MODE_SIZE (from) < 4)
17920 return true;
17922 /* Vector registers do not support subreg with nonzero offsets, which
17923 are otherwise valid for integer registers. Since we can't see
17924 whether we have a nonzero offset from here, prohibit all
17925 nonparadoxical subregs changing size. */
17926 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17927 return true;
17930 return false;
17933 /* Return the cost of moving data from a register in class CLASS1 to
17934 one in class CLASS2.
17936 It is not required that the cost always equal 2 when FROM is the same as TO;
17937 on some machines it is expensive to move between registers if they are not
17938 general registers. */
17941 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17942 enum reg_class class2)
17944 /* In case we require secondary memory, compute cost of the store followed
17945 by load. In order to avoid bad register allocation choices, we need
17946 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17948 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17950 int cost = 1;
17952 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17953 MEMORY_MOVE_COST (mode, class1, 1));
17954 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17955 MEMORY_MOVE_COST (mode, class2, 1));
17957 /* In case of copying from general_purpose_register we may emit multiple
17958 stores followed by single load causing memory size mismatch stall.
17959 Count this as arbitrarily high cost of 20. */
17960 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17961 cost += 20;
17963 /* In the case of FP/MMX moves, the registers actually overlap, and we
17964 have to switch modes in order to treat them differently. */
17965 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17966 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17967 cost += 20;
17969 return cost;
17972 /* Moves between SSE/MMX and integer unit are expensive. */
17973 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17974 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17975 return ix86_cost->mmxsse_to_integer;
17976 if (MAYBE_FLOAT_CLASS_P (class1))
17977 return ix86_cost->fp_move;
17978 if (MAYBE_SSE_CLASS_P (class1))
17979 return ix86_cost->sse_move;
17980 if (MAYBE_MMX_CLASS_P (class1))
17981 return ix86_cost->mmx_move;
17982 return 2;
17985 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17987 bool
17988 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17990 /* Flags and only flags can only hold CCmode values. */
17991 if (CC_REGNO_P (regno))
17992 return GET_MODE_CLASS (mode) == MODE_CC;
17993 if (GET_MODE_CLASS (mode) == MODE_CC
17994 || GET_MODE_CLASS (mode) == MODE_RANDOM
17995 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
17996 return 0;
17997 if (FP_REGNO_P (regno))
17998 return VALID_FP_MODE_P (mode);
17999 if (SSE_REGNO_P (regno))
18001 /* We implement the move patterns for all vector modes into and
18002 out of SSE registers, even when no operation instructions
18003 are available. */
18004 return (VALID_SSE_REG_MODE (mode)
18005 || VALID_SSE2_REG_MODE (mode)
18006 || VALID_MMX_REG_MODE (mode)
18007 || VALID_MMX_REG_MODE_3DNOW (mode));
18009 if (MMX_REGNO_P (regno))
18011 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18012 so if the register is available at all, then we can move data of
18013 the given mode into or out of it. */
18014 return (VALID_MMX_REG_MODE (mode)
18015 || VALID_MMX_REG_MODE_3DNOW (mode));
18018 if (mode == QImode)
18020 /* Take care for QImode values - they can be in non-QI regs,
18021 but then they do cause partial register stalls. */
18022 if (regno < 4 || TARGET_64BIT)
18023 return 1;
18024 if (!TARGET_PARTIAL_REG_STALL)
18025 return 1;
18026 return reload_in_progress || reload_completed;
18028 /* We handle both integer and floats in the general purpose registers. */
18029 else if (VALID_INT_MODE_P (mode))
18030 return 1;
18031 else if (VALID_FP_MODE_P (mode))
18032 return 1;
18033 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18034 on to use that value in smaller contexts, this can easily force a
18035 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18036 supporting DImode, allow it. */
18037 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18038 return 1;
18040 return 0;
18043 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18044 tieable integer mode. */
18046 static bool
18047 ix86_tieable_integer_mode_p (enum machine_mode mode)
18049 switch (mode)
18051 case HImode:
18052 case SImode:
18053 return true;
18055 case QImode:
18056 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18058 case DImode:
18059 return TARGET_64BIT;
18061 default:
18062 return false;
18066 /* Return true if MODE1 is accessible in a register that can hold MODE2
18067 without copying. That is, all register classes that can hold MODE2
18068 can also hold MODE1. */
18070 bool
18071 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18073 if (mode1 == mode2)
18074 return true;
18076 if (ix86_tieable_integer_mode_p (mode1)
18077 && ix86_tieable_integer_mode_p (mode2))
18078 return true;
18080 /* MODE2 being XFmode implies fp stack or general regs, which means we
18081 can tie any smaller floating point modes to it. Note that we do not
18082 tie this with TFmode. */
18083 if (mode2 == XFmode)
18084 return mode1 == SFmode || mode1 == DFmode;
18086 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18087 that we can tie it with SFmode. */
18088 if (mode2 == DFmode)
18089 return mode1 == SFmode;
18091 /* If MODE2 is only appropriate for an SSE register, then tie with
18092 any other mode acceptable to SSE registers. */
18093 if (GET_MODE_SIZE (mode2) >= 8
18094 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18095 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
18097 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18098 with any other mode acceptable to MMX registers. */
18099 if (GET_MODE_SIZE (mode2) == 8
18100 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18101 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
18103 return false;
18106 /* Return the cost of moving data of mode M between a
18107 register and memory. A value of 2 is the default; this cost is
18108 relative to those in `REGISTER_MOVE_COST'.
18110 If moving between registers and memory is more expensive than
18111 between two registers, you should define this macro to express the
18112 relative cost.
18114 Model also increased moving costs of QImode registers in non
18115 Q_REGS classes.
18118 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18120 if (FLOAT_CLASS_P (class))
18122 int index;
18123 switch (mode)
18125 case SFmode:
18126 index = 0;
18127 break;
18128 case DFmode:
18129 index = 1;
18130 break;
18131 case XFmode:
18132 index = 2;
18133 break;
18134 default:
18135 return 100;
18137 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18139 if (SSE_CLASS_P (class))
18141 int index;
18142 switch (GET_MODE_SIZE (mode))
18144 case 4:
18145 index = 0;
18146 break;
18147 case 8:
18148 index = 1;
18149 break;
18150 case 16:
18151 index = 2;
18152 break;
18153 default:
18154 return 100;
18156 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18158 if (MMX_CLASS_P (class))
18160 int index;
18161 switch (GET_MODE_SIZE (mode))
18163 case 4:
18164 index = 0;
18165 break;
18166 case 8:
18167 index = 1;
18168 break;
18169 default:
18170 return 100;
18172 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18174 switch (GET_MODE_SIZE (mode))
18176 case 1:
18177 if (in)
18178 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18179 : ix86_cost->movzbl_load);
18180 else
18181 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18182 : ix86_cost->int_store[0] + 4);
18183 break;
18184 case 2:
18185 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18186 default:
18187 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18188 if (mode == TFmode)
18189 mode = XFmode;
18190 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18191 * (((int) GET_MODE_SIZE (mode)
18192 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18196 /* Compute a (partial) cost for rtx X. Return true if the complete
18197 cost has been computed, and false if subexpressions should be
18198 scanned. In either case, *TOTAL contains the cost result. */
18200 static bool
18201 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
18203 enum machine_mode mode = GET_MODE (x);
18205 switch (code)
18207 case CONST_INT:
18208 case CONST:
18209 case LABEL_REF:
18210 case SYMBOL_REF:
18211 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
18212 *total = 3;
18213 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
18214 *total = 2;
18215 else if (flag_pic && SYMBOLIC_CONST (x)
18216 && (!TARGET_64BIT
18217 || (!GET_CODE (x) != LABEL_REF
18218 && (GET_CODE (x) != SYMBOL_REF
18219 || !SYMBOL_REF_LOCAL_P (x)))))
18220 *total = 1;
18221 else
18222 *total = 0;
18223 return true;
18225 case CONST_DOUBLE:
18226 if (mode == VOIDmode)
18227 *total = 0;
18228 else
18229 switch (standard_80387_constant_p (x))
18231 case 1: /* 0.0 */
18232 *total = 1;
18233 break;
18234 default: /* Other constants */
18235 *total = 2;
18236 break;
18237 case 0:
18238 case -1:
18239 /* Start with (MEM (SYMBOL_REF)), since that's where
18240 it'll probably end up. Add a penalty for size. */
18241 *total = (COSTS_N_INSNS (1)
18242 + (flag_pic != 0 && !TARGET_64BIT)
18243 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
18244 break;
18246 return true;
18248 case ZERO_EXTEND:
18249 /* The zero extensions is often completely free on x86_64, so make
18250 it as cheap as possible. */
18251 if (TARGET_64BIT && mode == DImode
18252 && GET_MODE (XEXP (x, 0)) == SImode)
18253 *total = 1;
18254 else if (TARGET_ZERO_EXTEND_WITH_AND)
18255 *total = ix86_cost->add;
18256 else
18257 *total = ix86_cost->movzx;
18258 return false;
18260 case SIGN_EXTEND:
18261 *total = ix86_cost->movsx;
18262 return false;
18264 case ASHIFT:
18265 if (GET_CODE (XEXP (x, 1)) == CONST_INT
18266 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
18268 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18269 if (value == 1)
18271 *total = ix86_cost->add;
18272 return false;
18274 if ((value == 2 || value == 3)
18275 && ix86_cost->lea <= ix86_cost->shift_const)
18277 *total = ix86_cost->lea;
18278 return false;
18281 /* FALLTHRU */
18283 case ROTATE:
18284 case ASHIFTRT:
18285 case LSHIFTRT:
18286 case ROTATERT:
18287 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
18289 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
18291 if (INTVAL (XEXP (x, 1)) > 32)
18292 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
18293 else
18294 *total = ix86_cost->shift_const * 2;
18296 else
18298 if (GET_CODE (XEXP (x, 1)) == AND)
18299 *total = ix86_cost->shift_var * 2;
18300 else
18301 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
18304 else
18306 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
18307 *total = ix86_cost->shift_const;
18308 else
18309 *total = ix86_cost->shift_var;
18311 return false;
18313 case MULT:
18314 if (FLOAT_MODE_P (mode))
18316 *total = ix86_cost->fmul;
18317 return false;
18319 else
18321 rtx op0 = XEXP (x, 0);
18322 rtx op1 = XEXP (x, 1);
18323 int nbits;
18324 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
18326 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18327 for (nbits = 0; value != 0; value &= value - 1)
18328 nbits++;
18330 else
18331 /* This is arbitrary. */
18332 nbits = 7;
18334 /* Compute costs correctly for widening multiplication. */
18335 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
18336 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
18337 == GET_MODE_SIZE (mode))
18339 int is_mulwiden = 0;
18340 enum machine_mode inner_mode = GET_MODE (op0);
18342 if (GET_CODE (op0) == GET_CODE (op1))
18343 is_mulwiden = 1, op1 = XEXP (op1, 0);
18344 else if (GET_CODE (op1) == CONST_INT)
18346 if (GET_CODE (op0) == SIGN_EXTEND)
18347 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
18348 == INTVAL (op1);
18349 else
18350 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
18353 if (is_mulwiden)
18354 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
18357 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
18358 + nbits * ix86_cost->mult_bit
18359 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
18361 return true;
18364 case DIV:
18365 case UDIV:
18366 case MOD:
18367 case UMOD:
18368 if (FLOAT_MODE_P (mode))
18369 *total = ix86_cost->fdiv;
18370 else
18371 *total = ix86_cost->divide[MODE_INDEX (mode)];
18372 return false;
18374 case PLUS:
18375 if (FLOAT_MODE_P (mode))
18376 *total = ix86_cost->fadd;
18377 else if (GET_MODE_CLASS (mode) == MODE_INT
18378 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
18380 if (GET_CODE (XEXP (x, 0)) == PLUS
18381 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
18382 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
18383 && CONSTANT_P (XEXP (x, 1)))
18385 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
18386 if (val == 2 || val == 4 || val == 8)
18388 *total = ix86_cost->lea;
18389 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18390 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
18391 outer_code);
18392 *total += rtx_cost (XEXP (x, 1), outer_code);
18393 return true;
18396 else if (GET_CODE (XEXP (x, 0)) == MULT
18397 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
18399 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
18400 if (val == 2 || val == 4 || val == 8)
18402 *total = ix86_cost->lea;
18403 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18404 *total += rtx_cost (XEXP (x, 1), outer_code);
18405 return true;
18408 else if (GET_CODE (XEXP (x, 0)) == PLUS)
18410 *total = ix86_cost->lea;
18411 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18412 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18413 *total += rtx_cost (XEXP (x, 1), outer_code);
18414 return true;
18417 /* FALLTHRU */
18419 case MINUS:
18420 if (FLOAT_MODE_P (mode))
18422 *total = ix86_cost->fadd;
18423 return false;
18425 /* FALLTHRU */
18427 case AND:
18428 case IOR:
18429 case XOR:
18430 if (!TARGET_64BIT && mode == DImode)
18432 *total = (ix86_cost->add * 2
18433 + (rtx_cost (XEXP (x, 0), outer_code)
18434 << (GET_MODE (XEXP (x, 0)) != DImode))
18435 + (rtx_cost (XEXP (x, 1), outer_code)
18436 << (GET_MODE (XEXP (x, 1)) != DImode)));
18437 return true;
18439 /* FALLTHRU */
18441 case NEG:
18442 if (FLOAT_MODE_P (mode))
18444 *total = ix86_cost->fchs;
18445 return false;
18447 /* FALLTHRU */
18449 case NOT:
18450 if (!TARGET_64BIT && mode == DImode)
18451 *total = ix86_cost->add * 2;
18452 else
18453 *total = ix86_cost->add;
18454 return false;
18456 case COMPARE:
18457 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18458 && XEXP (XEXP (x, 0), 1) == const1_rtx
18459 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
18460 && XEXP (x, 1) == const0_rtx)
18462 /* This kind of construct is implemented using test[bwl].
18463 Treat it as if we had an AND. */
18464 *total = (ix86_cost->add
18465 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18466 + rtx_cost (const1_rtx, outer_code));
18467 return true;
18469 return false;
18471 case FLOAT_EXTEND:
18472 if (!TARGET_SSE_MATH
18473 || mode == XFmode
18474 || (mode == DFmode && !TARGET_SSE2))
18475 *total = 0;
18476 return false;
18478 case ABS:
18479 if (FLOAT_MODE_P (mode))
18480 *total = ix86_cost->fabs;
18481 return false;
18483 case SQRT:
18484 if (FLOAT_MODE_P (mode))
18485 *total = ix86_cost->fsqrt;
18486 return false;
18488 case UNSPEC:
18489 if (XINT (x, 1) == UNSPEC_TP)
18490 *total = 0;
18491 return false;
18493 default:
18494 return false;
18498 #if TARGET_MACHO
18500 static int current_machopic_label_num;
18502 /* Given a symbol name and its associated stub, write out the
18503 definition of the stub. */
18505 void
18506 machopic_output_stub (FILE *file, const char *symb, const char *stub)
18508 unsigned int length;
18509 char *binder_name, *symbol_name, lazy_ptr_name[32];
18510 int label = ++current_machopic_label_num;
18512 /* For 64-bit we shouldn't get here. */
18513 gcc_assert (!TARGET_64BIT);
18515 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18516 symb = (*targetm.strip_name_encoding) (symb);
18518 length = strlen (stub);
18519 binder_name = alloca (length + 32);
18520 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18522 length = strlen (symb);
18523 symbol_name = alloca (length + 32);
18524 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18526 sprintf (lazy_ptr_name, "L%d$lz", label);
18528 if (MACHOPIC_PURE)
18529 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18530 else
18531 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18533 fprintf (file, "%s:\n", stub);
18534 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18536 if (MACHOPIC_PURE)
18538 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18539 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18540 fprintf (file, "\tjmp\t*%%edx\n");
18542 else
18543 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18545 fprintf (file, "%s:\n", binder_name);
18547 if (MACHOPIC_PURE)
18549 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18550 fprintf (file, "\tpushl\t%%eax\n");
18552 else
18553 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18555 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18557 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18558 fprintf (file, "%s:\n", lazy_ptr_name);
18559 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18560 fprintf (file, "\t.long %s\n", binder_name);
18563 void
18564 darwin_x86_file_end (void)
18566 darwin_file_end ();
18567 ix86_file_end ();
18569 #endif /* TARGET_MACHO */
18571 /* Order the registers for register allocator. */
18573 void
18574 x86_order_regs_for_local_alloc (void)
18576 int pos = 0;
18577 int i;
18579 /* First allocate the local general purpose registers. */
18580 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18581 if (GENERAL_REGNO_P (i) && call_used_regs[i])
18582 reg_alloc_order [pos++] = i;
18584 /* Global general purpose registers. */
18585 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18586 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18587 reg_alloc_order [pos++] = i;
18589 /* x87 registers come first in case we are doing FP math
18590 using them. */
18591 if (!TARGET_SSE_MATH)
18592 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18593 reg_alloc_order [pos++] = i;
18595 /* SSE registers. */
18596 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18597 reg_alloc_order [pos++] = i;
18598 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18599 reg_alloc_order [pos++] = i;
18601 /* x87 registers. */
18602 if (TARGET_SSE_MATH)
18603 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18604 reg_alloc_order [pos++] = i;
18606 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18607 reg_alloc_order [pos++] = i;
18609 /* Initialize the rest of array as we do not allocate some registers
18610 at all. */
18611 while (pos < FIRST_PSEUDO_REGISTER)
18612 reg_alloc_order [pos++] = 0;
18615 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18616 struct attribute_spec.handler. */
18617 static tree
18618 ix86_handle_struct_attribute (tree *node, tree name,
18619 tree args ATTRIBUTE_UNUSED,
18620 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18622 tree *type = NULL;
18623 if (DECL_P (*node))
18625 if (TREE_CODE (*node) == TYPE_DECL)
18626 type = &TREE_TYPE (*node);
18628 else
18629 type = node;
18631 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18632 || TREE_CODE (*type) == UNION_TYPE)))
18634 warning (OPT_Wattributes, "%qs attribute ignored",
18635 IDENTIFIER_POINTER (name));
18636 *no_add_attrs = true;
18639 else if ((is_attribute_p ("ms_struct", name)
18640 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18641 || ((is_attribute_p ("gcc_struct", name)
18642 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18644 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18645 IDENTIFIER_POINTER (name));
18646 *no_add_attrs = true;
18649 return NULL_TREE;
18652 static bool
18653 ix86_ms_bitfield_layout_p (tree record_type)
18655 return (TARGET_MS_BITFIELD_LAYOUT &&
18656 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18657 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18660 /* Returns an expression indicating where the this parameter is
18661 located on entry to the FUNCTION. */
18663 static rtx
18664 x86_this_parameter (tree function)
18666 tree type = TREE_TYPE (function);
18668 if (TARGET_64BIT)
18670 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18671 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18674 if (ix86_function_regparm (type, function) > 0)
18676 tree parm;
18678 parm = TYPE_ARG_TYPES (type);
18679 /* Figure out whether or not the function has a variable number of
18680 arguments. */
18681 for (; parm; parm = TREE_CHAIN (parm))
18682 if (TREE_VALUE (parm) == void_type_node)
18683 break;
18684 /* If not, the this parameter is in the first argument. */
18685 if (parm)
18687 int regno = 0;
18688 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18689 regno = 2;
18690 return gen_rtx_REG (SImode, regno);
18694 if (aggregate_value_p (TREE_TYPE (type), type))
18695 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18696 else
18697 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18700 /* Determine whether x86_output_mi_thunk can succeed. */
18702 static bool
18703 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18704 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18705 HOST_WIDE_INT vcall_offset, tree function)
18707 /* 64-bit can handle anything. */
18708 if (TARGET_64BIT)
18709 return true;
18711 /* For 32-bit, everything's fine if we have one free register. */
18712 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18713 return true;
18715 /* Need a free register for vcall_offset. */
18716 if (vcall_offset)
18717 return false;
18719 /* Need a free register for GOT references. */
18720 if (flag_pic && !(*targetm.binds_local_p) (function))
18721 return false;
18723 /* Otherwise ok. */
18724 return true;
18727 /* Output the assembler code for a thunk function. THUNK_DECL is the
18728 declaration for the thunk function itself, FUNCTION is the decl for
18729 the target function. DELTA is an immediate constant offset to be
18730 added to THIS. If VCALL_OFFSET is nonzero, the word at
18731 *(*this + vcall_offset) should be added to THIS. */
18733 static void
18734 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18735 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18736 HOST_WIDE_INT vcall_offset, tree function)
18738 rtx xops[3];
18739 rtx this = x86_this_parameter (function);
18740 rtx this_reg, tmp;
18742 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
18743 pull it in now and let DELTA benefit. */
18744 if (REG_P (this))
18745 this_reg = this;
18746 else if (vcall_offset)
18748 /* Put the this parameter into %eax. */
18749 xops[0] = this;
18750 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18751 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18753 else
18754 this_reg = NULL_RTX;
18756 /* Adjust the this parameter by a fixed constant. */
18757 if (delta)
18759 xops[0] = GEN_INT (delta);
18760 xops[1] = this_reg ? this_reg : this;
18761 if (TARGET_64BIT)
18763 if (!x86_64_general_operand (xops[0], DImode))
18765 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18766 xops[1] = tmp;
18767 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18768 xops[0] = tmp;
18769 xops[1] = this;
18771 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18773 else
18774 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18777 /* Adjust the this parameter by a value stored in the vtable. */
18778 if (vcall_offset)
18780 if (TARGET_64BIT)
18781 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
18782 else
18784 int tmp_regno = 2 /* ECX */;
18785 if (lookup_attribute ("fastcall",
18786 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18787 tmp_regno = 0 /* EAX */;
18788 tmp = gen_rtx_REG (SImode, tmp_regno);
18791 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18792 xops[1] = tmp;
18793 if (TARGET_64BIT)
18794 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18795 else
18796 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18798 /* Adjust the this parameter. */
18799 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18800 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18802 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
18803 xops[0] = GEN_INT (vcall_offset);
18804 xops[1] = tmp2;
18805 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18806 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18808 xops[1] = this_reg;
18809 if (TARGET_64BIT)
18810 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18811 else
18812 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18815 /* If necessary, drop THIS back to its stack slot. */
18816 if (this_reg && this_reg != this)
18818 xops[0] = this_reg;
18819 xops[1] = this;
18820 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18823 xops[0] = XEXP (DECL_RTL (function), 0);
18824 if (TARGET_64BIT)
18826 if (!flag_pic || (*targetm.binds_local_p) (function))
18827 output_asm_insn ("jmp\t%P0", xops);
18828 else
18830 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18831 tmp = gen_rtx_CONST (Pmode, tmp);
18832 tmp = gen_rtx_MEM (QImode, tmp);
18833 xops[0] = tmp;
18834 output_asm_insn ("jmp\t%A0", xops);
18837 else
18839 if (!flag_pic || (*targetm.binds_local_p) (function))
18840 output_asm_insn ("jmp\t%P0", xops);
18841 else
18842 #if TARGET_MACHO
18843 if (TARGET_MACHO)
18845 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18846 tmp = (gen_rtx_SYMBOL_REF
18847 (Pmode,
18848 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18849 tmp = gen_rtx_MEM (QImode, tmp);
18850 xops[0] = tmp;
18851 output_asm_insn ("jmp\t%0", xops);
18853 else
18854 #endif /* TARGET_MACHO */
18856 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18857 output_set_got (tmp, NULL_RTX);
18859 xops[1] = tmp;
18860 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18861 output_asm_insn ("jmp\t{*}%1", xops);
18866 static void
18867 x86_file_start (void)
18869 default_file_start ();
18870 #if TARGET_MACHO
18871 darwin_file_start ();
18872 #endif
18873 if (X86_FILE_START_VERSION_DIRECTIVE)
18874 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18875 if (X86_FILE_START_FLTUSED)
18876 fputs ("\t.global\t__fltused\n", asm_out_file);
18877 if (ix86_asm_dialect == ASM_INTEL)
18878 fputs ("\t.intel_syntax\n", asm_out_file);
18882 x86_field_alignment (tree field, int computed)
18884 enum machine_mode mode;
18885 tree type = TREE_TYPE (field);
18887 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18888 return computed;
18889 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18890 ? get_inner_array_type (type) : type);
18891 if (mode == DFmode || mode == DCmode
18892 || GET_MODE_CLASS (mode) == MODE_INT
18893 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18894 return MIN (32, computed);
18895 return computed;
18898 /* Output assembler code to FILE to increment profiler label # LABELNO
18899 for profiling a function entry. */
18900 void
18901 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18903 if (TARGET_64BIT)
18904 if (flag_pic)
18906 #ifndef NO_PROFILE_COUNTERS
18907 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18908 #endif
18909 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18911 else
18913 #ifndef NO_PROFILE_COUNTERS
18914 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18915 #endif
18916 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18918 else if (flag_pic)
18920 #ifndef NO_PROFILE_COUNTERS
18921 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18922 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18923 #endif
18924 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18926 else
18928 #ifndef NO_PROFILE_COUNTERS
18929 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18930 PROFILE_COUNT_REGISTER);
18931 #endif
18932 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18936 /* We don't have exact information about the insn sizes, but we may assume
18937 quite safely that we are informed about all 1 byte insns and memory
18938 address sizes. This is enough to eliminate unnecessary padding in
18939 99% of cases. */
18941 static int
18942 min_insn_size (rtx insn)
18944 int l = 0;
18946 if (!INSN_P (insn) || !active_insn_p (insn))
18947 return 0;
18949 /* Discard alignments we've emit and jump instructions. */
18950 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18951 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18952 return 0;
18953 if (GET_CODE (insn) == JUMP_INSN
18954 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18955 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18956 return 0;
18958 /* Important case - calls are always 5 bytes.
18959 It is common to have many calls in the row. */
18960 if (GET_CODE (insn) == CALL_INSN
18961 && symbolic_reference_mentioned_p (PATTERN (insn))
18962 && !SIBLING_CALL_P (insn))
18963 return 5;
18964 if (get_attr_length (insn) <= 1)
18965 return 1;
18967 /* For normal instructions we may rely on the sizes of addresses
18968 and the presence of symbol to require 4 bytes of encoding.
18969 This is not the case for jumps where references are PC relative. */
18970 if (GET_CODE (insn) != JUMP_INSN)
18972 l = get_attr_length_address (insn);
18973 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18974 l = 4;
18976 if (l)
18977 return 1+l;
18978 else
18979 return 2;
18982 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18983 window. */
18985 static void
18986 ix86_avoid_jump_misspredicts (void)
18988 rtx insn, start = get_insns ();
18989 int nbytes = 0, njumps = 0;
18990 int isjump = 0;
18992 /* Look for all minimal intervals of instructions containing 4 jumps.
18993 The intervals are bounded by START and INSN. NBYTES is the total
18994 size of instructions in the interval including INSN and not including
18995 START. When the NBYTES is smaller than 16 bytes, it is possible
18996 that the end of START and INSN ends up in the same 16byte page.
18998 The smallest offset in the page INSN can start is the case where START
18999 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19000 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19002 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19005 nbytes += min_insn_size (insn);
19006 if (dump_file)
19007 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19008 INSN_UID (insn), min_insn_size (insn));
19009 if ((GET_CODE (insn) == JUMP_INSN
19010 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19011 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19012 || GET_CODE (insn) == CALL_INSN)
19013 njumps++;
19014 else
19015 continue;
19017 while (njumps > 3)
19019 start = NEXT_INSN (start);
19020 if ((GET_CODE (start) == JUMP_INSN
19021 && GET_CODE (PATTERN (start)) != ADDR_VEC
19022 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19023 || GET_CODE (start) == CALL_INSN)
19024 njumps--, isjump = 1;
19025 else
19026 isjump = 0;
19027 nbytes -= min_insn_size (start);
19029 gcc_assert (njumps >= 0);
19030 if (dump_file)
19031 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19032 INSN_UID (start), INSN_UID (insn), nbytes);
19034 if (njumps == 3 && isjump && nbytes < 16)
19036 int padsize = 15 - nbytes + min_insn_size (insn);
19038 if (dump_file)
19039 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19040 INSN_UID (insn), padsize);
19041 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19046 /* AMD Athlon works faster
19047 when RET is not destination of conditional jump or directly preceded
19048 by other jump instruction. We avoid the penalty by inserting NOP just
19049 before the RET instructions in such cases. */
19050 static void
19051 ix86_pad_returns (void)
19053 edge e;
19054 edge_iterator ei;
19056 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19058 basic_block bb = e->src;
19059 rtx ret = BB_END (bb);
19060 rtx prev;
19061 bool replace = false;
19063 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
19064 || !maybe_hot_bb_p (bb))
19065 continue;
19066 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19067 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
19068 break;
19069 if (prev && GET_CODE (prev) == CODE_LABEL)
19071 edge e;
19072 edge_iterator ei;
19074 FOR_EACH_EDGE (e, ei, bb->preds)
19075 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19076 && !(e->flags & EDGE_FALLTHRU))
19077 replace = true;
19079 if (!replace)
19081 prev = prev_active_insn (ret);
19082 if (prev
19083 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
19084 || GET_CODE (prev) == CALL_INSN))
19085 replace = true;
19086 /* Empty functions get branch mispredict even when the jump destination
19087 is not visible to us. */
19088 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19089 replace = true;
19091 if (replace)
19093 emit_insn_before (gen_return_internal_long (), ret);
19094 delete_insn (ret);
19099 /* Implement machine specific optimizations. We implement padding of returns
19100 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19101 static void
19102 ix86_reorg (void)
19104 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19105 ix86_pad_returns ();
19106 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19107 ix86_avoid_jump_misspredicts ();
19110 /* Return nonzero when QImode register that must be represented via REX prefix
19111 is used. */
19112 bool
19113 x86_extended_QIreg_mentioned_p (rtx insn)
19115 int i;
19116 extract_insn_cached (insn);
19117 for (i = 0; i < recog_data.n_operands; i++)
19118 if (REG_P (recog_data.operand[i])
19119 && REGNO (recog_data.operand[i]) >= 4)
19120 return true;
19121 return false;
19124 /* Return nonzero when P points to register encoded via REX prefix.
19125 Called via for_each_rtx. */
19126 static int
19127 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19129 unsigned int regno;
19130 if (!REG_P (*p))
19131 return 0;
19132 regno = REGNO (*p);
19133 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19136 /* Return true when INSN mentions register that must be encoded using REX
19137 prefix. */
19138 bool
19139 x86_extended_reg_mentioned_p (rtx insn)
19141 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19144 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19145 optabs would emit if we didn't have TFmode patterns. */
19147 void
19148 x86_emit_floatuns (rtx operands[2])
19150 rtx neglab, donelab, i0, i1, f0, in, out;
19151 enum machine_mode mode, inmode;
19153 inmode = GET_MODE (operands[1]);
19154 gcc_assert (inmode == SImode || inmode == DImode);
19156 out = operands[0];
19157 in = force_reg (inmode, operands[1]);
19158 mode = GET_MODE (out);
19159 neglab = gen_label_rtx ();
19160 donelab = gen_label_rtx ();
19161 i1 = gen_reg_rtx (Pmode);
19162 f0 = gen_reg_rtx (mode);
19164 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
19166 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
19167 emit_jump_insn (gen_jump (donelab));
19168 emit_barrier ();
19170 emit_label (neglab);
19172 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
19173 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
19174 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19175 expand_float (f0, i0, 0);
19176 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19178 emit_label (donelab);
19181 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19182 with all elements equal to VAR. Return true if successful. */
19184 static bool
19185 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19186 rtx target, rtx val)
19188 enum machine_mode smode, wsmode, wvmode;
19189 rtx x;
19191 switch (mode)
19193 case V2SImode:
19194 case V2SFmode:
19195 if (!mmx_ok)
19196 return false;
19197 /* FALLTHRU */
19199 case V2DFmode:
19200 case V2DImode:
19201 case V4SFmode:
19202 case V4SImode:
19203 val = force_reg (GET_MODE_INNER (mode), val);
19204 x = gen_rtx_VEC_DUPLICATE (mode, val);
19205 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19206 return true;
19208 case V4HImode:
19209 if (!mmx_ok)
19210 return false;
19211 if (TARGET_SSE || TARGET_3DNOW_A)
19213 val = gen_lowpart (SImode, val);
19214 x = gen_rtx_TRUNCATE (HImode, val);
19215 x = gen_rtx_VEC_DUPLICATE (mode, x);
19216 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19217 return true;
19219 else
19221 smode = HImode;
19222 wsmode = SImode;
19223 wvmode = V2SImode;
19224 goto widen;
19227 case V8QImode:
19228 if (!mmx_ok)
19229 return false;
19230 smode = QImode;
19231 wsmode = HImode;
19232 wvmode = V4HImode;
19233 goto widen;
19234 case V8HImode:
19235 if (TARGET_SSE2)
19237 rtx tmp1, tmp2;
19238 /* Extend HImode to SImode using a paradoxical SUBREG. */
19239 tmp1 = gen_reg_rtx (SImode);
19240 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19241 /* Insert the SImode value as low element of V4SImode vector. */
19242 tmp2 = gen_reg_rtx (V4SImode);
19243 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19244 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19245 CONST0_RTX (V4SImode),
19246 const1_rtx);
19247 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19248 /* Cast the V4SImode vector back to a V8HImode vector. */
19249 tmp1 = gen_reg_rtx (V8HImode);
19250 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
19251 /* Duplicate the low short through the whole low SImode word. */
19252 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
19253 /* Cast the V8HImode vector back to a V4SImode vector. */
19254 tmp2 = gen_reg_rtx (V4SImode);
19255 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19256 /* Replicate the low element of the V4SImode vector. */
19257 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19258 /* Cast the V2SImode back to V8HImode, and store in target. */
19259 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
19260 return true;
19262 smode = HImode;
19263 wsmode = SImode;
19264 wvmode = V4SImode;
19265 goto widen;
19266 case V16QImode:
19267 if (TARGET_SSE2)
19269 rtx tmp1, tmp2;
19270 /* Extend QImode to SImode using a paradoxical SUBREG. */
19271 tmp1 = gen_reg_rtx (SImode);
19272 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19273 /* Insert the SImode value as low element of V4SImode vector. */
19274 tmp2 = gen_reg_rtx (V4SImode);
19275 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19276 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19277 CONST0_RTX (V4SImode),
19278 const1_rtx);
19279 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19280 /* Cast the V4SImode vector back to a V16QImode vector. */
19281 tmp1 = gen_reg_rtx (V16QImode);
19282 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
19283 /* Duplicate the low byte through the whole low SImode word. */
19284 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19285 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19286 /* Cast the V16QImode vector back to a V4SImode vector. */
19287 tmp2 = gen_reg_rtx (V4SImode);
19288 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19289 /* Replicate the low element of the V4SImode vector. */
19290 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19291 /* Cast the V2SImode back to V16QImode, and store in target. */
19292 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
19293 return true;
19295 smode = QImode;
19296 wsmode = HImode;
19297 wvmode = V8HImode;
19298 goto widen;
19299 widen:
19300 /* Replicate the value once into the next wider mode and recurse. */
19301 val = convert_modes (wsmode, smode, val, true);
19302 x = expand_simple_binop (wsmode, ASHIFT, val,
19303 GEN_INT (GET_MODE_BITSIZE (smode)),
19304 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19305 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
19307 x = gen_reg_rtx (wvmode);
19308 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
19309 gcc_unreachable ();
19310 emit_move_insn (target, gen_lowpart (mode, x));
19311 return true;
19313 default:
19314 return false;
19318 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19319 whose ONE_VAR element is VAR, and other elements are zero. Return true
19320 if successful. */
19322 static bool
19323 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
19324 rtx target, rtx var, int one_var)
19326 enum machine_mode vsimode;
19327 rtx new_target;
19328 rtx x, tmp;
19330 switch (mode)
19332 case V2SFmode:
19333 case V2SImode:
19334 if (!mmx_ok)
19335 return false;
19336 /* FALLTHRU */
19338 case V2DFmode:
19339 case V2DImode:
19340 if (one_var != 0)
19341 return false;
19342 var = force_reg (GET_MODE_INNER (mode), var);
19343 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
19344 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19345 return true;
19347 case V4SFmode:
19348 case V4SImode:
19349 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
19350 new_target = gen_reg_rtx (mode);
19351 else
19352 new_target = target;
19353 var = force_reg (GET_MODE_INNER (mode), var);
19354 x = gen_rtx_VEC_DUPLICATE (mode, var);
19355 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
19356 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
19357 if (one_var != 0)
19359 /* We need to shuffle the value to the correct position, so
19360 create a new pseudo to store the intermediate result. */
19362 /* With SSE2, we can use the integer shuffle insns. */
19363 if (mode != V4SFmode && TARGET_SSE2)
19365 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
19366 GEN_INT (1),
19367 GEN_INT (one_var == 1 ? 0 : 1),
19368 GEN_INT (one_var == 2 ? 0 : 1),
19369 GEN_INT (one_var == 3 ? 0 : 1)));
19370 if (target != new_target)
19371 emit_move_insn (target, new_target);
19372 return true;
19375 /* Otherwise convert the intermediate result to V4SFmode and
19376 use the SSE1 shuffle instructions. */
19377 if (mode != V4SFmode)
19379 tmp = gen_reg_rtx (V4SFmode);
19380 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
19382 else
19383 tmp = new_target;
19385 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
19386 GEN_INT (1),
19387 GEN_INT (one_var == 1 ? 0 : 1),
19388 GEN_INT (one_var == 2 ? 0+4 : 1+4),
19389 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
19391 if (mode != V4SFmode)
19392 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
19393 else if (tmp != target)
19394 emit_move_insn (target, tmp);
19396 else if (target != new_target)
19397 emit_move_insn (target, new_target);
19398 return true;
19400 case V8HImode:
19401 case V16QImode:
19402 vsimode = V4SImode;
19403 goto widen;
19404 case V4HImode:
19405 case V8QImode:
19406 if (!mmx_ok)
19407 return false;
19408 vsimode = V2SImode;
19409 goto widen;
19410 widen:
19411 if (one_var != 0)
19412 return false;
19414 /* Zero extend the variable element to SImode and recurse. */
19415 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19417 x = gen_reg_rtx (vsimode);
19418 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19419 var, one_var))
19420 gcc_unreachable ();
19422 emit_move_insn (target, gen_lowpart (mode, x));
19423 return true;
19425 default:
19426 return false;
19430 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19431 consisting of the values in VALS. It is known that all elements
19432 except ONE_VAR are constants. Return true if successful. */
19434 static bool
19435 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19436 rtx target, rtx vals, int one_var)
19438 rtx var = XVECEXP (vals, 0, one_var);
19439 enum machine_mode wmode;
19440 rtx const_vec, x;
19442 const_vec = copy_rtx (vals);
19443 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19444 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19446 switch (mode)
19448 case V2DFmode:
19449 case V2DImode:
19450 case V2SFmode:
19451 case V2SImode:
19452 /* For the two element vectors, it's just as easy to use
19453 the general case. */
19454 return false;
19456 case V4SFmode:
19457 case V4SImode:
19458 case V8HImode:
19459 case V4HImode:
19460 break;
19462 case V16QImode:
19463 wmode = V8HImode;
19464 goto widen;
19465 case V8QImode:
19466 wmode = V4HImode;
19467 goto widen;
19468 widen:
19469 /* There's no way to set one QImode entry easily. Combine
19470 the variable value with its adjacent constant value, and
19471 promote to an HImode set. */
19472 x = XVECEXP (vals, 0, one_var ^ 1);
19473 if (one_var & 1)
19475 var = convert_modes (HImode, QImode, var, true);
19476 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19477 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19478 x = GEN_INT (INTVAL (x) & 0xff);
19480 else
19482 var = convert_modes (HImode, QImode, var, true);
19483 x = gen_int_mode (INTVAL (x) << 8, HImode);
19485 if (x != const0_rtx)
19486 var = expand_simple_binop (HImode, IOR, var, x, var,
19487 1, OPTAB_LIB_WIDEN);
19489 x = gen_reg_rtx (wmode);
19490 emit_move_insn (x, gen_lowpart (wmode, const_vec));
19491 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19493 emit_move_insn (target, gen_lowpart (mode, x));
19494 return true;
19496 default:
19497 return false;
19500 emit_move_insn (target, const_vec);
19501 ix86_expand_vector_set (mmx_ok, target, var, one_var);
19502 return true;
19505 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
19506 all values variable, and none identical. */
19508 static void
19509 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19510 rtx target, rtx vals)
19512 enum machine_mode half_mode = GET_MODE_INNER (mode);
19513 rtx op0 = NULL, op1 = NULL;
19514 bool use_vec_concat = false;
19516 switch (mode)
19518 case V2SFmode:
19519 case V2SImode:
19520 if (!mmx_ok && !TARGET_SSE)
19521 break;
19522 /* FALLTHRU */
19524 case V2DFmode:
19525 case V2DImode:
19526 /* For the two element vectors, we always implement VEC_CONCAT. */
19527 op0 = XVECEXP (vals, 0, 0);
19528 op1 = XVECEXP (vals, 0, 1);
19529 use_vec_concat = true;
19530 break;
19532 case V4SFmode:
19533 half_mode = V2SFmode;
19534 goto half;
19535 case V4SImode:
19536 half_mode = V2SImode;
19537 goto half;
19538 half:
19540 rtvec v;
19542 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19543 Recurse to load the two halves. */
19545 op0 = gen_reg_rtx (half_mode);
19546 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19547 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19549 op1 = gen_reg_rtx (half_mode);
19550 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19551 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19553 use_vec_concat = true;
19555 break;
19557 case V8HImode:
19558 case V16QImode:
19559 case V4HImode:
19560 case V8QImode:
19561 break;
19563 default:
19564 gcc_unreachable ();
19567 if (use_vec_concat)
19569 if (!register_operand (op0, half_mode))
19570 op0 = force_reg (half_mode, op0);
19571 if (!register_operand (op1, half_mode))
19572 op1 = force_reg (half_mode, op1);
19574 emit_insn (gen_rtx_SET (VOIDmode, target,
19575 gen_rtx_VEC_CONCAT (mode, op0, op1)));
19577 else
19579 int i, j, n_elts, n_words, n_elt_per_word;
19580 enum machine_mode inner_mode;
19581 rtx words[4], shift;
19583 inner_mode = GET_MODE_INNER (mode);
19584 n_elts = GET_MODE_NUNITS (mode);
19585 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19586 n_elt_per_word = n_elts / n_words;
19587 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19589 for (i = 0; i < n_words; ++i)
19591 rtx word = NULL_RTX;
19593 for (j = 0; j < n_elt_per_word; ++j)
19595 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19596 elt = convert_modes (word_mode, inner_mode, elt, true);
19598 if (j == 0)
19599 word = elt;
19600 else
19602 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19603 word, 1, OPTAB_LIB_WIDEN);
19604 word = expand_simple_binop (word_mode, IOR, word, elt,
19605 word, 1, OPTAB_LIB_WIDEN);
19609 words[i] = word;
19612 if (n_words == 1)
19613 emit_move_insn (target, gen_lowpart (mode, words[0]));
19614 else if (n_words == 2)
19616 rtx tmp = gen_reg_rtx (mode);
19617 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19618 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19619 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19620 emit_move_insn (target, tmp);
19622 else if (n_words == 4)
19624 rtx tmp = gen_reg_rtx (V4SImode);
19625 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19626 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19627 emit_move_insn (target, gen_lowpart (mode, tmp));
19629 else
19630 gcc_unreachable ();
19634 /* Initialize vector TARGET via VALS. Suppress the use of MMX
19635 instructions unless MMX_OK is true. */
19637 void
19638 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19640 enum machine_mode mode = GET_MODE (target);
19641 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19642 int n_elts = GET_MODE_NUNITS (mode);
19643 int n_var = 0, one_var = -1;
19644 bool all_same = true, all_const_zero = true;
19645 int i;
19646 rtx x;
19648 for (i = 0; i < n_elts; ++i)
19650 x = XVECEXP (vals, 0, i);
19651 if (!CONSTANT_P (x))
19652 n_var++, one_var = i;
19653 else if (x != CONST0_RTX (inner_mode))
19654 all_const_zero = false;
19655 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19656 all_same = false;
19659 /* Constants are best loaded from the constant pool. */
19660 if (n_var == 0)
19662 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19663 return;
19666 /* If all values are identical, broadcast the value. */
19667 if (all_same
19668 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19669 XVECEXP (vals, 0, 0)))
19670 return;
19672 /* Values where only one field is non-constant are best loaded from
19673 the pool and overwritten via move later. */
19674 if (n_var == 1)
19676 if (all_const_zero
19677 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19678 XVECEXP (vals, 0, one_var),
19679 one_var))
19680 return;
19682 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19683 return;
19686 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19689 void
19690 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19692 enum machine_mode mode = GET_MODE (target);
19693 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19694 bool use_vec_merge = false;
19695 rtx tmp;
19697 switch (mode)
19699 case V2SFmode:
19700 case V2SImode:
19701 if (mmx_ok)
19703 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19704 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19705 if (elt == 0)
19706 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19707 else
19708 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19709 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19710 return;
19712 break;
19714 case V2DFmode:
19715 case V2DImode:
19717 rtx op0, op1;
19719 /* For the two element vectors, we implement a VEC_CONCAT with
19720 the extraction of the other element. */
19722 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19723 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19725 if (elt == 0)
19726 op0 = val, op1 = tmp;
19727 else
19728 op0 = tmp, op1 = val;
19730 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19731 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19733 return;
19735 case V4SFmode:
19736 switch (elt)
19738 case 0:
19739 use_vec_merge = true;
19740 break;
19742 case 1:
19743 /* tmp = target = A B C D */
19744 tmp = copy_to_reg (target);
19745 /* target = A A B B */
19746 emit_insn (gen_sse_unpcklps (target, target, target));
19747 /* target = X A B B */
19748 ix86_expand_vector_set (false, target, val, 0);
19749 /* target = A X C D */
19750 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19751 GEN_INT (1), GEN_INT (0),
19752 GEN_INT (2+4), GEN_INT (3+4)));
19753 return;
19755 case 2:
19756 /* tmp = target = A B C D */
19757 tmp = copy_to_reg (target);
19758 /* tmp = X B C D */
19759 ix86_expand_vector_set (false, tmp, val, 0);
19760 /* target = A B X D */
19761 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19762 GEN_INT (0), GEN_INT (1),
19763 GEN_INT (0+4), GEN_INT (3+4)));
19764 return;
19766 case 3:
19767 /* tmp = target = A B C D */
19768 tmp = copy_to_reg (target);
19769 /* tmp = X B C D */
19770 ix86_expand_vector_set (false, tmp, val, 0);
19771 /* target = A B X D */
19772 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19773 GEN_INT (0), GEN_INT (1),
19774 GEN_INT (2+4), GEN_INT (0+4)));
19775 return;
19777 default:
19778 gcc_unreachable ();
19780 break;
19782 case V4SImode:
19783 /* Element 0 handled by vec_merge below. */
19784 if (elt == 0)
19786 use_vec_merge = true;
19787 break;
19790 if (TARGET_SSE2)
19792 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19793 store into element 0, then shuffle them back. */
19795 rtx order[4];
19797 order[0] = GEN_INT (elt);
19798 order[1] = const1_rtx;
19799 order[2] = const2_rtx;
19800 order[3] = GEN_INT (3);
19801 order[elt] = const0_rtx;
19803 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19804 order[1], order[2], order[3]));
19806 ix86_expand_vector_set (false, target, val, 0);
19808 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19809 order[1], order[2], order[3]));
19811 else
19813 /* For SSE1, we have to reuse the V4SF code. */
19814 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19815 gen_lowpart (SFmode, val), elt);
19817 return;
19819 case V8HImode:
19820 use_vec_merge = TARGET_SSE2;
19821 break;
19822 case V4HImode:
19823 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19824 break;
19826 case V16QImode:
19827 case V8QImode:
19828 default:
19829 break;
19832 if (use_vec_merge)
19834 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19835 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19836 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19838 else
19840 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19842 emit_move_insn (mem, target);
19844 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19845 emit_move_insn (tmp, val);
19847 emit_move_insn (target, mem);
19851 void
19852 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19854 enum machine_mode mode = GET_MODE (vec);
19855 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19856 bool use_vec_extr = false;
19857 rtx tmp;
19859 switch (mode)
19861 case V2SImode:
19862 case V2SFmode:
19863 if (!mmx_ok)
19864 break;
19865 /* FALLTHRU */
19867 case V2DFmode:
19868 case V2DImode:
19869 use_vec_extr = true;
19870 break;
19872 case V4SFmode:
19873 switch (elt)
19875 case 0:
19876 tmp = vec;
19877 break;
19879 case 1:
19880 case 3:
19881 tmp = gen_reg_rtx (mode);
19882 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19883 GEN_INT (elt), GEN_INT (elt),
19884 GEN_INT (elt+4), GEN_INT (elt+4)));
19885 break;
19887 case 2:
19888 tmp = gen_reg_rtx (mode);
19889 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19890 break;
19892 default:
19893 gcc_unreachable ();
19895 vec = tmp;
19896 use_vec_extr = true;
19897 elt = 0;
19898 break;
19900 case V4SImode:
19901 if (TARGET_SSE2)
19903 switch (elt)
19905 case 0:
19906 tmp = vec;
19907 break;
19909 case 1:
19910 case 3:
19911 tmp = gen_reg_rtx (mode);
19912 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19913 GEN_INT (elt), GEN_INT (elt),
19914 GEN_INT (elt), GEN_INT (elt)));
19915 break;
19917 case 2:
19918 tmp = gen_reg_rtx (mode);
19919 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19920 break;
19922 default:
19923 gcc_unreachable ();
19925 vec = tmp;
19926 use_vec_extr = true;
19927 elt = 0;
19929 else
19931 /* For SSE1, we have to reuse the V4SF code. */
19932 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19933 gen_lowpart (V4SFmode, vec), elt);
19934 return;
19936 break;
19938 case V8HImode:
19939 use_vec_extr = TARGET_SSE2;
19940 break;
19941 case V4HImode:
19942 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19943 break;
19945 case V16QImode:
19946 case V8QImode:
19947 /* ??? Could extract the appropriate HImode element and shift. */
19948 default:
19949 break;
19952 if (use_vec_extr)
19954 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19955 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19957 /* Let the rtl optimizers know about the zero extension performed. */
19958 if (inner_mode == HImode)
19960 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19961 target = gen_lowpart (SImode, target);
19964 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19966 else
19968 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19970 emit_move_insn (mem, vec);
19972 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19973 emit_move_insn (target, tmp);
19977 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19978 pattern to reduce; DEST is the destination; IN is the input vector. */
19980 void
19981 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19983 rtx tmp1, tmp2, tmp3;
19985 tmp1 = gen_reg_rtx (V4SFmode);
19986 tmp2 = gen_reg_rtx (V4SFmode);
19987 tmp3 = gen_reg_rtx (V4SFmode);
19989 emit_insn (gen_sse_movhlps (tmp1, in, in));
19990 emit_insn (fn (tmp2, tmp1, in));
19992 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19993 GEN_INT (1), GEN_INT (1),
19994 GEN_INT (1+4), GEN_INT (1+4)));
19995 emit_insn (fn (dest, tmp2, tmp3));
19998 /* Target hook for scalar_mode_supported_p. */
19999 static bool
20000 ix86_scalar_mode_supported_p (enum machine_mode mode)
20002 if (DECIMAL_FLOAT_MODE_P (mode))
20003 return true;
20004 else
20005 return default_scalar_mode_supported_p (mode);
20008 /* Implements target hook vector_mode_supported_p. */
20009 static bool
20010 ix86_vector_mode_supported_p (enum machine_mode mode)
20012 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20013 return true;
20014 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20015 return true;
20016 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20017 return true;
20018 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20019 return true;
20020 return false;
20023 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20025 We do this in the new i386 backend to maintain source compatibility
20026 with the old cc0-based compiler. */
20028 static tree
20029 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20030 tree inputs ATTRIBUTE_UNUSED,
20031 tree clobbers)
20033 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20034 clobbers);
20035 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20036 clobbers);
20037 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
20038 clobbers);
20039 return clobbers;
20042 /* Return true if this goes in small data/bss. */
20044 static bool
20045 ix86_in_large_data_p (tree exp)
20047 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20048 return false;
20050 /* Functions are never large data. */
20051 if (TREE_CODE (exp) == FUNCTION_DECL)
20052 return false;
20054 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20056 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20057 if (strcmp (section, ".ldata") == 0
20058 || strcmp (section, ".lbss") == 0)
20059 return true;
20060 return false;
20062 else
20064 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20066 /* If this is an incomplete type with size 0, then we can't put it
20067 in data because it might be too big when completed. */
20068 if (!size || size > ix86_section_threshold)
20069 return true;
20072 return false;
20074 static void
20075 ix86_encode_section_info (tree decl, rtx rtl, int first)
20077 default_encode_section_info (decl, rtl, first);
20079 if (TREE_CODE (decl) == VAR_DECL
20080 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20081 && ix86_in_large_data_p (decl))
20082 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20085 /* Worker function for REVERSE_CONDITION. */
20087 enum rtx_code
20088 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20090 return (mode != CCFPmode && mode != CCFPUmode
20091 ? reverse_condition (code)
20092 : reverse_condition_maybe_unordered (code));
20095 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20096 to OPERANDS[0]. */
20098 const char *
20099 output_387_reg_move (rtx insn, rtx *operands)
20101 if (REG_P (operands[1])
20102 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20104 if (REGNO (operands[0]) == FIRST_STACK_REG)
20105 return output_387_ffreep (operands, 0);
20106 return "fstp\t%y0";
20108 if (STACK_TOP_P (operands[0]))
20109 return "fld%z1\t%y1";
20110 return "fst\t%y0";
20113 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20114 FP status register is set. */
20116 void
20117 ix86_emit_fp_unordered_jump (rtx label)
20119 rtx reg = gen_reg_rtx (HImode);
20120 rtx temp;
20122 emit_insn (gen_x86_fnstsw_1 (reg));
20124 if (TARGET_USE_SAHF)
20126 emit_insn (gen_x86_sahf_1 (reg));
20128 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20129 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20131 else
20133 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20135 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20136 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20139 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20140 gen_rtx_LABEL_REF (VOIDmode, label),
20141 pc_rtx);
20142 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20143 emit_jump_insn (temp);
20146 /* Output code to perform a log1p XFmode calculation. */
20148 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20150 rtx label1 = gen_label_rtx ();
20151 rtx label2 = gen_label_rtx ();
20153 rtx tmp = gen_reg_rtx (XFmode);
20154 rtx tmp2 = gen_reg_rtx (XFmode);
20156 emit_insn (gen_absxf2 (tmp, op1));
20157 emit_insn (gen_cmpxf (tmp,
20158 CONST_DOUBLE_FROM_REAL_VALUE (
20159 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20160 XFmode)));
20161 emit_jump_insn (gen_bge (label1));
20163 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20164 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
20165 emit_jump (label2);
20167 emit_label (label1);
20168 emit_move_insn (tmp, CONST1_RTX (XFmode));
20169 emit_insn (gen_addxf3 (tmp, op1, tmp));
20170 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20171 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
20173 emit_label (label2);
20176 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20178 static void
20179 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20180 tree decl)
20182 /* With Binutils 2.15, the "@unwind" marker must be specified on
20183 every occurrence of the ".eh_frame" section, not just the first
20184 one. */
20185 if (TARGET_64BIT
20186 && strcmp (name, ".eh_frame") == 0)
20188 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20189 flags & SECTION_WRITE ? "aw" : "a");
20190 return;
20192 default_elf_asm_named_section (name, flags, decl);
20195 /* Return the mangling of TYPE if it is an extended fundamental type. */
20197 static const char *
20198 ix86_mangle_fundamental_type (tree type)
20200 switch (TYPE_MODE (type))
20202 case TFmode:
20203 /* __float128 is "g". */
20204 return "g";
20205 case XFmode:
20206 /* "long double" or __float80 is "e". */
20207 return "e";
20208 default:
20209 return NULL;
20213 /* For 32-bit code we can save PIC register setup by using
20214 __stack_chk_fail_local hidden function instead of calling
20215 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20216 register, so it is better to call __stack_chk_fail directly. */
20218 static tree
20219 ix86_stack_protect_fail (void)
20221 return TARGET_64BIT
20222 ? default_external_stack_protect_fail ()
20223 : default_hidden_stack_protect_fail ();
20226 /* Select a format to encode pointers in exception handling data. CODE
20227 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20228 true if the symbol may be affected by dynamic relocations.
20230 ??? All x86 object file formats are capable of representing this.
20231 After all, the relocation needed is the same as for the call insn.
20232 Whether or not a particular assembler allows us to enter such, I
20233 guess we'll have to see. */
20235 asm_preferred_eh_data_format (int code, int global)
20237 if (flag_pic)
20239 int type = DW_EH_PE_sdata8;
20240 if (!TARGET_64BIT
20241 || ix86_cmodel == CM_SMALL_PIC
20242 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
20243 type = DW_EH_PE_sdata4;
20244 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
20246 if (ix86_cmodel == CM_SMALL
20247 || (ix86_cmodel == CM_MEDIUM && code))
20248 return DW_EH_PE_udata4;
20249 return DW_EH_PE_absptr;
20252 /* Expand copysign from SIGN to the positive value ABS_VALUE
20253 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20254 the sign-bit. */
20255 static void
20256 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
20258 enum machine_mode mode = GET_MODE (sign);
20259 rtx sgn = gen_reg_rtx (mode);
20260 if (mask == NULL_RTX)
20262 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
20263 if (!VECTOR_MODE_P (mode))
20265 /* We need to generate a scalar mode mask in this case. */
20266 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20267 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20268 mask = gen_reg_rtx (mode);
20269 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20272 else
20273 mask = gen_rtx_NOT (mode, mask);
20274 emit_insn (gen_rtx_SET (VOIDmode, sgn,
20275 gen_rtx_AND (mode, mask, sign)));
20276 emit_insn (gen_rtx_SET (VOIDmode, result,
20277 gen_rtx_IOR (mode, abs_value, sgn)));
20280 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20281 mask for masking out the sign-bit is stored in *SMASK, if that is
20282 non-null. */
20283 static rtx
20284 ix86_expand_sse_fabs (rtx op0, rtx *smask)
20286 enum machine_mode mode = GET_MODE (op0);
20287 rtx xa, mask;
20289 xa = gen_reg_rtx (mode);
20290 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
20291 if (!VECTOR_MODE_P (mode))
20293 /* We need to generate a scalar mode mask in this case. */
20294 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20295 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20296 mask = gen_reg_rtx (mode);
20297 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20299 emit_insn (gen_rtx_SET (VOIDmode, xa,
20300 gen_rtx_AND (mode, op0, mask)));
20302 if (smask)
20303 *smask = mask;
20305 return xa;
20308 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20309 swapping the operands if SWAP_OPERANDS is true. The expanded
20310 code is a forward jump to a newly created label in case the
20311 comparison is true. The generated label rtx is returned. */
20312 static rtx
20313 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
20314 bool swap_operands)
20316 rtx label, tmp;
20318 if (swap_operands)
20320 tmp = op0;
20321 op0 = op1;
20322 op1 = tmp;
20325 label = gen_label_rtx ();
20326 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
20327 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20328 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
20329 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
20330 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20331 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
20332 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20333 JUMP_LABEL (tmp) = label;
20335 return label;
20338 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
20339 using comparison code CODE. Operands are swapped for the comparison if
20340 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
20341 static rtx
20342 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
20343 bool swap_operands)
20345 enum machine_mode mode = GET_MODE (op0);
20346 rtx mask = gen_reg_rtx (mode);
20348 if (swap_operands)
20350 rtx tmp = op0;
20351 op0 = op1;
20352 op1 = tmp;
20355 if (mode == DFmode)
20356 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
20357 gen_rtx_fmt_ee (code, mode, op0, op1)));
20358 else
20359 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
20360 gen_rtx_fmt_ee (code, mode, op0, op1)));
20362 return mask;
20365 /* Generate and return a rtx of mode MODE for 2**n where n is the number
20366 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
20367 static rtx
20368 ix86_gen_TWO52 (enum machine_mode mode)
20370 REAL_VALUE_TYPE TWO52r;
20371 rtx TWO52;
20373 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
20374 TWO52 = const_double_from_real_value (TWO52r, mode);
20375 TWO52 = force_reg (mode, TWO52);
20377 return TWO52;
20380 /* Expand SSE sequence for computing lround from OP1 storing
20381 into OP0. */
20382 void
20383 ix86_expand_lround (rtx op0, rtx op1)
20385 /* C code for the stuff we're doing below:
20386 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
20387 return (long)tmp;
20389 enum machine_mode mode = GET_MODE (op1);
20390 const struct real_format *fmt;
20391 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
20392 rtx adj;
20394 /* load nextafter (0.5, 0.0) */
20395 fmt = REAL_MODE_FORMAT (mode);
20396 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
20397 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
20399 /* adj = copysign (0.5, op1) */
20400 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
20401 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
20403 /* adj = op1 + adj */
20404 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
20406 /* op0 = (imode)adj */
20407 expand_fix (op0, adj, 0);
20410 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
20411 into OPERAND0. */
20412 void
20413 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
20415 /* C code for the stuff we're doing below (for do_floor):
20416 xi = (long)op1;
20417 xi -= (double)xi > op1 ? 1 : 0;
20418 return xi;
20420 enum machine_mode fmode = GET_MODE (op1);
20421 enum machine_mode imode = GET_MODE (op0);
20422 rtx ireg, freg, label, tmp;
20424 /* reg = (long)op1 */
20425 ireg = gen_reg_rtx (imode);
20426 expand_fix (ireg, op1, 0);
20428 /* freg = (double)reg */
20429 freg = gen_reg_rtx (fmode);
20430 expand_float (freg, ireg, 0);
20432 /* ireg = (freg > op1) ? ireg - 1 : ireg */
20433 label = ix86_expand_sse_compare_and_jump (UNLE,
20434 freg, op1, !do_floor);
20435 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
20436 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
20437 emit_move_insn (ireg, tmp);
20439 emit_label (label);
20440 LABEL_NUSES (label) = 1;
20442 emit_move_insn (op0, ireg);
20445 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
20446 result in OPERAND0. */
20447 void
20448 ix86_expand_rint (rtx operand0, rtx operand1)
20450 /* C code for the stuff we're doing below:
20451 xa = fabs (operand1);
20452 if (!isless (xa, 2**52))
20453 return operand1;
20454 xa = xa + 2**52 - 2**52;
20455 return copysign (xa, operand1);
20457 enum machine_mode mode = GET_MODE (operand0);
20458 rtx res, xa, label, TWO52, mask;
20460 res = gen_reg_rtx (mode);
20461 emit_move_insn (res, operand1);
20463 /* xa = abs (operand1) */
20464 xa = ix86_expand_sse_fabs (res, &mask);
20466 /* if (!isless (xa, TWO52)) goto label; */
20467 TWO52 = ix86_gen_TWO52 (mode);
20468 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20470 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20471 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
20473 ix86_sse_copysign_to_positive (res, xa, res, mask);
20475 emit_label (label);
20476 LABEL_NUSES (label) = 1;
20478 emit_move_insn (operand0, res);
20481 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20482 into OPERAND0. */
20483 void
20484 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
20486 /* C code for the stuff we expand below.
20487 double xa = fabs (x), x2;
20488 if (!isless (xa, TWO52))
20489 return x;
20490 xa = xa + TWO52 - TWO52;
20491 x2 = copysign (xa, x);
20492 Compensate. Floor:
20493 if (x2 > x)
20494 x2 -= 1;
20495 Compensate. Ceil:
20496 if (x2 < x)
20497 x2 -= -1;
20498 return x2;
20500 enum machine_mode mode = GET_MODE (operand0);
20501 rtx xa, TWO52, tmp, label, one, res, mask;
20503 TWO52 = ix86_gen_TWO52 (mode);
20505 /* Temporary for holding the result, initialized to the input
20506 operand to ease control flow. */
20507 res = gen_reg_rtx (mode);
20508 emit_move_insn (res, operand1);
20510 /* xa = abs (operand1) */
20511 xa = ix86_expand_sse_fabs (res, &mask);
20513 /* if (!isless (xa, TWO52)) goto label; */
20514 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20516 /* xa = xa + TWO52 - TWO52; */
20517 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20518 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
20520 /* xa = copysign (xa, operand1) */
20521 ix86_sse_copysign_to_positive (xa, xa, res, mask);
20523 /* generate 1.0 or -1.0 */
20524 one = force_reg (mode,
20525 const_double_from_real_value (do_floor
20526 ? dconst1 : dconstm1, mode));
20528 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20529 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
20530 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20531 gen_rtx_AND (mode, one, tmp)));
20532 /* We always need to subtract here to preserve signed zero. */
20533 tmp = expand_simple_binop (mode, MINUS,
20534 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20535 emit_move_insn (res, tmp);
20537 emit_label (label);
20538 LABEL_NUSES (label) = 1;
20540 emit_move_insn (operand0, res);
20543 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20544 into OPERAND0. */
20545 void
20546 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
20548 /* C code for the stuff we expand below.
20549 double xa = fabs (x), x2;
20550 if (!isless (xa, TWO52))
20551 return x;
20552 x2 = (double)(long)x;
20553 Compensate. Floor:
20554 if (x2 > x)
20555 x2 -= 1;
20556 Compensate. Ceil:
20557 if (x2 < x)
20558 x2 += 1;
20559 if (HONOR_SIGNED_ZEROS (mode))
20560 return copysign (x2, x);
20561 return x2;
20563 enum machine_mode mode = GET_MODE (operand0);
20564 rtx xa, xi, TWO52, tmp, label, one, res, mask;
20566 TWO52 = ix86_gen_TWO52 (mode);
20568 /* Temporary for holding the result, initialized to the input
20569 operand to ease control flow. */
20570 res = gen_reg_rtx (mode);
20571 emit_move_insn (res, operand1);
20573 /* xa = abs (operand1) */
20574 xa = ix86_expand_sse_fabs (res, &mask);
20576 /* if (!isless (xa, TWO52)) goto label; */
20577 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20579 /* xa = (double)(long)x */
20580 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20581 expand_fix (xi, res, 0);
20582 expand_float (xa, xi, 0);
20584 /* generate 1.0 */
20585 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
20587 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20588 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
20589 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20590 gen_rtx_AND (mode, one, tmp)));
20591 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
20592 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20593 emit_move_insn (res, tmp);
20595 if (HONOR_SIGNED_ZEROS (mode))
20596 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
20598 emit_label (label);
20599 LABEL_NUSES (label) = 1;
20601 emit_move_insn (operand0, res);
20604 /* Expand SSE sequence for computing round from OPERAND1 storing
20605 into OPERAND0. Sequence that works without relying on DImode truncation
20606 via cvttsd2siq that is only available on 64bit targets. */
20607 void
20608 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
20610 /* C code for the stuff we expand below.
20611 double xa = fabs (x), xa2, x2;
20612 if (!isless (xa, TWO52))
20613 return x;
20614 Using the absolute value and copying back sign makes
20615 -0.0 -> -0.0 correct.
20616 xa2 = xa + TWO52 - TWO52;
20617 Compensate.
20618 dxa = xa2 - xa;
20619 if (dxa <= -0.5)
20620 xa2 += 1;
20621 else if (dxa > 0.5)
20622 xa2 -= 1;
20623 x2 = copysign (xa2, x);
20624 return x2;
20626 enum machine_mode mode = GET_MODE (operand0);
20627 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
20629 TWO52 = ix86_gen_TWO52 (mode);
20631 /* Temporary for holding the result, initialized to the input
20632 operand to ease control flow. */
20633 res = gen_reg_rtx (mode);
20634 emit_move_insn (res, operand1);
20636 /* xa = abs (operand1) */
20637 xa = ix86_expand_sse_fabs (res, &mask);
20639 /* if (!isless (xa, TWO52)) goto label; */
20640 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20642 /* xa2 = xa + TWO52 - TWO52; */
20643 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20644 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
20646 /* dxa = xa2 - xa; */
20647 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
20649 /* generate 0.5, 1.0 and -0.5 */
20650 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
20651 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
20652 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
20653 0, OPTAB_DIRECT);
20655 /* Compensate. */
20656 tmp = gen_reg_rtx (mode);
20657 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
20658 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
20659 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20660 gen_rtx_AND (mode, one, tmp)));
20661 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20662 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
20663 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
20664 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20665 gen_rtx_AND (mode, one, tmp)));
20666 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20668 /* res = copysign (xa2, operand1) */
20669 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
20671 emit_label (label);
20672 LABEL_NUSES (label) = 1;
20674 emit_move_insn (operand0, res);
20677 /* Expand SSE sequence for computing trunc from OPERAND1 storing
20678 into OPERAND0. */
20679 void
20680 ix86_expand_trunc (rtx operand0, rtx operand1)
20682 /* C code for SSE variant we expand below.
20683 double xa = fabs (x), x2;
20684 if (!isless (xa, TWO52))
20685 return x;
20686 x2 = (double)(long)x;
20687 if (HONOR_SIGNED_ZEROS (mode))
20688 return copysign (x2, x);
20689 return x2;
20691 enum machine_mode mode = GET_MODE (operand0);
20692 rtx xa, xi, TWO52, label, res, mask;
20694 TWO52 = ix86_gen_TWO52 (mode);
20696 /* Temporary for holding the result, initialized to the input
20697 operand to ease control flow. */
20698 res = gen_reg_rtx (mode);
20699 emit_move_insn (res, operand1);
20701 /* xa = abs (operand1) */
20702 xa = ix86_expand_sse_fabs (res, &mask);
20704 /* if (!isless (xa, TWO52)) goto label; */
20705 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20707 /* x = (double)(long)x */
20708 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20709 expand_fix (xi, res, 0);
20710 expand_float (res, xi, 0);
20712 if (HONOR_SIGNED_ZEROS (mode))
20713 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
20715 emit_label (label);
20716 LABEL_NUSES (label) = 1;
20718 emit_move_insn (operand0, res);
20721 /* Expand SSE sequence for computing trunc from OPERAND1 storing
20722 into OPERAND0. */
20723 void
20724 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
20726 enum machine_mode mode = GET_MODE (operand0);
20727 rtx xa, mask, TWO52, label, one, res, smask, tmp;
20729 /* C code for SSE variant we expand below.
20730 double xa = fabs (x), x2;
20731 if (!isless (xa, TWO52))
20732 return x;
20733 xa2 = xa + TWO52 - TWO52;
20734 Compensate:
20735 if (xa2 > xa)
20736 xa2 -= 1.0;
20737 x2 = copysign (xa2, x);
20738 return x2;
20741 TWO52 = ix86_gen_TWO52 (mode);
20743 /* Temporary for holding the result, initialized to the input
20744 operand to ease control flow. */
20745 res = gen_reg_rtx (mode);
20746 emit_move_insn (res, operand1);
20748 /* xa = abs (operand1) */
20749 xa = ix86_expand_sse_fabs (res, &smask);
20751 /* if (!isless (xa, TWO52)) goto label; */
20752 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20754 /* res = xa + TWO52 - TWO52; */
20755 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20756 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
20757 emit_move_insn (res, tmp);
20759 /* generate 1.0 */
20760 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
20762 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
20763 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
20764 emit_insn (gen_rtx_SET (VOIDmode, mask,
20765 gen_rtx_AND (mode, mask, one)));
20766 tmp = expand_simple_binop (mode, MINUS,
20767 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
20768 emit_move_insn (res, tmp);
20770 /* res = copysign (res, operand1) */
20771 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
20773 emit_label (label);
20774 LABEL_NUSES (label) = 1;
20776 emit_move_insn (operand0, res);
20779 /* Expand SSE sequence for computing round from OPERAND1 storing
20780 into OPERAND0. */
20781 void
20782 ix86_expand_round (rtx operand0, rtx operand1)
20784 /* C code for the stuff we're doing below:
20785 double xa = fabs (x);
20786 if (!isless (xa, TWO52))
20787 return x;
20788 xa = (double)(long)(xa + nextafter (0.5, 0.0));
20789 return copysign (xa, x);
20791 enum machine_mode mode = GET_MODE (operand0);
20792 rtx res, TWO52, xa, label, xi, half, mask;
20793 const struct real_format *fmt;
20794 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
20796 /* Temporary for holding the result, initialized to the input
20797 operand to ease control flow. */
20798 res = gen_reg_rtx (mode);
20799 emit_move_insn (res, operand1);
20801 TWO52 = ix86_gen_TWO52 (mode);
20802 xa = ix86_expand_sse_fabs (res, &mask);
20803 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20805 /* load nextafter (0.5, 0.0) */
20806 fmt = REAL_MODE_FORMAT (mode);
20807 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
20808 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
20810 /* xa = xa + 0.5 */
20811 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
20812 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
20814 /* xa = (double)(int64_t)xa */
20815 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20816 expand_fix (xi, xa, 0);
20817 expand_float (xa, xi, 0);
20819 /* res = copysign (xa, operand1) */
20820 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
20822 emit_label (label);
20823 LABEL_NUSES (label) = 1;
20825 emit_move_insn (operand0, res);
20828 #include "gt-i386.h"