gcc/
[official-gcc.git] / gcc / config / i386 / i386.c
blobbd086012e51931294496079aae632dc6dad1469e
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
980 const struct processor_costs *ix86_cost = &pentium_cost;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1030 | m_NOCONA | m_CORE2 | m_GENERIC,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1039 ~m_386,
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32,
1043 /* | m_GENERIC | m_ATHLON_K8 ? */
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1058 m_PPRO,
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1069 /* X86_TUNE_USE_MOV0 */
1070 m_K6,
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1076 m_PENT4,
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1079 m_PPRO,
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1082 ~m_PENT,
1084 /* X86_TUNE_READ_MODIFY */
1085 ~(m_PENT | m_PPRO),
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1104 ~m_PPRO,
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1110 m_PPRO,
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1146 m_AMDFAM10,
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1152 m_ATHLON_K8,
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1169 /* X86_TUNE_SHIFT1 */
1170 ~m_486,
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1198 /* Feature tests against the various architecture variations. */
1199 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1200 /* X86_ARCH_CMOVE */
1201 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1203 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1204 ~m_386,
1206 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1207 ~(m_386 | m_486),
1209 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1210 ~m_386,
1212 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1213 ~m_386,
1216 static const unsigned int x86_accumulate_outgoing_args
1217 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1219 static const unsigned int x86_arch_always_fancy_math_387
1220 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1221 | m_NOCONA | m_CORE2 | m_GENERIC;
1223 static enum stringop_alg stringop_alg = no_stringop;
1225 /* In case the average insn count for single function invocation is
1226 lower than this constant, emit fast (but longer) prologue and
1227 epilogue code. */
1228 #define FAST_PROLOGUE_INSN_COUNT 20
1230 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1231 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1232 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1233 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1235 /* Array of the smallest class containing reg number REGNO, indexed by
1236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1238 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1240 /* ax, dx, cx, bx */
1241 AREG, DREG, CREG, BREG,
1242 /* si, di, bp, sp */
1243 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1244 /* FP registers */
1245 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1246 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1247 /* arg pointer */
1248 NON_Q_REGS,
1249 /* flags, fpsr, fpcr, frame */
1250 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1251 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1252 SSE_REGS, SSE_REGS,
1253 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1254 MMX_REGS, MMX_REGS,
1255 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1256 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1257 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1258 SSE_REGS, SSE_REGS,
1261 /* The "default" register map used in 32bit mode. */
1263 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1265 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1266 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1268 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1269 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1270 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1271 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1274 static int const x86_64_int_parameter_registers[6] =
1276 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1277 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1280 static int const x86_64_int_return_registers[4] =
1282 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1285 /* The "default" register map used in 64bit mode. */
1286 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1288 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1289 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1290 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1291 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1292 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1293 8,9,10,11,12,13,14,15, /* extended integer registers */
1294 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1297 /* Define the register numbers to be used in Dwarf debugging information.
1298 The SVR4 reference port C compiler uses the following register numbers
1299 in its Dwarf output code:
1300 0 for %eax (gcc regno = 0)
1301 1 for %ecx (gcc regno = 2)
1302 2 for %edx (gcc regno = 1)
1303 3 for %ebx (gcc regno = 3)
1304 4 for %esp (gcc regno = 7)
1305 5 for %ebp (gcc regno = 6)
1306 6 for %esi (gcc regno = 4)
1307 7 for %edi (gcc regno = 5)
1308 The following three DWARF register numbers are never generated by
1309 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1310 believes these numbers have these meanings.
1311 8 for %eip (no gcc equivalent)
1312 9 for %eflags (gcc regno = 17)
1313 10 for %trapno (no gcc equivalent)
1314 It is not at all clear how we should number the FP stack registers
1315 for the x86 architecture. If the version of SDB on x86/svr4 were
1316 a bit less brain dead with respect to floating-point then we would
1317 have a precedent to follow with respect to DWARF register numbers
1318 for x86 FP registers, but the SDB on x86/svr4 is so completely
1319 broken with respect to FP registers that it is hardly worth thinking
1320 of it as something to strive for compatibility with.
1321 The version of x86/svr4 SDB I have at the moment does (partially)
1322 seem to believe that DWARF register number 11 is associated with
1323 the x86 register %st(0), but that's about all. Higher DWARF
1324 register numbers don't seem to be associated with anything in
1325 particular, and even for DWARF regno 11, SDB only seems to under-
1326 stand that it should say that a variable lives in %st(0) (when
1327 asked via an `=' command) if we said it was in DWARF regno 11,
1328 but SDB still prints garbage when asked for the value of the
1329 variable in question (via a `/' command).
1330 (Also note that the labels SDB prints for various FP stack regs
1331 when doing an `x' command are all wrong.)
1332 Note that these problems generally don't affect the native SVR4
1333 C compiler because it doesn't allow the use of -O with -g and
1334 because when it is *not* optimizing, it allocates a memory
1335 location for each floating-point variable, and the memory
1336 location is what gets described in the DWARF AT_location
1337 attribute for the variable in question.
1338 Regardless of the severe mental illness of the x86/svr4 SDB, we
1339 do something sensible here and we use the following DWARF
1340 register numbers. Note that these are all stack-top-relative
1341 numbers.
1342 11 for %st(0) (gcc regno = 8)
1343 12 for %st(1) (gcc regno = 9)
1344 13 for %st(2) (gcc regno = 10)
1345 14 for %st(3) (gcc regno = 11)
1346 15 for %st(4) (gcc regno = 12)
1347 16 for %st(5) (gcc regno = 13)
1348 17 for %st(6) (gcc regno = 14)
1349 18 for %st(7) (gcc regno = 15)
1351 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1353 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1354 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1355 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1362 /* Test and compare insns in i386.md store the information needed to
1363 generate branch and scc insns here. */
1365 rtx ix86_compare_op0 = NULL_RTX;
1366 rtx ix86_compare_op1 = NULL_RTX;
1367 rtx ix86_compare_emitted = NULL_RTX;
1369 /* Size of the register save area. */
1370 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1372 /* Define the structure for the machine field in struct function. */
1374 struct stack_local_entry GTY(())
1376 unsigned short mode;
1377 unsigned short n;
1378 rtx rtl;
1379 struct stack_local_entry *next;
1382 /* Structure describing stack frame layout.
1383 Stack grows downward:
1385 [arguments]
1386 <- ARG_POINTER
1387 saved pc
1389 saved frame pointer if frame_pointer_needed
1390 <- HARD_FRAME_POINTER
1391 [saved regs]
1393 [padding1] \
1395 [va_arg registers] (
1396 > to_allocate <- FRAME_POINTER
1397 [frame] (
1399 [padding2] /
1401 struct ix86_frame
1403 int nregs;
1404 int padding1;
1405 int va_arg_size;
1406 HOST_WIDE_INT frame;
1407 int padding2;
1408 int outgoing_arguments_size;
1409 int red_zone_size;
1411 HOST_WIDE_INT to_allocate;
1412 /* The offsets relative to ARG_POINTER. */
1413 HOST_WIDE_INT frame_pointer_offset;
1414 HOST_WIDE_INT hard_frame_pointer_offset;
1415 HOST_WIDE_INT stack_pointer_offset;
1417 /* When save_regs_using_mov is set, emit prologue using
1418 move instead of push instructions. */
1419 bool save_regs_using_mov;
1422 /* Code model option. */
1423 enum cmodel ix86_cmodel;
1424 /* Asm dialect. */
1425 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1426 /* TLS dialects. */
1427 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1429 /* Which unit we are generating floating point math for. */
1430 enum fpmath_unit ix86_fpmath;
1432 /* Which cpu are we scheduling for. */
1433 enum processor_type ix86_tune;
1435 /* Which instruction set architecture to use. */
1436 enum processor_type ix86_arch;
1438 /* true if sse prefetch instruction is not NOOP. */
1439 int x86_prefetch_sse;
1441 /* true if cmpxchg16b is supported. */
1442 int x86_cmpxchg16b;
1444 /* ix86_regparm_string as a number */
1445 static int ix86_regparm;
1447 /* -mstackrealign option */
1448 extern int ix86_force_align_arg_pointer;
1449 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1451 /* Preferred alignment for stack boundary in bits. */
1452 unsigned int ix86_preferred_stack_boundary;
1454 /* Values 1-5: see jump.c */
1455 int ix86_branch_cost;
1457 /* Variables which are this size or smaller are put in the data/bss
1458 or ldata/lbss sections. */
1460 int ix86_section_threshold = 65536;
1462 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1463 char internal_label_prefix[16];
1464 int internal_label_prefix_len;
1466 static bool ix86_handle_option (size_t, const char *, int);
1467 static void output_pic_addr_const (FILE *, rtx, int);
1468 static void put_condition_code (enum rtx_code, enum machine_mode,
1469 int, int, FILE *);
1470 static const char *get_some_local_dynamic_name (void);
1471 static int get_some_local_dynamic_name_1 (rtx *, void *);
1472 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1473 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1474 rtx *);
1475 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1476 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1477 enum machine_mode);
1478 static rtx get_thread_pointer (int);
1479 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1480 static void get_pc_thunk_name (char [32], unsigned int);
1481 static rtx gen_push (rtx);
1482 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1483 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1484 static struct machine_function * ix86_init_machine_status (void);
1485 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1486 static int ix86_nsaved_regs (void);
1487 static void ix86_emit_save_regs (void);
1488 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1489 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1490 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1491 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1492 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1493 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1494 static int ix86_issue_rate (void);
1495 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1496 static int ia32_multipass_dfa_lookahead (void);
1497 static void ix86_init_mmx_sse_builtins (void);
1498 static rtx x86_this_parameter (tree);
1499 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1500 HOST_WIDE_INT, tree);
1501 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1502 static void x86_file_start (void);
1503 static void ix86_reorg (void);
1504 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1505 static tree ix86_build_builtin_va_list (void);
1506 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1507 tree, int *, int);
1508 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1509 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1510 static bool ix86_vector_mode_supported_p (enum machine_mode);
1512 static int ix86_address_cost (rtx);
1513 static bool ix86_cannot_force_const_mem (rtx);
1514 static rtx ix86_delegitimize_address (rtx);
1516 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1518 struct builtin_description;
1519 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1520 tree, rtx);
1521 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1522 tree, rtx);
1523 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1524 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1525 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1526 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1527 static rtx safe_vector_operand (rtx, enum machine_mode);
1528 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1529 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1530 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1531 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1532 static int ix86_fp_comparison_cost (enum rtx_code code);
1533 static unsigned int ix86_select_alt_pic_regnum (void);
1534 static int ix86_save_reg (unsigned int, int);
1535 static void ix86_compute_frame_layout (struct ix86_frame *);
1536 static int ix86_comp_type_attributes (tree, tree);
1537 static int ix86_function_regparm (tree, tree);
1538 const struct attribute_spec ix86_attribute_table[];
1539 static bool ix86_function_ok_for_sibcall (tree, tree);
1540 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1541 static int ix86_value_regno (enum machine_mode, tree, tree);
1542 static bool contains_128bit_aligned_vector_p (tree);
1543 static rtx ix86_struct_value_rtx (tree, int);
1544 static bool ix86_ms_bitfield_layout_p (tree);
1545 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1546 static int extended_reg_mentioned_1 (rtx *, void *);
1547 static bool ix86_rtx_costs (rtx, int, int, int *);
1548 static int min_insn_size (rtx);
1549 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1550 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1551 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1552 tree, bool);
1553 static void ix86_init_builtins (void);
1554 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1555 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1556 static tree ix86_builtin_conversion (enum tree_code, tree);
1557 static const char *ix86_mangle_fundamental_type (tree);
1558 static tree ix86_stack_protect_fail (void);
1559 static rtx ix86_internal_arg_pointer (void);
1560 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1561 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1562 rtx, rtx, int);
1564 /* This function is only used on Solaris. */
1565 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1566 ATTRIBUTE_UNUSED;
1568 /* Register class used for passing given 64bit part of the argument.
1569 These represent classes as documented by the PS ABI, with the exception
1570 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1571 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1573 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1574 whenever possible (upper half does contain padding).
1576 enum x86_64_reg_class
1578 X86_64_NO_CLASS,
1579 X86_64_INTEGER_CLASS,
1580 X86_64_INTEGERSI_CLASS,
1581 X86_64_SSE_CLASS,
1582 X86_64_SSESF_CLASS,
1583 X86_64_SSEDF_CLASS,
1584 X86_64_SSEUP_CLASS,
1585 X86_64_X87_CLASS,
1586 X86_64_X87UP_CLASS,
1587 X86_64_COMPLEX_X87_CLASS,
1588 X86_64_MEMORY_CLASS
1590 static const char * const x86_64_reg_class_name[] = {
1591 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1592 "sseup", "x87", "x87up", "cplx87", "no"
1595 #define MAX_CLASSES 4
1597 /* Table of constants used by fldpi, fldln2, etc.... */
1598 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1599 static bool ext_80387_constants_init = 0;
1600 static void init_ext_80387_constants (void);
1601 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1602 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1603 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1604 static section *x86_64_elf_select_section (tree decl, int reloc,
1605 unsigned HOST_WIDE_INT align)
1606 ATTRIBUTE_UNUSED;
1608 /* Initialize the GCC target structure. */
1609 #undef TARGET_ATTRIBUTE_TABLE
1610 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1611 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1612 # undef TARGET_MERGE_DECL_ATTRIBUTES
1613 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1614 #endif
1616 #undef TARGET_COMP_TYPE_ATTRIBUTES
1617 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1619 #undef TARGET_INIT_BUILTINS
1620 #define TARGET_INIT_BUILTINS ix86_init_builtins
1621 #undef TARGET_EXPAND_BUILTIN
1622 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1624 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1625 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1626 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1627 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1629 #undef TARGET_ASM_FUNCTION_EPILOGUE
1630 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1632 #undef TARGET_ENCODE_SECTION_INFO
1633 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1634 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1635 #else
1636 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1637 #endif
1639 #undef TARGET_ASM_OPEN_PAREN
1640 #define TARGET_ASM_OPEN_PAREN ""
1641 #undef TARGET_ASM_CLOSE_PAREN
1642 #define TARGET_ASM_CLOSE_PAREN ""
1644 #undef TARGET_ASM_ALIGNED_HI_OP
1645 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1646 #undef TARGET_ASM_ALIGNED_SI_OP
1647 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1648 #ifdef ASM_QUAD
1649 #undef TARGET_ASM_ALIGNED_DI_OP
1650 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1651 #endif
1653 #undef TARGET_ASM_UNALIGNED_HI_OP
1654 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1655 #undef TARGET_ASM_UNALIGNED_SI_OP
1656 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1657 #undef TARGET_ASM_UNALIGNED_DI_OP
1658 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1660 #undef TARGET_SCHED_ADJUST_COST
1661 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1662 #undef TARGET_SCHED_ISSUE_RATE
1663 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1664 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1665 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1666 ia32_multipass_dfa_lookahead
1668 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1669 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1671 #ifdef HAVE_AS_TLS
1672 #undef TARGET_HAVE_TLS
1673 #define TARGET_HAVE_TLS true
1674 #endif
1675 #undef TARGET_CANNOT_FORCE_CONST_MEM
1676 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1677 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1678 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1680 #undef TARGET_DELEGITIMIZE_ADDRESS
1681 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1683 #undef TARGET_MS_BITFIELD_LAYOUT_P
1684 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1686 #if TARGET_MACHO
1687 #undef TARGET_BINDS_LOCAL_P
1688 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1689 #endif
1691 #undef TARGET_ASM_OUTPUT_MI_THUNK
1692 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1693 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1694 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1696 #undef TARGET_ASM_FILE_START
1697 #define TARGET_ASM_FILE_START x86_file_start
1699 #undef TARGET_DEFAULT_TARGET_FLAGS
1700 #define TARGET_DEFAULT_TARGET_FLAGS \
1701 (TARGET_DEFAULT \
1702 | TARGET_64BIT_DEFAULT \
1703 | TARGET_SUBTARGET_DEFAULT \
1704 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1706 #undef TARGET_HANDLE_OPTION
1707 #define TARGET_HANDLE_OPTION ix86_handle_option
1709 #undef TARGET_RTX_COSTS
1710 #define TARGET_RTX_COSTS ix86_rtx_costs
1711 #undef TARGET_ADDRESS_COST
1712 #define TARGET_ADDRESS_COST ix86_address_cost
1714 #undef TARGET_FIXED_CONDITION_CODE_REGS
1715 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1716 #undef TARGET_CC_MODES_COMPATIBLE
1717 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1719 #undef TARGET_MACHINE_DEPENDENT_REORG
1720 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1722 #undef TARGET_BUILD_BUILTIN_VA_LIST
1723 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1725 #undef TARGET_MD_ASM_CLOBBERS
1726 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1728 #undef TARGET_PROMOTE_PROTOTYPES
1729 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1730 #undef TARGET_STRUCT_VALUE_RTX
1731 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1732 #undef TARGET_SETUP_INCOMING_VARARGS
1733 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1734 #undef TARGET_MUST_PASS_IN_STACK
1735 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1736 #undef TARGET_PASS_BY_REFERENCE
1737 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1738 #undef TARGET_INTERNAL_ARG_POINTER
1739 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1740 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1741 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1743 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1744 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1746 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1747 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1749 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1750 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1752 #ifdef HAVE_AS_TLS
1753 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1754 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1755 #endif
1757 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1758 #undef TARGET_INSERT_ATTRIBUTES
1759 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1760 #endif
1762 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1763 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1765 #undef TARGET_STACK_PROTECT_FAIL
1766 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1768 #undef TARGET_FUNCTION_VALUE
1769 #define TARGET_FUNCTION_VALUE ix86_function_value
1771 struct gcc_target targetm = TARGET_INITIALIZER;
1774 /* The svr4 ABI for the i386 says that records and unions are returned
1775 in memory. */
1776 #ifndef DEFAULT_PCC_STRUCT_RETURN
1777 #define DEFAULT_PCC_STRUCT_RETURN 1
1778 #endif
1780 /* Implement TARGET_HANDLE_OPTION. */
1782 static bool
1783 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1785 switch (code)
1787 case OPT_m3dnow:
1788 if (!value)
1790 target_flags &= ~MASK_3DNOW_A;
1791 target_flags_explicit |= MASK_3DNOW_A;
1793 return true;
1795 case OPT_mmmx:
1796 if (!value)
1798 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1799 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1801 return true;
1803 case OPT_msse:
1804 if (!value)
1806 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1807 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1809 return true;
1811 case OPT_msse2:
1812 if (!value)
1814 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1815 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1817 return true;
1819 case OPT_msse3:
1820 if (!value)
1822 target_flags &= ~MASK_SSE4A;
1823 target_flags_explicit |= MASK_SSE4A;
1825 return true;
1827 default:
1828 return true;
1832 /* Sometimes certain combinations of command options do not make
1833 sense on a particular target machine. You can define a macro
1834 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1835 defined, is executed once just after all the command options have
1836 been parsed.
1838 Don't use this macro to turn on various extra optimizations for
1839 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1841 void
1842 override_options (void)
1844 int i;
1845 int ix86_tune_defaulted = 0;
1846 unsigned int ix86_arch_mask, ix86_tune_mask;
1848 /* Comes from final.c -- no real reason to change it. */
1849 #define MAX_CODE_ALIGN 16
1851 static struct ptt
1853 const struct processor_costs *cost; /* Processor costs */
1854 const int target_enable; /* Target flags to enable. */
1855 const int target_disable; /* Target flags to disable. */
1856 const int align_loop; /* Default alignments. */
1857 const int align_loop_max_skip;
1858 const int align_jump;
1859 const int align_jump_max_skip;
1860 const int align_func;
1862 const processor_target_table[PROCESSOR_max] =
1864 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1865 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1866 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1867 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1868 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1869 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1870 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1871 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1872 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1873 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1874 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1875 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1876 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1877 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1880 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1881 static struct pta
1883 const char *const name; /* processor name or nickname. */
1884 const enum processor_type processor;
1885 const enum pta_flags
1887 PTA_SSE = 1,
1888 PTA_SSE2 = 2,
1889 PTA_SSE3 = 4,
1890 PTA_MMX = 8,
1891 PTA_PREFETCH_SSE = 16,
1892 PTA_3DNOW = 32,
1893 PTA_3DNOW_A = 64,
1894 PTA_64BIT = 128,
1895 PTA_SSSE3 = 256,
1896 PTA_CX16 = 512,
1897 PTA_POPCNT = 1024,
1898 PTA_ABM = 2048,
1899 PTA_SSE4A = 4096
1900 } flags;
1902 const processor_alias_table[] =
1904 {"i386", PROCESSOR_I386, 0},
1905 {"i486", PROCESSOR_I486, 0},
1906 {"i586", PROCESSOR_PENTIUM, 0},
1907 {"pentium", PROCESSOR_PENTIUM, 0},
1908 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1909 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1910 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1911 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1912 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1913 {"i686", PROCESSOR_PENTIUMPRO, 0},
1914 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1915 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1916 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1917 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1918 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1919 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1920 | PTA_MMX | PTA_PREFETCH_SSE},
1921 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1922 | PTA_MMX | PTA_PREFETCH_SSE},
1923 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1924 | PTA_MMX | PTA_PREFETCH_SSE},
1925 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1926 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1927 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1928 | PTA_64BIT | PTA_MMX
1929 | PTA_PREFETCH_SSE | PTA_CX16},
1930 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1931 | PTA_3DNOW_A},
1932 {"k6", PROCESSOR_K6, PTA_MMX},
1933 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1934 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1935 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1936 | PTA_3DNOW_A},
1937 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1938 | PTA_3DNOW | PTA_3DNOW_A},
1939 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1940 | PTA_3DNOW_A | PTA_SSE},
1941 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1942 | PTA_3DNOW_A | PTA_SSE},
1943 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1944 | PTA_3DNOW_A | PTA_SSE},
1945 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1946 | PTA_SSE | PTA_SSE2 },
1947 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1948 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1949 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1950 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1951 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1952 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1953 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1954 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1955 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1956 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1957 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1958 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1959 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1960 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1963 int const pta_size = ARRAY_SIZE (processor_alias_table);
1965 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1966 SUBTARGET_OVERRIDE_OPTIONS;
1967 #endif
1969 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1970 SUBSUBTARGET_OVERRIDE_OPTIONS;
1971 #endif
1973 /* -fPIC is the default for x86_64. */
1974 if (TARGET_MACHO && TARGET_64BIT)
1975 flag_pic = 2;
1977 /* Set the default values for switches whose default depends on TARGET_64BIT
1978 in case they weren't overwritten by command line options. */
1979 if (TARGET_64BIT)
1981 /* Mach-O doesn't support omitting the frame pointer for now. */
1982 if (flag_omit_frame_pointer == 2)
1983 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1984 if (flag_asynchronous_unwind_tables == 2)
1985 flag_asynchronous_unwind_tables = 1;
1986 if (flag_pcc_struct_return == 2)
1987 flag_pcc_struct_return = 0;
1989 else
1991 if (flag_omit_frame_pointer == 2)
1992 flag_omit_frame_pointer = 0;
1993 if (flag_asynchronous_unwind_tables == 2)
1994 flag_asynchronous_unwind_tables = 0;
1995 if (flag_pcc_struct_return == 2)
1996 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1999 /* Need to check -mtune=generic first. */
2000 if (ix86_tune_string)
2002 if (!strcmp (ix86_tune_string, "generic")
2003 || !strcmp (ix86_tune_string, "i686")
2004 /* As special support for cross compilers we read -mtune=native
2005 as -mtune=generic. With native compilers we won't see the
2006 -mtune=native, as it was changed by the driver. */
2007 || !strcmp (ix86_tune_string, "native"))
2009 if (TARGET_64BIT)
2010 ix86_tune_string = "generic64";
2011 else
2012 ix86_tune_string = "generic32";
2014 else if (!strncmp (ix86_tune_string, "generic", 7))
2015 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2017 else
2019 if (ix86_arch_string)
2020 ix86_tune_string = ix86_arch_string;
2021 if (!ix86_tune_string)
2023 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
2024 ix86_tune_defaulted = 1;
2027 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2028 need to use a sensible tune option. */
2029 if (!strcmp (ix86_tune_string, "generic")
2030 || !strcmp (ix86_tune_string, "x86-64")
2031 || !strcmp (ix86_tune_string, "i686"))
2033 if (TARGET_64BIT)
2034 ix86_tune_string = "generic64";
2035 else
2036 ix86_tune_string = "generic32";
2039 if (ix86_stringop_string)
2041 if (!strcmp (ix86_stringop_string, "rep_byte"))
2042 stringop_alg = rep_prefix_1_byte;
2043 else if (!strcmp (ix86_stringop_string, "libcall"))
2044 stringop_alg = libcall;
2045 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2046 stringop_alg = rep_prefix_4_byte;
2047 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2048 stringop_alg = rep_prefix_8_byte;
2049 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2050 stringop_alg = loop_1_byte;
2051 else if (!strcmp (ix86_stringop_string, "loop"))
2052 stringop_alg = loop;
2053 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2054 stringop_alg = unrolled_loop;
2055 else
2056 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2058 if (!strcmp (ix86_tune_string, "x86-64"))
2059 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2060 "-mtune=generic instead as appropriate.");
2062 if (!ix86_arch_string)
2063 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2064 if (!strcmp (ix86_arch_string, "generic"))
2065 error ("generic CPU can be used only for -mtune= switch");
2066 if (!strncmp (ix86_arch_string, "generic", 7))
2067 error ("bad value (%s) for -march= switch", ix86_arch_string);
2069 if (ix86_cmodel_string != 0)
2071 if (!strcmp (ix86_cmodel_string, "small"))
2072 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2073 else if (!strcmp (ix86_cmodel_string, "medium"))
2074 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2075 else if (!strcmp (ix86_cmodel_string, "large"))
2076 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2077 else if (flag_pic)
2078 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2079 else if (!strcmp (ix86_cmodel_string, "32"))
2080 ix86_cmodel = CM_32;
2081 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2082 ix86_cmodel = CM_KERNEL;
2083 else
2084 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2086 else
2088 ix86_cmodel = CM_32;
2089 if (TARGET_64BIT)
2090 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2092 if (ix86_asm_string != 0)
2094 if (! TARGET_MACHO
2095 && !strcmp (ix86_asm_string, "intel"))
2096 ix86_asm_dialect = ASM_INTEL;
2097 else if (!strcmp (ix86_asm_string, "att"))
2098 ix86_asm_dialect = ASM_ATT;
2099 else
2100 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2102 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2103 error ("code model %qs not supported in the %s bit mode",
2104 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2105 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2106 sorry ("%i-bit mode not compiled in",
2107 (target_flags & MASK_64BIT) ? 64 : 32);
2109 for (i = 0; i < pta_size; i++)
2110 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2112 ix86_arch = processor_alias_table[i].processor;
2113 /* Default cpu tuning to the architecture. */
2114 ix86_tune = ix86_arch;
2115 if (processor_alias_table[i].flags & PTA_MMX
2116 && !(target_flags_explicit & MASK_MMX))
2117 target_flags |= MASK_MMX;
2118 if (processor_alias_table[i].flags & PTA_3DNOW
2119 && !(target_flags_explicit & MASK_3DNOW))
2120 target_flags |= MASK_3DNOW;
2121 if (processor_alias_table[i].flags & PTA_3DNOW_A
2122 && !(target_flags_explicit & MASK_3DNOW_A))
2123 target_flags |= MASK_3DNOW_A;
2124 if (processor_alias_table[i].flags & PTA_SSE
2125 && !(target_flags_explicit & MASK_SSE))
2126 target_flags |= MASK_SSE;
2127 if (processor_alias_table[i].flags & PTA_SSE2
2128 && !(target_flags_explicit & MASK_SSE2))
2129 target_flags |= MASK_SSE2;
2130 if (processor_alias_table[i].flags & PTA_SSE3
2131 && !(target_flags_explicit & MASK_SSE3))
2132 target_flags |= MASK_SSE3;
2133 if (processor_alias_table[i].flags & PTA_SSSE3
2134 && !(target_flags_explicit & MASK_SSSE3))
2135 target_flags |= MASK_SSSE3;
2136 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2137 x86_prefetch_sse = true;
2138 if (processor_alias_table[i].flags & PTA_CX16)
2139 x86_cmpxchg16b = true;
2140 if (processor_alias_table[i].flags & PTA_POPCNT
2141 && !(target_flags_explicit & MASK_POPCNT))
2142 target_flags |= MASK_POPCNT;
2143 if (processor_alias_table[i].flags & PTA_ABM
2144 && !(target_flags_explicit & MASK_ABM))
2145 target_flags |= MASK_ABM;
2146 if (processor_alias_table[i].flags & PTA_SSE4A
2147 && !(target_flags_explicit & MASK_SSE4A))
2148 target_flags |= MASK_SSE4A;
2149 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2150 error ("CPU you selected does not support x86-64 "
2151 "instruction set");
2152 break;
2155 if (i == pta_size)
2156 error ("bad value (%s) for -march= switch", ix86_arch_string);
2158 ix86_arch_mask = 1u << ix86_arch;
2159 for (i = 0; i < X86_ARCH_LAST; ++i)
2160 ix86_arch_features[i] &= ix86_arch_mask;
2162 for (i = 0; i < pta_size; i++)
2163 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2165 ix86_tune = processor_alias_table[i].processor;
2166 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2168 if (ix86_tune_defaulted)
2170 ix86_tune_string = "x86-64";
2171 for (i = 0; i < pta_size; i++)
2172 if (! strcmp (ix86_tune_string,
2173 processor_alias_table[i].name))
2174 break;
2175 ix86_tune = processor_alias_table[i].processor;
2177 else
2178 error ("CPU you selected does not support x86-64 "
2179 "instruction set");
2181 /* Intel CPUs have always interpreted SSE prefetch instructions as
2182 NOPs; so, we can enable SSE prefetch instructions even when
2183 -mtune (rather than -march) points us to a processor that has them.
2184 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2185 higher processors. */
2186 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2187 x86_prefetch_sse = true;
2188 break;
2190 if (i == pta_size)
2191 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2193 ix86_tune_mask = 1u << ix86_tune;
2194 for (i = 0; i < X86_TUNE_LAST; ++i)
2195 ix86_tune_features[i] &= ix86_tune_mask;
2197 if (optimize_size)
2198 ix86_cost = &size_cost;
2199 else
2200 ix86_cost = processor_target_table[ix86_tune].cost;
2201 target_flags |= processor_target_table[ix86_tune].target_enable;
2202 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2204 /* Arrange to set up i386_stack_locals for all functions. */
2205 init_machine_status = ix86_init_machine_status;
2207 /* Validate -mregparm= value. */
2208 if (ix86_regparm_string)
2210 i = atoi (ix86_regparm_string);
2211 if (i < 0 || i > REGPARM_MAX)
2212 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2213 else
2214 ix86_regparm = i;
2216 else
2217 if (TARGET_64BIT)
2218 ix86_regparm = REGPARM_MAX;
2220 /* If the user has provided any of the -malign-* options,
2221 warn and use that value only if -falign-* is not set.
2222 Remove this code in GCC 3.2 or later. */
2223 if (ix86_align_loops_string)
2225 warning (0, "-malign-loops is obsolete, use -falign-loops");
2226 if (align_loops == 0)
2228 i = atoi (ix86_align_loops_string);
2229 if (i < 0 || i > MAX_CODE_ALIGN)
2230 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2231 else
2232 align_loops = 1 << i;
2236 if (ix86_align_jumps_string)
2238 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2239 if (align_jumps == 0)
2241 i = atoi (ix86_align_jumps_string);
2242 if (i < 0 || i > MAX_CODE_ALIGN)
2243 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2244 else
2245 align_jumps = 1 << i;
2249 if (ix86_align_funcs_string)
2251 warning (0, "-malign-functions is obsolete, use -falign-functions");
2252 if (align_functions == 0)
2254 i = atoi (ix86_align_funcs_string);
2255 if (i < 0 || i > MAX_CODE_ALIGN)
2256 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2257 else
2258 align_functions = 1 << i;
2262 /* Default align_* from the processor table. */
2263 if (align_loops == 0)
2265 align_loops = processor_target_table[ix86_tune].align_loop;
2266 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2268 if (align_jumps == 0)
2270 align_jumps = processor_target_table[ix86_tune].align_jump;
2271 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2273 if (align_functions == 0)
2275 align_functions = processor_target_table[ix86_tune].align_func;
2278 /* Validate -mbranch-cost= value, or provide default. */
2279 ix86_branch_cost = ix86_cost->branch_cost;
2280 if (ix86_branch_cost_string)
2282 i = atoi (ix86_branch_cost_string);
2283 if (i < 0 || i > 5)
2284 error ("-mbranch-cost=%d is not between 0 and 5", i);
2285 else
2286 ix86_branch_cost = i;
2288 if (ix86_section_threshold_string)
2290 i = atoi (ix86_section_threshold_string);
2291 if (i < 0)
2292 error ("-mlarge-data-threshold=%d is negative", i);
2293 else
2294 ix86_section_threshold = i;
2297 if (ix86_tls_dialect_string)
2299 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2300 ix86_tls_dialect = TLS_DIALECT_GNU;
2301 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2302 ix86_tls_dialect = TLS_DIALECT_GNU2;
2303 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2304 ix86_tls_dialect = TLS_DIALECT_SUN;
2305 else
2306 error ("bad value (%s) for -mtls-dialect= switch",
2307 ix86_tls_dialect_string);
2310 /* Keep nonleaf frame pointers. */
2311 if (flag_omit_frame_pointer)
2312 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2313 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2314 flag_omit_frame_pointer = 1;
2316 /* If we're doing fast math, we don't care about comparison order
2317 wrt NaNs. This lets us use a shorter comparison sequence. */
2318 if (flag_finite_math_only)
2319 target_flags &= ~MASK_IEEE_FP;
2321 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2322 since the insns won't need emulation. */
2323 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2324 target_flags &= ~MASK_NO_FANCY_MATH_387;
2326 /* Likewise, if the target doesn't have a 387, or we've specified
2327 software floating point, don't use 387 inline intrinsics. */
2328 if (!TARGET_80387)
2329 target_flags |= MASK_NO_FANCY_MATH_387;
2331 /* Turn on SSE3 builtins for -mssse3. */
2332 if (TARGET_SSSE3)
2333 target_flags |= MASK_SSE3;
2335 /* Turn on SSE3 builtins for -msse4a. */
2336 if (TARGET_SSE4A)
2337 target_flags |= MASK_SSE3;
2339 /* Turn on SSE2 builtins for -msse3. */
2340 if (TARGET_SSE3)
2341 target_flags |= MASK_SSE2;
2343 /* Turn on SSE builtins for -msse2. */
2344 if (TARGET_SSE2)
2345 target_flags |= MASK_SSE;
2347 /* Turn on MMX builtins for -msse. */
2348 if (TARGET_SSE)
2350 target_flags |= MASK_MMX & ~target_flags_explicit;
2351 x86_prefetch_sse = true;
2354 /* Turn on MMX builtins for 3Dnow. */
2355 if (TARGET_3DNOW)
2356 target_flags |= MASK_MMX;
2358 /* Turn on POPCNT builtins for -mabm. */
2359 if (TARGET_ABM)
2360 target_flags |= MASK_POPCNT;
2362 if (TARGET_64BIT)
2364 if (TARGET_ALIGN_DOUBLE)
2365 error ("-malign-double makes no sense in the 64bit mode");
2366 if (TARGET_RTD)
2367 error ("-mrtd calling convention not supported in the 64bit mode");
2369 /* Enable by default the SSE and MMX builtins. Do allow the user to
2370 explicitly disable any of these. In particular, disabling SSE and
2371 MMX for kernel code is extremely useful. */
2372 target_flags
2373 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2374 & ~target_flags_explicit);
2376 else
2378 /* i386 ABI does not specify red zone. It still makes sense to use it
2379 when programmer takes care to stack from being destroyed. */
2380 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2381 target_flags |= MASK_NO_RED_ZONE;
2384 /* Validate -mpreferred-stack-boundary= value, or provide default.
2385 The default of 128 bits is for Pentium III's SSE __m128. We can't
2386 change it because of optimize_size. Otherwise, we can't mix object
2387 files compiled with -Os and -On. */
2388 ix86_preferred_stack_boundary = 128;
2389 if (ix86_preferred_stack_boundary_string)
2391 i = atoi (ix86_preferred_stack_boundary_string);
2392 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2393 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2394 TARGET_64BIT ? 4 : 2);
2395 else
2396 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2399 /* Accept -msseregparm only if at least SSE support is enabled. */
2400 if (TARGET_SSEREGPARM
2401 && ! TARGET_SSE)
2402 error ("-msseregparm used without SSE enabled");
2404 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2405 if (ix86_fpmath_string != 0)
2407 if (! strcmp (ix86_fpmath_string, "387"))
2408 ix86_fpmath = FPMATH_387;
2409 else if (! strcmp (ix86_fpmath_string, "sse"))
2411 if (!TARGET_SSE)
2413 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2414 ix86_fpmath = FPMATH_387;
2416 else
2417 ix86_fpmath = FPMATH_SSE;
2419 else if (! strcmp (ix86_fpmath_string, "387,sse")
2420 || ! strcmp (ix86_fpmath_string, "sse,387"))
2422 if (!TARGET_SSE)
2424 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2425 ix86_fpmath = FPMATH_387;
2427 else if (!TARGET_80387)
2429 warning (0, "387 instruction set disabled, using SSE arithmetics");
2430 ix86_fpmath = FPMATH_SSE;
2432 else
2433 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2435 else
2436 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2439 /* If the i387 is disabled, then do not return values in it. */
2440 if (!TARGET_80387)
2441 target_flags &= ~MASK_FLOAT_RETURNS;
2443 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2444 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2445 && !optimize_size)
2446 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2448 /* ??? Unwind info is not correct around the CFG unless either a frame
2449 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2450 unwind info generation to be aware of the CFG and propagating states
2451 around edges. */
2452 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2453 || flag_exceptions || flag_non_call_exceptions)
2454 && flag_omit_frame_pointer
2455 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2457 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2458 warning (0, "unwind tables currently require either a frame pointer "
2459 "or -maccumulate-outgoing-args for correctness");
2460 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2463 /* For sane SSE instruction set generation we need fcomi instruction.
2464 It is safe to enable all CMOVE instructions. */
2465 if (TARGET_SSE)
2466 TARGET_CMOVE = 1;
2468 /* ??? Any idea why this is unconditionally disabled for 64-bit? */
2469 if (TARGET_64BIT)
2470 TARGET_USE_SAHF = 0;
2472 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2474 char *p;
2475 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2476 p = strchr (internal_label_prefix, 'X');
2477 internal_label_prefix_len = p - internal_label_prefix;
2478 *p = '\0';
2481 /* When scheduling description is not available, disable scheduler pass
2482 so it won't slow down the compilation and make x87 code slower. */
2483 if (!TARGET_SCHEDULE)
2484 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2486 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2487 set_param_value ("simultaneous-prefetches",
2488 ix86_cost->simultaneous_prefetches);
2489 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2490 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2493 /* switch to the appropriate section for output of DECL.
2494 DECL is either a `VAR_DECL' node or a constant of some sort.
2495 RELOC indicates whether forming the initial value of DECL requires
2496 link-time relocations. */
2498 static section *
2499 x86_64_elf_select_section (tree decl, int reloc,
2500 unsigned HOST_WIDE_INT align)
2502 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2503 && ix86_in_large_data_p (decl))
2505 const char *sname = NULL;
2506 unsigned int flags = SECTION_WRITE;
2507 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2509 case SECCAT_DATA:
2510 sname = ".ldata";
2511 break;
2512 case SECCAT_DATA_REL:
2513 sname = ".ldata.rel";
2514 break;
2515 case SECCAT_DATA_REL_LOCAL:
2516 sname = ".ldata.rel.local";
2517 break;
2518 case SECCAT_DATA_REL_RO:
2519 sname = ".ldata.rel.ro";
2520 break;
2521 case SECCAT_DATA_REL_RO_LOCAL:
2522 sname = ".ldata.rel.ro.local";
2523 break;
2524 case SECCAT_BSS:
2525 sname = ".lbss";
2526 flags |= SECTION_BSS;
2527 break;
2528 case SECCAT_RODATA:
2529 case SECCAT_RODATA_MERGE_STR:
2530 case SECCAT_RODATA_MERGE_STR_INIT:
2531 case SECCAT_RODATA_MERGE_CONST:
2532 sname = ".lrodata";
2533 flags = 0;
2534 break;
2535 case SECCAT_SRODATA:
2536 case SECCAT_SDATA:
2537 case SECCAT_SBSS:
2538 gcc_unreachable ();
2539 case SECCAT_TEXT:
2540 case SECCAT_TDATA:
2541 case SECCAT_TBSS:
2542 /* We don't split these for medium model. Place them into
2543 default sections and hope for best. */
2544 break;
2546 if (sname)
2548 /* We might get called with string constants, but get_named_section
2549 doesn't like them as they are not DECLs. Also, we need to set
2550 flags in that case. */
2551 if (!DECL_P (decl))
2552 return get_section (sname, flags, NULL);
2553 return get_named_section (decl, sname, reloc);
2556 return default_elf_select_section (decl, reloc, align);
2559 /* Build up a unique section name, expressed as a
2560 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2561 RELOC indicates whether the initial value of EXP requires
2562 link-time relocations. */
2564 static void
2565 x86_64_elf_unique_section (tree decl, int reloc)
2567 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2568 && ix86_in_large_data_p (decl))
2570 const char *prefix = NULL;
2571 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2572 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2574 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2576 case SECCAT_DATA:
2577 case SECCAT_DATA_REL:
2578 case SECCAT_DATA_REL_LOCAL:
2579 case SECCAT_DATA_REL_RO:
2580 case SECCAT_DATA_REL_RO_LOCAL:
2581 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2582 break;
2583 case SECCAT_BSS:
2584 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2585 break;
2586 case SECCAT_RODATA:
2587 case SECCAT_RODATA_MERGE_STR:
2588 case SECCAT_RODATA_MERGE_STR_INIT:
2589 case SECCAT_RODATA_MERGE_CONST:
2590 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2591 break;
2592 case SECCAT_SRODATA:
2593 case SECCAT_SDATA:
2594 case SECCAT_SBSS:
2595 gcc_unreachable ();
2596 case SECCAT_TEXT:
2597 case SECCAT_TDATA:
2598 case SECCAT_TBSS:
2599 /* We don't split these for medium model. Place them into
2600 default sections and hope for best. */
2601 break;
2603 if (prefix)
2605 const char *name;
2606 size_t nlen, plen;
2607 char *string;
2608 plen = strlen (prefix);
2610 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2611 name = targetm.strip_name_encoding (name);
2612 nlen = strlen (name);
2614 string = alloca (nlen + plen + 1);
2615 memcpy (string, prefix, plen);
2616 memcpy (string + plen, name, nlen + 1);
2618 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2619 return;
2622 default_unique_section (decl, reloc);
2625 #ifdef COMMON_ASM_OP
2626 /* This says how to output assembler code to declare an
2627 uninitialized external linkage data object.
2629 For medium model x86-64 we need to use .largecomm opcode for
2630 large objects. */
2631 void
2632 x86_elf_aligned_common (FILE *file,
2633 const char *name, unsigned HOST_WIDE_INT size,
2634 int align)
2636 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2637 && size > (unsigned int)ix86_section_threshold)
2638 fprintf (file, ".largecomm\t");
2639 else
2640 fprintf (file, "%s", COMMON_ASM_OP);
2641 assemble_name (file, name);
2642 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2643 size, align / BITS_PER_UNIT);
2645 #endif
2646 /* Utility function for targets to use in implementing
2647 ASM_OUTPUT_ALIGNED_BSS. */
2649 void
2650 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2651 const char *name, unsigned HOST_WIDE_INT size,
2652 int align)
2654 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2655 && size > (unsigned int)ix86_section_threshold)
2656 switch_to_section (get_named_section (decl, ".lbss", 0));
2657 else
2658 switch_to_section (bss_section);
2659 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2660 #ifdef ASM_DECLARE_OBJECT_NAME
2661 last_assemble_variable_decl = decl;
2662 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2663 #else
2664 /* Standard thing is just output label for the object. */
2665 ASM_OUTPUT_LABEL (file, name);
2666 #endif /* ASM_DECLARE_OBJECT_NAME */
2667 ASM_OUTPUT_SKIP (file, size ? size : 1);
2670 void
2671 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2673 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2674 make the problem with not enough registers even worse. */
2675 #ifdef INSN_SCHEDULING
2676 if (level > 1)
2677 flag_schedule_insns = 0;
2678 #endif
2680 if (TARGET_MACHO)
2681 /* The Darwin libraries never set errno, so we might as well
2682 avoid calling them when that's the only reason we would. */
2683 flag_errno_math = 0;
2685 /* The default values of these switches depend on the TARGET_64BIT
2686 that is not known at this moment. Mark these values with 2 and
2687 let user the to override these. In case there is no command line option
2688 specifying them, we will set the defaults in override_options. */
2689 if (optimize >= 1)
2690 flag_omit_frame_pointer = 2;
2691 flag_pcc_struct_return = 2;
2692 flag_asynchronous_unwind_tables = 2;
2693 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2694 SUBTARGET_OPTIMIZATION_OPTIONS;
2695 #endif
2698 /* Table of valid machine attributes. */
2699 const struct attribute_spec ix86_attribute_table[] =
2701 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2702 /* Stdcall attribute says callee is responsible for popping arguments
2703 if they are not variable. */
2704 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2705 /* Fastcall attribute says callee is responsible for popping arguments
2706 if they are not variable. */
2707 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2708 /* Cdecl attribute says the callee is a normal C declaration */
2709 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2710 /* Regparm attribute specifies how many integer arguments are to be
2711 passed in registers. */
2712 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2713 /* Sseregparm attribute says we are using x86_64 calling conventions
2714 for FP arguments. */
2715 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2716 /* force_align_arg_pointer says this function realigns the stack at entry. */
2717 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2718 false, true, true, ix86_handle_cconv_attribute },
2719 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2720 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2721 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2722 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2723 #endif
2724 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2725 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2726 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2727 SUBTARGET_ATTRIBUTE_TABLE,
2728 #endif
2729 { NULL, 0, 0, false, false, false, NULL }
2732 /* Decide whether we can make a sibling call to a function. DECL is the
2733 declaration of the function being targeted by the call and EXP is the
2734 CALL_EXPR representing the call. */
2736 static bool
2737 ix86_function_ok_for_sibcall (tree decl, tree exp)
2739 tree func;
2740 rtx a, b;
2742 /* If we are generating position-independent code, we cannot sibcall
2743 optimize any indirect call, or a direct call to a global function,
2744 as the PLT requires %ebx be live. */
2745 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2746 return false;
2748 if (decl)
2749 func = decl;
2750 else
2752 func = TREE_TYPE (CALL_EXPR_FN (exp));
2753 if (POINTER_TYPE_P (func))
2754 func = TREE_TYPE (func);
2757 /* Check that the return value locations are the same. Like
2758 if we are returning floats on the 80387 register stack, we cannot
2759 make a sibcall from a function that doesn't return a float to a
2760 function that does or, conversely, from a function that does return
2761 a float to a function that doesn't; the necessary stack adjustment
2762 would not be executed. This is also the place we notice
2763 differences in the return value ABI. Note that it is ok for one
2764 of the functions to have void return type as long as the return
2765 value of the other is passed in a register. */
2766 a = ix86_function_value (TREE_TYPE (exp), func, false);
2767 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2768 cfun->decl, false);
2769 if (STACK_REG_P (a) || STACK_REG_P (b))
2771 if (!rtx_equal_p (a, b))
2772 return false;
2774 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2776 else if (!rtx_equal_p (a, b))
2777 return false;
2779 /* If this call is indirect, we'll need to be able to use a call-clobbered
2780 register for the address of the target function. Make sure that all
2781 such registers are not used for passing parameters. */
2782 if (!decl && !TARGET_64BIT)
2784 tree type;
2786 /* We're looking at the CALL_EXPR, we need the type of the function. */
2787 type = CALL_EXPR_FN (exp); /* pointer expression */
2788 type = TREE_TYPE (type); /* pointer type */
2789 type = TREE_TYPE (type); /* function type */
2791 if (ix86_function_regparm (type, NULL) >= 3)
2793 /* ??? Need to count the actual number of registers to be used,
2794 not the possible number of registers. Fix later. */
2795 return false;
2799 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2800 /* Dllimport'd functions are also called indirectly. */
2801 if (decl && DECL_DLLIMPORT_P (decl)
2802 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2803 return false;
2804 #endif
2806 /* If we forced aligned the stack, then sibcalling would unalign the
2807 stack, which may break the called function. */
2808 if (cfun->machine->force_align_arg_pointer)
2809 return false;
2811 /* Otherwise okay. That also includes certain types of indirect calls. */
2812 return true;
2815 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2816 calling convention attributes;
2817 arguments as in struct attribute_spec.handler. */
2819 static tree
2820 ix86_handle_cconv_attribute (tree *node, tree name,
2821 tree args,
2822 int flags ATTRIBUTE_UNUSED,
2823 bool *no_add_attrs)
2825 if (TREE_CODE (*node) != FUNCTION_TYPE
2826 && TREE_CODE (*node) != METHOD_TYPE
2827 && TREE_CODE (*node) != FIELD_DECL
2828 && TREE_CODE (*node) != TYPE_DECL)
2830 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2831 IDENTIFIER_POINTER (name));
2832 *no_add_attrs = true;
2833 return NULL_TREE;
2836 /* Can combine regparm with all attributes but fastcall. */
2837 if (is_attribute_p ("regparm", name))
2839 tree cst;
2841 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2843 error ("fastcall and regparm attributes are not compatible");
2846 cst = TREE_VALUE (args);
2847 if (TREE_CODE (cst) != INTEGER_CST)
2849 warning (OPT_Wattributes,
2850 "%qs attribute requires an integer constant argument",
2851 IDENTIFIER_POINTER (name));
2852 *no_add_attrs = true;
2854 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2856 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2857 IDENTIFIER_POINTER (name), REGPARM_MAX);
2858 *no_add_attrs = true;
2861 if (!TARGET_64BIT
2862 && lookup_attribute (ix86_force_align_arg_pointer_string,
2863 TYPE_ATTRIBUTES (*node))
2864 && compare_tree_int (cst, REGPARM_MAX-1))
2866 error ("%s functions limited to %d register parameters",
2867 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2870 return NULL_TREE;
2873 if (TARGET_64BIT)
2875 warning (OPT_Wattributes, "%qs attribute ignored",
2876 IDENTIFIER_POINTER (name));
2877 *no_add_attrs = true;
2878 return NULL_TREE;
2881 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2882 if (is_attribute_p ("fastcall", name))
2884 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2886 error ("fastcall and cdecl attributes are not compatible");
2888 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2890 error ("fastcall and stdcall attributes are not compatible");
2892 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2894 error ("fastcall and regparm attributes are not compatible");
2898 /* Can combine stdcall with fastcall (redundant), regparm and
2899 sseregparm. */
2900 else if (is_attribute_p ("stdcall", name))
2902 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2904 error ("stdcall and cdecl attributes are not compatible");
2906 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2908 error ("stdcall and fastcall attributes are not compatible");
2912 /* Can combine cdecl with regparm and sseregparm. */
2913 else if (is_attribute_p ("cdecl", name))
2915 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2917 error ("stdcall and cdecl attributes are not compatible");
2919 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2921 error ("fastcall and cdecl attributes are not compatible");
2925 /* Can combine sseregparm with all attributes. */
2927 return NULL_TREE;
2930 /* Return 0 if the attributes for two types are incompatible, 1 if they
2931 are compatible, and 2 if they are nearly compatible (which causes a
2932 warning to be generated). */
2934 static int
2935 ix86_comp_type_attributes (tree type1, tree type2)
2937 /* Check for mismatch of non-default calling convention. */
2938 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2940 if (TREE_CODE (type1) != FUNCTION_TYPE)
2941 return 1;
2943 /* Check for mismatched fastcall/regparm types. */
2944 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2945 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2946 || (ix86_function_regparm (type1, NULL)
2947 != ix86_function_regparm (type2, NULL)))
2948 return 0;
2950 /* Check for mismatched sseregparm types. */
2951 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2952 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2953 return 0;
2955 /* Check for mismatched return types (cdecl vs stdcall). */
2956 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2957 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2958 return 0;
2960 return 1;
2963 /* Return the regparm value for a function with the indicated TYPE and DECL.
2964 DECL may be NULL when calling function indirectly
2965 or considering a libcall. */
2967 static int
2968 ix86_function_regparm (tree type, tree decl)
2970 tree attr;
2971 int regparm = ix86_regparm;
2972 bool user_convention = false;
2974 if (!TARGET_64BIT)
2976 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2977 if (attr)
2979 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2980 user_convention = true;
2983 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2985 regparm = 2;
2986 user_convention = true;
2989 /* Use register calling convention for local functions when possible. */
2990 if (!TARGET_64BIT && !user_convention && decl
2991 && flag_unit_at_a_time && !profile_flag)
2993 struct cgraph_local_info *i = cgraph_local_info (decl);
2994 if (i && i->local)
2996 int local_regparm, globals = 0, regno;
2998 /* Make sure no regparm register is taken by a global register
2999 variable. */
3000 for (local_regparm = 0; local_regparm < 3; local_regparm++)
3001 if (global_regs[local_regparm])
3002 break;
3003 /* We can't use regparm(3) for nested functions as these use
3004 static chain pointer in third argument. */
3005 if (local_regparm == 3
3006 && decl_function_context (decl)
3007 && !DECL_NO_STATIC_CHAIN (decl))
3008 local_regparm = 2;
3009 /* If the function realigns its stackpointer, the
3010 prologue will clobber %ecx. If we've already
3011 generated code for the callee, the callee
3012 DECL_STRUCT_FUNCTION is gone, so we fall back to
3013 scanning the attributes for the self-realigning
3014 property. */
3015 if ((DECL_STRUCT_FUNCTION (decl)
3016 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
3017 || (!DECL_STRUCT_FUNCTION (decl)
3018 && lookup_attribute (ix86_force_align_arg_pointer_string,
3019 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3020 local_regparm = 2;
3021 /* Each global register variable increases register preassure,
3022 so the more global reg vars there are, the smaller regparm
3023 optimization use, unless requested by the user explicitly. */
3024 for (regno = 0; regno < 6; regno++)
3025 if (global_regs[regno])
3026 globals++;
3027 local_regparm
3028 = globals < local_regparm ? local_regparm - globals : 0;
3030 if (local_regparm > regparm)
3031 regparm = local_regparm;
3035 return regparm;
3038 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3039 DFmode (2) arguments in SSE registers for a function with the
3040 indicated TYPE and DECL. DECL may be NULL when calling function
3041 indirectly or considering a libcall. Otherwise return 0. */
3043 static int
3044 ix86_function_sseregparm (tree type, tree decl)
3046 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3047 by the sseregparm attribute. */
3048 if (TARGET_SSEREGPARM
3049 || (type
3050 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3052 if (!TARGET_SSE)
3054 if (decl)
3055 error ("Calling %qD with attribute sseregparm without "
3056 "SSE/SSE2 enabled", decl);
3057 else
3058 error ("Calling %qT with attribute sseregparm without "
3059 "SSE/SSE2 enabled", type);
3060 return 0;
3063 return 2;
3066 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3067 (and DFmode for SSE2) arguments in SSE registers,
3068 even for 32-bit targets. */
3069 if (!TARGET_64BIT && decl
3070 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3072 struct cgraph_local_info *i = cgraph_local_info (decl);
3073 if (i && i->local)
3074 return TARGET_SSE2 ? 2 : 1;
3077 return 0;
3080 /* Return true if EAX is live at the start of the function. Used by
3081 ix86_expand_prologue to determine if we need special help before
3082 calling allocate_stack_worker. */
3084 static bool
3085 ix86_eax_live_at_start_p (void)
3087 /* Cheat. Don't bother working forward from ix86_function_regparm
3088 to the function type to whether an actual argument is located in
3089 eax. Instead just look at cfg info, which is still close enough
3090 to correct at this point. This gives false positives for broken
3091 functions that might use uninitialized data that happens to be
3092 allocated in eax, but who cares? */
3093 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3096 /* Value is the number of bytes of arguments automatically
3097 popped when returning from a subroutine call.
3098 FUNDECL is the declaration node of the function (as a tree),
3099 FUNTYPE is the data type of the function (as a tree),
3100 or for a library call it is an identifier node for the subroutine name.
3101 SIZE is the number of bytes of arguments passed on the stack.
3103 On the 80386, the RTD insn may be used to pop them if the number
3104 of args is fixed, but if the number is variable then the caller
3105 must pop them all. RTD can't be used for library calls now
3106 because the library is compiled with the Unix compiler.
3107 Use of RTD is a selectable option, since it is incompatible with
3108 standard Unix calling sequences. If the option is not selected,
3109 the caller must always pop the args.
3111 The attribute stdcall is equivalent to RTD on a per module basis. */
3114 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3116 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3118 /* Cdecl functions override -mrtd, and never pop the stack. */
3119 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
3121 /* Stdcall and fastcall functions will pop the stack if not
3122 variable args. */
3123 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3124 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3125 rtd = 1;
3127 if (rtd
3128 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
3129 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
3130 == void_type_node)))
3131 return size;
3134 /* Lose any fake structure return argument if it is passed on the stack. */
3135 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3136 && !TARGET_64BIT
3137 && !KEEP_AGGREGATE_RETURN_POINTER)
3139 int nregs = ix86_function_regparm (funtype, fundecl);
3141 if (!nregs)
3142 return GET_MODE_SIZE (Pmode);
3145 return 0;
3148 /* Argument support functions. */
3150 /* Return true when register may be used to pass function parameters. */
3151 bool
3152 ix86_function_arg_regno_p (int regno)
3154 int i;
3155 if (!TARGET_64BIT)
3157 if (TARGET_MACHO)
3158 return (regno < REGPARM_MAX
3159 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3160 else
3161 return (regno < REGPARM_MAX
3162 || (TARGET_MMX && MMX_REGNO_P (regno)
3163 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3164 || (TARGET_SSE && SSE_REGNO_P (regno)
3165 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3168 if (TARGET_MACHO)
3170 if (SSE_REGNO_P (regno) && TARGET_SSE)
3171 return true;
3173 else
3175 if (TARGET_SSE && SSE_REGNO_P (regno)
3176 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3177 return true;
3179 /* RAX is used as hidden argument to va_arg functions. */
3180 if (!regno)
3181 return true;
3182 for (i = 0; i < REGPARM_MAX; i++)
3183 if (regno == x86_64_int_parameter_registers[i])
3184 return true;
3185 return false;
3188 /* Return if we do not know how to pass TYPE solely in registers. */
3190 static bool
3191 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3193 if (must_pass_in_stack_var_size_or_pad (mode, type))
3194 return true;
3196 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3197 The layout_type routine is crafty and tries to trick us into passing
3198 currently unsupported vector types on the stack by using TImode. */
3199 return (!TARGET_64BIT && mode == TImode
3200 && type && TREE_CODE (type) != VECTOR_TYPE);
3203 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3204 for a call to a function whose data type is FNTYPE.
3205 For a library call, FNTYPE is 0. */
3207 void
3208 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3209 tree fntype, /* tree ptr for function decl */
3210 rtx libname, /* SYMBOL_REF of library name or 0 */
3211 tree fndecl)
3213 static CUMULATIVE_ARGS zero_cum;
3214 tree param, next_param;
3216 if (TARGET_DEBUG_ARG)
3218 fprintf (stderr, "\ninit_cumulative_args (");
3219 if (fntype)
3220 fprintf (stderr, "fntype code = %s, ret code = %s",
3221 tree_code_name[(int) TREE_CODE (fntype)],
3222 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3223 else
3224 fprintf (stderr, "no fntype");
3226 if (libname)
3227 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3230 *cum = zero_cum;
3232 /* Set up the number of registers to use for passing arguments. */
3233 cum->nregs = ix86_regparm;
3234 if (TARGET_SSE)
3235 cum->sse_nregs = SSE_REGPARM_MAX;
3236 if (TARGET_MMX)
3237 cum->mmx_nregs = MMX_REGPARM_MAX;
3238 cum->warn_sse = true;
3239 cum->warn_mmx = true;
3240 cum->maybe_vaarg = false;
3242 /* Use ecx and edx registers if function has fastcall attribute,
3243 else look for regparm information. */
3244 if (fntype && !TARGET_64BIT)
3246 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3248 cum->nregs = 2;
3249 cum->fastcall = 1;
3251 else
3252 cum->nregs = ix86_function_regparm (fntype, fndecl);
3255 /* Set up the number of SSE registers used for passing SFmode
3256 and DFmode arguments. Warn for mismatching ABI. */
3257 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3259 /* Determine if this function has variable arguments. This is
3260 indicated by the last argument being 'void_type_mode' if there
3261 are no variable arguments. If there are variable arguments, then
3262 we won't pass anything in registers in 32-bit mode. */
3264 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3266 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3267 param != 0; param = next_param)
3269 next_param = TREE_CHAIN (param);
3270 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3272 if (!TARGET_64BIT)
3274 cum->nregs = 0;
3275 cum->sse_nregs = 0;
3276 cum->mmx_nregs = 0;
3277 cum->warn_sse = 0;
3278 cum->warn_mmx = 0;
3279 cum->fastcall = 0;
3280 cum->float_in_sse = 0;
3282 cum->maybe_vaarg = true;
3286 if ((!fntype && !libname)
3287 || (fntype && !TYPE_ARG_TYPES (fntype)))
3288 cum->maybe_vaarg = true;
3290 if (TARGET_DEBUG_ARG)
3291 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3293 return;
3296 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3297 But in the case of vector types, it is some vector mode.
3299 When we have only some of our vector isa extensions enabled, then there
3300 are some modes for which vector_mode_supported_p is false. For these
3301 modes, the generic vector support in gcc will choose some non-vector mode
3302 in order to implement the type. By computing the natural mode, we'll
3303 select the proper ABI location for the operand and not depend on whatever
3304 the middle-end decides to do with these vector types. */
3306 static enum machine_mode
3307 type_natural_mode (tree type)
3309 enum machine_mode mode = TYPE_MODE (type);
3311 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3313 HOST_WIDE_INT size = int_size_in_bytes (type);
3314 if ((size == 8 || size == 16)
3315 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3316 && TYPE_VECTOR_SUBPARTS (type) > 1)
3318 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3320 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3321 mode = MIN_MODE_VECTOR_FLOAT;
3322 else
3323 mode = MIN_MODE_VECTOR_INT;
3325 /* Get the mode which has this inner mode and number of units. */
3326 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3327 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3328 && GET_MODE_INNER (mode) == innermode)
3329 return mode;
3331 gcc_unreachable ();
3335 return mode;
3338 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3339 this may not agree with the mode that the type system has chosen for the
3340 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3341 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3343 static rtx
3344 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3345 unsigned int regno)
3347 rtx tmp;
3349 if (orig_mode != BLKmode)
3350 tmp = gen_rtx_REG (orig_mode, regno);
3351 else
3353 tmp = gen_rtx_REG (mode, regno);
3354 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3355 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3358 return tmp;
3361 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3362 of this code is to classify each 8bytes of incoming argument by the register
3363 class and assign registers accordingly. */
3365 /* Return the union class of CLASS1 and CLASS2.
3366 See the x86-64 PS ABI for details. */
3368 static enum x86_64_reg_class
3369 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3371 /* Rule #1: If both classes are equal, this is the resulting class. */
3372 if (class1 == class2)
3373 return class1;
3375 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3376 the other class. */
3377 if (class1 == X86_64_NO_CLASS)
3378 return class2;
3379 if (class2 == X86_64_NO_CLASS)
3380 return class1;
3382 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3383 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3384 return X86_64_MEMORY_CLASS;
3386 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3387 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3388 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3389 return X86_64_INTEGERSI_CLASS;
3390 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3391 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3392 return X86_64_INTEGER_CLASS;
3394 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3395 MEMORY is used. */
3396 if (class1 == X86_64_X87_CLASS
3397 || class1 == X86_64_X87UP_CLASS
3398 || class1 == X86_64_COMPLEX_X87_CLASS
3399 || class2 == X86_64_X87_CLASS
3400 || class2 == X86_64_X87UP_CLASS
3401 || class2 == X86_64_COMPLEX_X87_CLASS)
3402 return X86_64_MEMORY_CLASS;
3404 /* Rule #6: Otherwise class SSE is used. */
3405 return X86_64_SSE_CLASS;
3408 /* Classify the argument of type TYPE and mode MODE.
3409 CLASSES will be filled by the register class used to pass each word
3410 of the operand. The number of words is returned. In case the parameter
3411 should be passed in memory, 0 is returned. As a special case for zero
3412 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3414 BIT_OFFSET is used internally for handling records and specifies offset
3415 of the offset in bits modulo 256 to avoid overflow cases.
3417 See the x86-64 PS ABI for details.
3420 static int
3421 classify_argument (enum machine_mode mode, tree type,
3422 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3424 HOST_WIDE_INT bytes =
3425 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3426 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3428 /* Variable sized entities are always passed/returned in memory. */
3429 if (bytes < 0)
3430 return 0;
3432 if (mode != VOIDmode
3433 && targetm.calls.must_pass_in_stack (mode, type))
3434 return 0;
3436 if (type && AGGREGATE_TYPE_P (type))
3438 int i;
3439 tree field;
3440 enum x86_64_reg_class subclasses[MAX_CLASSES];
3442 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3443 if (bytes > 16)
3444 return 0;
3446 for (i = 0; i < words; i++)
3447 classes[i] = X86_64_NO_CLASS;
3449 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3450 signalize memory class, so handle it as special case. */
3451 if (!words)
3453 classes[0] = X86_64_NO_CLASS;
3454 return 1;
3457 /* Classify each field of record and merge classes. */
3458 switch (TREE_CODE (type))
3460 case RECORD_TYPE:
3461 /* And now merge the fields of structure. */
3462 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3464 if (TREE_CODE (field) == FIELD_DECL)
3466 int num;
3468 if (TREE_TYPE (field) == error_mark_node)
3469 continue;
3471 /* Bitfields are always classified as integer. Handle them
3472 early, since later code would consider them to be
3473 misaligned integers. */
3474 if (DECL_BIT_FIELD (field))
3476 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3477 i < ((int_bit_position (field) + (bit_offset % 64))
3478 + tree_low_cst (DECL_SIZE (field), 0)
3479 + 63) / 8 / 8; i++)
3480 classes[i] =
3481 merge_classes (X86_64_INTEGER_CLASS,
3482 classes[i]);
3484 else
3486 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3487 TREE_TYPE (field), subclasses,
3488 (int_bit_position (field)
3489 + bit_offset) % 256);
3490 if (!num)
3491 return 0;
3492 for (i = 0; i < num; i++)
3494 int pos =
3495 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3496 classes[i + pos] =
3497 merge_classes (subclasses[i], classes[i + pos]);
3502 break;
3504 case ARRAY_TYPE:
3505 /* Arrays are handled as small records. */
3507 int num;
3508 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3509 TREE_TYPE (type), subclasses, bit_offset);
3510 if (!num)
3511 return 0;
3513 /* The partial classes are now full classes. */
3514 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3515 subclasses[0] = X86_64_SSE_CLASS;
3516 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3517 subclasses[0] = X86_64_INTEGER_CLASS;
3519 for (i = 0; i < words; i++)
3520 classes[i] = subclasses[i % num];
3522 break;
3524 case UNION_TYPE:
3525 case QUAL_UNION_TYPE:
3526 /* Unions are similar to RECORD_TYPE but offset is always 0.
3528 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3530 if (TREE_CODE (field) == FIELD_DECL)
3532 int num;
3534 if (TREE_TYPE (field) == error_mark_node)
3535 continue;
3537 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3538 TREE_TYPE (field), subclasses,
3539 bit_offset);
3540 if (!num)
3541 return 0;
3542 for (i = 0; i < num; i++)
3543 classes[i] = merge_classes (subclasses[i], classes[i]);
3546 break;
3548 default:
3549 gcc_unreachable ();
3552 /* Final merger cleanup. */
3553 for (i = 0; i < words; i++)
3555 /* If one class is MEMORY, everything should be passed in
3556 memory. */
3557 if (classes[i] == X86_64_MEMORY_CLASS)
3558 return 0;
3560 /* The X86_64_SSEUP_CLASS should be always preceded by
3561 X86_64_SSE_CLASS. */
3562 if (classes[i] == X86_64_SSEUP_CLASS
3563 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3564 classes[i] = X86_64_SSE_CLASS;
3566 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3567 if (classes[i] == X86_64_X87UP_CLASS
3568 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3569 classes[i] = X86_64_SSE_CLASS;
3571 return words;
3574 /* Compute alignment needed. We align all types to natural boundaries with
3575 exception of XFmode that is aligned to 64bits. */
3576 if (mode != VOIDmode && mode != BLKmode)
3578 int mode_alignment = GET_MODE_BITSIZE (mode);
3580 if (mode == XFmode)
3581 mode_alignment = 128;
3582 else if (mode == XCmode)
3583 mode_alignment = 256;
3584 if (COMPLEX_MODE_P (mode))
3585 mode_alignment /= 2;
3586 /* Misaligned fields are always returned in memory. */
3587 if (bit_offset % mode_alignment)
3588 return 0;
3591 /* for V1xx modes, just use the base mode */
3592 if (VECTOR_MODE_P (mode)
3593 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3594 mode = GET_MODE_INNER (mode);
3596 /* Classification of atomic types. */
3597 switch (mode)
3599 case SDmode:
3600 case DDmode:
3601 classes[0] = X86_64_SSE_CLASS;
3602 return 1;
3603 case TDmode:
3604 classes[0] = X86_64_SSE_CLASS;
3605 classes[1] = X86_64_SSEUP_CLASS;
3606 return 2;
3607 case DImode:
3608 case SImode:
3609 case HImode:
3610 case QImode:
3611 case CSImode:
3612 case CHImode:
3613 case CQImode:
3614 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3615 classes[0] = X86_64_INTEGERSI_CLASS;
3616 else
3617 classes[0] = X86_64_INTEGER_CLASS;
3618 return 1;
3619 case CDImode:
3620 case TImode:
3621 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3622 return 2;
3623 case CTImode:
3624 return 0;
3625 case SFmode:
3626 if (!(bit_offset % 64))
3627 classes[0] = X86_64_SSESF_CLASS;
3628 else
3629 classes[0] = X86_64_SSE_CLASS;
3630 return 1;
3631 case DFmode:
3632 classes[0] = X86_64_SSEDF_CLASS;
3633 return 1;
3634 case XFmode:
3635 classes[0] = X86_64_X87_CLASS;
3636 classes[1] = X86_64_X87UP_CLASS;
3637 return 2;
3638 case TFmode:
3639 classes[0] = X86_64_SSE_CLASS;
3640 classes[1] = X86_64_SSEUP_CLASS;
3641 return 2;
3642 case SCmode:
3643 classes[0] = X86_64_SSE_CLASS;
3644 return 1;
3645 case DCmode:
3646 classes[0] = X86_64_SSEDF_CLASS;
3647 classes[1] = X86_64_SSEDF_CLASS;
3648 return 2;
3649 case XCmode:
3650 classes[0] = X86_64_COMPLEX_X87_CLASS;
3651 return 1;
3652 case TCmode:
3653 /* This modes is larger than 16 bytes. */
3654 return 0;
3655 case V4SFmode:
3656 case V4SImode:
3657 case V16QImode:
3658 case V8HImode:
3659 case V2DFmode:
3660 case V2DImode:
3661 classes[0] = X86_64_SSE_CLASS;
3662 classes[1] = X86_64_SSEUP_CLASS;
3663 return 2;
3664 case V2SFmode:
3665 case V2SImode:
3666 case V4HImode:
3667 case V8QImode:
3668 classes[0] = X86_64_SSE_CLASS;
3669 return 1;
3670 case BLKmode:
3671 case VOIDmode:
3672 return 0;
3673 default:
3674 gcc_assert (VECTOR_MODE_P (mode));
3676 if (bytes > 16)
3677 return 0;
3679 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3681 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3682 classes[0] = X86_64_INTEGERSI_CLASS;
3683 else
3684 classes[0] = X86_64_INTEGER_CLASS;
3685 classes[1] = X86_64_INTEGER_CLASS;
3686 return 1 + (bytes > 8);
3690 /* Examine the argument and return set number of register required in each
3691 class. Return 0 iff parameter should be passed in memory. */
3692 static int
3693 examine_argument (enum machine_mode mode, tree type, int in_return,
3694 int *int_nregs, int *sse_nregs)
3696 enum x86_64_reg_class class[MAX_CLASSES];
3697 int n = classify_argument (mode, type, class, 0);
3699 *int_nregs = 0;
3700 *sse_nregs = 0;
3701 if (!n)
3702 return 0;
3703 for (n--; n >= 0; n--)
3704 switch (class[n])
3706 case X86_64_INTEGER_CLASS:
3707 case X86_64_INTEGERSI_CLASS:
3708 (*int_nregs)++;
3709 break;
3710 case X86_64_SSE_CLASS:
3711 case X86_64_SSESF_CLASS:
3712 case X86_64_SSEDF_CLASS:
3713 (*sse_nregs)++;
3714 break;
3715 case X86_64_NO_CLASS:
3716 case X86_64_SSEUP_CLASS:
3717 break;
3718 case X86_64_X87_CLASS:
3719 case X86_64_X87UP_CLASS:
3720 if (!in_return)
3721 return 0;
3722 break;
3723 case X86_64_COMPLEX_X87_CLASS:
3724 return in_return ? 2 : 0;
3725 case X86_64_MEMORY_CLASS:
3726 gcc_unreachable ();
3728 return 1;
3731 /* Construct container for the argument used by GCC interface. See
3732 FUNCTION_ARG for the detailed description. */
3734 static rtx
3735 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3736 tree type, int in_return, int nintregs, int nsseregs,
3737 const int *intreg, int sse_regno)
3739 /* The following variables hold the static issued_error state. */
3740 static bool issued_sse_arg_error;
3741 static bool issued_sse_ret_error;
3742 static bool issued_x87_ret_error;
3744 enum machine_mode tmpmode;
3745 int bytes =
3746 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3747 enum x86_64_reg_class class[MAX_CLASSES];
3748 int n;
3749 int i;
3750 int nexps = 0;
3751 int needed_sseregs, needed_intregs;
3752 rtx exp[MAX_CLASSES];
3753 rtx ret;
3755 n = classify_argument (mode, type, class, 0);
3756 if (TARGET_DEBUG_ARG)
3758 if (!n)
3759 fprintf (stderr, "Memory class\n");
3760 else
3762 fprintf (stderr, "Classes:");
3763 for (i = 0; i < n; i++)
3765 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3767 fprintf (stderr, "\n");
3770 if (!n)
3771 return NULL;
3772 if (!examine_argument (mode, type, in_return, &needed_intregs,
3773 &needed_sseregs))
3774 return NULL;
3775 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3776 return NULL;
3778 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3779 some less clueful developer tries to use floating-point anyway. */
3780 if (needed_sseregs && !TARGET_SSE)
3782 if (in_return)
3784 if (!issued_sse_ret_error)
3786 error ("SSE register return with SSE disabled");
3787 issued_sse_ret_error = true;
3790 else if (!issued_sse_arg_error)
3792 error ("SSE register argument with SSE disabled");
3793 issued_sse_arg_error = true;
3795 return NULL;
3798 /* Likewise, error if the ABI requires us to return values in the
3799 x87 registers and the user specified -mno-80387. */
3800 if (!TARGET_80387 && in_return)
3801 for (i = 0; i < n; i++)
3802 if (class[i] == X86_64_X87_CLASS
3803 || class[i] == X86_64_X87UP_CLASS
3804 || class[i] == X86_64_COMPLEX_X87_CLASS)
3806 if (!issued_x87_ret_error)
3808 error ("x87 register return with x87 disabled");
3809 issued_x87_ret_error = true;
3811 return NULL;
3814 /* First construct simple cases. Avoid SCmode, since we want to use
3815 single register to pass this type. */
3816 if (n == 1 && mode != SCmode)
3817 switch (class[0])
3819 case X86_64_INTEGER_CLASS:
3820 case X86_64_INTEGERSI_CLASS:
3821 return gen_rtx_REG (mode, intreg[0]);
3822 case X86_64_SSE_CLASS:
3823 case X86_64_SSESF_CLASS:
3824 case X86_64_SSEDF_CLASS:
3825 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3826 case X86_64_X87_CLASS:
3827 case X86_64_COMPLEX_X87_CLASS:
3828 return gen_rtx_REG (mode, FIRST_STACK_REG);
3829 case X86_64_NO_CLASS:
3830 /* Zero sized array, struct or class. */
3831 return NULL;
3832 default:
3833 gcc_unreachable ();
3835 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3836 && mode != BLKmode)
3837 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3838 if (n == 2
3839 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3840 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3841 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3842 && class[1] == X86_64_INTEGER_CLASS
3843 && (mode == CDImode || mode == TImode || mode == TFmode)
3844 && intreg[0] + 1 == intreg[1])
3845 return gen_rtx_REG (mode, intreg[0]);
3847 /* Otherwise figure out the entries of the PARALLEL. */
3848 for (i = 0; i < n; i++)
3850 switch (class[i])
3852 case X86_64_NO_CLASS:
3853 break;
3854 case X86_64_INTEGER_CLASS:
3855 case X86_64_INTEGERSI_CLASS:
3856 /* Merge TImodes on aligned occasions here too. */
3857 if (i * 8 + 8 > bytes)
3858 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3859 else if (class[i] == X86_64_INTEGERSI_CLASS)
3860 tmpmode = SImode;
3861 else
3862 tmpmode = DImode;
3863 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3864 if (tmpmode == BLKmode)
3865 tmpmode = DImode;
3866 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3867 gen_rtx_REG (tmpmode, *intreg),
3868 GEN_INT (i*8));
3869 intreg++;
3870 break;
3871 case X86_64_SSESF_CLASS:
3872 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3873 gen_rtx_REG (SFmode,
3874 SSE_REGNO (sse_regno)),
3875 GEN_INT (i*8));
3876 sse_regno++;
3877 break;
3878 case X86_64_SSEDF_CLASS:
3879 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3880 gen_rtx_REG (DFmode,
3881 SSE_REGNO (sse_regno)),
3882 GEN_INT (i*8));
3883 sse_regno++;
3884 break;
3885 case X86_64_SSE_CLASS:
3886 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3887 tmpmode = TImode;
3888 else
3889 tmpmode = DImode;
3890 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3891 gen_rtx_REG (tmpmode,
3892 SSE_REGNO (sse_regno)),
3893 GEN_INT (i*8));
3894 if (tmpmode == TImode)
3895 i++;
3896 sse_regno++;
3897 break;
3898 default:
3899 gcc_unreachable ();
3903 /* Empty aligned struct, union or class. */
3904 if (nexps == 0)
3905 return NULL;
3907 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3908 for (i = 0; i < nexps; i++)
3909 XVECEXP (ret, 0, i) = exp [i];
3910 return ret;
3913 /* Update the data in CUM to advance over an argument
3914 of mode MODE and data type TYPE.
3915 (TYPE is null for libcalls where that information may not be available.) */
3917 void
3918 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3919 tree type, int named)
3921 int bytes =
3922 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3923 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3925 if (type)
3926 mode = type_natural_mode (type);
3928 if (TARGET_DEBUG_ARG)
3929 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3930 "mode=%s, named=%d)\n\n",
3931 words, cum->words, cum->nregs, cum->sse_nregs,
3932 GET_MODE_NAME (mode), named);
3934 if (TARGET_64BIT)
3936 int int_nregs, sse_nregs;
3937 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3938 cum->words += words;
3939 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3941 cum->nregs -= int_nregs;
3942 cum->sse_nregs -= sse_nregs;
3943 cum->regno += int_nregs;
3944 cum->sse_regno += sse_nregs;
3946 else
3947 cum->words += words;
3949 else
3951 switch (mode)
3953 default:
3954 break;
3956 case BLKmode:
3957 if (bytes < 0)
3958 break;
3959 /* FALLTHRU */
3961 case DImode:
3962 case SImode:
3963 case HImode:
3964 case QImode:
3965 cum->words += words;
3966 cum->nregs -= words;
3967 cum->regno += words;
3969 if (cum->nregs <= 0)
3971 cum->nregs = 0;
3972 cum->regno = 0;
3974 break;
3976 case DFmode:
3977 if (cum->float_in_sse < 2)
3978 break;
3979 case SFmode:
3980 if (cum->float_in_sse < 1)
3981 break;
3982 /* FALLTHRU */
3984 case TImode:
3985 case V16QImode:
3986 case V8HImode:
3987 case V4SImode:
3988 case V2DImode:
3989 case V4SFmode:
3990 case V2DFmode:
3991 if (!type || !AGGREGATE_TYPE_P (type))
3993 cum->sse_words += words;
3994 cum->sse_nregs -= 1;
3995 cum->sse_regno += 1;
3996 if (cum->sse_nregs <= 0)
3998 cum->sse_nregs = 0;
3999 cum->sse_regno = 0;
4002 break;
4004 case V8QImode:
4005 case V4HImode:
4006 case V2SImode:
4007 case V2SFmode:
4008 if (!type || !AGGREGATE_TYPE_P (type))
4010 cum->mmx_words += words;
4011 cum->mmx_nregs -= 1;
4012 cum->mmx_regno += 1;
4013 if (cum->mmx_nregs <= 0)
4015 cum->mmx_nregs = 0;
4016 cum->mmx_regno = 0;
4019 break;
4024 /* Define where to put the arguments to a function.
4025 Value is zero to push the argument on the stack,
4026 or a hard register in which to store the argument.
4028 MODE is the argument's machine mode.
4029 TYPE is the data type of the argument (as a tree).
4030 This is null for libcalls where that information may
4031 not be available.
4032 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4033 the preceding args and about the function being called.
4034 NAMED is nonzero if this argument is a named parameter
4035 (otherwise it is an extra parameter matching an ellipsis). */
4038 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
4039 tree type, int named)
4041 enum machine_mode mode = orig_mode;
4042 rtx ret = NULL_RTX;
4043 int bytes =
4044 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4045 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4046 static bool warnedsse, warnedmmx;
4048 /* To simplify the code below, represent vector types with a vector mode
4049 even if MMX/SSE are not active. */
4050 if (type && TREE_CODE (type) == VECTOR_TYPE)
4051 mode = type_natural_mode (type);
4053 /* Handle a hidden AL argument containing number of registers for varargs
4054 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4055 any AL settings. */
4056 if (mode == VOIDmode)
4058 if (TARGET_64BIT)
4059 return GEN_INT (cum->maybe_vaarg
4060 ? (cum->sse_nregs < 0
4061 ? SSE_REGPARM_MAX
4062 : cum->sse_regno)
4063 : -1);
4064 else
4065 return constm1_rtx;
4067 if (TARGET_64BIT)
4068 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
4069 cum->sse_nregs,
4070 &x86_64_int_parameter_registers [cum->regno],
4071 cum->sse_regno);
4072 else
4073 switch (mode)
4075 /* For now, pass fp/complex values on the stack. */
4076 default:
4077 break;
4079 case BLKmode:
4080 if (bytes < 0)
4081 break;
4082 /* FALLTHRU */
4083 case DImode:
4084 case SImode:
4085 case HImode:
4086 case QImode:
4087 if (words <= cum->nregs)
4089 int regno = cum->regno;
4091 /* Fastcall allocates the first two DWORD (SImode) or
4092 smaller arguments to ECX and EDX. */
4093 if (cum->fastcall)
4095 if (mode == BLKmode || mode == DImode)
4096 break;
4098 /* ECX not EAX is the first allocated register. */
4099 if (regno == 0)
4100 regno = 2;
4102 ret = gen_rtx_REG (mode, regno);
4104 break;
4105 case DFmode:
4106 if (cum->float_in_sse < 2)
4107 break;
4108 case SFmode:
4109 if (cum->float_in_sse < 1)
4110 break;
4111 /* FALLTHRU */
4112 case TImode:
4113 case V16QImode:
4114 case V8HImode:
4115 case V4SImode:
4116 case V2DImode:
4117 case V4SFmode:
4118 case V2DFmode:
4119 if (!type || !AGGREGATE_TYPE_P (type))
4121 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4123 warnedsse = true;
4124 warning (0, "SSE vector argument without SSE enabled "
4125 "changes the ABI");
4127 if (cum->sse_nregs)
4128 ret = gen_reg_or_parallel (mode, orig_mode,
4129 cum->sse_regno + FIRST_SSE_REG);
4131 break;
4132 case V8QImode:
4133 case V4HImode:
4134 case V2SImode:
4135 case V2SFmode:
4136 if (!type || !AGGREGATE_TYPE_P (type))
4138 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4140 warnedmmx = true;
4141 warning (0, "MMX vector argument without MMX enabled "
4142 "changes the ABI");
4144 if (cum->mmx_nregs)
4145 ret = gen_reg_or_parallel (mode, orig_mode,
4146 cum->mmx_regno + FIRST_MMX_REG);
4148 break;
4151 if (TARGET_DEBUG_ARG)
4153 fprintf (stderr,
4154 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4155 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4157 if (ret)
4158 print_simple_rtl (stderr, ret);
4159 else
4160 fprintf (stderr, ", stack");
4162 fprintf (stderr, " )\n");
4165 return ret;
4168 /* A C expression that indicates when an argument must be passed by
4169 reference. If nonzero for an argument, a copy of that argument is
4170 made in memory and a pointer to the argument is passed instead of
4171 the argument itself. The pointer is passed in whatever way is
4172 appropriate for passing a pointer to that type. */
4174 static bool
4175 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4176 enum machine_mode mode ATTRIBUTE_UNUSED,
4177 tree type, bool named ATTRIBUTE_UNUSED)
4179 if (!TARGET_64BIT)
4180 return 0;
4182 if (type && int_size_in_bytes (type) == -1)
4184 if (TARGET_DEBUG_ARG)
4185 fprintf (stderr, "function_arg_pass_by_reference\n");
4186 return 1;
4189 return 0;
4192 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4193 ABI. Only called if TARGET_SSE. */
4194 static bool
4195 contains_128bit_aligned_vector_p (tree type)
4197 enum machine_mode mode = TYPE_MODE (type);
4198 if (SSE_REG_MODE_P (mode)
4199 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4200 return true;
4201 if (TYPE_ALIGN (type) < 128)
4202 return false;
4204 if (AGGREGATE_TYPE_P (type))
4206 /* Walk the aggregates recursively. */
4207 switch (TREE_CODE (type))
4209 case RECORD_TYPE:
4210 case UNION_TYPE:
4211 case QUAL_UNION_TYPE:
4213 tree field;
4215 /* Walk all the structure fields. */
4216 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4218 if (TREE_CODE (field) == FIELD_DECL
4219 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4220 return true;
4222 break;
4225 case ARRAY_TYPE:
4226 /* Just for use if some languages passes arrays by value. */
4227 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4228 return true;
4229 break;
4231 default:
4232 gcc_unreachable ();
4235 return false;
4238 /* Gives the alignment boundary, in bits, of an argument with the
4239 specified mode and type. */
4242 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4244 int align;
4245 if (type)
4246 align = TYPE_ALIGN (type);
4247 else
4248 align = GET_MODE_ALIGNMENT (mode);
4249 if (align < PARM_BOUNDARY)
4250 align = PARM_BOUNDARY;
4251 if (!TARGET_64BIT)
4253 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4254 make an exception for SSE modes since these require 128bit
4255 alignment.
4257 The handling here differs from field_alignment. ICC aligns MMX
4258 arguments to 4 byte boundaries, while structure fields are aligned
4259 to 8 byte boundaries. */
4260 if (!TARGET_SSE)
4261 align = PARM_BOUNDARY;
4262 else if (!type)
4264 if (!SSE_REG_MODE_P (mode))
4265 align = PARM_BOUNDARY;
4267 else
4269 if (!contains_128bit_aligned_vector_p (type))
4270 align = PARM_BOUNDARY;
4273 if (align > 128)
4274 align = 128;
4275 return align;
4278 /* Return true if N is a possible register number of function value. */
4279 bool
4280 ix86_function_value_regno_p (int regno)
4282 if (TARGET_MACHO)
4284 if (!TARGET_64BIT)
4286 return ((regno) == 0
4287 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4288 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4290 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4291 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4292 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4294 else
4296 if (regno == 0
4297 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4298 || (regno == FIRST_SSE_REG && TARGET_SSE))
4299 return true;
4301 if (!TARGET_64BIT
4302 && (regno == FIRST_MMX_REG && TARGET_MMX))
4303 return true;
4305 return false;
4309 /* Define how to find the value returned by a function.
4310 VALTYPE is the data type of the value (as a tree).
4311 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4312 otherwise, FUNC is 0. */
4314 ix86_function_value (tree valtype, tree fntype_or_decl,
4315 bool outgoing ATTRIBUTE_UNUSED)
4317 enum machine_mode natmode = type_natural_mode (valtype);
4319 if (TARGET_64BIT)
4321 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4322 1, REGPARM_MAX, SSE_REGPARM_MAX,
4323 x86_64_int_return_registers, 0);
4324 /* For zero sized structures, construct_container return NULL, but we
4325 need to keep rest of compiler happy by returning meaningful value. */
4326 if (!ret)
4327 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4328 return ret;
4330 else
4332 tree fn = NULL_TREE, fntype;
4333 if (fntype_or_decl
4334 && DECL_P (fntype_or_decl))
4335 fn = fntype_or_decl;
4336 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4337 return gen_rtx_REG (TYPE_MODE (valtype),
4338 ix86_value_regno (natmode, fn, fntype));
4342 /* Return true iff type is returned in memory. */
4344 ix86_return_in_memory (tree type)
4346 int needed_intregs, needed_sseregs, size;
4347 enum machine_mode mode = type_natural_mode (type);
4349 if (TARGET_64BIT)
4350 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4352 if (mode == BLKmode)
4353 return 1;
4355 size = int_size_in_bytes (type);
4357 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4358 return 0;
4360 if (VECTOR_MODE_P (mode) || mode == TImode)
4362 /* User-created vectors small enough to fit in EAX. */
4363 if (size < 8)
4364 return 0;
4366 /* MMX/3dNow values are returned in MM0,
4367 except when it doesn't exits. */
4368 if (size == 8)
4369 return (TARGET_MMX ? 0 : 1);
4371 /* SSE values are returned in XMM0, except when it doesn't exist. */
4372 if (size == 16)
4373 return (TARGET_SSE ? 0 : 1);
4376 if (mode == XFmode)
4377 return 0;
4379 if (mode == TDmode)
4380 return 1;
4382 if (size > 12)
4383 return 1;
4384 return 0;
4387 /* When returning SSE vector types, we have a choice of either
4388 (1) being abi incompatible with a -march switch, or
4389 (2) generating an error.
4390 Given no good solution, I think the safest thing is one warning.
4391 The user won't be able to use -Werror, but....
4393 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4394 called in response to actually generating a caller or callee that
4395 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4396 via aggregate_value_p for general type probing from tree-ssa. */
4398 static rtx
4399 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4401 static bool warnedsse, warnedmmx;
4403 if (type)
4405 /* Look at the return type of the function, not the function type. */
4406 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4408 if (!TARGET_SSE && !warnedsse)
4410 if (mode == TImode
4411 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4413 warnedsse = true;
4414 warning (0, "SSE vector return without SSE enabled "
4415 "changes the ABI");
4419 if (!TARGET_MMX && !warnedmmx)
4421 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4423 warnedmmx = true;
4424 warning (0, "MMX vector return without MMX enabled "
4425 "changes the ABI");
4430 return NULL;
4433 /* Define how to find the value returned by a library function
4434 assuming the value has mode MODE. */
4436 ix86_libcall_value (enum machine_mode mode)
4438 if (TARGET_64BIT)
4440 switch (mode)
4442 case SFmode:
4443 case SCmode:
4444 case DFmode:
4445 case DCmode:
4446 case TFmode:
4447 case SDmode:
4448 case DDmode:
4449 case TDmode:
4450 return gen_rtx_REG (mode, FIRST_SSE_REG);
4451 case XFmode:
4452 case XCmode:
4453 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4454 case TCmode:
4455 return NULL;
4456 default:
4457 return gen_rtx_REG (mode, 0);
4460 else
4461 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4464 /* Given a mode, return the register to use for a return value. */
4466 static int
4467 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4469 gcc_assert (!TARGET_64BIT);
4471 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4472 we normally prevent this case when mmx is not available. However
4473 some ABIs may require the result to be returned like DImode. */
4474 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4475 return TARGET_MMX ? FIRST_MMX_REG : 0;
4477 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4478 we prevent this case when sse is not available. However some ABIs
4479 may require the result to be returned like integer TImode. */
4480 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4481 return TARGET_SSE ? FIRST_SSE_REG : 0;
4483 /* Decimal floating point values can go in %eax, unlike other float modes. */
4484 if (DECIMAL_FLOAT_MODE_P (mode))
4485 return 0;
4487 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4488 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4489 return 0;
4491 /* Floating point return values in %st(0), except for local functions when
4492 SSE math is enabled or for functions with sseregparm attribute. */
4493 if ((func || fntype)
4494 && (mode == SFmode || mode == DFmode))
4496 int sse_level = ix86_function_sseregparm (fntype, func);
4497 if ((sse_level >= 1 && mode == SFmode)
4498 || (sse_level == 2 && mode == DFmode))
4499 return FIRST_SSE_REG;
4502 return FIRST_FLOAT_REG;
4505 /* Create the va_list data type. */
4507 static tree
4508 ix86_build_builtin_va_list (void)
4510 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4512 /* For i386 we use plain pointer to argument area. */
4513 if (!TARGET_64BIT)
4514 return build_pointer_type (char_type_node);
4516 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4517 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4519 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4520 unsigned_type_node);
4521 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4522 unsigned_type_node);
4523 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4524 ptr_type_node);
4525 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4526 ptr_type_node);
4528 va_list_gpr_counter_field = f_gpr;
4529 va_list_fpr_counter_field = f_fpr;
4531 DECL_FIELD_CONTEXT (f_gpr) = record;
4532 DECL_FIELD_CONTEXT (f_fpr) = record;
4533 DECL_FIELD_CONTEXT (f_ovf) = record;
4534 DECL_FIELD_CONTEXT (f_sav) = record;
4536 TREE_CHAIN (record) = type_decl;
4537 TYPE_NAME (record) = type_decl;
4538 TYPE_FIELDS (record) = f_gpr;
4539 TREE_CHAIN (f_gpr) = f_fpr;
4540 TREE_CHAIN (f_fpr) = f_ovf;
4541 TREE_CHAIN (f_ovf) = f_sav;
4543 layout_type (record);
4545 /* The correct type is an array type of one element. */
4546 return build_array_type (record, build_index_type (size_zero_node));
4549 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4551 static void
4552 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4553 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4554 int no_rtl)
4556 CUMULATIVE_ARGS next_cum;
4557 rtx save_area = NULL_RTX, mem;
4558 rtx label;
4559 rtx label_ref;
4560 rtx tmp_reg;
4561 rtx nsse_reg;
4562 int set;
4563 tree fntype;
4564 int stdarg_p;
4565 int i;
4567 if (!TARGET_64BIT)
4568 return;
4570 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4571 return;
4573 /* Indicate to allocate space on the stack for varargs save area. */
4574 ix86_save_varrargs_registers = 1;
4576 cfun->stack_alignment_needed = 128;
4578 fntype = TREE_TYPE (current_function_decl);
4579 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4580 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4581 != void_type_node));
4583 /* For varargs, we do not want to skip the dummy va_dcl argument.
4584 For stdargs, we do want to skip the last named argument. */
4585 next_cum = *cum;
4586 if (stdarg_p)
4587 function_arg_advance (&next_cum, mode, type, 1);
4589 if (!no_rtl)
4590 save_area = frame_pointer_rtx;
4592 set = get_varargs_alias_set ();
4594 for (i = next_cum.regno;
4595 i < ix86_regparm
4596 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4597 i++)
4599 mem = gen_rtx_MEM (Pmode,
4600 plus_constant (save_area, i * UNITS_PER_WORD));
4601 MEM_NOTRAP_P (mem) = 1;
4602 set_mem_alias_set (mem, set);
4603 emit_move_insn (mem, gen_rtx_REG (Pmode,
4604 x86_64_int_parameter_registers[i]));
4607 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4609 /* Now emit code to save SSE registers. The AX parameter contains number
4610 of SSE parameter registers used to call this function. We use
4611 sse_prologue_save insn template that produces computed jump across
4612 SSE saves. We need some preparation work to get this working. */
4614 label = gen_label_rtx ();
4615 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4617 /* Compute address to jump to :
4618 label - 5*eax + nnamed_sse_arguments*5 */
4619 tmp_reg = gen_reg_rtx (Pmode);
4620 nsse_reg = gen_reg_rtx (Pmode);
4621 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4622 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4623 gen_rtx_MULT (Pmode, nsse_reg,
4624 GEN_INT (4))));
4625 if (next_cum.sse_regno)
4626 emit_move_insn
4627 (nsse_reg,
4628 gen_rtx_CONST (DImode,
4629 gen_rtx_PLUS (DImode,
4630 label_ref,
4631 GEN_INT (next_cum.sse_regno * 4))));
4632 else
4633 emit_move_insn (nsse_reg, label_ref);
4634 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4636 /* Compute address of memory block we save into. We always use pointer
4637 pointing 127 bytes after first byte to store - this is needed to keep
4638 instruction size limited by 4 bytes. */
4639 tmp_reg = gen_reg_rtx (Pmode);
4640 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4641 plus_constant (save_area,
4642 8 * REGPARM_MAX + 127)));
4643 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4644 MEM_NOTRAP_P (mem) = 1;
4645 set_mem_alias_set (mem, set);
4646 set_mem_align (mem, BITS_PER_WORD);
4648 /* And finally do the dirty job! */
4649 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4650 GEN_INT (next_cum.sse_regno), label));
4655 /* Implement va_start. */
4657 void
4658 ix86_va_start (tree valist, rtx nextarg)
4660 HOST_WIDE_INT words, n_gpr, n_fpr;
4661 tree f_gpr, f_fpr, f_ovf, f_sav;
4662 tree gpr, fpr, ovf, sav, t;
4663 tree type;
4665 /* Only 64bit target needs something special. */
4666 if (!TARGET_64BIT)
4668 std_expand_builtin_va_start (valist, nextarg);
4669 return;
4672 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4673 f_fpr = TREE_CHAIN (f_gpr);
4674 f_ovf = TREE_CHAIN (f_fpr);
4675 f_sav = TREE_CHAIN (f_ovf);
4677 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4678 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4679 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4680 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4681 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4683 /* Count number of gp and fp argument registers used. */
4684 words = current_function_args_info.words;
4685 n_gpr = current_function_args_info.regno;
4686 n_fpr = current_function_args_info.sse_regno;
4688 if (TARGET_DEBUG_ARG)
4689 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4690 (int) words, (int) n_gpr, (int) n_fpr);
4692 if (cfun->va_list_gpr_size)
4694 type = TREE_TYPE (gpr);
4695 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4696 build_int_cst (type, n_gpr * 8));
4697 TREE_SIDE_EFFECTS (t) = 1;
4698 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4701 if (cfun->va_list_fpr_size)
4703 type = TREE_TYPE (fpr);
4704 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4705 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4706 TREE_SIDE_EFFECTS (t) = 1;
4707 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4710 /* Find the overflow area. */
4711 type = TREE_TYPE (ovf);
4712 t = make_tree (type, virtual_incoming_args_rtx);
4713 if (words != 0)
4714 t = build2 (PLUS_EXPR, type, t,
4715 build_int_cst (type, words * UNITS_PER_WORD));
4716 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4717 TREE_SIDE_EFFECTS (t) = 1;
4718 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4720 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4722 /* Find the register save area.
4723 Prologue of the function save it right above stack frame. */
4724 type = TREE_TYPE (sav);
4725 t = make_tree (type, frame_pointer_rtx);
4726 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4727 TREE_SIDE_EFFECTS (t) = 1;
4728 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4732 /* Implement va_arg. */
4734 tree
4735 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4737 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4738 tree f_gpr, f_fpr, f_ovf, f_sav;
4739 tree gpr, fpr, ovf, sav, t;
4740 int size, rsize;
4741 tree lab_false, lab_over = NULL_TREE;
4742 tree addr, t2;
4743 rtx container;
4744 int indirect_p = 0;
4745 tree ptrtype;
4746 enum machine_mode nat_mode;
4748 /* Only 64bit target needs something special. */
4749 if (!TARGET_64BIT)
4750 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4752 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4753 f_fpr = TREE_CHAIN (f_gpr);
4754 f_ovf = TREE_CHAIN (f_fpr);
4755 f_sav = TREE_CHAIN (f_ovf);
4757 valist = build_va_arg_indirect_ref (valist);
4758 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4759 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4760 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4761 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4763 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4764 if (indirect_p)
4765 type = build_pointer_type (type);
4766 size = int_size_in_bytes (type);
4767 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4769 nat_mode = type_natural_mode (type);
4770 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4771 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4773 /* Pull the value out of the saved registers. */
4775 addr = create_tmp_var (ptr_type_node, "addr");
4776 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4778 if (container)
4780 int needed_intregs, needed_sseregs;
4781 bool need_temp;
4782 tree int_addr, sse_addr;
4784 lab_false = create_artificial_label ();
4785 lab_over = create_artificial_label ();
4787 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4789 need_temp = (!REG_P (container)
4790 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4791 || TYPE_ALIGN (type) > 128));
4793 /* In case we are passing structure, verify that it is consecutive block
4794 on the register save area. If not we need to do moves. */
4795 if (!need_temp && !REG_P (container))
4797 /* Verify that all registers are strictly consecutive */
4798 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4800 int i;
4802 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4804 rtx slot = XVECEXP (container, 0, i);
4805 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4806 || INTVAL (XEXP (slot, 1)) != i * 16)
4807 need_temp = 1;
4810 else
4812 int i;
4814 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4816 rtx slot = XVECEXP (container, 0, i);
4817 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4818 || INTVAL (XEXP (slot, 1)) != i * 8)
4819 need_temp = 1;
4823 if (!need_temp)
4825 int_addr = addr;
4826 sse_addr = addr;
4828 else
4830 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4831 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4832 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4833 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4836 /* First ensure that we fit completely in registers. */
4837 if (needed_intregs)
4839 t = build_int_cst (TREE_TYPE (gpr),
4840 (REGPARM_MAX - needed_intregs + 1) * 8);
4841 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4842 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4843 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4844 gimplify_and_add (t, pre_p);
4846 if (needed_sseregs)
4848 t = build_int_cst (TREE_TYPE (fpr),
4849 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4850 + REGPARM_MAX * 8);
4851 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4852 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4853 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4854 gimplify_and_add (t, pre_p);
4857 /* Compute index to start of area used for integer regs. */
4858 if (needed_intregs)
4860 /* int_addr = gpr + sav; */
4861 t = fold_convert (ptr_type_node, gpr);
4862 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4863 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4864 gimplify_and_add (t, pre_p);
4866 if (needed_sseregs)
4868 /* sse_addr = fpr + sav; */
4869 t = fold_convert (ptr_type_node, fpr);
4870 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4871 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4872 gimplify_and_add (t, pre_p);
4874 if (need_temp)
4876 int i;
4877 tree temp = create_tmp_var (type, "va_arg_tmp");
4879 /* addr = &temp; */
4880 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4881 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4882 gimplify_and_add (t, pre_p);
4884 for (i = 0; i < XVECLEN (container, 0); i++)
4886 rtx slot = XVECEXP (container, 0, i);
4887 rtx reg = XEXP (slot, 0);
4888 enum machine_mode mode = GET_MODE (reg);
4889 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4890 tree addr_type = build_pointer_type (piece_type);
4891 tree src_addr, src;
4892 int src_offset;
4893 tree dest_addr, dest;
4895 if (SSE_REGNO_P (REGNO (reg)))
4897 src_addr = sse_addr;
4898 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4900 else
4902 src_addr = int_addr;
4903 src_offset = REGNO (reg) * 8;
4905 src_addr = fold_convert (addr_type, src_addr);
4906 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4907 size_int (src_offset));
4908 src = build_va_arg_indirect_ref (src_addr);
4910 dest_addr = fold_convert (addr_type, addr);
4911 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4912 size_int (INTVAL (XEXP (slot, 1))));
4913 dest = build_va_arg_indirect_ref (dest_addr);
4915 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4916 gimplify_and_add (t, pre_p);
4920 if (needed_intregs)
4922 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4923 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4924 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4925 gimplify_and_add (t, pre_p);
4927 if (needed_sseregs)
4929 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4930 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4931 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4932 gimplify_and_add (t, pre_p);
4935 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4936 gimplify_and_add (t, pre_p);
4938 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4939 append_to_statement_list (t, pre_p);
4942 /* ... otherwise out of the overflow area. */
4944 /* Care for on-stack alignment if needed. */
4945 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4946 || integer_zerop (TYPE_SIZE (type)))
4947 t = ovf;
4948 else
4950 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4951 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4952 build_int_cst (TREE_TYPE (ovf), align - 1));
4953 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4954 build_int_cst (TREE_TYPE (t), -align));
4956 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4958 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4959 gimplify_and_add (t2, pre_p);
4961 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4962 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4963 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4964 gimplify_and_add (t, pre_p);
4966 if (container)
4968 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4969 append_to_statement_list (t, pre_p);
4972 ptrtype = build_pointer_type (type);
4973 addr = fold_convert (ptrtype, addr);
4975 if (indirect_p)
4976 addr = build_va_arg_indirect_ref (addr);
4977 return build_va_arg_indirect_ref (addr);
4980 /* Return nonzero if OPNUM's MEM should be matched
4981 in movabs* patterns. */
4984 ix86_check_movabs (rtx insn, int opnum)
4986 rtx set, mem;
4988 set = PATTERN (insn);
4989 if (GET_CODE (set) == PARALLEL)
4990 set = XVECEXP (set, 0, 0);
4991 gcc_assert (GET_CODE (set) == SET);
4992 mem = XEXP (set, opnum);
4993 while (GET_CODE (mem) == SUBREG)
4994 mem = SUBREG_REG (mem);
4995 gcc_assert (MEM_P (mem));
4996 return (volatile_ok || !MEM_VOLATILE_P (mem));
4999 /* Initialize the table of extra 80387 mathematical constants. */
5001 static void
5002 init_ext_80387_constants (void)
5004 static const char * cst[5] =
5006 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5007 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5008 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5009 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5010 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5012 int i;
5014 for (i = 0; i < 5; i++)
5016 real_from_string (&ext_80387_constants_table[i], cst[i]);
5017 /* Ensure each constant is rounded to XFmode precision. */
5018 real_convert (&ext_80387_constants_table[i],
5019 XFmode, &ext_80387_constants_table[i]);
5022 ext_80387_constants_init = 1;
5025 /* Return true if the constant is something that can be loaded with
5026 a special instruction. */
5029 standard_80387_constant_p (rtx x)
5031 REAL_VALUE_TYPE r;
5033 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
5034 return -1;
5036 if (x == CONST0_RTX (GET_MODE (x)))
5037 return 1;
5038 if (x == CONST1_RTX (GET_MODE (x)))
5039 return 2;
5041 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5043 /* For XFmode constants, try to find a special 80387 instruction when
5044 optimizing for size or on those CPUs that benefit from them. */
5045 if (GET_MODE (x) == XFmode
5046 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5048 int i;
5050 if (! ext_80387_constants_init)
5051 init_ext_80387_constants ();
5053 for (i = 0; i < 5; i++)
5054 if (real_identical (&r, &ext_80387_constants_table[i]))
5055 return i + 3;
5058 /* Load of the constant -0.0 or -1.0 will be split as
5059 fldz;fchs or fld1;fchs sequence. */
5060 if (real_isnegzero (&r))
5061 return 8;
5062 if (real_identical (&r, &dconstm1))
5063 return 9;
5065 return 0;
5068 /* Return the opcode of the special instruction to be used to load
5069 the constant X. */
5071 const char *
5072 standard_80387_constant_opcode (rtx x)
5074 switch (standard_80387_constant_p (x))
5076 case 1:
5077 return "fldz";
5078 case 2:
5079 return "fld1";
5080 case 3:
5081 return "fldlg2";
5082 case 4:
5083 return "fldln2";
5084 case 5:
5085 return "fldl2e";
5086 case 6:
5087 return "fldl2t";
5088 case 7:
5089 return "fldpi";
5090 case 8:
5091 case 9:
5092 return "#";
5093 default:
5094 gcc_unreachable ();
5098 /* Return the CONST_DOUBLE representing the 80387 constant that is
5099 loaded by the specified special instruction. The argument IDX
5100 matches the return value from standard_80387_constant_p. */
5103 standard_80387_constant_rtx (int idx)
5105 int i;
5107 if (! ext_80387_constants_init)
5108 init_ext_80387_constants ();
5110 switch (idx)
5112 case 3:
5113 case 4:
5114 case 5:
5115 case 6:
5116 case 7:
5117 i = idx - 3;
5118 break;
5120 default:
5121 gcc_unreachable ();
5124 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5125 XFmode);
5128 /* Return 1 if mode is a valid mode for sse. */
5129 static int
5130 standard_sse_mode_p (enum machine_mode mode)
5132 switch (mode)
5134 case V16QImode:
5135 case V8HImode:
5136 case V4SImode:
5137 case V2DImode:
5138 case V4SFmode:
5139 case V2DFmode:
5140 return 1;
5142 default:
5143 return 0;
5147 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5150 standard_sse_constant_p (rtx x)
5152 enum machine_mode mode = GET_MODE (x);
5154 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5155 return 1;
5156 if (vector_all_ones_operand (x, mode)
5157 && standard_sse_mode_p (mode))
5158 return TARGET_SSE2 ? 2 : -1;
5160 return 0;
5163 /* Return the opcode of the special instruction to be used to load
5164 the constant X. */
5166 const char *
5167 standard_sse_constant_opcode (rtx insn, rtx x)
5169 switch (standard_sse_constant_p (x))
5171 case 1:
5172 if (get_attr_mode (insn) == MODE_V4SF)
5173 return "xorps\t%0, %0";
5174 else if (get_attr_mode (insn) == MODE_V2DF)
5175 return "xorpd\t%0, %0";
5176 else
5177 return "pxor\t%0, %0";
5178 case 2:
5179 return "pcmpeqd\t%0, %0";
5181 gcc_unreachable ();
5184 /* Returns 1 if OP contains a symbol reference */
5187 symbolic_reference_mentioned_p (rtx op)
5189 const char *fmt;
5190 int i;
5192 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5193 return 1;
5195 fmt = GET_RTX_FORMAT (GET_CODE (op));
5196 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5198 if (fmt[i] == 'E')
5200 int j;
5202 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5203 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5204 return 1;
5207 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5208 return 1;
5211 return 0;
5214 /* Return 1 if it is appropriate to emit `ret' instructions in the
5215 body of a function. Do this only if the epilogue is simple, needing a
5216 couple of insns. Prior to reloading, we can't tell how many registers
5217 must be saved, so return 0 then. Return 0 if there is no frame
5218 marker to de-allocate. */
5221 ix86_can_use_return_insn_p (void)
5223 struct ix86_frame frame;
5225 if (! reload_completed || frame_pointer_needed)
5226 return 0;
5228 /* Don't allow more than 32 pop, since that's all we can do
5229 with one instruction. */
5230 if (current_function_pops_args
5231 && current_function_args_size >= 32768)
5232 return 0;
5234 ix86_compute_frame_layout (&frame);
5235 return frame.to_allocate == 0 && frame.nregs == 0;
5238 /* Value should be nonzero if functions must have frame pointers.
5239 Zero means the frame pointer need not be set up (and parms may
5240 be accessed via the stack pointer) in functions that seem suitable. */
5243 ix86_frame_pointer_required (void)
5245 /* If we accessed previous frames, then the generated code expects
5246 to be able to access the saved ebp value in our frame. */
5247 if (cfun->machine->accesses_prev_frame)
5248 return 1;
5250 /* Several x86 os'es need a frame pointer for other reasons,
5251 usually pertaining to setjmp. */
5252 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5253 return 1;
5255 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5256 the frame pointer by default. Turn it back on now if we've not
5257 got a leaf function. */
5258 if (TARGET_OMIT_LEAF_FRAME_POINTER
5259 && (!current_function_is_leaf
5260 || ix86_current_function_calls_tls_descriptor))
5261 return 1;
5263 if (current_function_profile)
5264 return 1;
5266 return 0;
5269 /* Record that the current function accesses previous call frames. */
5271 void
5272 ix86_setup_frame_addresses (void)
5274 cfun->machine->accesses_prev_frame = 1;
5277 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5278 # define USE_HIDDEN_LINKONCE 1
5279 #else
5280 # define USE_HIDDEN_LINKONCE 0
5281 #endif
5283 static int pic_labels_used;
5285 /* Fills in the label name that should be used for a pc thunk for
5286 the given register. */
5288 static void
5289 get_pc_thunk_name (char name[32], unsigned int regno)
5291 gcc_assert (!TARGET_64BIT);
5293 if (USE_HIDDEN_LINKONCE)
5294 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5295 else
5296 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5300 /* This function generates code for -fpic that loads %ebx with
5301 the return address of the caller and then returns. */
5303 void
5304 ix86_file_end (void)
5306 rtx xops[2];
5307 int regno;
5309 for (regno = 0; regno < 8; ++regno)
5311 char name[32];
5313 if (! ((pic_labels_used >> regno) & 1))
5314 continue;
5316 get_pc_thunk_name (name, regno);
5318 #if TARGET_MACHO
5319 if (TARGET_MACHO)
5321 switch_to_section (darwin_sections[text_coal_section]);
5322 fputs ("\t.weak_definition\t", asm_out_file);
5323 assemble_name (asm_out_file, name);
5324 fputs ("\n\t.private_extern\t", asm_out_file);
5325 assemble_name (asm_out_file, name);
5326 fputs ("\n", asm_out_file);
5327 ASM_OUTPUT_LABEL (asm_out_file, name);
5329 else
5330 #endif
5331 if (USE_HIDDEN_LINKONCE)
5333 tree decl;
5335 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5336 error_mark_node);
5337 TREE_PUBLIC (decl) = 1;
5338 TREE_STATIC (decl) = 1;
5339 DECL_ONE_ONLY (decl) = 1;
5341 (*targetm.asm_out.unique_section) (decl, 0);
5342 switch_to_section (get_named_section (decl, NULL, 0));
5344 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5345 fputs ("\t.hidden\t", asm_out_file);
5346 assemble_name (asm_out_file, name);
5347 fputc ('\n', asm_out_file);
5348 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5350 else
5352 switch_to_section (text_section);
5353 ASM_OUTPUT_LABEL (asm_out_file, name);
5356 xops[0] = gen_rtx_REG (SImode, regno);
5357 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5358 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5359 output_asm_insn ("ret", xops);
5362 if (NEED_INDICATE_EXEC_STACK)
5363 file_end_indicate_exec_stack ();
5366 /* Emit code for the SET_GOT patterns. */
5368 const char *
5369 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5371 rtx xops[3];
5373 xops[0] = dest;
5375 if (TARGET_VXWORKS_RTP && flag_pic)
5377 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5378 xops[2] = gen_rtx_MEM (Pmode,
5379 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5380 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5382 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5383 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5384 an unadorned address. */
5385 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5386 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5387 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5388 return "";
5391 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5393 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5395 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5397 if (!flag_pic)
5398 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5399 else
5400 output_asm_insn ("call\t%a2", xops);
5402 #if TARGET_MACHO
5403 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5404 is what will be referenced by the Mach-O PIC subsystem. */
5405 if (!label)
5406 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5407 #endif
5409 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5410 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5412 if (flag_pic)
5413 output_asm_insn ("pop{l}\t%0", xops);
5415 else
5417 char name[32];
5418 get_pc_thunk_name (name, REGNO (dest));
5419 pic_labels_used |= 1 << REGNO (dest);
5421 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5422 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5423 output_asm_insn ("call\t%X2", xops);
5424 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5425 is what will be referenced by the Mach-O PIC subsystem. */
5426 #if TARGET_MACHO
5427 if (!label)
5428 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5429 else
5430 targetm.asm_out.internal_label (asm_out_file, "L",
5431 CODE_LABEL_NUMBER (label));
5432 #endif
5435 if (TARGET_MACHO)
5436 return "";
5438 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5439 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5440 else
5441 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5443 return "";
5446 /* Generate an "push" pattern for input ARG. */
5448 static rtx
5449 gen_push (rtx arg)
5451 return gen_rtx_SET (VOIDmode,
5452 gen_rtx_MEM (Pmode,
5453 gen_rtx_PRE_DEC (Pmode,
5454 stack_pointer_rtx)),
5455 arg);
5458 /* Return >= 0 if there is an unused call-clobbered register available
5459 for the entire function. */
5461 static unsigned int
5462 ix86_select_alt_pic_regnum (void)
5464 if (current_function_is_leaf && !current_function_profile
5465 && !ix86_current_function_calls_tls_descriptor)
5467 int i;
5468 for (i = 2; i >= 0; --i)
5469 if (!regs_ever_live[i])
5470 return i;
5473 return INVALID_REGNUM;
5476 /* Return 1 if we need to save REGNO. */
5477 static int
5478 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5480 if (pic_offset_table_rtx
5481 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5482 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5483 || current_function_profile
5484 || current_function_calls_eh_return
5485 || current_function_uses_const_pool))
5487 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5488 return 0;
5489 return 1;
5492 if (current_function_calls_eh_return && maybe_eh_return)
5494 unsigned i;
5495 for (i = 0; ; i++)
5497 unsigned test = EH_RETURN_DATA_REGNO (i);
5498 if (test == INVALID_REGNUM)
5499 break;
5500 if (test == regno)
5501 return 1;
5505 if (cfun->machine->force_align_arg_pointer
5506 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5507 return 1;
5509 return (regs_ever_live[regno]
5510 && !call_used_regs[regno]
5511 && !fixed_regs[regno]
5512 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5515 /* Return number of registers to be saved on the stack. */
5517 static int
5518 ix86_nsaved_regs (void)
5520 int nregs = 0;
5521 int regno;
5523 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5524 if (ix86_save_reg (regno, true))
5525 nregs++;
5526 return nregs;
5529 /* Return the offset between two registers, one to be eliminated, and the other
5530 its replacement, at the start of a routine. */
5532 HOST_WIDE_INT
5533 ix86_initial_elimination_offset (int from, int to)
5535 struct ix86_frame frame;
5536 ix86_compute_frame_layout (&frame);
5538 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5539 return frame.hard_frame_pointer_offset;
5540 else if (from == FRAME_POINTER_REGNUM
5541 && to == HARD_FRAME_POINTER_REGNUM)
5542 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5543 else
5545 gcc_assert (to == STACK_POINTER_REGNUM);
5547 if (from == ARG_POINTER_REGNUM)
5548 return frame.stack_pointer_offset;
5550 gcc_assert (from == FRAME_POINTER_REGNUM);
5551 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5555 /* Fill structure ix86_frame about frame of currently computed function. */
5557 static void
5558 ix86_compute_frame_layout (struct ix86_frame *frame)
5560 HOST_WIDE_INT total_size;
5561 unsigned int stack_alignment_needed;
5562 HOST_WIDE_INT offset;
5563 unsigned int preferred_alignment;
5564 HOST_WIDE_INT size = get_frame_size ();
5566 frame->nregs = ix86_nsaved_regs ();
5567 total_size = size;
5569 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5570 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5572 /* During reload iteration the amount of registers saved can change.
5573 Recompute the value as needed. Do not recompute when amount of registers
5574 didn't change as reload does multiple calls to the function and does not
5575 expect the decision to change within single iteration. */
5576 if (!optimize_size
5577 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5579 int count = frame->nregs;
5581 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5582 /* The fast prologue uses move instead of push to save registers. This
5583 is significantly longer, but also executes faster as modern hardware
5584 can execute the moves in parallel, but can't do that for push/pop.
5586 Be careful about choosing what prologue to emit: When function takes
5587 many instructions to execute we may use slow version as well as in
5588 case function is known to be outside hot spot (this is known with
5589 feedback only). Weight the size of function by number of registers
5590 to save as it is cheap to use one or two push instructions but very
5591 slow to use many of them. */
5592 if (count)
5593 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5594 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5595 || (flag_branch_probabilities
5596 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5597 cfun->machine->use_fast_prologue_epilogue = false;
5598 else
5599 cfun->machine->use_fast_prologue_epilogue
5600 = !expensive_function_p (count);
5602 if (TARGET_PROLOGUE_USING_MOVE
5603 && cfun->machine->use_fast_prologue_epilogue)
5604 frame->save_regs_using_mov = true;
5605 else
5606 frame->save_regs_using_mov = false;
5609 /* Skip return address and saved base pointer. */
5610 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5612 frame->hard_frame_pointer_offset = offset;
5614 /* Do some sanity checking of stack_alignment_needed and
5615 preferred_alignment, since i386 port is the only using those features
5616 that may break easily. */
5618 gcc_assert (!size || stack_alignment_needed);
5619 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5620 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5621 gcc_assert (stack_alignment_needed
5622 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5624 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5625 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5627 /* Register save area */
5628 offset += frame->nregs * UNITS_PER_WORD;
5630 /* Va-arg area */
5631 if (ix86_save_varrargs_registers)
5633 offset += X86_64_VARARGS_SIZE;
5634 frame->va_arg_size = X86_64_VARARGS_SIZE;
5636 else
5637 frame->va_arg_size = 0;
5639 /* Align start of frame for local function. */
5640 frame->padding1 = ((offset + stack_alignment_needed - 1)
5641 & -stack_alignment_needed) - offset;
5643 offset += frame->padding1;
5645 /* Frame pointer points here. */
5646 frame->frame_pointer_offset = offset;
5648 offset += size;
5650 /* Add outgoing arguments area. Can be skipped if we eliminated
5651 all the function calls as dead code.
5652 Skipping is however impossible when function calls alloca. Alloca
5653 expander assumes that last current_function_outgoing_args_size
5654 of stack frame are unused. */
5655 if (ACCUMULATE_OUTGOING_ARGS
5656 && (!current_function_is_leaf || current_function_calls_alloca
5657 || ix86_current_function_calls_tls_descriptor))
5659 offset += current_function_outgoing_args_size;
5660 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5662 else
5663 frame->outgoing_arguments_size = 0;
5665 /* Align stack boundary. Only needed if we're calling another function
5666 or using alloca. */
5667 if (!current_function_is_leaf || current_function_calls_alloca
5668 || ix86_current_function_calls_tls_descriptor)
5669 frame->padding2 = ((offset + preferred_alignment - 1)
5670 & -preferred_alignment) - offset;
5671 else
5672 frame->padding2 = 0;
5674 offset += frame->padding2;
5676 /* We've reached end of stack frame. */
5677 frame->stack_pointer_offset = offset;
5679 /* Size prologue needs to allocate. */
5680 frame->to_allocate =
5681 (size + frame->padding1 + frame->padding2
5682 + frame->outgoing_arguments_size + frame->va_arg_size);
5684 if ((!frame->to_allocate && frame->nregs <= 1)
5685 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5686 frame->save_regs_using_mov = false;
5688 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5689 && current_function_is_leaf
5690 && !ix86_current_function_calls_tls_descriptor)
5692 frame->red_zone_size = frame->to_allocate;
5693 if (frame->save_regs_using_mov)
5694 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5695 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5696 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5698 else
5699 frame->red_zone_size = 0;
5700 frame->to_allocate -= frame->red_zone_size;
5701 frame->stack_pointer_offset -= frame->red_zone_size;
5702 #if 0
5703 fprintf (stderr, "\n");
5704 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5705 fprintf (stderr, "size: %ld\n", (long)size);
5706 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5707 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5708 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5709 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5710 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5711 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5712 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5713 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5714 (long)frame->hard_frame_pointer_offset);
5715 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5716 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5717 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5718 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5719 #endif
5722 /* Emit code to save registers in the prologue. */
5724 static void
5725 ix86_emit_save_regs (void)
5727 unsigned int regno;
5728 rtx insn;
5730 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5731 if (ix86_save_reg (regno, true))
5733 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5734 RTX_FRAME_RELATED_P (insn) = 1;
5738 /* Emit code to save registers using MOV insns. First register
5739 is restored from POINTER + OFFSET. */
5740 static void
5741 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5743 unsigned int regno;
5744 rtx insn;
5746 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5747 if (ix86_save_reg (regno, true))
5749 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5750 Pmode, offset),
5751 gen_rtx_REG (Pmode, regno));
5752 RTX_FRAME_RELATED_P (insn) = 1;
5753 offset += UNITS_PER_WORD;
5757 /* Expand prologue or epilogue stack adjustment.
5758 The pattern exist to put a dependency on all ebp-based memory accesses.
5759 STYLE should be negative if instructions should be marked as frame related,
5760 zero if %r11 register is live and cannot be freely used and positive
5761 otherwise. */
5763 static void
5764 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5766 rtx insn;
5768 if (! TARGET_64BIT)
5769 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5770 else if (x86_64_immediate_operand (offset, DImode))
5771 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5772 else
5774 rtx r11;
5775 /* r11 is used by indirect sibcall return as well, set before the
5776 epilogue and used after the epilogue. ATM indirect sibcall
5777 shouldn't be used together with huge frame sizes in one
5778 function because of the frame_size check in sibcall.c. */
5779 gcc_assert (style);
5780 r11 = gen_rtx_REG (DImode, R11_REG);
5781 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5782 if (style < 0)
5783 RTX_FRAME_RELATED_P (insn) = 1;
5784 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5785 offset));
5787 if (style < 0)
5788 RTX_FRAME_RELATED_P (insn) = 1;
5791 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5793 static rtx
5794 ix86_internal_arg_pointer (void)
5796 bool has_force_align_arg_pointer =
5797 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5798 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5799 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5800 && DECL_NAME (current_function_decl)
5801 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5802 && DECL_FILE_SCOPE_P (current_function_decl))
5803 || ix86_force_align_arg_pointer
5804 || has_force_align_arg_pointer)
5806 /* Nested functions can't realign the stack due to a register
5807 conflict. */
5808 if (DECL_CONTEXT (current_function_decl)
5809 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5811 if (ix86_force_align_arg_pointer)
5812 warning (0, "-mstackrealign ignored for nested functions");
5813 if (has_force_align_arg_pointer)
5814 error ("%s not supported for nested functions",
5815 ix86_force_align_arg_pointer_string);
5816 return virtual_incoming_args_rtx;
5818 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5819 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5821 else
5822 return virtual_incoming_args_rtx;
5825 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5826 This is called from dwarf2out.c to emit call frame instructions
5827 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5828 static void
5829 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5831 rtx unspec = SET_SRC (pattern);
5832 gcc_assert (GET_CODE (unspec) == UNSPEC);
5834 switch (index)
5836 case UNSPEC_REG_SAVE:
5837 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5838 SET_DEST (pattern));
5839 break;
5840 case UNSPEC_DEF_CFA:
5841 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5842 INTVAL (XVECEXP (unspec, 0, 0)));
5843 break;
5844 default:
5845 gcc_unreachable ();
5849 /* Expand the prologue into a bunch of separate insns. */
5851 void
5852 ix86_expand_prologue (void)
5854 rtx insn;
5855 bool pic_reg_used;
5856 struct ix86_frame frame;
5857 HOST_WIDE_INT allocate;
5859 ix86_compute_frame_layout (&frame);
5861 if (cfun->machine->force_align_arg_pointer)
5863 rtx x, y;
5865 /* Grab the argument pointer. */
5866 x = plus_constant (stack_pointer_rtx, 4);
5867 y = cfun->machine->force_align_arg_pointer;
5868 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5869 RTX_FRAME_RELATED_P (insn) = 1;
5871 /* The unwind info consists of two parts: install the fafp as the cfa,
5872 and record the fafp as the "save register" of the stack pointer.
5873 The later is there in order that the unwinder can see where it
5874 should restore the stack pointer across the and insn. */
5875 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5876 x = gen_rtx_SET (VOIDmode, y, x);
5877 RTX_FRAME_RELATED_P (x) = 1;
5878 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5879 UNSPEC_REG_SAVE);
5880 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5881 RTX_FRAME_RELATED_P (y) = 1;
5882 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5883 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5884 REG_NOTES (insn) = x;
5886 /* Align the stack. */
5887 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5888 GEN_INT (-16)));
5890 /* And here we cheat like madmen with the unwind info. We force the
5891 cfa register back to sp+4, which is exactly what it was at the
5892 start of the function. Re-pushing the return address results in
5893 the return at the same spot relative to the cfa, and thus is
5894 correct wrt the unwind info. */
5895 x = cfun->machine->force_align_arg_pointer;
5896 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5897 insn = emit_insn (gen_push (x));
5898 RTX_FRAME_RELATED_P (insn) = 1;
5900 x = GEN_INT (4);
5901 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5902 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5903 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5904 REG_NOTES (insn) = x;
5907 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5908 slower on all targets. Also sdb doesn't like it. */
5910 if (frame_pointer_needed)
5912 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5913 RTX_FRAME_RELATED_P (insn) = 1;
5915 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5916 RTX_FRAME_RELATED_P (insn) = 1;
5919 allocate = frame.to_allocate;
5921 if (!frame.save_regs_using_mov)
5922 ix86_emit_save_regs ();
5923 else
5924 allocate += frame.nregs * UNITS_PER_WORD;
5926 /* When using red zone we may start register saving before allocating
5927 the stack frame saving one cycle of the prologue. */
5928 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5929 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5930 : stack_pointer_rtx,
5931 -frame.nregs * UNITS_PER_WORD);
5933 if (allocate == 0)
5935 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5936 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5937 GEN_INT (-allocate), -1);
5938 else
5940 /* Only valid for Win32. */
5941 rtx eax = gen_rtx_REG (SImode, 0);
5942 bool eax_live = ix86_eax_live_at_start_p ();
5943 rtx t;
5945 gcc_assert (!TARGET_64BIT);
5947 if (eax_live)
5949 emit_insn (gen_push (eax));
5950 allocate -= 4;
5953 emit_move_insn (eax, GEN_INT (allocate));
5955 insn = emit_insn (gen_allocate_stack_worker (eax));
5956 RTX_FRAME_RELATED_P (insn) = 1;
5957 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5958 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5959 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5960 t, REG_NOTES (insn));
5962 if (eax_live)
5964 if (frame_pointer_needed)
5965 t = plus_constant (hard_frame_pointer_rtx,
5966 allocate
5967 - frame.to_allocate
5968 - frame.nregs * UNITS_PER_WORD);
5969 else
5970 t = plus_constant (stack_pointer_rtx, allocate);
5971 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5975 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5977 if (!frame_pointer_needed || !frame.to_allocate)
5978 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5979 else
5980 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5981 -frame.nregs * UNITS_PER_WORD);
5984 pic_reg_used = false;
5985 if (pic_offset_table_rtx
5986 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5987 || current_function_profile))
5989 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5991 if (alt_pic_reg_used != INVALID_REGNUM)
5992 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5994 pic_reg_used = true;
5997 if (pic_reg_used)
5999 if (TARGET_64BIT)
6001 if (ix86_cmodel == CM_LARGE_PIC)
6003 rtx tmp_reg = gen_rtx_REG (DImode,
6004 FIRST_REX_INT_REG + 3 /* R11 */);
6005 rtx label = gen_label_rtx ();
6006 emit_label (label);
6007 LABEL_PRESERVE_P (label) = 1;
6008 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6009 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6010 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6011 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6012 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6013 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6014 pic_offset_table_rtx, tmp_reg));
6016 else
6017 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6019 else
6020 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6022 /* Even with accurate pre-reload life analysis, we can wind up
6023 deleting all references to the pic register after reload.
6024 Consider if cross-jumping unifies two sides of a branch
6025 controlled by a comparison vs the only read from a global.
6026 In which case, allow the set_got to be deleted, though we're
6027 too late to do anything about the ebx save in the prologue. */
6028 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6031 /* Prevent function calls from be scheduled before the call to mcount.
6032 In the pic_reg_used case, make sure that the got load isn't deleted. */
6033 if (current_function_profile)
6034 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6037 /* Emit code to restore saved registers using MOV insns. First register
6038 is restored from POINTER + OFFSET. */
6039 static void
6040 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6041 int maybe_eh_return)
6043 int regno;
6044 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6046 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6047 if (ix86_save_reg (regno, maybe_eh_return))
6049 /* Ensure that adjust_address won't be forced to produce pointer
6050 out of range allowed by x86-64 instruction set. */
6051 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6053 rtx r11;
6055 r11 = gen_rtx_REG (DImode, R11_REG);
6056 emit_move_insn (r11, GEN_INT (offset));
6057 emit_insn (gen_adddi3 (r11, r11, pointer));
6058 base_address = gen_rtx_MEM (Pmode, r11);
6059 offset = 0;
6061 emit_move_insn (gen_rtx_REG (Pmode, regno),
6062 adjust_address (base_address, Pmode, offset));
6063 offset += UNITS_PER_WORD;
6067 /* Restore function stack, frame, and registers. */
6069 void
6070 ix86_expand_epilogue (int style)
6072 int regno;
6073 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6074 struct ix86_frame frame;
6075 HOST_WIDE_INT offset;
6077 ix86_compute_frame_layout (&frame);
6079 /* Calculate start of saved registers relative to ebp. Special care
6080 must be taken for the normal return case of a function using
6081 eh_return: the eax and edx registers are marked as saved, but not
6082 restored along this path. */
6083 offset = frame.nregs;
6084 if (current_function_calls_eh_return && style != 2)
6085 offset -= 2;
6086 offset *= -UNITS_PER_WORD;
6088 /* If we're only restoring one register and sp is not valid then
6089 using a move instruction to restore the register since it's
6090 less work than reloading sp and popping the register.
6092 The default code result in stack adjustment using add/lea instruction,
6093 while this code results in LEAVE instruction (or discrete equivalent),
6094 so it is profitable in some other cases as well. Especially when there
6095 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6096 and there is exactly one register to pop. This heuristic may need some
6097 tuning in future. */
6098 if ((!sp_valid && frame.nregs <= 1)
6099 || (TARGET_EPILOGUE_USING_MOVE
6100 && cfun->machine->use_fast_prologue_epilogue
6101 && (frame.nregs > 1 || frame.to_allocate))
6102 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6103 || (frame_pointer_needed && TARGET_USE_LEAVE
6104 && cfun->machine->use_fast_prologue_epilogue
6105 && frame.nregs == 1)
6106 || current_function_calls_eh_return)
6108 /* Restore registers. We can use ebp or esp to address the memory
6109 locations. If both are available, default to ebp, since offsets
6110 are known to be small. Only exception is esp pointing directly to the
6111 end of block of saved registers, where we may simplify addressing
6112 mode. */
6114 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6115 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6116 frame.to_allocate, style == 2);
6117 else
6118 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6119 offset, style == 2);
6121 /* eh_return epilogues need %ecx added to the stack pointer. */
6122 if (style == 2)
6124 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6126 if (frame_pointer_needed)
6128 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6129 tmp = plus_constant (tmp, UNITS_PER_WORD);
6130 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6132 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6133 emit_move_insn (hard_frame_pointer_rtx, tmp);
6135 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6136 const0_rtx, style);
6138 else
6140 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6141 tmp = plus_constant (tmp, (frame.to_allocate
6142 + frame.nregs * UNITS_PER_WORD));
6143 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6146 else if (!frame_pointer_needed)
6147 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6148 GEN_INT (frame.to_allocate
6149 + frame.nregs * UNITS_PER_WORD),
6150 style);
6151 /* If not an i386, mov & pop is faster than "leave". */
6152 else if (TARGET_USE_LEAVE || optimize_size
6153 || !cfun->machine->use_fast_prologue_epilogue)
6154 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6155 else
6157 pro_epilogue_adjust_stack (stack_pointer_rtx,
6158 hard_frame_pointer_rtx,
6159 const0_rtx, style);
6160 if (TARGET_64BIT)
6161 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6162 else
6163 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6166 else
6168 /* First step is to deallocate the stack frame so that we can
6169 pop the registers. */
6170 if (!sp_valid)
6172 gcc_assert (frame_pointer_needed);
6173 pro_epilogue_adjust_stack (stack_pointer_rtx,
6174 hard_frame_pointer_rtx,
6175 GEN_INT (offset), style);
6177 else if (frame.to_allocate)
6178 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6179 GEN_INT (frame.to_allocate), style);
6181 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6182 if (ix86_save_reg (regno, false))
6184 if (TARGET_64BIT)
6185 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6186 else
6187 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6189 if (frame_pointer_needed)
6191 /* Leave results in shorter dependency chains on CPUs that are
6192 able to grok it fast. */
6193 if (TARGET_USE_LEAVE)
6194 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6195 else if (TARGET_64BIT)
6196 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6197 else
6198 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6202 if (cfun->machine->force_align_arg_pointer)
6204 emit_insn (gen_addsi3 (stack_pointer_rtx,
6205 cfun->machine->force_align_arg_pointer,
6206 GEN_INT (-4)));
6209 /* Sibcall epilogues don't want a return instruction. */
6210 if (style == 0)
6211 return;
6213 if (current_function_pops_args && current_function_args_size)
6215 rtx popc = GEN_INT (current_function_pops_args);
6217 /* i386 can only pop 64K bytes. If asked to pop more, pop
6218 return address, do explicit add, and jump indirectly to the
6219 caller. */
6221 if (current_function_pops_args >= 65536)
6223 rtx ecx = gen_rtx_REG (SImode, 2);
6225 /* There is no "pascal" calling convention in 64bit ABI. */
6226 gcc_assert (!TARGET_64BIT);
6228 emit_insn (gen_popsi1 (ecx));
6229 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6230 emit_jump_insn (gen_return_indirect_internal (ecx));
6232 else
6233 emit_jump_insn (gen_return_pop_internal (popc));
6235 else
6236 emit_jump_insn (gen_return_internal ());
6239 /* Reset from the function's potential modifications. */
6241 static void
6242 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6243 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6245 if (pic_offset_table_rtx)
6246 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6247 #if TARGET_MACHO
6248 /* Mach-O doesn't support labels at the end of objects, so if
6249 it looks like we might want one, insert a NOP. */
6251 rtx insn = get_last_insn ();
6252 while (insn
6253 && NOTE_P (insn)
6254 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6255 insn = PREV_INSN (insn);
6256 if (insn
6257 && (LABEL_P (insn)
6258 || (NOTE_P (insn)
6259 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6260 fputs ("\tnop\n", file);
6262 #endif
6266 /* Extract the parts of an RTL expression that is a valid memory address
6267 for an instruction. Return 0 if the structure of the address is
6268 grossly off. Return -1 if the address contains ASHIFT, so it is not
6269 strictly valid, but still used for computing length of lea instruction. */
6272 ix86_decompose_address (rtx addr, struct ix86_address *out)
6274 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6275 rtx base_reg, index_reg;
6276 HOST_WIDE_INT scale = 1;
6277 rtx scale_rtx = NULL_RTX;
6278 int retval = 1;
6279 enum ix86_address_seg seg = SEG_DEFAULT;
6281 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6282 base = addr;
6283 else if (GET_CODE (addr) == PLUS)
6285 rtx addends[4], op;
6286 int n = 0, i;
6288 op = addr;
6291 if (n >= 4)
6292 return 0;
6293 addends[n++] = XEXP (op, 1);
6294 op = XEXP (op, 0);
6296 while (GET_CODE (op) == PLUS);
6297 if (n >= 4)
6298 return 0;
6299 addends[n] = op;
6301 for (i = n; i >= 0; --i)
6303 op = addends[i];
6304 switch (GET_CODE (op))
6306 case MULT:
6307 if (index)
6308 return 0;
6309 index = XEXP (op, 0);
6310 scale_rtx = XEXP (op, 1);
6311 break;
6313 case UNSPEC:
6314 if (XINT (op, 1) == UNSPEC_TP
6315 && TARGET_TLS_DIRECT_SEG_REFS
6316 && seg == SEG_DEFAULT)
6317 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6318 else
6319 return 0;
6320 break;
6322 case REG:
6323 case SUBREG:
6324 if (!base)
6325 base = op;
6326 else if (!index)
6327 index = op;
6328 else
6329 return 0;
6330 break;
6332 case CONST:
6333 case CONST_INT:
6334 case SYMBOL_REF:
6335 case LABEL_REF:
6336 if (disp)
6337 return 0;
6338 disp = op;
6339 break;
6341 default:
6342 return 0;
6346 else if (GET_CODE (addr) == MULT)
6348 index = XEXP (addr, 0); /* index*scale */
6349 scale_rtx = XEXP (addr, 1);
6351 else if (GET_CODE (addr) == ASHIFT)
6353 rtx tmp;
6355 /* We're called for lea too, which implements ashift on occasion. */
6356 index = XEXP (addr, 0);
6357 tmp = XEXP (addr, 1);
6358 if (!CONST_INT_P (tmp))
6359 return 0;
6360 scale = INTVAL (tmp);
6361 if ((unsigned HOST_WIDE_INT) scale > 3)
6362 return 0;
6363 scale = 1 << scale;
6364 retval = -1;
6366 else
6367 disp = addr; /* displacement */
6369 /* Extract the integral value of scale. */
6370 if (scale_rtx)
6372 if (!CONST_INT_P (scale_rtx))
6373 return 0;
6374 scale = INTVAL (scale_rtx);
6377 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6378 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6380 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6381 if (base_reg && index_reg && scale == 1
6382 && (index_reg == arg_pointer_rtx
6383 || index_reg == frame_pointer_rtx
6384 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6386 rtx tmp;
6387 tmp = base, base = index, index = tmp;
6388 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6391 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6392 if ((base_reg == hard_frame_pointer_rtx
6393 || base_reg == frame_pointer_rtx
6394 || base_reg == arg_pointer_rtx) && !disp)
6395 disp = const0_rtx;
6397 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6398 Avoid this by transforming to [%esi+0]. */
6399 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6400 && base_reg && !index_reg && !disp
6401 && REG_P (base_reg)
6402 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6403 disp = const0_rtx;
6405 /* Special case: encode reg+reg instead of reg*2. */
6406 if (!base && index && scale && scale == 2)
6407 base = index, base_reg = index_reg, scale = 1;
6409 /* Special case: scaling cannot be encoded without base or displacement. */
6410 if (!base && !disp && index && scale != 1)
6411 disp = const0_rtx;
6413 out->base = base;
6414 out->index = index;
6415 out->disp = disp;
6416 out->scale = scale;
6417 out->seg = seg;
6419 return retval;
6422 /* Return cost of the memory address x.
6423 For i386, it is better to use a complex address than let gcc copy
6424 the address into a reg and make a new pseudo. But not if the address
6425 requires to two regs - that would mean more pseudos with longer
6426 lifetimes. */
6427 static int
6428 ix86_address_cost (rtx x)
6430 struct ix86_address parts;
6431 int cost = 1;
6432 int ok = ix86_decompose_address (x, &parts);
6434 gcc_assert (ok);
6436 if (parts.base && GET_CODE (parts.base) == SUBREG)
6437 parts.base = SUBREG_REG (parts.base);
6438 if (parts.index && GET_CODE (parts.index) == SUBREG)
6439 parts.index = SUBREG_REG (parts.index);
6441 /* More complex memory references are better. */
6442 if (parts.disp && parts.disp != const0_rtx)
6443 cost--;
6444 if (parts.seg != SEG_DEFAULT)
6445 cost--;
6447 /* Attempt to minimize number of registers in the address. */
6448 if ((parts.base
6449 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6450 || (parts.index
6451 && (!REG_P (parts.index)
6452 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6453 cost++;
6455 if (parts.base
6456 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6457 && parts.index
6458 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6459 && parts.base != parts.index)
6460 cost++;
6462 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6463 since it's predecode logic can't detect the length of instructions
6464 and it degenerates to vector decoded. Increase cost of such
6465 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6466 to split such addresses or even refuse such addresses at all.
6468 Following addressing modes are affected:
6469 [base+scale*index]
6470 [scale*index+disp]
6471 [base+index]
6473 The first and last case may be avoidable by explicitly coding the zero in
6474 memory address, but I don't have AMD-K6 machine handy to check this
6475 theory. */
6477 if (TARGET_K6
6478 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6479 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6480 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6481 cost += 10;
6483 return cost;
6486 /* If X is a machine specific address (i.e. a symbol or label being
6487 referenced as a displacement from the GOT implemented using an
6488 UNSPEC), then return the base term. Otherwise return X. */
6491 ix86_find_base_term (rtx x)
6493 rtx term;
6495 if (TARGET_64BIT)
6497 if (GET_CODE (x) != CONST)
6498 return x;
6499 term = XEXP (x, 0);
6500 if (GET_CODE (term) == PLUS
6501 && (CONST_INT_P (XEXP (term, 1))
6502 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6503 term = XEXP (term, 0);
6504 if (GET_CODE (term) != UNSPEC
6505 || XINT (term, 1) != UNSPEC_GOTPCREL)
6506 return x;
6508 term = XVECEXP (term, 0, 0);
6510 if (GET_CODE (term) != SYMBOL_REF
6511 && GET_CODE (term) != LABEL_REF)
6512 return x;
6514 return term;
6517 term = ix86_delegitimize_address (x);
6519 if (GET_CODE (term) != SYMBOL_REF
6520 && GET_CODE (term) != LABEL_REF)
6521 return x;
6523 return term;
6526 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6527 this is used for to form addresses to local data when -fPIC is in
6528 use. */
6530 static bool
6531 darwin_local_data_pic (rtx disp)
6533 if (GET_CODE (disp) == MINUS)
6535 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6536 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6537 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6539 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6540 if (! strcmp (sym_name, "<pic base>"))
6541 return true;
6545 return false;
6548 /* Determine if a given RTX is a valid constant. We already know this
6549 satisfies CONSTANT_P. */
6551 bool
6552 legitimate_constant_p (rtx x)
6554 switch (GET_CODE (x))
6556 case CONST:
6557 x = XEXP (x, 0);
6559 if (GET_CODE (x) == PLUS)
6561 if (!CONST_INT_P (XEXP (x, 1)))
6562 return false;
6563 x = XEXP (x, 0);
6566 if (TARGET_MACHO && darwin_local_data_pic (x))
6567 return true;
6569 /* Only some unspecs are valid as "constants". */
6570 if (GET_CODE (x) == UNSPEC)
6571 switch (XINT (x, 1))
6573 case UNSPEC_GOT:
6574 case UNSPEC_GOTOFF:
6575 case UNSPEC_PLTOFF:
6576 return TARGET_64BIT;
6577 case UNSPEC_TPOFF:
6578 case UNSPEC_NTPOFF:
6579 x = XVECEXP (x, 0, 0);
6580 return (GET_CODE (x) == SYMBOL_REF
6581 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6582 case UNSPEC_DTPOFF:
6583 x = XVECEXP (x, 0, 0);
6584 return (GET_CODE (x) == SYMBOL_REF
6585 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6586 default:
6587 return false;
6590 /* We must have drilled down to a symbol. */
6591 if (GET_CODE (x) == LABEL_REF)
6592 return true;
6593 if (GET_CODE (x) != SYMBOL_REF)
6594 return false;
6595 /* FALLTHRU */
6597 case SYMBOL_REF:
6598 /* TLS symbols are never valid. */
6599 if (SYMBOL_REF_TLS_MODEL (x))
6600 return false;
6601 break;
6603 case CONST_DOUBLE:
6604 if (GET_MODE (x) == TImode
6605 && x != CONST0_RTX (TImode)
6606 && !TARGET_64BIT)
6607 return false;
6608 break;
6610 case CONST_VECTOR:
6611 if (x == CONST0_RTX (GET_MODE (x)))
6612 return true;
6613 return false;
6615 default:
6616 break;
6619 /* Otherwise we handle everything else in the move patterns. */
6620 return true;
6623 /* Determine if it's legal to put X into the constant pool. This
6624 is not possible for the address of thread-local symbols, which
6625 is checked above. */
6627 static bool
6628 ix86_cannot_force_const_mem (rtx x)
6630 /* We can always put integral constants and vectors in memory. */
6631 switch (GET_CODE (x))
6633 case CONST_INT:
6634 case CONST_DOUBLE:
6635 case CONST_VECTOR:
6636 return false;
6638 default:
6639 break;
6641 return !legitimate_constant_p (x);
6644 /* Determine if a given RTX is a valid constant address. */
6646 bool
6647 constant_address_p (rtx x)
6649 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6652 /* Nonzero if the constant value X is a legitimate general operand
6653 when generating PIC code. It is given that flag_pic is on and
6654 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6656 bool
6657 legitimate_pic_operand_p (rtx x)
6659 rtx inner;
6661 switch (GET_CODE (x))
6663 case CONST:
6664 inner = XEXP (x, 0);
6665 if (GET_CODE (inner) == PLUS
6666 && CONST_INT_P (XEXP (inner, 1)))
6667 inner = XEXP (inner, 0);
6669 /* Only some unspecs are valid as "constants". */
6670 if (GET_CODE (inner) == UNSPEC)
6671 switch (XINT (inner, 1))
6673 case UNSPEC_GOT:
6674 case UNSPEC_GOTOFF:
6675 case UNSPEC_PLTOFF:
6676 return TARGET_64BIT;
6677 case UNSPEC_TPOFF:
6678 x = XVECEXP (inner, 0, 0);
6679 return (GET_CODE (x) == SYMBOL_REF
6680 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6681 default:
6682 return false;
6684 /* FALLTHRU */
6686 case SYMBOL_REF:
6687 case LABEL_REF:
6688 return legitimate_pic_address_disp_p (x);
6690 default:
6691 return true;
6695 /* Determine if a given CONST RTX is a valid memory displacement
6696 in PIC mode. */
6699 legitimate_pic_address_disp_p (rtx disp)
6701 bool saw_plus;
6703 /* In 64bit mode we can allow direct addresses of symbols and labels
6704 when they are not dynamic symbols. */
6705 if (TARGET_64BIT)
6707 rtx op0 = disp, op1;
6709 switch (GET_CODE (disp))
6711 case LABEL_REF:
6712 return true;
6714 case CONST:
6715 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6716 break;
6717 op0 = XEXP (XEXP (disp, 0), 0);
6718 op1 = XEXP (XEXP (disp, 0), 1);
6719 if (!CONST_INT_P (op1)
6720 || INTVAL (op1) >= 16*1024*1024
6721 || INTVAL (op1) < -16*1024*1024)
6722 break;
6723 if (GET_CODE (op0) == LABEL_REF)
6724 return true;
6725 if (GET_CODE (op0) != SYMBOL_REF)
6726 break;
6727 /* FALLTHRU */
6729 case SYMBOL_REF:
6730 /* TLS references should always be enclosed in UNSPEC. */
6731 if (SYMBOL_REF_TLS_MODEL (op0))
6732 return false;
6733 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6734 && ix86_cmodel != CM_LARGE_PIC)
6735 return true;
6736 break;
6738 default:
6739 break;
6742 if (GET_CODE (disp) != CONST)
6743 return 0;
6744 disp = XEXP (disp, 0);
6746 if (TARGET_64BIT)
6748 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6749 of GOT tables. We should not need these anyway. */
6750 if (GET_CODE (disp) != UNSPEC
6751 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6752 && XINT (disp, 1) != UNSPEC_GOTOFF
6753 && XINT (disp, 1) != UNSPEC_PLTOFF))
6754 return 0;
6756 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6757 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6758 return 0;
6759 return 1;
6762 saw_plus = false;
6763 if (GET_CODE (disp) == PLUS)
6765 if (!CONST_INT_P (XEXP (disp, 1)))
6766 return 0;
6767 disp = XEXP (disp, 0);
6768 saw_plus = true;
6771 if (TARGET_MACHO && darwin_local_data_pic (disp))
6772 return 1;
6774 if (GET_CODE (disp) != UNSPEC)
6775 return 0;
6777 switch (XINT (disp, 1))
6779 case UNSPEC_GOT:
6780 if (saw_plus)
6781 return false;
6782 /* We need to check for both symbols and labels because VxWorks loads
6783 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6784 details. */
6785 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6786 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6787 case UNSPEC_GOTOFF:
6788 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6789 While ABI specify also 32bit relocation but we don't produce it in
6790 small PIC model at all. */
6791 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6792 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6793 && !TARGET_64BIT)
6794 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6795 return false;
6796 case UNSPEC_GOTTPOFF:
6797 case UNSPEC_GOTNTPOFF:
6798 case UNSPEC_INDNTPOFF:
6799 if (saw_plus)
6800 return false;
6801 disp = XVECEXP (disp, 0, 0);
6802 return (GET_CODE (disp) == SYMBOL_REF
6803 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6804 case UNSPEC_NTPOFF:
6805 disp = XVECEXP (disp, 0, 0);
6806 return (GET_CODE (disp) == SYMBOL_REF
6807 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6808 case UNSPEC_DTPOFF:
6809 disp = XVECEXP (disp, 0, 0);
6810 return (GET_CODE (disp) == SYMBOL_REF
6811 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6814 return 0;
6817 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6818 memory address for an instruction. The MODE argument is the machine mode
6819 for the MEM expression that wants to use this address.
6821 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6822 convert common non-canonical forms to canonical form so that they will
6823 be recognized. */
6826 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6828 struct ix86_address parts;
6829 rtx base, index, disp;
6830 HOST_WIDE_INT scale;
6831 const char *reason = NULL;
6832 rtx reason_rtx = NULL_RTX;
6834 if (TARGET_DEBUG_ADDR)
6836 fprintf (stderr,
6837 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6838 GET_MODE_NAME (mode), strict);
6839 debug_rtx (addr);
6842 if (ix86_decompose_address (addr, &parts) <= 0)
6844 reason = "decomposition failed";
6845 goto report_error;
6848 base = parts.base;
6849 index = parts.index;
6850 disp = parts.disp;
6851 scale = parts.scale;
6853 /* Validate base register.
6855 Don't allow SUBREG's that span more than a word here. It can lead to spill
6856 failures when the base is one word out of a two word structure, which is
6857 represented internally as a DImode int. */
6859 if (base)
6861 rtx reg;
6862 reason_rtx = base;
6864 if (REG_P (base))
6865 reg = base;
6866 else if (GET_CODE (base) == SUBREG
6867 && REG_P (SUBREG_REG (base))
6868 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6869 <= UNITS_PER_WORD)
6870 reg = SUBREG_REG (base);
6871 else
6873 reason = "base is not a register";
6874 goto report_error;
6877 if (GET_MODE (base) != Pmode)
6879 reason = "base is not in Pmode";
6880 goto report_error;
6883 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6884 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6886 reason = "base is not valid";
6887 goto report_error;
6891 /* Validate index register.
6893 Don't allow SUBREG's that span more than a word here -- same as above. */
6895 if (index)
6897 rtx reg;
6898 reason_rtx = index;
6900 if (REG_P (index))
6901 reg = index;
6902 else if (GET_CODE (index) == SUBREG
6903 && REG_P (SUBREG_REG (index))
6904 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6905 <= UNITS_PER_WORD)
6906 reg = SUBREG_REG (index);
6907 else
6909 reason = "index is not a register";
6910 goto report_error;
6913 if (GET_MODE (index) != Pmode)
6915 reason = "index is not in Pmode";
6916 goto report_error;
6919 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6920 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6922 reason = "index is not valid";
6923 goto report_error;
6927 /* Validate scale factor. */
6928 if (scale != 1)
6930 reason_rtx = GEN_INT (scale);
6931 if (!index)
6933 reason = "scale without index";
6934 goto report_error;
6937 if (scale != 2 && scale != 4 && scale != 8)
6939 reason = "scale is not a valid multiplier";
6940 goto report_error;
6944 /* Validate displacement. */
6945 if (disp)
6947 reason_rtx = disp;
6949 if (GET_CODE (disp) == CONST
6950 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6951 switch (XINT (XEXP (disp, 0), 1))
6953 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6954 used. While ABI specify also 32bit relocations, we don't produce
6955 them at all and use IP relative instead. */
6956 case UNSPEC_GOT:
6957 case UNSPEC_GOTOFF:
6958 gcc_assert (flag_pic);
6959 if (!TARGET_64BIT)
6960 goto is_legitimate_pic;
6961 reason = "64bit address unspec";
6962 goto report_error;
6964 case UNSPEC_GOTPCREL:
6965 gcc_assert (flag_pic);
6966 goto is_legitimate_pic;
6968 case UNSPEC_GOTTPOFF:
6969 case UNSPEC_GOTNTPOFF:
6970 case UNSPEC_INDNTPOFF:
6971 case UNSPEC_NTPOFF:
6972 case UNSPEC_DTPOFF:
6973 break;
6975 default:
6976 reason = "invalid address unspec";
6977 goto report_error;
6980 else if (SYMBOLIC_CONST (disp)
6981 && (flag_pic
6982 || (TARGET_MACHO
6983 #if TARGET_MACHO
6984 && MACHOPIC_INDIRECT
6985 && !machopic_operand_p (disp)
6986 #endif
6990 is_legitimate_pic:
6991 if (TARGET_64BIT && (index || base))
6993 /* foo@dtpoff(%rX) is ok. */
6994 if (GET_CODE (disp) != CONST
6995 || GET_CODE (XEXP (disp, 0)) != PLUS
6996 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6997 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6998 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6999 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7001 reason = "non-constant pic memory reference";
7002 goto report_error;
7005 else if (! legitimate_pic_address_disp_p (disp))
7007 reason = "displacement is an invalid pic construct";
7008 goto report_error;
7011 /* This code used to verify that a symbolic pic displacement
7012 includes the pic_offset_table_rtx register.
7014 While this is good idea, unfortunately these constructs may
7015 be created by "adds using lea" optimization for incorrect
7016 code like:
7018 int a;
7019 int foo(int i)
7021 return *(&a+i);
7024 This code is nonsensical, but results in addressing
7025 GOT table with pic_offset_table_rtx base. We can't
7026 just refuse it easily, since it gets matched by
7027 "addsi3" pattern, that later gets split to lea in the
7028 case output register differs from input. While this
7029 can be handled by separate addsi pattern for this case
7030 that never results in lea, this seems to be easier and
7031 correct fix for crash to disable this test. */
7033 else if (GET_CODE (disp) != LABEL_REF
7034 && !CONST_INT_P (disp)
7035 && (GET_CODE (disp) != CONST
7036 || !legitimate_constant_p (disp))
7037 && (GET_CODE (disp) != SYMBOL_REF
7038 || !legitimate_constant_p (disp)))
7040 reason = "displacement is not constant";
7041 goto report_error;
7043 else if (TARGET_64BIT
7044 && !x86_64_immediate_operand (disp, VOIDmode))
7046 reason = "displacement is out of range";
7047 goto report_error;
7051 /* Everything looks valid. */
7052 if (TARGET_DEBUG_ADDR)
7053 fprintf (stderr, "Success.\n");
7054 return TRUE;
7056 report_error:
7057 if (TARGET_DEBUG_ADDR)
7059 fprintf (stderr, "Error: %s\n", reason);
7060 debug_rtx (reason_rtx);
7062 return FALSE;
7065 /* Return a unique alias set for the GOT. */
7067 static HOST_WIDE_INT
7068 ix86_GOT_alias_set (void)
7070 static HOST_WIDE_INT set = -1;
7071 if (set == -1)
7072 set = new_alias_set ();
7073 return set;
7076 /* Return a legitimate reference for ORIG (an address) using the
7077 register REG. If REG is 0, a new pseudo is generated.
7079 There are two types of references that must be handled:
7081 1. Global data references must load the address from the GOT, via
7082 the PIC reg. An insn is emitted to do this load, and the reg is
7083 returned.
7085 2. Static data references, constant pool addresses, and code labels
7086 compute the address as an offset from the GOT, whose base is in
7087 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7088 differentiate them from global data objects. The returned
7089 address is the PIC reg + an unspec constant.
7091 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7092 reg also appears in the address. */
7094 static rtx
7095 legitimize_pic_address (rtx orig, rtx reg)
7097 rtx addr = orig;
7098 rtx new = orig;
7099 rtx base;
7101 #if TARGET_MACHO
7102 if (TARGET_MACHO && !TARGET_64BIT)
7104 if (reg == 0)
7105 reg = gen_reg_rtx (Pmode);
7106 /* Use the generic Mach-O PIC machinery. */
7107 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7109 #endif
7111 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7112 new = addr;
7113 else if (TARGET_64BIT
7114 && ix86_cmodel != CM_SMALL_PIC
7115 && gotoff_operand (addr, Pmode))
7117 rtx tmpreg;
7118 /* This symbol may be referenced via a displacement from the PIC
7119 base address (@GOTOFF). */
7121 if (reload_in_progress)
7122 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7123 if (GET_CODE (addr) == CONST)
7124 addr = XEXP (addr, 0);
7125 if (GET_CODE (addr) == PLUS)
7127 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7128 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7130 else
7131 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7132 new = gen_rtx_CONST (Pmode, new);
7133 if (!reg)
7134 tmpreg = gen_reg_rtx (Pmode);
7135 else
7136 tmpreg = reg;
7137 emit_move_insn (tmpreg, new);
7139 if (reg != 0)
7141 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7142 tmpreg, 1, OPTAB_DIRECT);
7143 new = reg;
7145 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7147 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7149 /* This symbol may be referenced via a displacement from the PIC
7150 base address (@GOTOFF). */
7152 if (reload_in_progress)
7153 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7154 if (GET_CODE (addr) == CONST)
7155 addr = XEXP (addr, 0);
7156 if (GET_CODE (addr) == PLUS)
7158 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7159 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7161 else
7162 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7163 new = gen_rtx_CONST (Pmode, new);
7164 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7166 if (reg != 0)
7168 emit_move_insn (reg, new);
7169 new = reg;
7172 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7173 /* We can't use @GOTOFF for text labels on VxWorks;
7174 see gotoff_operand. */
7175 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7177 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7179 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7180 new = gen_rtx_CONST (Pmode, new);
7181 new = gen_const_mem (Pmode, new);
7182 set_mem_alias_set (new, ix86_GOT_alias_set ());
7184 if (reg == 0)
7185 reg = gen_reg_rtx (Pmode);
7186 /* Use directly gen_movsi, otherwise the address is loaded
7187 into register for CSE. We don't want to CSE this addresses,
7188 instead we CSE addresses from the GOT table, so skip this. */
7189 emit_insn (gen_movsi (reg, new));
7190 new = reg;
7192 else
7194 /* This symbol must be referenced via a load from the
7195 Global Offset Table (@GOT). */
7197 if (reload_in_progress)
7198 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7199 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7200 new = gen_rtx_CONST (Pmode, new);
7201 if (TARGET_64BIT)
7202 new = force_reg (Pmode, new);
7203 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7204 new = gen_const_mem (Pmode, new);
7205 set_mem_alias_set (new, ix86_GOT_alias_set ());
7207 if (reg == 0)
7208 reg = gen_reg_rtx (Pmode);
7209 emit_move_insn (reg, new);
7210 new = reg;
7213 else
7215 if (CONST_INT_P (addr)
7216 && !x86_64_immediate_operand (addr, VOIDmode))
7218 if (reg)
7220 emit_move_insn (reg, addr);
7221 new = reg;
7223 else
7224 new = force_reg (Pmode, addr);
7226 else if (GET_CODE (addr) == CONST)
7228 addr = XEXP (addr, 0);
7230 /* We must match stuff we generate before. Assume the only
7231 unspecs that can get here are ours. Not that we could do
7232 anything with them anyway.... */
7233 if (GET_CODE (addr) == UNSPEC
7234 || (GET_CODE (addr) == PLUS
7235 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7236 return orig;
7237 gcc_assert (GET_CODE (addr) == PLUS);
7239 if (GET_CODE (addr) == PLUS)
7241 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7243 /* Check first to see if this is a constant offset from a @GOTOFF
7244 symbol reference. */
7245 if (gotoff_operand (op0, Pmode)
7246 && CONST_INT_P (op1))
7248 if (!TARGET_64BIT)
7250 if (reload_in_progress)
7251 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7252 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7253 UNSPEC_GOTOFF);
7254 new = gen_rtx_PLUS (Pmode, new, op1);
7255 new = gen_rtx_CONST (Pmode, new);
7256 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7258 if (reg != 0)
7260 emit_move_insn (reg, new);
7261 new = reg;
7264 else
7266 if (INTVAL (op1) < -16*1024*1024
7267 || INTVAL (op1) >= 16*1024*1024)
7269 if (!x86_64_immediate_operand (op1, Pmode))
7270 op1 = force_reg (Pmode, op1);
7271 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7275 else
7277 base = legitimize_pic_address (XEXP (addr, 0), reg);
7278 new = legitimize_pic_address (XEXP (addr, 1),
7279 base == reg ? NULL_RTX : reg);
7281 if (CONST_INT_P (new))
7282 new = plus_constant (base, INTVAL (new));
7283 else
7285 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7287 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7288 new = XEXP (new, 1);
7290 new = gen_rtx_PLUS (Pmode, base, new);
7295 return new;
7298 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7300 static rtx
7301 get_thread_pointer (int to_reg)
7303 rtx tp, reg, insn;
7305 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7306 if (!to_reg)
7307 return tp;
7309 reg = gen_reg_rtx (Pmode);
7310 insn = gen_rtx_SET (VOIDmode, reg, tp);
7311 insn = emit_insn (insn);
7313 return reg;
7316 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7317 false if we expect this to be used for a memory address and true if
7318 we expect to load the address into a register. */
7320 static rtx
7321 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7323 rtx dest, base, off, pic, tp;
7324 int type;
7326 switch (model)
7328 case TLS_MODEL_GLOBAL_DYNAMIC:
7329 dest = gen_reg_rtx (Pmode);
7330 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7332 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7334 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7336 start_sequence ();
7337 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7338 insns = get_insns ();
7339 end_sequence ();
7341 emit_libcall_block (insns, dest, rax, x);
7343 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7344 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7345 else
7346 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7348 if (TARGET_GNU2_TLS)
7350 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7352 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7354 break;
7356 case TLS_MODEL_LOCAL_DYNAMIC:
7357 base = gen_reg_rtx (Pmode);
7358 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7360 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7362 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7364 start_sequence ();
7365 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7366 insns = get_insns ();
7367 end_sequence ();
7369 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7370 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7371 emit_libcall_block (insns, base, rax, note);
7373 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7374 emit_insn (gen_tls_local_dynamic_base_64 (base));
7375 else
7376 emit_insn (gen_tls_local_dynamic_base_32 (base));
7378 if (TARGET_GNU2_TLS)
7380 rtx x = ix86_tls_module_base ();
7382 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7383 gen_rtx_MINUS (Pmode, x, tp));
7386 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7387 off = gen_rtx_CONST (Pmode, off);
7389 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7391 if (TARGET_GNU2_TLS)
7393 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7395 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7398 break;
7400 case TLS_MODEL_INITIAL_EXEC:
7401 if (TARGET_64BIT)
7403 pic = NULL;
7404 type = UNSPEC_GOTNTPOFF;
7406 else if (flag_pic)
7408 if (reload_in_progress)
7409 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7410 pic = pic_offset_table_rtx;
7411 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7413 else if (!TARGET_ANY_GNU_TLS)
7415 pic = gen_reg_rtx (Pmode);
7416 emit_insn (gen_set_got (pic));
7417 type = UNSPEC_GOTTPOFF;
7419 else
7421 pic = NULL;
7422 type = UNSPEC_INDNTPOFF;
7425 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7426 off = gen_rtx_CONST (Pmode, off);
7427 if (pic)
7428 off = gen_rtx_PLUS (Pmode, pic, off);
7429 off = gen_const_mem (Pmode, off);
7430 set_mem_alias_set (off, ix86_GOT_alias_set ());
7432 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7434 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7435 off = force_reg (Pmode, off);
7436 return gen_rtx_PLUS (Pmode, base, off);
7438 else
7440 base = get_thread_pointer (true);
7441 dest = gen_reg_rtx (Pmode);
7442 emit_insn (gen_subsi3 (dest, base, off));
7444 break;
7446 case TLS_MODEL_LOCAL_EXEC:
7447 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7448 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7449 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7450 off = gen_rtx_CONST (Pmode, off);
7452 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7454 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7455 return gen_rtx_PLUS (Pmode, base, off);
7457 else
7459 base = get_thread_pointer (true);
7460 dest = gen_reg_rtx (Pmode);
7461 emit_insn (gen_subsi3 (dest, base, off));
7463 break;
7465 default:
7466 gcc_unreachable ();
7469 return dest;
7472 /* Try machine-dependent ways of modifying an illegitimate address
7473 to be legitimate. If we find one, return the new, valid address.
7474 This macro is used in only one place: `memory_address' in explow.c.
7476 OLDX is the address as it was before break_out_memory_refs was called.
7477 In some cases it is useful to look at this to decide what needs to be done.
7479 MODE and WIN are passed so that this macro can use
7480 GO_IF_LEGITIMATE_ADDRESS.
7482 It is always safe for this macro to do nothing. It exists to recognize
7483 opportunities to optimize the output.
7485 For the 80386, we handle X+REG by loading X into a register R and
7486 using R+REG. R will go in a general reg and indexing will be used.
7487 However, if REG is a broken-out memory address or multiplication,
7488 nothing needs to be done because REG can certainly go in a general reg.
7490 When -fpic is used, special handling is needed for symbolic references.
7491 See comments by legitimize_pic_address in i386.c for details. */
7494 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7496 int changed = 0;
7497 unsigned log;
7499 if (TARGET_DEBUG_ADDR)
7501 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7502 GET_MODE_NAME (mode));
7503 debug_rtx (x);
7506 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7507 if (log)
7508 return legitimize_tls_address (x, log, false);
7509 if (GET_CODE (x) == CONST
7510 && GET_CODE (XEXP (x, 0)) == PLUS
7511 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7512 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7514 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7515 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7518 if (flag_pic && SYMBOLIC_CONST (x))
7519 return legitimize_pic_address (x, 0);
7521 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7522 if (GET_CODE (x) == ASHIFT
7523 && CONST_INT_P (XEXP (x, 1))
7524 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7526 changed = 1;
7527 log = INTVAL (XEXP (x, 1));
7528 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7529 GEN_INT (1 << log));
7532 if (GET_CODE (x) == PLUS)
7534 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7536 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7537 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7538 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7540 changed = 1;
7541 log = INTVAL (XEXP (XEXP (x, 0), 1));
7542 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7543 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7544 GEN_INT (1 << log));
7547 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7548 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7549 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7551 changed = 1;
7552 log = INTVAL (XEXP (XEXP (x, 1), 1));
7553 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7554 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7555 GEN_INT (1 << log));
7558 /* Put multiply first if it isn't already. */
7559 if (GET_CODE (XEXP (x, 1)) == MULT)
7561 rtx tmp = XEXP (x, 0);
7562 XEXP (x, 0) = XEXP (x, 1);
7563 XEXP (x, 1) = tmp;
7564 changed = 1;
7567 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7568 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7569 created by virtual register instantiation, register elimination, and
7570 similar optimizations. */
7571 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7573 changed = 1;
7574 x = gen_rtx_PLUS (Pmode,
7575 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7576 XEXP (XEXP (x, 1), 0)),
7577 XEXP (XEXP (x, 1), 1));
7580 /* Canonicalize
7581 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7582 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7583 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7584 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7585 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7586 && CONSTANT_P (XEXP (x, 1)))
7588 rtx constant;
7589 rtx other = NULL_RTX;
7591 if (CONST_INT_P (XEXP (x, 1)))
7593 constant = XEXP (x, 1);
7594 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7596 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7598 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7599 other = XEXP (x, 1);
7601 else
7602 constant = 0;
7604 if (constant)
7606 changed = 1;
7607 x = gen_rtx_PLUS (Pmode,
7608 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7609 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7610 plus_constant (other, INTVAL (constant)));
7614 if (changed && legitimate_address_p (mode, x, FALSE))
7615 return x;
7617 if (GET_CODE (XEXP (x, 0)) == MULT)
7619 changed = 1;
7620 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7623 if (GET_CODE (XEXP (x, 1)) == MULT)
7625 changed = 1;
7626 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7629 if (changed
7630 && REG_P (XEXP (x, 1))
7631 && REG_P (XEXP (x, 0)))
7632 return x;
7634 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7636 changed = 1;
7637 x = legitimize_pic_address (x, 0);
7640 if (changed && legitimate_address_p (mode, x, FALSE))
7641 return x;
7643 if (REG_P (XEXP (x, 0)))
7645 rtx temp = gen_reg_rtx (Pmode);
7646 rtx val = force_operand (XEXP (x, 1), temp);
7647 if (val != temp)
7648 emit_move_insn (temp, val);
7650 XEXP (x, 1) = temp;
7651 return x;
7654 else if (REG_P (XEXP (x, 1)))
7656 rtx temp = gen_reg_rtx (Pmode);
7657 rtx val = force_operand (XEXP (x, 0), temp);
7658 if (val != temp)
7659 emit_move_insn (temp, val);
7661 XEXP (x, 0) = temp;
7662 return x;
7666 return x;
7669 /* Print an integer constant expression in assembler syntax. Addition
7670 and subtraction are the only arithmetic that may appear in these
7671 expressions. FILE is the stdio stream to write to, X is the rtx, and
7672 CODE is the operand print code from the output string. */
7674 static void
7675 output_pic_addr_const (FILE *file, rtx x, int code)
7677 char buf[256];
7679 switch (GET_CODE (x))
7681 case PC:
7682 gcc_assert (flag_pic);
7683 putc ('.', file);
7684 break;
7686 case SYMBOL_REF:
7687 if (! TARGET_MACHO || TARGET_64BIT)
7688 output_addr_const (file, x);
7689 else
7691 const char *name = XSTR (x, 0);
7693 /* Mark the decl as referenced so that cgraph will output the function. */
7694 if (SYMBOL_REF_DECL (x))
7695 mark_decl_referenced (SYMBOL_REF_DECL (x));
7697 #if TARGET_MACHO
7698 if (MACHOPIC_INDIRECT
7699 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7700 name = machopic_indirection_name (x, /*stub_p=*/true);
7701 #endif
7702 assemble_name (file, name);
7704 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7705 fputs ("@PLT", file);
7706 break;
7708 case LABEL_REF:
7709 x = XEXP (x, 0);
7710 /* FALLTHRU */
7711 case CODE_LABEL:
7712 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7713 assemble_name (asm_out_file, buf);
7714 break;
7716 case CONST_INT:
7717 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7718 break;
7720 case CONST:
7721 /* This used to output parentheses around the expression,
7722 but that does not work on the 386 (either ATT or BSD assembler). */
7723 output_pic_addr_const (file, XEXP (x, 0), code);
7724 break;
7726 case CONST_DOUBLE:
7727 if (GET_MODE (x) == VOIDmode)
7729 /* We can use %d if the number is <32 bits and positive. */
7730 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7731 fprintf (file, "0x%lx%08lx",
7732 (unsigned long) CONST_DOUBLE_HIGH (x),
7733 (unsigned long) CONST_DOUBLE_LOW (x));
7734 else
7735 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7737 else
7738 /* We can't handle floating point constants;
7739 PRINT_OPERAND must handle them. */
7740 output_operand_lossage ("floating constant misused");
7741 break;
7743 case PLUS:
7744 /* Some assemblers need integer constants to appear first. */
7745 if (CONST_INT_P (XEXP (x, 0)))
7747 output_pic_addr_const (file, XEXP (x, 0), code);
7748 putc ('+', file);
7749 output_pic_addr_const (file, XEXP (x, 1), code);
7751 else
7753 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7754 output_pic_addr_const (file, XEXP (x, 1), code);
7755 putc ('+', file);
7756 output_pic_addr_const (file, XEXP (x, 0), code);
7758 break;
7760 case MINUS:
7761 if (!TARGET_MACHO)
7762 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7763 output_pic_addr_const (file, XEXP (x, 0), code);
7764 putc ('-', file);
7765 output_pic_addr_const (file, XEXP (x, 1), code);
7766 if (!TARGET_MACHO)
7767 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7768 break;
7770 case UNSPEC:
7771 gcc_assert (XVECLEN (x, 0) == 1);
7772 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7773 switch (XINT (x, 1))
7775 case UNSPEC_GOT:
7776 fputs ("@GOT", file);
7777 break;
7778 case UNSPEC_GOTOFF:
7779 fputs ("@GOTOFF", file);
7780 break;
7781 case UNSPEC_PLTOFF:
7782 fputs ("@PLTOFF", file);
7783 break;
7784 case UNSPEC_GOTPCREL:
7785 fputs ("@GOTPCREL(%rip)", file);
7786 break;
7787 case UNSPEC_GOTTPOFF:
7788 /* FIXME: This might be @TPOFF in Sun ld too. */
7789 fputs ("@GOTTPOFF", file);
7790 break;
7791 case UNSPEC_TPOFF:
7792 fputs ("@TPOFF", file);
7793 break;
7794 case UNSPEC_NTPOFF:
7795 if (TARGET_64BIT)
7796 fputs ("@TPOFF", file);
7797 else
7798 fputs ("@NTPOFF", file);
7799 break;
7800 case UNSPEC_DTPOFF:
7801 fputs ("@DTPOFF", file);
7802 break;
7803 case UNSPEC_GOTNTPOFF:
7804 if (TARGET_64BIT)
7805 fputs ("@GOTTPOFF(%rip)", file);
7806 else
7807 fputs ("@GOTNTPOFF", file);
7808 break;
7809 case UNSPEC_INDNTPOFF:
7810 fputs ("@INDNTPOFF", file);
7811 break;
7812 default:
7813 output_operand_lossage ("invalid UNSPEC as operand");
7814 break;
7816 break;
7818 default:
7819 output_operand_lossage ("invalid expression as operand");
7823 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7824 We need to emit DTP-relative relocations. */
7826 static void
7827 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7829 fputs (ASM_LONG, file);
7830 output_addr_const (file, x);
7831 fputs ("@DTPOFF", file);
7832 switch (size)
7834 case 4:
7835 break;
7836 case 8:
7837 fputs (", 0", file);
7838 break;
7839 default:
7840 gcc_unreachable ();
7844 /* In the name of slightly smaller debug output, and to cater to
7845 general assembler lossage, recognize PIC+GOTOFF and turn it back
7846 into a direct symbol reference.
7848 On Darwin, this is necessary to avoid a crash, because Darwin
7849 has a different PIC label for each routine but the DWARF debugging
7850 information is not associated with any particular routine, so it's
7851 necessary to remove references to the PIC label from RTL stored by
7852 the DWARF output code. */
7854 static rtx
7855 ix86_delegitimize_address (rtx orig_x)
7857 rtx x = orig_x;
7858 /* reg_addend is NULL or a multiple of some register. */
7859 rtx reg_addend = NULL_RTX;
7860 /* const_addend is NULL or a const_int. */
7861 rtx const_addend = NULL_RTX;
7862 /* This is the result, or NULL. */
7863 rtx result = NULL_RTX;
7865 if (MEM_P (x))
7866 x = XEXP (x, 0);
7868 if (TARGET_64BIT)
7870 if (GET_CODE (x) != CONST
7871 || GET_CODE (XEXP (x, 0)) != UNSPEC
7872 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7873 || !MEM_P (orig_x))
7874 return orig_x;
7875 return XVECEXP (XEXP (x, 0), 0, 0);
7878 if (GET_CODE (x) != PLUS
7879 || GET_CODE (XEXP (x, 1)) != CONST)
7880 return orig_x;
7882 if (REG_P (XEXP (x, 0))
7883 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7884 /* %ebx + GOT/GOTOFF */
7886 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7888 /* %ebx + %reg * scale + GOT/GOTOFF */
7889 reg_addend = XEXP (x, 0);
7890 if (REG_P (XEXP (reg_addend, 0))
7891 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7892 reg_addend = XEXP (reg_addend, 1);
7893 else if (REG_P (XEXP (reg_addend, 1))
7894 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7895 reg_addend = XEXP (reg_addend, 0);
7896 else
7897 return orig_x;
7898 if (!REG_P (reg_addend)
7899 && GET_CODE (reg_addend) != MULT
7900 && GET_CODE (reg_addend) != ASHIFT)
7901 return orig_x;
7903 else
7904 return orig_x;
7906 x = XEXP (XEXP (x, 1), 0);
7907 if (GET_CODE (x) == PLUS
7908 && CONST_INT_P (XEXP (x, 1)))
7910 const_addend = XEXP (x, 1);
7911 x = XEXP (x, 0);
7914 if (GET_CODE (x) == UNSPEC
7915 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7916 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7917 result = XVECEXP (x, 0, 0);
7919 if (TARGET_MACHO && darwin_local_data_pic (x)
7920 && !MEM_P (orig_x))
7921 result = XEXP (x, 0);
7923 if (! result)
7924 return orig_x;
7926 if (const_addend)
7927 result = gen_rtx_PLUS (Pmode, result, const_addend);
7928 if (reg_addend)
7929 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7930 return result;
7933 static void
7934 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7935 int fp, FILE *file)
7937 const char *suffix;
7939 if (mode == CCFPmode || mode == CCFPUmode)
7941 enum rtx_code second_code, bypass_code;
7942 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7943 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7944 code = ix86_fp_compare_code_to_integer (code);
7945 mode = CCmode;
7947 if (reverse)
7948 code = reverse_condition (code);
7950 switch (code)
7952 case EQ:
7953 suffix = "e";
7954 break;
7955 case NE:
7956 suffix = "ne";
7957 break;
7958 case GT:
7959 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7960 suffix = "g";
7961 break;
7962 case GTU:
7963 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7964 Those same assemblers have the same but opposite lossage on cmov. */
7965 gcc_assert (mode == CCmode);
7966 suffix = fp ? "nbe" : "a";
7967 break;
7968 case LT:
7969 switch (mode)
7971 case CCNOmode:
7972 case CCGOCmode:
7973 suffix = "s";
7974 break;
7976 case CCmode:
7977 case CCGCmode:
7978 suffix = "l";
7979 break;
7981 default:
7982 gcc_unreachable ();
7984 break;
7985 case LTU:
7986 gcc_assert (mode == CCmode);
7987 suffix = "b";
7988 break;
7989 case GE:
7990 switch (mode)
7992 case CCNOmode:
7993 case CCGOCmode:
7994 suffix = "ns";
7995 break;
7997 case CCmode:
7998 case CCGCmode:
7999 suffix = "ge";
8000 break;
8002 default:
8003 gcc_unreachable ();
8005 break;
8006 case GEU:
8007 /* ??? As above. */
8008 gcc_assert (mode == CCmode);
8009 suffix = fp ? "nb" : "ae";
8010 break;
8011 case LE:
8012 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8013 suffix = "le";
8014 break;
8015 case LEU:
8016 gcc_assert (mode == CCmode);
8017 suffix = "be";
8018 break;
8019 case UNORDERED:
8020 suffix = fp ? "u" : "p";
8021 break;
8022 case ORDERED:
8023 suffix = fp ? "nu" : "np";
8024 break;
8025 default:
8026 gcc_unreachable ();
8028 fputs (suffix, file);
8031 /* Print the name of register X to FILE based on its machine mode and number.
8032 If CODE is 'w', pretend the mode is HImode.
8033 If CODE is 'b', pretend the mode is QImode.
8034 If CODE is 'k', pretend the mode is SImode.
8035 If CODE is 'q', pretend the mode is DImode.
8036 If CODE is 'h', pretend the reg is the 'high' byte register.
8037 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8039 void
8040 print_reg (rtx x, int code, FILE *file)
8042 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8043 && REGNO (x) != FRAME_POINTER_REGNUM
8044 && REGNO (x) != FLAGS_REG
8045 && REGNO (x) != FPSR_REG
8046 && REGNO (x) != FPCR_REG);
8048 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8049 putc ('%', file);
8051 if (code == 'w' || MMX_REG_P (x))
8052 code = 2;
8053 else if (code == 'b')
8054 code = 1;
8055 else if (code == 'k')
8056 code = 4;
8057 else if (code == 'q')
8058 code = 8;
8059 else if (code == 'y')
8060 code = 3;
8061 else if (code == 'h')
8062 code = 0;
8063 else
8064 code = GET_MODE_SIZE (GET_MODE (x));
8066 /* Irritatingly, AMD extended registers use different naming convention
8067 from the normal registers. */
8068 if (REX_INT_REG_P (x))
8070 gcc_assert (TARGET_64BIT);
8071 switch (code)
8073 case 0:
8074 error ("extended registers have no high halves");
8075 break;
8076 case 1:
8077 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8078 break;
8079 case 2:
8080 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8081 break;
8082 case 4:
8083 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8084 break;
8085 case 8:
8086 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8087 break;
8088 default:
8089 error ("unsupported operand size for extended register");
8090 break;
8092 return;
8094 switch (code)
8096 case 3:
8097 if (STACK_TOP_P (x))
8099 fputs ("st(0)", file);
8100 break;
8102 /* FALLTHRU */
8103 case 8:
8104 case 4:
8105 case 12:
8106 if (! ANY_FP_REG_P (x))
8107 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8108 /* FALLTHRU */
8109 case 16:
8110 case 2:
8111 normal:
8112 fputs (hi_reg_name[REGNO (x)], file);
8113 break;
8114 case 1:
8115 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8116 goto normal;
8117 fputs (qi_reg_name[REGNO (x)], file);
8118 break;
8119 case 0:
8120 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8121 goto normal;
8122 fputs (qi_high_reg_name[REGNO (x)], file);
8123 break;
8124 default:
8125 gcc_unreachable ();
8129 /* Locate some local-dynamic symbol still in use by this function
8130 so that we can print its name in some tls_local_dynamic_base
8131 pattern. */
8133 static const char *
8134 get_some_local_dynamic_name (void)
8136 rtx insn;
8138 if (cfun->machine->some_ld_name)
8139 return cfun->machine->some_ld_name;
8141 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8142 if (INSN_P (insn)
8143 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8144 return cfun->machine->some_ld_name;
8146 gcc_unreachable ();
8149 static int
8150 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8152 rtx x = *px;
8154 if (GET_CODE (x) == SYMBOL_REF
8155 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8157 cfun->machine->some_ld_name = XSTR (x, 0);
8158 return 1;
8161 return 0;
8164 /* Meaning of CODE:
8165 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8166 C -- print opcode suffix for set/cmov insn.
8167 c -- like C, but print reversed condition
8168 F,f -- likewise, but for floating-point.
8169 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8170 otherwise nothing
8171 R -- print the prefix for register names.
8172 z -- print the opcode suffix for the size of the current operand.
8173 * -- print a star (in certain assembler syntax)
8174 A -- print an absolute memory reference.
8175 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8176 s -- print a shift double count, followed by the assemblers argument
8177 delimiter.
8178 b -- print the QImode name of the register for the indicated operand.
8179 %b0 would print %al if operands[0] is reg 0.
8180 w -- likewise, print the HImode name of the register.
8181 k -- likewise, print the SImode name of the register.
8182 q -- likewise, print the DImode name of the register.
8183 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8184 y -- print "st(0)" instead of "st" as a register.
8185 D -- print condition for SSE cmp instruction.
8186 P -- if PIC, print an @PLT suffix.
8187 X -- don't print any sort of PIC '@' suffix for a symbol.
8188 & -- print some in-use local-dynamic symbol name.
8189 H -- print a memory address offset by 8; used for sse high-parts
8192 void
8193 print_operand (FILE *file, rtx x, int code)
8195 if (code)
8197 switch (code)
8199 case '*':
8200 if (ASSEMBLER_DIALECT == ASM_ATT)
8201 putc ('*', file);
8202 return;
8204 case '&':
8205 assemble_name (file, get_some_local_dynamic_name ());
8206 return;
8208 case 'A':
8209 switch (ASSEMBLER_DIALECT)
8211 case ASM_ATT:
8212 putc ('*', file);
8213 break;
8215 case ASM_INTEL:
8216 /* Intel syntax. For absolute addresses, registers should not
8217 be surrounded by braces. */
8218 if (!REG_P (x))
8220 putc ('[', file);
8221 PRINT_OPERAND (file, x, 0);
8222 putc (']', file);
8223 return;
8225 break;
8227 default:
8228 gcc_unreachable ();
8231 PRINT_OPERAND (file, x, 0);
8232 return;
8235 case 'L':
8236 if (ASSEMBLER_DIALECT == ASM_ATT)
8237 putc ('l', file);
8238 return;
8240 case 'W':
8241 if (ASSEMBLER_DIALECT == ASM_ATT)
8242 putc ('w', file);
8243 return;
8245 case 'B':
8246 if (ASSEMBLER_DIALECT == ASM_ATT)
8247 putc ('b', file);
8248 return;
8250 case 'Q':
8251 if (ASSEMBLER_DIALECT == ASM_ATT)
8252 putc ('l', file);
8253 return;
8255 case 'S':
8256 if (ASSEMBLER_DIALECT == ASM_ATT)
8257 putc ('s', file);
8258 return;
8260 case 'T':
8261 if (ASSEMBLER_DIALECT == ASM_ATT)
8262 putc ('t', file);
8263 return;
8265 case 'z':
8266 /* 387 opcodes don't get size suffixes if the operands are
8267 registers. */
8268 if (STACK_REG_P (x))
8269 return;
8271 /* Likewise if using Intel opcodes. */
8272 if (ASSEMBLER_DIALECT == ASM_INTEL)
8273 return;
8275 /* This is the size of op from size of operand. */
8276 switch (GET_MODE_SIZE (GET_MODE (x)))
8278 case 1:
8279 putc ('b', file);
8280 return;
8282 case 2:
8283 #ifdef HAVE_GAS_FILDS_FISTS
8284 putc ('s', file);
8285 #endif
8286 return;
8288 case 4:
8289 if (GET_MODE (x) == SFmode)
8291 putc ('s', file);
8292 return;
8294 else
8295 putc ('l', file);
8296 return;
8298 case 12:
8299 case 16:
8300 putc ('t', file);
8301 return;
8303 case 8:
8304 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8306 #ifdef GAS_MNEMONICS
8307 putc ('q', file);
8308 #else
8309 putc ('l', file);
8310 putc ('l', file);
8311 #endif
8313 else
8314 putc ('l', file);
8315 return;
8317 default:
8318 gcc_unreachable ();
8321 case 'b':
8322 case 'w':
8323 case 'k':
8324 case 'q':
8325 case 'h':
8326 case 'y':
8327 case 'X':
8328 case 'P':
8329 break;
8331 case 's':
8332 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8334 PRINT_OPERAND (file, x, 0);
8335 putc (',', file);
8337 return;
8339 case 'D':
8340 /* Little bit of braindamage here. The SSE compare instructions
8341 does use completely different names for the comparisons that the
8342 fp conditional moves. */
8343 switch (GET_CODE (x))
8345 case EQ:
8346 case UNEQ:
8347 fputs ("eq", file);
8348 break;
8349 case LT:
8350 case UNLT:
8351 fputs ("lt", file);
8352 break;
8353 case LE:
8354 case UNLE:
8355 fputs ("le", file);
8356 break;
8357 case UNORDERED:
8358 fputs ("unord", file);
8359 break;
8360 case NE:
8361 case LTGT:
8362 fputs ("neq", file);
8363 break;
8364 case UNGE:
8365 case GE:
8366 fputs ("nlt", file);
8367 break;
8368 case UNGT:
8369 case GT:
8370 fputs ("nle", file);
8371 break;
8372 case ORDERED:
8373 fputs ("ord", file);
8374 break;
8375 default:
8376 gcc_unreachable ();
8378 return;
8379 case 'O':
8380 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8381 if (ASSEMBLER_DIALECT == ASM_ATT)
8383 switch (GET_MODE (x))
8385 case HImode: putc ('w', file); break;
8386 case SImode:
8387 case SFmode: putc ('l', file); break;
8388 case DImode:
8389 case DFmode: putc ('q', file); break;
8390 default: gcc_unreachable ();
8392 putc ('.', file);
8394 #endif
8395 return;
8396 case 'C':
8397 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8398 return;
8399 case 'F':
8400 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8401 if (ASSEMBLER_DIALECT == ASM_ATT)
8402 putc ('.', file);
8403 #endif
8404 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8405 return;
8407 /* Like above, but reverse condition */
8408 case 'c':
8409 /* Check to see if argument to %c is really a constant
8410 and not a condition code which needs to be reversed. */
8411 if (!COMPARISON_P (x))
8413 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8414 return;
8416 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8417 return;
8418 case 'f':
8419 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8420 if (ASSEMBLER_DIALECT == ASM_ATT)
8421 putc ('.', file);
8422 #endif
8423 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8424 return;
8426 case 'H':
8427 /* It doesn't actually matter what mode we use here, as we're
8428 only going to use this for printing. */
8429 x = adjust_address_nv (x, DImode, 8);
8430 break;
8432 case '+':
8434 rtx x;
8436 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8437 return;
8439 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8440 if (x)
8442 int pred_val = INTVAL (XEXP (x, 0));
8444 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8445 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8447 int taken = pred_val > REG_BR_PROB_BASE / 2;
8448 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8450 /* Emit hints only in the case default branch prediction
8451 heuristics would fail. */
8452 if (taken != cputaken)
8454 /* We use 3e (DS) prefix for taken branches and
8455 2e (CS) prefix for not taken branches. */
8456 if (taken)
8457 fputs ("ds ; ", file);
8458 else
8459 fputs ("cs ; ", file);
8463 return;
8465 default:
8466 output_operand_lossage ("invalid operand code '%c'", code);
8470 if (REG_P (x))
8471 print_reg (x, code, file);
8473 else if (MEM_P (x))
8475 /* No `byte ptr' prefix for call instructions. */
8476 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8478 const char * size;
8479 switch (GET_MODE_SIZE (GET_MODE (x)))
8481 case 1: size = "BYTE"; break;
8482 case 2: size = "WORD"; break;
8483 case 4: size = "DWORD"; break;
8484 case 8: size = "QWORD"; break;
8485 case 12: size = "XWORD"; break;
8486 case 16: size = "XMMWORD"; break;
8487 default:
8488 gcc_unreachable ();
8491 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8492 if (code == 'b')
8493 size = "BYTE";
8494 else if (code == 'w')
8495 size = "WORD";
8496 else if (code == 'k')
8497 size = "DWORD";
8499 fputs (size, file);
8500 fputs (" PTR ", file);
8503 x = XEXP (x, 0);
8504 /* Avoid (%rip) for call operands. */
8505 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8506 && !CONST_INT_P (x))
8507 output_addr_const (file, x);
8508 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8509 output_operand_lossage ("invalid constraints for operand");
8510 else
8511 output_address (x);
8514 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8516 REAL_VALUE_TYPE r;
8517 long l;
8519 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8520 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8522 if (ASSEMBLER_DIALECT == ASM_ATT)
8523 putc ('$', file);
8524 fprintf (file, "0x%08lx", l);
8527 /* These float cases don't actually occur as immediate operands. */
8528 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8530 char dstr[30];
8532 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8533 fprintf (file, "%s", dstr);
8536 else if (GET_CODE (x) == CONST_DOUBLE
8537 && GET_MODE (x) == XFmode)
8539 char dstr[30];
8541 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8542 fprintf (file, "%s", dstr);
8545 else
8547 /* We have patterns that allow zero sets of memory, for instance.
8548 In 64-bit mode, we should probably support all 8-byte vectors,
8549 since we can in fact encode that into an immediate. */
8550 if (GET_CODE (x) == CONST_VECTOR)
8552 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8553 x = const0_rtx;
8556 if (code != 'P')
8558 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8560 if (ASSEMBLER_DIALECT == ASM_ATT)
8561 putc ('$', file);
8563 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8564 || GET_CODE (x) == LABEL_REF)
8566 if (ASSEMBLER_DIALECT == ASM_ATT)
8567 putc ('$', file);
8568 else
8569 fputs ("OFFSET FLAT:", file);
8572 if (CONST_INT_P (x))
8573 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8574 else if (flag_pic)
8575 output_pic_addr_const (file, x, code);
8576 else
8577 output_addr_const (file, x);
8581 /* Print a memory operand whose address is ADDR. */
8583 void
8584 print_operand_address (FILE *file, rtx addr)
8586 struct ix86_address parts;
8587 rtx base, index, disp;
8588 int scale;
8589 int ok = ix86_decompose_address (addr, &parts);
8591 gcc_assert (ok);
8593 base = parts.base;
8594 index = parts.index;
8595 disp = parts.disp;
8596 scale = parts.scale;
8598 switch (parts.seg)
8600 case SEG_DEFAULT:
8601 break;
8602 case SEG_FS:
8603 case SEG_GS:
8604 if (USER_LABEL_PREFIX[0] == 0)
8605 putc ('%', file);
8606 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8607 break;
8608 default:
8609 gcc_unreachable ();
8612 if (!base && !index)
8614 /* Displacement only requires special attention. */
8616 if (CONST_INT_P (disp))
8618 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8620 if (USER_LABEL_PREFIX[0] == 0)
8621 putc ('%', file);
8622 fputs ("ds:", file);
8624 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8626 else if (flag_pic)
8627 output_pic_addr_const (file, disp, 0);
8628 else
8629 output_addr_const (file, disp);
8631 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8632 if (TARGET_64BIT)
8634 if (GET_CODE (disp) == CONST
8635 && GET_CODE (XEXP (disp, 0)) == PLUS
8636 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8637 disp = XEXP (XEXP (disp, 0), 0);
8638 if (GET_CODE (disp) == LABEL_REF
8639 || (GET_CODE (disp) == SYMBOL_REF
8640 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8641 fputs ("(%rip)", file);
8644 else
8646 if (ASSEMBLER_DIALECT == ASM_ATT)
8648 if (disp)
8650 if (flag_pic)
8651 output_pic_addr_const (file, disp, 0);
8652 else if (GET_CODE (disp) == LABEL_REF)
8653 output_asm_label (disp);
8654 else
8655 output_addr_const (file, disp);
8658 putc ('(', file);
8659 if (base)
8660 print_reg (base, 0, file);
8661 if (index)
8663 putc (',', file);
8664 print_reg (index, 0, file);
8665 if (scale != 1)
8666 fprintf (file, ",%d", scale);
8668 putc (')', file);
8670 else
8672 rtx offset = NULL_RTX;
8674 if (disp)
8676 /* Pull out the offset of a symbol; print any symbol itself. */
8677 if (GET_CODE (disp) == CONST
8678 && GET_CODE (XEXP (disp, 0)) == PLUS
8679 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8681 offset = XEXP (XEXP (disp, 0), 1);
8682 disp = gen_rtx_CONST (VOIDmode,
8683 XEXP (XEXP (disp, 0), 0));
8686 if (flag_pic)
8687 output_pic_addr_const (file, disp, 0);
8688 else if (GET_CODE (disp) == LABEL_REF)
8689 output_asm_label (disp);
8690 else if (CONST_INT_P (disp))
8691 offset = disp;
8692 else
8693 output_addr_const (file, disp);
8696 putc ('[', file);
8697 if (base)
8699 print_reg (base, 0, file);
8700 if (offset)
8702 if (INTVAL (offset) >= 0)
8703 putc ('+', file);
8704 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8707 else if (offset)
8708 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8709 else
8710 putc ('0', file);
8712 if (index)
8714 putc ('+', file);
8715 print_reg (index, 0, file);
8716 if (scale != 1)
8717 fprintf (file, "*%d", scale);
8719 putc (']', file);
8724 bool
8725 output_addr_const_extra (FILE *file, rtx x)
8727 rtx op;
8729 if (GET_CODE (x) != UNSPEC)
8730 return false;
8732 op = XVECEXP (x, 0, 0);
8733 switch (XINT (x, 1))
8735 case UNSPEC_GOTTPOFF:
8736 output_addr_const (file, op);
8737 /* FIXME: This might be @TPOFF in Sun ld. */
8738 fputs ("@GOTTPOFF", file);
8739 break;
8740 case UNSPEC_TPOFF:
8741 output_addr_const (file, op);
8742 fputs ("@TPOFF", file);
8743 break;
8744 case UNSPEC_NTPOFF:
8745 output_addr_const (file, op);
8746 if (TARGET_64BIT)
8747 fputs ("@TPOFF", file);
8748 else
8749 fputs ("@NTPOFF", file);
8750 break;
8751 case UNSPEC_DTPOFF:
8752 output_addr_const (file, op);
8753 fputs ("@DTPOFF", file);
8754 break;
8755 case UNSPEC_GOTNTPOFF:
8756 output_addr_const (file, op);
8757 if (TARGET_64BIT)
8758 fputs ("@GOTTPOFF(%rip)", file);
8759 else
8760 fputs ("@GOTNTPOFF", file);
8761 break;
8762 case UNSPEC_INDNTPOFF:
8763 output_addr_const (file, op);
8764 fputs ("@INDNTPOFF", file);
8765 break;
8767 default:
8768 return false;
8771 return true;
8774 /* Split one or more DImode RTL references into pairs of SImode
8775 references. The RTL can be REG, offsettable MEM, integer constant, or
8776 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8777 split and "num" is its length. lo_half and hi_half are output arrays
8778 that parallel "operands". */
8780 void
8781 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8783 while (num--)
8785 rtx op = operands[num];
8787 /* simplify_subreg refuse to split volatile memory addresses,
8788 but we still have to handle it. */
8789 if (MEM_P (op))
8791 lo_half[num] = adjust_address (op, SImode, 0);
8792 hi_half[num] = adjust_address (op, SImode, 4);
8794 else
8796 lo_half[num] = simplify_gen_subreg (SImode, op,
8797 GET_MODE (op) == VOIDmode
8798 ? DImode : GET_MODE (op), 0);
8799 hi_half[num] = simplify_gen_subreg (SImode, op,
8800 GET_MODE (op) == VOIDmode
8801 ? DImode : GET_MODE (op), 4);
8805 /* Split one or more TImode RTL references into pairs of DImode
8806 references. The RTL can be REG, offsettable MEM, integer constant, or
8807 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8808 split and "num" is its length. lo_half and hi_half are output arrays
8809 that parallel "operands". */
8811 void
8812 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8814 while (num--)
8816 rtx op = operands[num];
8818 /* simplify_subreg refuse to split volatile memory addresses, but we
8819 still have to handle it. */
8820 if (MEM_P (op))
8822 lo_half[num] = adjust_address (op, DImode, 0);
8823 hi_half[num] = adjust_address (op, DImode, 8);
8825 else
8827 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8828 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8833 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8834 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8835 is the expression of the binary operation. The output may either be
8836 emitted here, or returned to the caller, like all output_* functions.
8838 There is no guarantee that the operands are the same mode, as they
8839 might be within FLOAT or FLOAT_EXTEND expressions. */
8841 #ifndef SYSV386_COMPAT
8842 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8843 wants to fix the assemblers because that causes incompatibility
8844 with gcc. No-one wants to fix gcc because that causes
8845 incompatibility with assemblers... You can use the option of
8846 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8847 #define SYSV386_COMPAT 1
8848 #endif
8850 const char *
8851 output_387_binary_op (rtx insn, rtx *operands)
8853 static char buf[30];
8854 const char *p;
8855 const char *ssep;
8856 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8858 #ifdef ENABLE_CHECKING
8859 /* Even if we do not want to check the inputs, this documents input
8860 constraints. Which helps in understanding the following code. */
8861 if (STACK_REG_P (operands[0])
8862 && ((REG_P (operands[1])
8863 && REGNO (operands[0]) == REGNO (operands[1])
8864 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8865 || (REG_P (operands[2])
8866 && REGNO (operands[0]) == REGNO (operands[2])
8867 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8868 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8869 ; /* ok */
8870 else
8871 gcc_assert (is_sse);
8872 #endif
8874 switch (GET_CODE (operands[3]))
8876 case PLUS:
8877 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8878 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8879 p = "fiadd";
8880 else
8881 p = "fadd";
8882 ssep = "add";
8883 break;
8885 case MINUS:
8886 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8887 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8888 p = "fisub";
8889 else
8890 p = "fsub";
8891 ssep = "sub";
8892 break;
8894 case MULT:
8895 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8896 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8897 p = "fimul";
8898 else
8899 p = "fmul";
8900 ssep = "mul";
8901 break;
8903 case DIV:
8904 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8905 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8906 p = "fidiv";
8907 else
8908 p = "fdiv";
8909 ssep = "div";
8910 break;
8912 default:
8913 gcc_unreachable ();
8916 if (is_sse)
8918 strcpy (buf, ssep);
8919 if (GET_MODE (operands[0]) == SFmode)
8920 strcat (buf, "ss\t{%2, %0|%0, %2}");
8921 else
8922 strcat (buf, "sd\t{%2, %0|%0, %2}");
8923 return buf;
8925 strcpy (buf, p);
8927 switch (GET_CODE (operands[3]))
8929 case MULT:
8930 case PLUS:
8931 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8933 rtx temp = operands[2];
8934 operands[2] = operands[1];
8935 operands[1] = temp;
8938 /* know operands[0] == operands[1]. */
8940 if (MEM_P (operands[2]))
8942 p = "%z2\t%2";
8943 break;
8946 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8948 if (STACK_TOP_P (operands[0]))
8949 /* How is it that we are storing to a dead operand[2]?
8950 Well, presumably operands[1] is dead too. We can't
8951 store the result to st(0) as st(0) gets popped on this
8952 instruction. Instead store to operands[2] (which I
8953 think has to be st(1)). st(1) will be popped later.
8954 gcc <= 2.8.1 didn't have this check and generated
8955 assembly code that the Unixware assembler rejected. */
8956 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8957 else
8958 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8959 break;
8962 if (STACK_TOP_P (operands[0]))
8963 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8964 else
8965 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8966 break;
8968 case MINUS:
8969 case DIV:
8970 if (MEM_P (operands[1]))
8972 p = "r%z1\t%1";
8973 break;
8976 if (MEM_P (operands[2]))
8978 p = "%z2\t%2";
8979 break;
8982 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8984 #if SYSV386_COMPAT
8985 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8986 derived assemblers, confusingly reverse the direction of
8987 the operation for fsub{r} and fdiv{r} when the
8988 destination register is not st(0). The Intel assembler
8989 doesn't have this brain damage. Read !SYSV386_COMPAT to
8990 figure out what the hardware really does. */
8991 if (STACK_TOP_P (operands[0]))
8992 p = "{p\t%0, %2|rp\t%2, %0}";
8993 else
8994 p = "{rp\t%2, %0|p\t%0, %2}";
8995 #else
8996 if (STACK_TOP_P (operands[0]))
8997 /* As above for fmul/fadd, we can't store to st(0). */
8998 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8999 else
9000 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9001 #endif
9002 break;
9005 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9007 #if SYSV386_COMPAT
9008 if (STACK_TOP_P (operands[0]))
9009 p = "{rp\t%0, %1|p\t%1, %0}";
9010 else
9011 p = "{p\t%1, %0|rp\t%0, %1}";
9012 #else
9013 if (STACK_TOP_P (operands[0]))
9014 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9015 else
9016 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9017 #endif
9018 break;
9021 if (STACK_TOP_P (operands[0]))
9023 if (STACK_TOP_P (operands[1]))
9024 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9025 else
9026 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9027 break;
9029 else if (STACK_TOP_P (operands[1]))
9031 #if SYSV386_COMPAT
9032 p = "{\t%1, %0|r\t%0, %1}";
9033 #else
9034 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9035 #endif
9037 else
9039 #if SYSV386_COMPAT
9040 p = "{r\t%2, %0|\t%0, %2}";
9041 #else
9042 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9043 #endif
9045 break;
9047 default:
9048 gcc_unreachable ();
9051 strcat (buf, p);
9052 return buf;
9055 /* Return needed mode for entity in optimize_mode_switching pass. */
9058 ix86_mode_needed (int entity, rtx insn)
9060 enum attr_i387_cw mode;
9062 /* The mode UNINITIALIZED is used to store control word after a
9063 function call or ASM pattern. The mode ANY specify that function
9064 has no requirements on the control word and make no changes in the
9065 bits we are interested in. */
9067 if (CALL_P (insn)
9068 || (NONJUMP_INSN_P (insn)
9069 && (asm_noperands (PATTERN (insn)) >= 0
9070 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9071 return I387_CW_UNINITIALIZED;
9073 if (recog_memoized (insn) < 0)
9074 return I387_CW_ANY;
9076 mode = get_attr_i387_cw (insn);
9078 switch (entity)
9080 case I387_TRUNC:
9081 if (mode == I387_CW_TRUNC)
9082 return mode;
9083 break;
9085 case I387_FLOOR:
9086 if (mode == I387_CW_FLOOR)
9087 return mode;
9088 break;
9090 case I387_CEIL:
9091 if (mode == I387_CW_CEIL)
9092 return mode;
9093 break;
9095 case I387_MASK_PM:
9096 if (mode == I387_CW_MASK_PM)
9097 return mode;
9098 break;
9100 default:
9101 gcc_unreachable ();
9104 return I387_CW_ANY;
9107 /* Output code to initialize control word copies used by trunc?f?i and
9108 rounding patterns. CURRENT_MODE is set to current control word,
9109 while NEW_MODE is set to new control word. */
9111 void
9112 emit_i387_cw_initialization (int mode)
9114 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9115 rtx new_mode;
9117 int slot;
9119 rtx reg = gen_reg_rtx (HImode);
9121 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9122 emit_move_insn (reg, copy_rtx (stored_mode));
9124 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9126 switch (mode)
9128 case I387_CW_TRUNC:
9129 /* round toward zero (truncate) */
9130 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9131 slot = SLOT_CW_TRUNC;
9132 break;
9134 case I387_CW_FLOOR:
9135 /* round down toward -oo */
9136 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9137 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9138 slot = SLOT_CW_FLOOR;
9139 break;
9141 case I387_CW_CEIL:
9142 /* round up toward +oo */
9143 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9144 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9145 slot = SLOT_CW_CEIL;
9146 break;
9148 case I387_CW_MASK_PM:
9149 /* mask precision exception for nearbyint() */
9150 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9151 slot = SLOT_CW_MASK_PM;
9152 break;
9154 default:
9155 gcc_unreachable ();
9158 else
9160 switch (mode)
9162 case I387_CW_TRUNC:
9163 /* round toward zero (truncate) */
9164 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9165 slot = SLOT_CW_TRUNC;
9166 break;
9168 case I387_CW_FLOOR:
9169 /* round down toward -oo */
9170 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9171 slot = SLOT_CW_FLOOR;
9172 break;
9174 case I387_CW_CEIL:
9175 /* round up toward +oo */
9176 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9177 slot = SLOT_CW_CEIL;
9178 break;
9180 case I387_CW_MASK_PM:
9181 /* mask precision exception for nearbyint() */
9182 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9183 slot = SLOT_CW_MASK_PM;
9184 break;
9186 default:
9187 gcc_unreachable ();
9191 gcc_assert (slot < MAX_386_STACK_LOCALS);
9193 new_mode = assign_386_stack_local (HImode, slot);
9194 emit_move_insn (new_mode, reg);
9197 /* Output code for INSN to convert a float to a signed int. OPERANDS
9198 are the insn operands. The output may be [HSD]Imode and the input
9199 operand may be [SDX]Fmode. */
9201 const char *
9202 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9204 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9205 int dimode_p = GET_MODE (operands[0]) == DImode;
9206 int round_mode = get_attr_i387_cw (insn);
9208 /* Jump through a hoop or two for DImode, since the hardware has no
9209 non-popping instruction. We used to do this a different way, but
9210 that was somewhat fragile and broke with post-reload splitters. */
9211 if ((dimode_p || fisttp) && !stack_top_dies)
9212 output_asm_insn ("fld\t%y1", operands);
9214 gcc_assert (STACK_TOP_P (operands[1]));
9215 gcc_assert (MEM_P (operands[0]));
9217 if (fisttp)
9218 output_asm_insn ("fisttp%z0\t%0", operands);
9219 else
9221 if (round_mode != I387_CW_ANY)
9222 output_asm_insn ("fldcw\t%3", operands);
9223 if (stack_top_dies || dimode_p)
9224 output_asm_insn ("fistp%z0\t%0", operands);
9225 else
9226 output_asm_insn ("fist%z0\t%0", operands);
9227 if (round_mode != I387_CW_ANY)
9228 output_asm_insn ("fldcw\t%2", operands);
9231 return "";
9234 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9235 have the values zero or one, indicates the ffreep insn's operand
9236 from the OPERANDS array. */
9238 static const char *
9239 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9241 if (TARGET_USE_FFREEP)
9242 #if HAVE_AS_IX86_FFREEP
9243 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9244 #else
9246 static char retval[] = ".word\t0xc_df";
9247 int regno = REGNO (operands[opno]);
9249 gcc_assert (FP_REGNO_P (regno));
9251 retval[9] = '0' + (regno - FIRST_STACK_REG);
9252 return retval;
9254 #endif
9256 return opno ? "fstp\t%y1" : "fstp\t%y0";
9260 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9261 should be used. UNORDERED_P is true when fucom should be used. */
9263 const char *
9264 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9266 int stack_top_dies;
9267 rtx cmp_op0, cmp_op1;
9268 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9270 if (eflags_p)
9272 cmp_op0 = operands[0];
9273 cmp_op1 = operands[1];
9275 else
9277 cmp_op0 = operands[1];
9278 cmp_op1 = operands[2];
9281 if (is_sse)
9283 if (GET_MODE (operands[0]) == SFmode)
9284 if (unordered_p)
9285 return "ucomiss\t{%1, %0|%0, %1}";
9286 else
9287 return "comiss\t{%1, %0|%0, %1}";
9288 else
9289 if (unordered_p)
9290 return "ucomisd\t{%1, %0|%0, %1}";
9291 else
9292 return "comisd\t{%1, %0|%0, %1}";
9295 gcc_assert (STACK_TOP_P (cmp_op0));
9297 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9299 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9301 if (stack_top_dies)
9303 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9304 return output_387_ffreep (operands, 1);
9306 else
9307 return "ftst\n\tfnstsw\t%0";
9310 if (STACK_REG_P (cmp_op1)
9311 && stack_top_dies
9312 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9313 && REGNO (cmp_op1) != FIRST_STACK_REG)
9315 /* If both the top of the 387 stack dies, and the other operand
9316 is also a stack register that dies, then this must be a
9317 `fcompp' float compare */
9319 if (eflags_p)
9321 /* There is no double popping fcomi variant. Fortunately,
9322 eflags is immune from the fstp's cc clobbering. */
9323 if (unordered_p)
9324 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9325 else
9326 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9327 return output_387_ffreep (operands, 0);
9329 else
9331 if (unordered_p)
9332 return "fucompp\n\tfnstsw\t%0";
9333 else
9334 return "fcompp\n\tfnstsw\t%0";
9337 else
9339 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9341 static const char * const alt[16] =
9343 "fcom%z2\t%y2\n\tfnstsw\t%0",
9344 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9345 "fucom%z2\t%y2\n\tfnstsw\t%0",
9346 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9348 "ficom%z2\t%y2\n\tfnstsw\t%0",
9349 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9350 NULL,
9351 NULL,
9353 "fcomi\t{%y1, %0|%0, %y1}",
9354 "fcomip\t{%y1, %0|%0, %y1}",
9355 "fucomi\t{%y1, %0|%0, %y1}",
9356 "fucomip\t{%y1, %0|%0, %y1}",
9358 NULL,
9359 NULL,
9360 NULL,
9361 NULL
9364 int mask;
9365 const char *ret;
9367 mask = eflags_p << 3;
9368 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9369 mask |= unordered_p << 1;
9370 mask |= stack_top_dies;
9372 gcc_assert (mask < 16);
9373 ret = alt[mask];
9374 gcc_assert (ret);
9376 return ret;
9380 void
9381 ix86_output_addr_vec_elt (FILE *file, int value)
9383 const char *directive = ASM_LONG;
9385 #ifdef ASM_QUAD
9386 if (TARGET_64BIT)
9387 directive = ASM_QUAD;
9388 #else
9389 gcc_assert (!TARGET_64BIT);
9390 #endif
9392 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9395 void
9396 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9398 const char *directive = ASM_LONG;
9400 #ifdef ASM_QUAD
9401 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9402 directive = ASM_QUAD;
9403 #else
9404 gcc_assert (!TARGET_64BIT);
9405 #endif
9406 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9407 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9408 fprintf (file, "%s%s%d-%s%d\n",
9409 directive, LPREFIX, value, LPREFIX, rel);
9410 else if (HAVE_AS_GOTOFF_IN_DATA)
9411 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9412 #if TARGET_MACHO
9413 else if (TARGET_MACHO)
9415 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9416 machopic_output_function_base_name (file);
9417 fprintf(file, "\n");
9419 #endif
9420 else
9421 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9422 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9425 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9426 for the target. */
9428 void
9429 ix86_expand_clear (rtx dest)
9431 rtx tmp;
9433 /* We play register width games, which are only valid after reload. */
9434 gcc_assert (reload_completed);
9436 /* Avoid HImode and its attendant prefix byte. */
9437 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9438 dest = gen_rtx_REG (SImode, REGNO (dest));
9440 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9442 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9443 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9445 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9446 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9449 emit_insn (tmp);
9452 /* X is an unchanging MEM. If it is a constant pool reference, return
9453 the constant pool rtx, else NULL. */
9456 maybe_get_pool_constant (rtx x)
9458 x = ix86_delegitimize_address (XEXP (x, 0));
9460 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9461 return get_pool_constant (x);
9463 return NULL_RTX;
9466 void
9467 ix86_expand_move (enum machine_mode mode, rtx operands[])
9469 int strict = (reload_in_progress || reload_completed);
9470 rtx op0, op1;
9471 enum tls_model model;
9473 op0 = operands[0];
9474 op1 = operands[1];
9476 if (GET_CODE (op1) == SYMBOL_REF)
9478 model = SYMBOL_REF_TLS_MODEL (op1);
9479 if (model)
9481 op1 = legitimize_tls_address (op1, model, true);
9482 op1 = force_operand (op1, op0);
9483 if (op1 == op0)
9484 return;
9487 else if (GET_CODE (op1) == CONST
9488 && GET_CODE (XEXP (op1, 0)) == PLUS
9489 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9491 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9492 if (model)
9494 rtx addend = XEXP (XEXP (op1, 0), 1);
9495 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9496 op1 = force_operand (op1, NULL);
9497 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9498 op0, 1, OPTAB_DIRECT);
9499 if (op1 == op0)
9500 return;
9504 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9506 if (TARGET_MACHO && !TARGET_64BIT)
9508 #if TARGET_MACHO
9509 if (MACHOPIC_PURE)
9511 rtx temp = ((reload_in_progress
9512 || ((op0 && REG_P (op0))
9513 && mode == Pmode))
9514 ? op0 : gen_reg_rtx (Pmode));
9515 op1 = machopic_indirect_data_reference (op1, temp);
9516 op1 = machopic_legitimize_pic_address (op1, mode,
9517 temp == op1 ? 0 : temp);
9519 else if (MACHOPIC_INDIRECT)
9520 op1 = machopic_indirect_data_reference (op1, 0);
9521 if (op0 == op1)
9522 return;
9523 #endif
9525 else
9527 if (MEM_P (op0))
9528 op1 = force_reg (Pmode, op1);
9529 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9531 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9532 op1 = legitimize_pic_address (op1, reg);
9533 if (op0 == op1)
9534 return;
9538 else
9540 if (MEM_P (op0)
9541 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9542 || !push_operand (op0, mode))
9543 && MEM_P (op1))
9544 op1 = force_reg (mode, op1);
9546 if (push_operand (op0, mode)
9547 && ! general_no_elim_operand (op1, mode))
9548 op1 = copy_to_mode_reg (mode, op1);
9550 /* Force large constants in 64bit compilation into register
9551 to get them CSEed. */
9552 if (TARGET_64BIT && mode == DImode
9553 && immediate_operand (op1, mode)
9554 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9555 && !register_operand (op0, mode)
9556 && optimize && !reload_completed && !reload_in_progress)
9557 op1 = copy_to_mode_reg (mode, op1);
9559 if (FLOAT_MODE_P (mode))
9561 /* If we are loading a floating point constant to a register,
9562 force the value to memory now, since we'll get better code
9563 out the back end. */
9565 if (strict)
9567 else if (GET_CODE (op1) == CONST_DOUBLE)
9569 op1 = validize_mem (force_const_mem (mode, op1));
9570 if (!register_operand (op0, mode))
9572 rtx temp = gen_reg_rtx (mode);
9573 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9574 emit_move_insn (op0, temp);
9575 return;
9581 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9584 void
9585 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9587 rtx op0 = operands[0], op1 = operands[1];
9589 /* Force constants other than zero into memory. We do not know how
9590 the instructions used to build constants modify the upper 64 bits
9591 of the register, once we have that information we may be able
9592 to handle some of them more efficiently. */
9593 if ((reload_in_progress | reload_completed) == 0
9594 && register_operand (op0, mode)
9595 && CONSTANT_P (op1)
9596 && standard_sse_constant_p (op1) <= 0)
9597 op1 = validize_mem (force_const_mem (mode, op1));
9599 /* Make operand1 a register if it isn't already. */
9600 if (!no_new_pseudos
9601 && !register_operand (op0, mode)
9602 && !register_operand (op1, mode))
9604 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9605 return;
9608 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9611 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9612 straight to ix86_expand_vector_move. */
9613 /* Code generation for scalar reg-reg moves of single and double precision data:
9614 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9615 movaps reg, reg
9616 else
9617 movss reg, reg
9618 if (x86_sse_partial_reg_dependency == true)
9619 movapd reg, reg
9620 else
9621 movsd reg, reg
9623 Code generation for scalar loads of double precision data:
9624 if (x86_sse_split_regs == true)
9625 movlpd mem, reg (gas syntax)
9626 else
9627 movsd mem, reg
9629 Code generation for unaligned packed loads of single precision data
9630 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9631 if (x86_sse_unaligned_move_optimal)
9632 movups mem, reg
9634 if (x86_sse_partial_reg_dependency == true)
9636 xorps reg, reg
9637 movlps mem, reg
9638 movhps mem+8, reg
9640 else
9642 movlps mem, reg
9643 movhps mem+8, reg
9646 Code generation for unaligned packed loads of double precision data
9647 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9648 if (x86_sse_unaligned_move_optimal)
9649 movupd mem, reg
9651 if (x86_sse_split_regs == true)
9653 movlpd mem, reg
9654 movhpd mem+8, reg
9656 else
9658 movsd mem, reg
9659 movhpd mem+8, reg
9663 void
9664 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9666 rtx op0, op1, m;
9668 op0 = operands[0];
9669 op1 = operands[1];
9671 if (MEM_P (op1))
9673 /* If we're optimizing for size, movups is the smallest. */
9674 if (optimize_size)
9676 op0 = gen_lowpart (V4SFmode, op0);
9677 op1 = gen_lowpart (V4SFmode, op1);
9678 emit_insn (gen_sse_movups (op0, op1));
9679 return;
9682 /* ??? If we have typed data, then it would appear that using
9683 movdqu is the only way to get unaligned data loaded with
9684 integer type. */
9685 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9687 op0 = gen_lowpart (V16QImode, op0);
9688 op1 = gen_lowpart (V16QImode, op1);
9689 emit_insn (gen_sse2_movdqu (op0, op1));
9690 return;
9693 if (TARGET_SSE2 && mode == V2DFmode)
9695 rtx zero;
9697 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9699 op0 = gen_lowpart (V2DFmode, op0);
9700 op1 = gen_lowpart (V2DFmode, op1);
9701 emit_insn (gen_sse2_movupd (op0, op1));
9702 return;
9705 /* When SSE registers are split into halves, we can avoid
9706 writing to the top half twice. */
9707 if (TARGET_SSE_SPLIT_REGS)
9709 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9710 zero = op0;
9712 else
9714 /* ??? Not sure about the best option for the Intel chips.
9715 The following would seem to satisfy; the register is
9716 entirely cleared, breaking the dependency chain. We
9717 then store to the upper half, with a dependency depth
9718 of one. A rumor has it that Intel recommends two movsd
9719 followed by an unpacklpd, but this is unconfirmed. And
9720 given that the dependency depth of the unpacklpd would
9721 still be one, I'm not sure why this would be better. */
9722 zero = CONST0_RTX (V2DFmode);
9725 m = adjust_address (op1, DFmode, 0);
9726 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9727 m = adjust_address (op1, DFmode, 8);
9728 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9730 else
9732 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9734 op0 = gen_lowpart (V4SFmode, op0);
9735 op1 = gen_lowpart (V4SFmode, op1);
9736 emit_insn (gen_sse_movups (op0, op1));
9737 return;
9740 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9741 emit_move_insn (op0, CONST0_RTX (mode));
9742 else
9743 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9745 if (mode != V4SFmode)
9746 op0 = gen_lowpart (V4SFmode, op0);
9747 m = adjust_address (op1, V2SFmode, 0);
9748 emit_insn (gen_sse_loadlps (op0, op0, m));
9749 m = adjust_address (op1, V2SFmode, 8);
9750 emit_insn (gen_sse_loadhps (op0, op0, m));
9753 else if (MEM_P (op0))
9755 /* If we're optimizing for size, movups is the smallest. */
9756 if (optimize_size)
9758 op0 = gen_lowpart (V4SFmode, op0);
9759 op1 = gen_lowpart (V4SFmode, op1);
9760 emit_insn (gen_sse_movups (op0, op1));
9761 return;
9764 /* ??? Similar to above, only less clear because of quote
9765 typeless stores unquote. */
9766 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9767 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9769 op0 = gen_lowpart (V16QImode, op0);
9770 op1 = gen_lowpart (V16QImode, op1);
9771 emit_insn (gen_sse2_movdqu (op0, op1));
9772 return;
9775 if (TARGET_SSE2 && mode == V2DFmode)
9777 m = adjust_address (op0, DFmode, 0);
9778 emit_insn (gen_sse2_storelpd (m, op1));
9779 m = adjust_address (op0, DFmode, 8);
9780 emit_insn (gen_sse2_storehpd (m, op1));
9782 else
9784 if (mode != V4SFmode)
9785 op1 = gen_lowpart (V4SFmode, op1);
9786 m = adjust_address (op0, V2SFmode, 0);
9787 emit_insn (gen_sse_storelps (m, op1));
9788 m = adjust_address (op0, V2SFmode, 8);
9789 emit_insn (gen_sse_storehps (m, op1));
9792 else
9793 gcc_unreachable ();
9796 /* Expand a push in MODE. This is some mode for which we do not support
9797 proper push instructions, at least from the registers that we expect
9798 the value to live in. */
9800 void
9801 ix86_expand_push (enum machine_mode mode, rtx x)
9803 rtx tmp;
9805 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9806 GEN_INT (-GET_MODE_SIZE (mode)),
9807 stack_pointer_rtx, 1, OPTAB_DIRECT);
9808 if (tmp != stack_pointer_rtx)
9809 emit_move_insn (stack_pointer_rtx, tmp);
9811 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9812 emit_move_insn (tmp, x);
9815 /* Helper function of ix86_fixup_binary_operands to canonicalize
9816 operand order. Returns true if the operands should be swapped. */
9818 static bool
9819 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9820 rtx operands[])
9822 rtx dst = operands[0];
9823 rtx src1 = operands[1];
9824 rtx src2 = operands[2];
9826 /* If the operation is not commutative, we can't do anything. */
9827 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9828 return false;
9830 /* Highest priority is that src1 should match dst. */
9831 if (rtx_equal_p (dst, src1))
9832 return false;
9833 if (rtx_equal_p (dst, src2))
9834 return true;
9836 /* Next highest priority is that immediate constants come second. */
9837 if (immediate_operand (src2, mode))
9838 return false;
9839 if (immediate_operand (src1, mode))
9840 return true;
9842 /* Lowest priority is that memory references should come second. */
9843 if (MEM_P (src2))
9844 return false;
9845 if (MEM_P (src1))
9846 return true;
9848 return false;
9852 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9853 destination to use for the operation. If different from the true
9854 destination in operands[0], a copy operation will be required. */
9857 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9858 rtx operands[])
9860 rtx dst = operands[0];
9861 rtx src1 = operands[1];
9862 rtx src2 = operands[2];
9864 /* Canonicalize operand order. */
9865 if (ix86_swap_binary_operands_p (code, mode, operands))
9867 rtx temp = src1;
9868 src1 = src2;
9869 src2 = temp;
9872 /* Both source operands cannot be in memory. */
9873 if (MEM_P (src1) && MEM_P (src2))
9875 /* Optimization: Only read from memory once. */
9876 if (rtx_equal_p (src1, src2))
9878 src2 = force_reg (mode, src2);
9879 src1 = src2;
9881 else
9882 src2 = force_reg (mode, src2);
9885 /* If the destination is memory, and we do not have matching source
9886 operands, do things in registers. */
9887 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9888 dst = gen_reg_rtx (mode);
9890 /* Source 1 cannot be a constant. */
9891 if (CONSTANT_P (src1))
9892 src1 = force_reg (mode, src1);
9894 /* Source 1 cannot be a non-matching memory. */
9895 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9896 src1 = force_reg (mode, src1);
9898 operands[1] = src1;
9899 operands[2] = src2;
9900 return dst;
9903 /* Similarly, but assume that the destination has already been
9904 set up properly. */
9906 void
9907 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9908 enum machine_mode mode, rtx operands[])
9910 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9911 gcc_assert (dst == operands[0]);
9914 /* Attempt to expand a binary operator. Make the expansion closer to the
9915 actual machine, then just general_operand, which will allow 3 separate
9916 memory references (one output, two input) in a single insn. */
9918 void
9919 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9920 rtx operands[])
9922 rtx src1, src2, dst, op, clob;
9924 dst = ix86_fixup_binary_operands (code, mode, operands);
9925 src1 = operands[1];
9926 src2 = operands[2];
9928 /* Emit the instruction. */
9930 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9931 if (reload_in_progress)
9933 /* Reload doesn't know about the flags register, and doesn't know that
9934 it doesn't want to clobber it. We can only do this with PLUS. */
9935 gcc_assert (code == PLUS);
9936 emit_insn (op);
9938 else
9940 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9941 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9944 /* Fix up the destination if needed. */
9945 if (dst != operands[0])
9946 emit_move_insn (operands[0], dst);
9949 /* Return TRUE or FALSE depending on whether the binary operator meets the
9950 appropriate constraints. */
9953 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9954 rtx operands[3])
9956 rtx dst = operands[0];
9957 rtx src1 = operands[1];
9958 rtx src2 = operands[2];
9960 /* Both source operands cannot be in memory. */
9961 if (MEM_P (src1) && MEM_P (src2))
9962 return 0;
9964 /* Canonicalize operand order for commutative operators. */
9965 if (ix86_swap_binary_operands_p (code, mode, operands))
9967 rtx temp = src1;
9968 src1 = src2;
9969 src2 = temp;
9972 /* If the destination is memory, we must have a matching source operand. */
9973 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9974 return 0;
9976 /* Source 1 cannot be a constant. */
9977 if (CONSTANT_P (src1))
9978 return 0;
9980 /* Source 1 cannot be a non-matching memory. */
9981 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9982 return 0;
9984 return 1;
9987 /* Attempt to expand a unary operator. Make the expansion closer to the
9988 actual machine, then just general_operand, which will allow 2 separate
9989 memory references (one output, one input) in a single insn. */
9991 void
9992 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9993 rtx operands[])
9995 int matching_memory;
9996 rtx src, dst, op, clob;
9998 dst = operands[0];
9999 src = operands[1];
10001 /* If the destination is memory, and we do not have matching source
10002 operands, do things in registers. */
10003 matching_memory = 0;
10004 if (MEM_P (dst))
10006 if (rtx_equal_p (dst, src))
10007 matching_memory = 1;
10008 else
10009 dst = gen_reg_rtx (mode);
10012 /* When source operand is memory, destination must match. */
10013 if (MEM_P (src) && !matching_memory)
10014 src = force_reg (mode, src);
10016 /* Emit the instruction. */
10018 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10019 if (reload_in_progress || code == NOT)
10021 /* Reload doesn't know about the flags register, and doesn't know that
10022 it doesn't want to clobber it. */
10023 gcc_assert (code == NOT);
10024 emit_insn (op);
10026 else
10028 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10029 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10032 /* Fix up the destination if needed. */
10033 if (dst != operands[0])
10034 emit_move_insn (operands[0], dst);
10037 /* Return TRUE or FALSE depending on whether the unary operator meets the
10038 appropriate constraints. */
10041 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10042 enum machine_mode mode ATTRIBUTE_UNUSED,
10043 rtx operands[2] ATTRIBUTE_UNUSED)
10045 /* If one of operands is memory, source and destination must match. */
10046 if ((MEM_P (operands[0])
10047 || MEM_P (operands[1]))
10048 && ! rtx_equal_p (operands[0], operands[1]))
10049 return FALSE;
10050 return TRUE;
10053 /* Post-reload splitter for converting an SF or DFmode value in an
10054 SSE register into an unsigned SImode. */
10056 void
10057 ix86_split_convert_uns_si_sse (rtx operands[])
10059 enum machine_mode vecmode;
10060 rtx value, large, zero_or_two31, input, two31, x;
10062 large = operands[1];
10063 zero_or_two31 = operands[2];
10064 input = operands[3];
10065 two31 = operands[4];
10066 vecmode = GET_MODE (large);
10067 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10069 /* Load up the value into the low element. We must ensure that the other
10070 elements are valid floats -- zero is the easiest such value. */
10071 if (MEM_P (input))
10073 if (vecmode == V4SFmode)
10074 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10075 else
10076 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10078 else
10080 input = gen_rtx_REG (vecmode, REGNO (input));
10081 emit_move_insn (value, CONST0_RTX (vecmode));
10082 if (vecmode == V4SFmode)
10083 emit_insn (gen_sse_movss (value, value, input));
10084 else
10085 emit_insn (gen_sse2_movsd (value, value, input));
10088 emit_move_insn (large, two31);
10089 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10091 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10092 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10094 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10095 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10097 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10098 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10100 large = gen_rtx_REG (V4SImode, REGNO (large));
10101 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10103 x = gen_rtx_REG (V4SImode, REGNO (value));
10104 if (vecmode == V4SFmode)
10105 emit_insn (gen_sse2_cvttps2dq (x, value));
10106 else
10107 emit_insn (gen_sse2_cvttpd2dq (x, value));
10108 value = x;
10110 emit_insn (gen_xorv4si3 (value, value, large));
10113 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10114 Expects the 64-bit DImode to be supplied in a pair of integral
10115 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10116 -mfpmath=sse, !optimize_size only. */
10118 void
10119 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10121 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10122 rtx int_xmm, fp_xmm;
10123 rtx biases, exponents;
10124 rtx x;
10126 int_xmm = gen_reg_rtx (V4SImode);
10127 if (TARGET_INTER_UNIT_MOVES)
10128 emit_insn (gen_movdi_to_sse (int_xmm, input));
10129 else if (TARGET_SSE_SPLIT_REGS)
10131 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10132 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10134 else
10136 x = gen_reg_rtx (V2DImode);
10137 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10138 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10141 x = gen_rtx_CONST_VECTOR (V4SImode,
10142 gen_rtvec (4, GEN_INT (0x43300000UL),
10143 GEN_INT (0x45300000UL),
10144 const0_rtx, const0_rtx));
10145 exponents = validize_mem (force_const_mem (V4SImode, x));
10147 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10148 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10150 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10151 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10152 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10153 (0x1.0p84 + double(fp_value_hi_xmm)).
10154 Note these exponents differ by 32. */
10156 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10158 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10159 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10160 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10161 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10162 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10163 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10164 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10165 biases = validize_mem (force_const_mem (V2DFmode, biases));
10166 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10168 /* Add the upper and lower DFmode values together. */
10169 if (TARGET_SSE3)
10170 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10171 else
10173 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10174 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10175 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10178 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10181 /* Convert an unsigned SImode value into a DFmode. Only currently used
10182 for SSE, but applicable anywhere. */
10184 void
10185 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10187 REAL_VALUE_TYPE TWO31r;
10188 rtx x, fp;
10190 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10191 NULL, 1, OPTAB_DIRECT);
10193 fp = gen_reg_rtx (DFmode);
10194 emit_insn (gen_floatsidf2 (fp, x));
10196 real_ldexp (&TWO31r, &dconst1, 31);
10197 x = const_double_from_real_value (TWO31r, DFmode);
10199 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10200 if (x != target)
10201 emit_move_insn (target, x);
10204 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10205 32-bit mode; otherwise we have a direct convert instruction. */
10207 void
10208 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10210 REAL_VALUE_TYPE TWO32r;
10211 rtx fp_lo, fp_hi, x;
10213 fp_lo = gen_reg_rtx (DFmode);
10214 fp_hi = gen_reg_rtx (DFmode);
10216 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10218 real_ldexp (&TWO32r, &dconst1, 32);
10219 x = const_double_from_real_value (TWO32r, DFmode);
10220 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10222 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10224 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10225 0, OPTAB_DIRECT);
10226 if (x != target)
10227 emit_move_insn (target, x);
10230 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10231 For x86_32, -mfpmath=sse, !optimize_size only. */
10232 void
10233 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10235 REAL_VALUE_TYPE ONE16r;
10236 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10238 real_ldexp (&ONE16r, &dconst1, 16);
10239 x = const_double_from_real_value (ONE16r, SFmode);
10240 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10241 NULL, 0, OPTAB_DIRECT);
10242 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10243 NULL, 0, OPTAB_DIRECT);
10244 fp_hi = gen_reg_rtx (SFmode);
10245 fp_lo = gen_reg_rtx (SFmode);
10246 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10247 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10248 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10249 0, OPTAB_DIRECT);
10250 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10251 0, OPTAB_DIRECT);
10252 if (!rtx_equal_p (target, fp_hi))
10253 emit_move_insn (target, fp_hi);
10256 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10257 then replicate the value for all elements of the vector
10258 register. */
10261 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10263 rtvec v;
10264 switch (mode)
10266 case SFmode:
10267 if (vect)
10268 v = gen_rtvec (4, value, value, value, value);
10269 else
10270 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10271 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10272 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10274 case DFmode:
10275 if (vect)
10276 v = gen_rtvec (2, value, value);
10277 else
10278 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10279 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10281 default:
10282 gcc_unreachable ();
10286 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10287 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10288 true, then replicate the mask for all elements of the vector register.
10289 If INVERT is true, then create a mask excluding the sign bit. */
10292 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10294 enum machine_mode vec_mode;
10295 HOST_WIDE_INT hi, lo;
10296 int shift = 63;
10297 rtx v;
10298 rtx mask;
10300 /* Find the sign bit, sign extended to 2*HWI. */
10301 if (mode == SFmode)
10302 lo = 0x80000000, hi = lo < 0;
10303 else if (HOST_BITS_PER_WIDE_INT >= 64)
10304 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10305 else
10306 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10308 if (invert)
10309 lo = ~lo, hi = ~hi;
10311 /* Force this value into the low part of a fp vector constant. */
10312 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10313 mask = gen_lowpart (mode, mask);
10315 v = ix86_build_const_vector (mode, vect, mask);
10316 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10317 return force_reg (vec_mode, v);
10320 /* Generate code for floating point ABS or NEG. */
10322 void
10323 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10324 rtx operands[])
10326 rtx mask, set, use, clob, dst, src;
10327 bool matching_memory;
10328 bool use_sse = false;
10329 bool vector_mode = VECTOR_MODE_P (mode);
10330 enum machine_mode elt_mode = mode;
10332 if (vector_mode)
10334 elt_mode = GET_MODE_INNER (mode);
10335 use_sse = true;
10337 else if (TARGET_SSE_MATH)
10338 use_sse = SSE_FLOAT_MODE_P (mode);
10340 /* NEG and ABS performed with SSE use bitwise mask operations.
10341 Create the appropriate mask now. */
10342 if (use_sse)
10343 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10344 else
10345 mask = NULL_RTX;
10347 dst = operands[0];
10348 src = operands[1];
10350 /* If the destination is memory, and we don't have matching source
10351 operands or we're using the x87, do things in registers. */
10352 matching_memory = false;
10353 if (MEM_P (dst))
10355 if (use_sse && rtx_equal_p (dst, src))
10356 matching_memory = true;
10357 else
10358 dst = gen_reg_rtx (mode);
10360 if (MEM_P (src) && !matching_memory)
10361 src = force_reg (mode, src);
10363 if (vector_mode)
10365 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10366 set = gen_rtx_SET (VOIDmode, dst, set);
10367 emit_insn (set);
10369 else
10371 set = gen_rtx_fmt_e (code, mode, src);
10372 set = gen_rtx_SET (VOIDmode, dst, set);
10373 if (mask)
10375 use = gen_rtx_USE (VOIDmode, mask);
10376 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10377 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10378 gen_rtvec (3, set, use, clob)));
10380 else
10381 emit_insn (set);
10384 if (dst != operands[0])
10385 emit_move_insn (operands[0], dst);
10388 /* Expand a copysign operation. Special case operand 0 being a constant. */
10390 void
10391 ix86_expand_copysign (rtx operands[])
10393 enum machine_mode mode, vmode;
10394 rtx dest, op0, op1, mask, nmask;
10396 dest = operands[0];
10397 op0 = operands[1];
10398 op1 = operands[2];
10400 mode = GET_MODE (dest);
10401 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10403 if (GET_CODE (op0) == CONST_DOUBLE)
10405 rtvec v;
10407 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10408 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10410 if (op0 == CONST0_RTX (mode))
10411 op0 = CONST0_RTX (vmode);
10412 else
10414 if (mode == SFmode)
10415 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10416 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10417 else
10418 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10419 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10422 mask = ix86_build_signbit_mask (mode, 0, 0);
10424 if (mode == SFmode)
10425 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10426 else
10427 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10429 else
10431 nmask = ix86_build_signbit_mask (mode, 0, 1);
10432 mask = ix86_build_signbit_mask (mode, 0, 0);
10434 if (mode == SFmode)
10435 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10436 else
10437 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10441 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10442 be a constant, and so has already been expanded into a vector constant. */
10444 void
10445 ix86_split_copysign_const (rtx operands[])
10447 enum machine_mode mode, vmode;
10448 rtx dest, op0, op1, mask, x;
10450 dest = operands[0];
10451 op0 = operands[1];
10452 op1 = operands[2];
10453 mask = operands[3];
10455 mode = GET_MODE (dest);
10456 vmode = GET_MODE (mask);
10458 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10459 x = gen_rtx_AND (vmode, dest, mask);
10460 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10462 if (op0 != CONST0_RTX (vmode))
10464 x = gen_rtx_IOR (vmode, dest, op0);
10465 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10469 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10470 so we have to do two masks. */
10472 void
10473 ix86_split_copysign_var (rtx operands[])
10475 enum machine_mode mode, vmode;
10476 rtx dest, scratch, op0, op1, mask, nmask, x;
10478 dest = operands[0];
10479 scratch = operands[1];
10480 op0 = operands[2];
10481 op1 = operands[3];
10482 nmask = operands[4];
10483 mask = operands[5];
10485 mode = GET_MODE (dest);
10486 vmode = GET_MODE (mask);
10488 if (rtx_equal_p (op0, op1))
10490 /* Shouldn't happen often (it's useless, obviously), but when it does
10491 we'd generate incorrect code if we continue below. */
10492 emit_move_insn (dest, op0);
10493 return;
10496 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10498 gcc_assert (REGNO (op1) == REGNO (scratch));
10500 x = gen_rtx_AND (vmode, scratch, mask);
10501 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10503 dest = mask;
10504 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10505 x = gen_rtx_NOT (vmode, dest);
10506 x = gen_rtx_AND (vmode, x, op0);
10507 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10509 else
10511 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10513 x = gen_rtx_AND (vmode, scratch, mask);
10515 else /* alternative 2,4 */
10517 gcc_assert (REGNO (mask) == REGNO (scratch));
10518 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10519 x = gen_rtx_AND (vmode, scratch, op1);
10521 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10523 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10525 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10526 x = gen_rtx_AND (vmode, dest, nmask);
10528 else /* alternative 3,4 */
10530 gcc_assert (REGNO (nmask) == REGNO (dest));
10531 dest = nmask;
10532 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10533 x = gen_rtx_AND (vmode, dest, op0);
10535 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10538 x = gen_rtx_IOR (vmode, dest, scratch);
10539 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10542 /* Return TRUE or FALSE depending on whether the first SET in INSN
10543 has source and destination with matching CC modes, and that the
10544 CC mode is at least as constrained as REQ_MODE. */
10547 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10549 rtx set;
10550 enum machine_mode set_mode;
10552 set = PATTERN (insn);
10553 if (GET_CODE (set) == PARALLEL)
10554 set = XVECEXP (set, 0, 0);
10555 gcc_assert (GET_CODE (set) == SET);
10556 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10558 set_mode = GET_MODE (SET_DEST (set));
10559 switch (set_mode)
10561 case CCNOmode:
10562 if (req_mode != CCNOmode
10563 && (req_mode != CCmode
10564 || XEXP (SET_SRC (set), 1) != const0_rtx))
10565 return 0;
10566 break;
10567 case CCmode:
10568 if (req_mode == CCGCmode)
10569 return 0;
10570 /* FALLTHRU */
10571 case CCGCmode:
10572 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10573 return 0;
10574 /* FALLTHRU */
10575 case CCGOCmode:
10576 if (req_mode == CCZmode)
10577 return 0;
10578 /* FALLTHRU */
10579 case CCZmode:
10580 break;
10582 default:
10583 gcc_unreachable ();
10586 return (GET_MODE (SET_SRC (set)) == set_mode);
10589 /* Generate insn patterns to do an integer compare of OPERANDS. */
10591 static rtx
10592 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10594 enum machine_mode cmpmode;
10595 rtx tmp, flags;
10597 cmpmode = SELECT_CC_MODE (code, op0, op1);
10598 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10600 /* This is very simple, but making the interface the same as in the
10601 FP case makes the rest of the code easier. */
10602 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10603 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10605 /* Return the test that should be put into the flags user, i.e.
10606 the bcc, scc, or cmov instruction. */
10607 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10610 /* Figure out whether to use ordered or unordered fp comparisons.
10611 Return the appropriate mode to use. */
10613 enum machine_mode
10614 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10616 /* ??? In order to make all comparisons reversible, we do all comparisons
10617 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10618 all forms trapping and nontrapping comparisons, we can make inequality
10619 comparisons trapping again, since it results in better code when using
10620 FCOM based compares. */
10621 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10624 enum machine_mode
10625 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10627 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10628 return ix86_fp_compare_mode (code);
10629 switch (code)
10631 /* Only zero flag is needed. */
10632 case EQ: /* ZF=0 */
10633 case NE: /* ZF!=0 */
10634 return CCZmode;
10635 /* Codes needing carry flag. */
10636 case GEU: /* CF=0 */
10637 case GTU: /* CF=0 & ZF=0 */
10638 case LTU: /* CF=1 */
10639 case LEU: /* CF=1 | ZF=1 */
10640 return CCmode;
10641 /* Codes possibly doable only with sign flag when
10642 comparing against zero. */
10643 case GE: /* SF=OF or SF=0 */
10644 case LT: /* SF<>OF or SF=1 */
10645 if (op1 == const0_rtx)
10646 return CCGOCmode;
10647 else
10648 /* For other cases Carry flag is not required. */
10649 return CCGCmode;
10650 /* Codes doable only with sign flag when comparing
10651 against zero, but we miss jump instruction for it
10652 so we need to use relational tests against overflow
10653 that thus needs to be zero. */
10654 case GT: /* ZF=0 & SF=OF */
10655 case LE: /* ZF=1 | SF<>OF */
10656 if (op1 == const0_rtx)
10657 return CCNOmode;
10658 else
10659 return CCGCmode;
10660 /* strcmp pattern do (use flags) and combine may ask us for proper
10661 mode. */
10662 case USE:
10663 return CCmode;
10664 default:
10665 gcc_unreachable ();
10669 /* Return the fixed registers used for condition codes. */
10671 static bool
10672 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10674 *p1 = FLAGS_REG;
10675 *p2 = FPSR_REG;
10676 return true;
10679 /* If two condition code modes are compatible, return a condition code
10680 mode which is compatible with both. Otherwise, return
10681 VOIDmode. */
10683 static enum machine_mode
10684 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10686 if (m1 == m2)
10687 return m1;
10689 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10690 return VOIDmode;
10692 if ((m1 == CCGCmode && m2 == CCGOCmode)
10693 || (m1 == CCGOCmode && m2 == CCGCmode))
10694 return CCGCmode;
10696 switch (m1)
10698 default:
10699 gcc_unreachable ();
10701 case CCmode:
10702 case CCGCmode:
10703 case CCGOCmode:
10704 case CCNOmode:
10705 case CCZmode:
10706 switch (m2)
10708 default:
10709 return VOIDmode;
10711 case CCmode:
10712 case CCGCmode:
10713 case CCGOCmode:
10714 case CCNOmode:
10715 case CCZmode:
10716 return CCmode;
10719 case CCFPmode:
10720 case CCFPUmode:
10721 /* These are only compatible with themselves, which we already
10722 checked above. */
10723 return VOIDmode;
10727 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10730 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10732 enum rtx_code swapped_code = swap_condition (code);
10733 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10734 || (ix86_fp_comparison_cost (swapped_code)
10735 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10738 /* Swap, force into registers, or otherwise massage the two operands
10739 to a fp comparison. The operands are updated in place; the new
10740 comparison code is returned. */
10742 static enum rtx_code
10743 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10745 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10746 rtx op0 = *pop0, op1 = *pop1;
10747 enum machine_mode op_mode = GET_MODE (op0);
10748 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10750 /* All of the unordered compare instructions only work on registers.
10751 The same is true of the fcomi compare instructions. The XFmode
10752 compare instructions require registers except when comparing
10753 against zero or when converting operand 1 from fixed point to
10754 floating point. */
10756 if (!is_sse
10757 && (fpcmp_mode == CCFPUmode
10758 || (op_mode == XFmode
10759 && ! (standard_80387_constant_p (op0) == 1
10760 || standard_80387_constant_p (op1) == 1)
10761 && GET_CODE (op1) != FLOAT)
10762 || ix86_use_fcomi_compare (code)))
10764 op0 = force_reg (op_mode, op0);
10765 op1 = force_reg (op_mode, op1);
10767 else
10769 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10770 things around if they appear profitable, otherwise force op0
10771 into a register. */
10773 if (standard_80387_constant_p (op0) == 0
10774 || (MEM_P (op0)
10775 && ! (standard_80387_constant_p (op1) == 0
10776 || MEM_P (op1))))
10778 rtx tmp;
10779 tmp = op0, op0 = op1, op1 = tmp;
10780 code = swap_condition (code);
10783 if (!REG_P (op0))
10784 op0 = force_reg (op_mode, op0);
10786 if (CONSTANT_P (op1))
10788 int tmp = standard_80387_constant_p (op1);
10789 if (tmp == 0)
10790 op1 = validize_mem (force_const_mem (op_mode, op1));
10791 else if (tmp == 1)
10793 if (TARGET_CMOVE)
10794 op1 = force_reg (op_mode, op1);
10796 else
10797 op1 = force_reg (op_mode, op1);
10801 /* Try to rearrange the comparison to make it cheaper. */
10802 if (ix86_fp_comparison_cost (code)
10803 > ix86_fp_comparison_cost (swap_condition (code))
10804 && (REG_P (op1) || !no_new_pseudos))
10806 rtx tmp;
10807 tmp = op0, op0 = op1, op1 = tmp;
10808 code = swap_condition (code);
10809 if (!REG_P (op0))
10810 op0 = force_reg (op_mode, op0);
10813 *pop0 = op0;
10814 *pop1 = op1;
10815 return code;
10818 /* Convert comparison codes we use to represent FP comparison to integer
10819 code that will result in proper branch. Return UNKNOWN if no such code
10820 is available. */
10822 enum rtx_code
10823 ix86_fp_compare_code_to_integer (enum rtx_code code)
10825 switch (code)
10827 case GT:
10828 return GTU;
10829 case GE:
10830 return GEU;
10831 case ORDERED:
10832 case UNORDERED:
10833 return code;
10834 break;
10835 case UNEQ:
10836 return EQ;
10837 break;
10838 case UNLT:
10839 return LTU;
10840 break;
10841 case UNLE:
10842 return LEU;
10843 break;
10844 case LTGT:
10845 return NE;
10846 break;
10847 default:
10848 return UNKNOWN;
10852 /* Split comparison code CODE into comparisons we can do using branch
10853 instructions. BYPASS_CODE is comparison code for branch that will
10854 branch around FIRST_CODE and SECOND_CODE. If some of branches
10855 is not required, set value to UNKNOWN.
10856 We never require more than two branches. */
10858 void
10859 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10860 enum rtx_code *first_code,
10861 enum rtx_code *second_code)
10863 *first_code = code;
10864 *bypass_code = UNKNOWN;
10865 *second_code = UNKNOWN;
10867 /* The fcomi comparison sets flags as follows:
10869 cmp ZF PF CF
10870 > 0 0 0
10871 < 0 0 1
10872 = 1 0 0
10873 un 1 1 1 */
10875 switch (code)
10877 case GT: /* GTU - CF=0 & ZF=0 */
10878 case GE: /* GEU - CF=0 */
10879 case ORDERED: /* PF=0 */
10880 case UNORDERED: /* PF=1 */
10881 case UNEQ: /* EQ - ZF=1 */
10882 case UNLT: /* LTU - CF=1 */
10883 case UNLE: /* LEU - CF=1 | ZF=1 */
10884 case LTGT: /* EQ - ZF=0 */
10885 break;
10886 case LT: /* LTU - CF=1 - fails on unordered */
10887 *first_code = UNLT;
10888 *bypass_code = UNORDERED;
10889 break;
10890 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10891 *first_code = UNLE;
10892 *bypass_code = UNORDERED;
10893 break;
10894 case EQ: /* EQ - ZF=1 - fails on unordered */
10895 *first_code = UNEQ;
10896 *bypass_code = UNORDERED;
10897 break;
10898 case NE: /* NE - ZF=0 - fails on unordered */
10899 *first_code = LTGT;
10900 *second_code = UNORDERED;
10901 break;
10902 case UNGE: /* GEU - CF=0 - fails on unordered */
10903 *first_code = GE;
10904 *second_code = UNORDERED;
10905 break;
10906 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10907 *first_code = GT;
10908 *second_code = UNORDERED;
10909 break;
10910 default:
10911 gcc_unreachable ();
10913 if (!TARGET_IEEE_FP)
10915 *second_code = UNKNOWN;
10916 *bypass_code = UNKNOWN;
10920 /* Return cost of comparison done fcom + arithmetics operations on AX.
10921 All following functions do use number of instructions as a cost metrics.
10922 In future this should be tweaked to compute bytes for optimize_size and
10923 take into account performance of various instructions on various CPUs. */
10924 static int
10925 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10927 if (!TARGET_IEEE_FP)
10928 return 4;
10929 /* The cost of code output by ix86_expand_fp_compare. */
10930 switch (code)
10932 case UNLE:
10933 case UNLT:
10934 case LTGT:
10935 case GT:
10936 case GE:
10937 case UNORDERED:
10938 case ORDERED:
10939 case UNEQ:
10940 return 4;
10941 break;
10942 case LT:
10943 case NE:
10944 case EQ:
10945 case UNGE:
10946 return 5;
10947 break;
10948 case LE:
10949 case UNGT:
10950 return 6;
10951 break;
10952 default:
10953 gcc_unreachable ();
10957 /* Return cost of comparison done using fcomi operation.
10958 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10959 static int
10960 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10962 enum rtx_code bypass_code, first_code, second_code;
10963 /* Return arbitrarily high cost when instruction is not supported - this
10964 prevents gcc from using it. */
10965 if (!TARGET_CMOVE)
10966 return 1024;
10967 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10968 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10971 /* Return cost of comparison done using sahf operation.
10972 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10973 static int
10974 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10976 enum rtx_code bypass_code, first_code, second_code;
10977 /* Return arbitrarily high cost when instruction is not preferred - this
10978 avoids gcc from using it. */
10979 if (!TARGET_USE_SAHF && !optimize_size)
10980 return 1024;
10981 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10982 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10985 /* Compute cost of the comparison done using any method.
10986 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10987 static int
10988 ix86_fp_comparison_cost (enum rtx_code code)
10990 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10991 int min;
10993 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10994 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10996 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10997 if (min > sahf_cost)
10998 min = sahf_cost;
10999 if (min > fcomi_cost)
11000 min = fcomi_cost;
11001 return min;
11004 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11006 static rtx
11007 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11008 rtx *second_test, rtx *bypass_test)
11010 enum machine_mode fpcmp_mode, intcmp_mode;
11011 rtx tmp, tmp2;
11012 int cost = ix86_fp_comparison_cost (code);
11013 enum rtx_code bypass_code, first_code, second_code;
11015 fpcmp_mode = ix86_fp_compare_mode (code);
11016 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11018 if (second_test)
11019 *second_test = NULL_RTX;
11020 if (bypass_test)
11021 *bypass_test = NULL_RTX;
11023 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11025 /* Do fcomi/sahf based test when profitable. */
11026 if ((bypass_code == UNKNOWN || bypass_test)
11027 && (second_code == UNKNOWN || second_test)
11028 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11030 if (TARGET_CMOVE)
11032 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11033 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11034 tmp);
11035 emit_insn (tmp);
11037 else
11039 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11040 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11041 if (!scratch)
11042 scratch = gen_reg_rtx (HImode);
11043 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11044 emit_insn (gen_x86_sahf_1 (scratch));
11047 /* The FP codes work out to act like unsigned. */
11048 intcmp_mode = fpcmp_mode;
11049 code = first_code;
11050 if (bypass_code != UNKNOWN)
11051 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11052 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11053 const0_rtx);
11054 if (second_code != UNKNOWN)
11055 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11056 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11057 const0_rtx);
11059 else
11061 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11062 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11063 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11064 if (!scratch)
11065 scratch = gen_reg_rtx (HImode);
11066 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11068 /* In the unordered case, we have to check C2 for NaN's, which
11069 doesn't happen to work out to anything nice combination-wise.
11070 So do some bit twiddling on the value we've got in AH to come
11071 up with an appropriate set of condition codes. */
11073 intcmp_mode = CCNOmode;
11074 switch (code)
11076 case GT:
11077 case UNGT:
11078 if (code == GT || !TARGET_IEEE_FP)
11080 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11081 code = EQ;
11083 else
11085 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11086 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11087 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11088 intcmp_mode = CCmode;
11089 code = GEU;
11091 break;
11092 case LT:
11093 case UNLT:
11094 if (code == LT && TARGET_IEEE_FP)
11096 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11097 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11098 intcmp_mode = CCmode;
11099 code = EQ;
11101 else
11103 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11104 code = NE;
11106 break;
11107 case GE:
11108 case UNGE:
11109 if (code == GE || !TARGET_IEEE_FP)
11111 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11112 code = EQ;
11114 else
11116 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11117 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11118 GEN_INT (0x01)));
11119 code = NE;
11121 break;
11122 case LE:
11123 case UNLE:
11124 if (code == LE && TARGET_IEEE_FP)
11126 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11127 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11128 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11129 intcmp_mode = CCmode;
11130 code = LTU;
11132 else
11134 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11135 code = NE;
11137 break;
11138 case EQ:
11139 case UNEQ:
11140 if (code == EQ && TARGET_IEEE_FP)
11142 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11143 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11144 intcmp_mode = CCmode;
11145 code = EQ;
11147 else
11149 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11150 code = NE;
11151 break;
11153 break;
11154 case NE:
11155 case LTGT:
11156 if (code == NE && TARGET_IEEE_FP)
11158 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11159 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11160 GEN_INT (0x40)));
11161 code = NE;
11163 else
11165 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11166 code = EQ;
11168 break;
11170 case UNORDERED:
11171 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11172 code = NE;
11173 break;
11174 case ORDERED:
11175 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11176 code = EQ;
11177 break;
11179 default:
11180 gcc_unreachable ();
11184 /* Return the test that should be put into the flags user, i.e.
11185 the bcc, scc, or cmov instruction. */
11186 return gen_rtx_fmt_ee (code, VOIDmode,
11187 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11188 const0_rtx);
11192 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11194 rtx op0, op1, ret;
11195 op0 = ix86_compare_op0;
11196 op1 = ix86_compare_op1;
11198 if (second_test)
11199 *second_test = NULL_RTX;
11200 if (bypass_test)
11201 *bypass_test = NULL_RTX;
11203 if (ix86_compare_emitted)
11205 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11206 ix86_compare_emitted = NULL_RTX;
11208 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11209 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11210 second_test, bypass_test);
11211 else
11212 ret = ix86_expand_int_compare (code, op0, op1);
11214 return ret;
11217 /* Return true if the CODE will result in nontrivial jump sequence. */
11218 bool
11219 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11221 enum rtx_code bypass_code, first_code, second_code;
11222 if (!TARGET_CMOVE)
11223 return true;
11224 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11225 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11228 void
11229 ix86_expand_branch (enum rtx_code code, rtx label)
11231 rtx tmp;
11233 /* If we have emitted a compare insn, go straight to simple.
11234 ix86_expand_compare won't emit anything if ix86_compare_emitted
11235 is non NULL. */
11236 if (ix86_compare_emitted)
11237 goto simple;
11239 switch (GET_MODE (ix86_compare_op0))
11241 case QImode:
11242 case HImode:
11243 case SImode:
11244 simple:
11245 tmp = ix86_expand_compare (code, NULL, NULL);
11246 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11247 gen_rtx_LABEL_REF (VOIDmode, label),
11248 pc_rtx);
11249 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11250 return;
11252 case SFmode:
11253 case DFmode:
11254 case XFmode:
11256 rtvec vec;
11257 int use_fcomi;
11258 enum rtx_code bypass_code, first_code, second_code;
11260 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11261 &ix86_compare_op1);
11263 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11265 /* Check whether we will use the natural sequence with one jump. If
11266 so, we can expand jump early. Otherwise delay expansion by
11267 creating compound insn to not confuse optimizers. */
11268 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11269 && TARGET_CMOVE)
11271 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11272 gen_rtx_LABEL_REF (VOIDmode, label),
11273 pc_rtx, NULL_RTX, NULL_RTX);
11275 else
11277 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11278 ix86_compare_op0, ix86_compare_op1);
11279 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11280 gen_rtx_LABEL_REF (VOIDmode, label),
11281 pc_rtx);
11282 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11284 use_fcomi = ix86_use_fcomi_compare (code);
11285 vec = rtvec_alloc (3 + !use_fcomi);
11286 RTVEC_ELT (vec, 0) = tmp;
11287 RTVEC_ELT (vec, 1)
11288 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11289 RTVEC_ELT (vec, 2)
11290 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11291 if (! use_fcomi)
11292 RTVEC_ELT (vec, 3)
11293 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11295 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11297 return;
11300 case DImode:
11301 if (TARGET_64BIT)
11302 goto simple;
11303 case TImode:
11304 /* Expand DImode branch into multiple compare+branch. */
11306 rtx lo[2], hi[2], label2;
11307 enum rtx_code code1, code2, code3;
11308 enum machine_mode submode;
11310 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11312 tmp = ix86_compare_op0;
11313 ix86_compare_op0 = ix86_compare_op1;
11314 ix86_compare_op1 = tmp;
11315 code = swap_condition (code);
11317 if (GET_MODE (ix86_compare_op0) == DImode)
11319 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11320 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11321 submode = SImode;
11323 else
11325 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11326 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11327 submode = DImode;
11330 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11331 avoid two branches. This costs one extra insn, so disable when
11332 optimizing for size. */
11334 if ((code == EQ || code == NE)
11335 && (!optimize_size
11336 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11338 rtx xor0, xor1;
11340 xor1 = hi[0];
11341 if (hi[1] != const0_rtx)
11342 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11343 NULL_RTX, 0, OPTAB_WIDEN);
11345 xor0 = lo[0];
11346 if (lo[1] != const0_rtx)
11347 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11348 NULL_RTX, 0, OPTAB_WIDEN);
11350 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11351 NULL_RTX, 0, OPTAB_WIDEN);
11353 ix86_compare_op0 = tmp;
11354 ix86_compare_op1 = const0_rtx;
11355 ix86_expand_branch (code, label);
11356 return;
11359 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11360 op1 is a constant and the low word is zero, then we can just
11361 examine the high word. */
11363 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11364 switch (code)
11366 case LT: case LTU: case GE: case GEU:
11367 ix86_compare_op0 = hi[0];
11368 ix86_compare_op1 = hi[1];
11369 ix86_expand_branch (code, label);
11370 return;
11371 default:
11372 break;
11375 /* Otherwise, we need two or three jumps. */
11377 label2 = gen_label_rtx ();
11379 code1 = code;
11380 code2 = swap_condition (code);
11381 code3 = unsigned_condition (code);
11383 switch (code)
11385 case LT: case GT: case LTU: case GTU:
11386 break;
11388 case LE: code1 = LT; code2 = GT; break;
11389 case GE: code1 = GT; code2 = LT; break;
11390 case LEU: code1 = LTU; code2 = GTU; break;
11391 case GEU: code1 = GTU; code2 = LTU; break;
11393 case EQ: code1 = UNKNOWN; code2 = NE; break;
11394 case NE: code2 = UNKNOWN; break;
11396 default:
11397 gcc_unreachable ();
11401 * a < b =>
11402 * if (hi(a) < hi(b)) goto true;
11403 * if (hi(a) > hi(b)) goto false;
11404 * if (lo(a) < lo(b)) goto true;
11405 * false:
11408 ix86_compare_op0 = hi[0];
11409 ix86_compare_op1 = hi[1];
11411 if (code1 != UNKNOWN)
11412 ix86_expand_branch (code1, label);
11413 if (code2 != UNKNOWN)
11414 ix86_expand_branch (code2, label2);
11416 ix86_compare_op0 = lo[0];
11417 ix86_compare_op1 = lo[1];
11418 ix86_expand_branch (code3, label);
11420 if (code2 != UNKNOWN)
11421 emit_label (label2);
11422 return;
11425 default:
11426 gcc_unreachable ();
11430 /* Split branch based on floating point condition. */
11431 void
11432 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11433 rtx target1, rtx target2, rtx tmp, rtx pushed)
11435 rtx second, bypass;
11436 rtx label = NULL_RTX;
11437 rtx condition;
11438 int bypass_probability = -1, second_probability = -1, probability = -1;
11439 rtx i;
11441 if (target2 != pc_rtx)
11443 rtx tmp = target2;
11444 code = reverse_condition_maybe_unordered (code);
11445 target2 = target1;
11446 target1 = tmp;
11449 condition = ix86_expand_fp_compare (code, op1, op2,
11450 tmp, &second, &bypass);
11452 /* Remove pushed operand from stack. */
11453 if (pushed)
11454 ix86_free_from_memory (GET_MODE (pushed));
11456 if (split_branch_probability >= 0)
11458 /* Distribute the probabilities across the jumps.
11459 Assume the BYPASS and SECOND to be always test
11460 for UNORDERED. */
11461 probability = split_branch_probability;
11463 /* Value of 1 is low enough to make no need for probability
11464 to be updated. Later we may run some experiments and see
11465 if unordered values are more frequent in practice. */
11466 if (bypass)
11467 bypass_probability = 1;
11468 if (second)
11469 second_probability = 1;
11471 if (bypass != NULL_RTX)
11473 label = gen_label_rtx ();
11474 i = emit_jump_insn (gen_rtx_SET
11475 (VOIDmode, pc_rtx,
11476 gen_rtx_IF_THEN_ELSE (VOIDmode,
11477 bypass,
11478 gen_rtx_LABEL_REF (VOIDmode,
11479 label),
11480 pc_rtx)));
11481 if (bypass_probability >= 0)
11482 REG_NOTES (i)
11483 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11484 GEN_INT (bypass_probability),
11485 REG_NOTES (i));
11487 i = emit_jump_insn (gen_rtx_SET
11488 (VOIDmode, pc_rtx,
11489 gen_rtx_IF_THEN_ELSE (VOIDmode,
11490 condition, target1, target2)));
11491 if (probability >= 0)
11492 REG_NOTES (i)
11493 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11494 GEN_INT (probability),
11495 REG_NOTES (i));
11496 if (second != NULL_RTX)
11498 i = emit_jump_insn (gen_rtx_SET
11499 (VOIDmode, pc_rtx,
11500 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11501 target2)));
11502 if (second_probability >= 0)
11503 REG_NOTES (i)
11504 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11505 GEN_INT (second_probability),
11506 REG_NOTES (i));
11508 if (label != NULL_RTX)
11509 emit_label (label);
11513 ix86_expand_setcc (enum rtx_code code, rtx dest)
11515 rtx ret, tmp, tmpreg, equiv;
11516 rtx second_test, bypass_test;
11518 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11519 return 0; /* FAIL */
11521 gcc_assert (GET_MODE (dest) == QImode);
11523 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11524 PUT_MODE (ret, QImode);
11526 tmp = dest;
11527 tmpreg = dest;
11529 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11530 if (bypass_test || second_test)
11532 rtx test = second_test;
11533 int bypass = 0;
11534 rtx tmp2 = gen_reg_rtx (QImode);
11535 if (bypass_test)
11537 gcc_assert (!second_test);
11538 test = bypass_test;
11539 bypass = 1;
11540 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11542 PUT_MODE (test, QImode);
11543 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11545 if (bypass)
11546 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11547 else
11548 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11551 /* Attach a REG_EQUAL note describing the comparison result. */
11552 if (ix86_compare_op0 && ix86_compare_op1)
11554 equiv = simplify_gen_relational (code, QImode,
11555 GET_MODE (ix86_compare_op0),
11556 ix86_compare_op0, ix86_compare_op1);
11557 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11560 return 1; /* DONE */
11563 /* Expand comparison setting or clearing carry flag. Return true when
11564 successful and set pop for the operation. */
11565 static bool
11566 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11568 enum machine_mode mode =
11569 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11571 /* Do not handle DImode compares that go through special path. Also we can't
11572 deal with FP compares yet. This is possible to add. */
11573 if (mode == (TARGET_64BIT ? TImode : DImode))
11574 return false;
11575 if (FLOAT_MODE_P (mode))
11577 rtx second_test = NULL, bypass_test = NULL;
11578 rtx compare_op, compare_seq;
11580 /* Shortcut: following common codes never translate into carry flag compares. */
11581 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11582 || code == ORDERED || code == UNORDERED)
11583 return false;
11585 /* These comparisons require zero flag; swap operands so they won't. */
11586 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11587 && !TARGET_IEEE_FP)
11589 rtx tmp = op0;
11590 op0 = op1;
11591 op1 = tmp;
11592 code = swap_condition (code);
11595 /* Try to expand the comparison and verify that we end up with carry flag
11596 based comparison. This is fails to be true only when we decide to expand
11597 comparison using arithmetic that is not too common scenario. */
11598 start_sequence ();
11599 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11600 &second_test, &bypass_test);
11601 compare_seq = get_insns ();
11602 end_sequence ();
11604 if (second_test || bypass_test)
11605 return false;
11606 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11607 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11608 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11609 else
11610 code = GET_CODE (compare_op);
11611 if (code != LTU && code != GEU)
11612 return false;
11613 emit_insn (compare_seq);
11614 *pop = compare_op;
11615 return true;
11617 if (!INTEGRAL_MODE_P (mode))
11618 return false;
11619 switch (code)
11621 case LTU:
11622 case GEU:
11623 break;
11625 /* Convert a==0 into (unsigned)a<1. */
11626 case EQ:
11627 case NE:
11628 if (op1 != const0_rtx)
11629 return false;
11630 op1 = const1_rtx;
11631 code = (code == EQ ? LTU : GEU);
11632 break;
11634 /* Convert a>b into b<a or a>=b-1. */
11635 case GTU:
11636 case LEU:
11637 if (CONST_INT_P (op1))
11639 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11640 /* Bail out on overflow. We still can swap operands but that
11641 would force loading of the constant into register. */
11642 if (op1 == const0_rtx
11643 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11644 return false;
11645 code = (code == GTU ? GEU : LTU);
11647 else
11649 rtx tmp = op1;
11650 op1 = op0;
11651 op0 = tmp;
11652 code = (code == GTU ? LTU : GEU);
11654 break;
11656 /* Convert a>=0 into (unsigned)a<0x80000000. */
11657 case LT:
11658 case GE:
11659 if (mode == DImode || op1 != const0_rtx)
11660 return false;
11661 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11662 code = (code == LT ? GEU : LTU);
11663 break;
11664 case LE:
11665 case GT:
11666 if (mode == DImode || op1 != constm1_rtx)
11667 return false;
11668 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11669 code = (code == LE ? GEU : LTU);
11670 break;
11672 default:
11673 return false;
11675 /* Swapping operands may cause constant to appear as first operand. */
11676 if (!nonimmediate_operand (op0, VOIDmode))
11678 if (no_new_pseudos)
11679 return false;
11680 op0 = force_reg (mode, op0);
11682 ix86_compare_op0 = op0;
11683 ix86_compare_op1 = op1;
11684 *pop = ix86_expand_compare (code, NULL, NULL);
11685 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11686 return true;
11690 ix86_expand_int_movcc (rtx operands[])
11692 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11693 rtx compare_seq, compare_op;
11694 rtx second_test, bypass_test;
11695 enum machine_mode mode = GET_MODE (operands[0]);
11696 bool sign_bit_compare_p = false;;
11698 start_sequence ();
11699 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11700 compare_seq = get_insns ();
11701 end_sequence ();
11703 compare_code = GET_CODE (compare_op);
11705 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11706 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11707 sign_bit_compare_p = true;
11709 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11710 HImode insns, we'd be swallowed in word prefix ops. */
11712 if ((mode != HImode || TARGET_FAST_PREFIX)
11713 && (mode != (TARGET_64BIT ? TImode : DImode))
11714 && CONST_INT_P (operands[2])
11715 && CONST_INT_P (operands[3]))
11717 rtx out = operands[0];
11718 HOST_WIDE_INT ct = INTVAL (operands[2]);
11719 HOST_WIDE_INT cf = INTVAL (operands[3]);
11720 HOST_WIDE_INT diff;
11722 diff = ct - cf;
11723 /* Sign bit compares are better done using shifts than we do by using
11724 sbb. */
11725 if (sign_bit_compare_p
11726 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11727 ix86_compare_op1, &compare_op))
11729 /* Detect overlap between destination and compare sources. */
11730 rtx tmp = out;
11732 if (!sign_bit_compare_p)
11734 bool fpcmp = false;
11736 compare_code = GET_CODE (compare_op);
11738 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11739 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11741 fpcmp = true;
11742 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11745 /* To simplify rest of code, restrict to the GEU case. */
11746 if (compare_code == LTU)
11748 HOST_WIDE_INT tmp = ct;
11749 ct = cf;
11750 cf = tmp;
11751 compare_code = reverse_condition (compare_code);
11752 code = reverse_condition (code);
11754 else
11756 if (fpcmp)
11757 PUT_CODE (compare_op,
11758 reverse_condition_maybe_unordered
11759 (GET_CODE (compare_op)));
11760 else
11761 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11763 diff = ct - cf;
11765 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11766 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11767 tmp = gen_reg_rtx (mode);
11769 if (mode == DImode)
11770 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11771 else
11772 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11774 else
11776 if (code == GT || code == GE)
11777 code = reverse_condition (code);
11778 else
11780 HOST_WIDE_INT tmp = ct;
11781 ct = cf;
11782 cf = tmp;
11783 diff = ct - cf;
11785 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11786 ix86_compare_op1, VOIDmode, 0, -1);
11789 if (diff == 1)
11792 * cmpl op0,op1
11793 * sbbl dest,dest
11794 * [addl dest, ct]
11796 * Size 5 - 8.
11798 if (ct)
11799 tmp = expand_simple_binop (mode, PLUS,
11800 tmp, GEN_INT (ct),
11801 copy_rtx (tmp), 1, OPTAB_DIRECT);
11803 else if (cf == -1)
11806 * cmpl op0,op1
11807 * sbbl dest,dest
11808 * orl $ct, dest
11810 * Size 8.
11812 tmp = expand_simple_binop (mode, IOR,
11813 tmp, GEN_INT (ct),
11814 copy_rtx (tmp), 1, OPTAB_DIRECT);
11816 else if (diff == -1 && ct)
11819 * cmpl op0,op1
11820 * sbbl dest,dest
11821 * notl dest
11822 * [addl dest, cf]
11824 * Size 8 - 11.
11826 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11827 if (cf)
11828 tmp = expand_simple_binop (mode, PLUS,
11829 copy_rtx (tmp), GEN_INT (cf),
11830 copy_rtx (tmp), 1, OPTAB_DIRECT);
11832 else
11835 * cmpl op0,op1
11836 * sbbl dest,dest
11837 * [notl dest]
11838 * andl cf - ct, dest
11839 * [addl dest, ct]
11841 * Size 8 - 11.
11844 if (cf == 0)
11846 cf = ct;
11847 ct = 0;
11848 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11851 tmp = expand_simple_binop (mode, AND,
11852 copy_rtx (tmp),
11853 gen_int_mode (cf - ct, mode),
11854 copy_rtx (tmp), 1, OPTAB_DIRECT);
11855 if (ct)
11856 tmp = expand_simple_binop (mode, PLUS,
11857 copy_rtx (tmp), GEN_INT (ct),
11858 copy_rtx (tmp), 1, OPTAB_DIRECT);
11861 if (!rtx_equal_p (tmp, out))
11862 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11864 return 1; /* DONE */
11867 if (diff < 0)
11869 HOST_WIDE_INT tmp;
11870 tmp = ct, ct = cf, cf = tmp;
11871 diff = -diff;
11872 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11874 /* We may be reversing unordered compare to normal compare, that
11875 is not valid in general (we may convert non-trapping condition
11876 to trapping one), however on i386 we currently emit all
11877 comparisons unordered. */
11878 compare_code = reverse_condition_maybe_unordered (compare_code);
11879 code = reverse_condition_maybe_unordered (code);
11881 else
11883 compare_code = reverse_condition (compare_code);
11884 code = reverse_condition (code);
11888 compare_code = UNKNOWN;
11889 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11890 && CONST_INT_P (ix86_compare_op1))
11892 if (ix86_compare_op1 == const0_rtx
11893 && (code == LT || code == GE))
11894 compare_code = code;
11895 else if (ix86_compare_op1 == constm1_rtx)
11897 if (code == LE)
11898 compare_code = LT;
11899 else if (code == GT)
11900 compare_code = GE;
11904 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11905 if (compare_code != UNKNOWN
11906 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11907 && (cf == -1 || ct == -1))
11909 /* If lea code below could be used, only optimize
11910 if it results in a 2 insn sequence. */
11912 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11913 || diff == 3 || diff == 5 || diff == 9)
11914 || (compare_code == LT && ct == -1)
11915 || (compare_code == GE && cf == -1))
11918 * notl op1 (if necessary)
11919 * sarl $31, op1
11920 * orl cf, op1
11922 if (ct != -1)
11924 cf = ct;
11925 ct = -1;
11926 code = reverse_condition (code);
11929 out = emit_store_flag (out, code, ix86_compare_op0,
11930 ix86_compare_op1, VOIDmode, 0, -1);
11932 out = expand_simple_binop (mode, IOR,
11933 out, GEN_INT (cf),
11934 out, 1, OPTAB_DIRECT);
11935 if (out != operands[0])
11936 emit_move_insn (operands[0], out);
11938 return 1; /* DONE */
11943 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11944 || diff == 3 || diff == 5 || diff == 9)
11945 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11946 && (mode != DImode
11947 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11950 * xorl dest,dest
11951 * cmpl op1,op2
11952 * setcc dest
11953 * lea cf(dest*(ct-cf)),dest
11955 * Size 14.
11957 * This also catches the degenerate setcc-only case.
11960 rtx tmp;
11961 int nops;
11963 out = emit_store_flag (out, code, ix86_compare_op0,
11964 ix86_compare_op1, VOIDmode, 0, 1);
11966 nops = 0;
11967 /* On x86_64 the lea instruction operates on Pmode, so we need
11968 to get arithmetics done in proper mode to match. */
11969 if (diff == 1)
11970 tmp = copy_rtx (out);
11971 else
11973 rtx out1;
11974 out1 = copy_rtx (out);
11975 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11976 nops++;
11977 if (diff & 1)
11979 tmp = gen_rtx_PLUS (mode, tmp, out1);
11980 nops++;
11983 if (cf != 0)
11985 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11986 nops++;
11988 if (!rtx_equal_p (tmp, out))
11990 if (nops == 1)
11991 out = force_operand (tmp, copy_rtx (out));
11992 else
11993 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11995 if (!rtx_equal_p (out, operands[0]))
11996 emit_move_insn (operands[0], copy_rtx (out));
11998 return 1; /* DONE */
12002 * General case: Jumpful:
12003 * xorl dest,dest cmpl op1, op2
12004 * cmpl op1, op2 movl ct, dest
12005 * setcc dest jcc 1f
12006 * decl dest movl cf, dest
12007 * andl (cf-ct),dest 1:
12008 * addl ct,dest
12010 * Size 20. Size 14.
12012 * This is reasonably steep, but branch mispredict costs are
12013 * high on modern cpus, so consider failing only if optimizing
12014 * for space.
12017 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12018 && BRANCH_COST >= 2)
12020 if (cf == 0)
12022 cf = ct;
12023 ct = 0;
12024 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
12025 /* We may be reversing unordered compare to normal compare,
12026 that is not valid in general (we may convert non-trapping
12027 condition to trapping one), however on i386 we currently
12028 emit all comparisons unordered. */
12029 code = reverse_condition_maybe_unordered (code);
12030 else
12032 code = reverse_condition (code);
12033 if (compare_code != UNKNOWN)
12034 compare_code = reverse_condition (compare_code);
12038 if (compare_code != UNKNOWN)
12040 /* notl op1 (if needed)
12041 sarl $31, op1
12042 andl (cf-ct), op1
12043 addl ct, op1
12045 For x < 0 (resp. x <= -1) there will be no notl,
12046 so if possible swap the constants to get rid of the
12047 complement.
12048 True/false will be -1/0 while code below (store flag
12049 followed by decrement) is 0/-1, so the constants need
12050 to be exchanged once more. */
12052 if (compare_code == GE || !cf)
12054 code = reverse_condition (code);
12055 compare_code = LT;
12057 else
12059 HOST_WIDE_INT tmp = cf;
12060 cf = ct;
12061 ct = tmp;
12064 out = emit_store_flag (out, code, ix86_compare_op0,
12065 ix86_compare_op1, VOIDmode, 0, -1);
12067 else
12069 out = emit_store_flag (out, code, ix86_compare_op0,
12070 ix86_compare_op1, VOIDmode, 0, 1);
12072 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12073 copy_rtx (out), 1, OPTAB_DIRECT);
12076 out = expand_simple_binop (mode, AND, copy_rtx (out),
12077 gen_int_mode (cf - ct, mode),
12078 copy_rtx (out), 1, OPTAB_DIRECT);
12079 if (ct)
12080 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12081 copy_rtx (out), 1, OPTAB_DIRECT);
12082 if (!rtx_equal_p (out, operands[0]))
12083 emit_move_insn (operands[0], copy_rtx (out));
12085 return 1; /* DONE */
12089 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12091 /* Try a few things more with specific constants and a variable. */
12093 optab op;
12094 rtx var, orig_out, out, tmp;
12096 if (BRANCH_COST <= 2)
12097 return 0; /* FAIL */
12099 /* If one of the two operands is an interesting constant, load a
12100 constant with the above and mask it in with a logical operation. */
12102 if (CONST_INT_P (operands[2]))
12104 var = operands[3];
12105 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12106 operands[3] = constm1_rtx, op = and_optab;
12107 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12108 operands[3] = const0_rtx, op = ior_optab;
12109 else
12110 return 0; /* FAIL */
12112 else if (CONST_INT_P (operands[3]))
12114 var = operands[2];
12115 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12116 operands[2] = constm1_rtx, op = and_optab;
12117 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12118 operands[2] = const0_rtx, op = ior_optab;
12119 else
12120 return 0; /* FAIL */
12122 else
12123 return 0; /* FAIL */
12125 orig_out = operands[0];
12126 tmp = gen_reg_rtx (mode);
12127 operands[0] = tmp;
12129 /* Recurse to get the constant loaded. */
12130 if (ix86_expand_int_movcc (operands) == 0)
12131 return 0; /* FAIL */
12133 /* Mask in the interesting variable. */
12134 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12135 OPTAB_WIDEN);
12136 if (!rtx_equal_p (out, orig_out))
12137 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12139 return 1; /* DONE */
12143 * For comparison with above,
12145 * movl cf,dest
12146 * movl ct,tmp
12147 * cmpl op1,op2
12148 * cmovcc tmp,dest
12150 * Size 15.
12153 if (! nonimmediate_operand (operands[2], mode))
12154 operands[2] = force_reg (mode, operands[2]);
12155 if (! nonimmediate_operand (operands[3], mode))
12156 operands[3] = force_reg (mode, operands[3]);
12158 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12160 rtx tmp = gen_reg_rtx (mode);
12161 emit_move_insn (tmp, operands[3]);
12162 operands[3] = tmp;
12164 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12166 rtx tmp = gen_reg_rtx (mode);
12167 emit_move_insn (tmp, operands[2]);
12168 operands[2] = tmp;
12171 if (! register_operand (operands[2], VOIDmode)
12172 && (mode == QImode
12173 || ! register_operand (operands[3], VOIDmode)))
12174 operands[2] = force_reg (mode, operands[2]);
12176 if (mode == QImode
12177 && ! register_operand (operands[3], VOIDmode))
12178 operands[3] = force_reg (mode, operands[3]);
12180 emit_insn (compare_seq);
12181 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12182 gen_rtx_IF_THEN_ELSE (mode,
12183 compare_op, operands[2],
12184 operands[3])));
12185 if (bypass_test)
12186 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12187 gen_rtx_IF_THEN_ELSE (mode,
12188 bypass_test,
12189 copy_rtx (operands[3]),
12190 copy_rtx (operands[0]))));
12191 if (second_test)
12192 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12193 gen_rtx_IF_THEN_ELSE (mode,
12194 second_test,
12195 copy_rtx (operands[2]),
12196 copy_rtx (operands[0]))));
12198 return 1; /* DONE */
12201 /* Swap, force into registers, or otherwise massage the two operands
12202 to an sse comparison with a mask result. Thus we differ a bit from
12203 ix86_prepare_fp_compare_args which expects to produce a flags result.
12205 The DEST operand exists to help determine whether to commute commutative
12206 operators. The POP0/POP1 operands are updated in place. The new
12207 comparison code is returned, or UNKNOWN if not implementable. */
12209 static enum rtx_code
12210 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12211 rtx *pop0, rtx *pop1)
12213 rtx tmp;
12215 switch (code)
12217 case LTGT:
12218 case UNEQ:
12219 /* We have no LTGT as an operator. We could implement it with
12220 NE & ORDERED, but this requires an extra temporary. It's
12221 not clear that it's worth it. */
12222 return UNKNOWN;
12224 case LT:
12225 case LE:
12226 case UNGT:
12227 case UNGE:
12228 /* These are supported directly. */
12229 break;
12231 case EQ:
12232 case NE:
12233 case UNORDERED:
12234 case ORDERED:
12235 /* For commutative operators, try to canonicalize the destination
12236 operand to be first in the comparison - this helps reload to
12237 avoid extra moves. */
12238 if (!dest || !rtx_equal_p (dest, *pop1))
12239 break;
12240 /* FALLTHRU */
12242 case GE:
12243 case GT:
12244 case UNLE:
12245 case UNLT:
12246 /* These are not supported directly. Swap the comparison operands
12247 to transform into something that is supported. */
12248 tmp = *pop0;
12249 *pop0 = *pop1;
12250 *pop1 = tmp;
12251 code = swap_condition (code);
12252 break;
12254 default:
12255 gcc_unreachable ();
12258 return code;
12261 /* Detect conditional moves that exactly match min/max operational
12262 semantics. Note that this is IEEE safe, as long as we don't
12263 interchange the operands.
12265 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12266 and TRUE if the operation is successful and instructions are emitted. */
12268 static bool
12269 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12270 rtx cmp_op1, rtx if_true, rtx if_false)
12272 enum machine_mode mode;
12273 bool is_min;
12274 rtx tmp;
12276 if (code == LT)
12278 else if (code == UNGE)
12280 tmp = if_true;
12281 if_true = if_false;
12282 if_false = tmp;
12284 else
12285 return false;
12287 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12288 is_min = true;
12289 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12290 is_min = false;
12291 else
12292 return false;
12294 mode = GET_MODE (dest);
12296 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12297 but MODE may be a vector mode and thus not appropriate. */
12298 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12300 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12301 rtvec v;
12303 if_true = force_reg (mode, if_true);
12304 v = gen_rtvec (2, if_true, if_false);
12305 tmp = gen_rtx_UNSPEC (mode, v, u);
12307 else
12309 code = is_min ? SMIN : SMAX;
12310 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12313 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12314 return true;
12317 /* Expand an sse vector comparison. Return the register with the result. */
12319 static rtx
12320 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12321 rtx op_true, rtx op_false)
12323 enum machine_mode mode = GET_MODE (dest);
12324 rtx x;
12326 cmp_op0 = force_reg (mode, cmp_op0);
12327 if (!nonimmediate_operand (cmp_op1, mode))
12328 cmp_op1 = force_reg (mode, cmp_op1);
12330 if (optimize
12331 || reg_overlap_mentioned_p (dest, op_true)
12332 || reg_overlap_mentioned_p (dest, op_false))
12333 dest = gen_reg_rtx (mode);
12335 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12336 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12338 return dest;
12341 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12342 operations. This is used for both scalar and vector conditional moves. */
12344 static void
12345 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12347 enum machine_mode mode = GET_MODE (dest);
12348 rtx t2, t3, x;
12350 if (op_false == CONST0_RTX (mode))
12352 op_true = force_reg (mode, op_true);
12353 x = gen_rtx_AND (mode, cmp, op_true);
12354 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12356 else if (op_true == CONST0_RTX (mode))
12358 op_false = force_reg (mode, op_false);
12359 x = gen_rtx_NOT (mode, cmp);
12360 x = gen_rtx_AND (mode, x, op_false);
12361 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12363 else
12365 op_true = force_reg (mode, op_true);
12366 op_false = force_reg (mode, op_false);
12368 t2 = gen_reg_rtx (mode);
12369 if (optimize)
12370 t3 = gen_reg_rtx (mode);
12371 else
12372 t3 = dest;
12374 x = gen_rtx_AND (mode, op_true, cmp);
12375 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12377 x = gen_rtx_NOT (mode, cmp);
12378 x = gen_rtx_AND (mode, x, op_false);
12379 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12381 x = gen_rtx_IOR (mode, t3, t2);
12382 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12386 /* Expand a floating-point conditional move. Return true if successful. */
12389 ix86_expand_fp_movcc (rtx operands[])
12391 enum machine_mode mode = GET_MODE (operands[0]);
12392 enum rtx_code code = GET_CODE (operands[1]);
12393 rtx tmp, compare_op, second_test, bypass_test;
12395 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12397 enum machine_mode cmode;
12399 /* Since we've no cmove for sse registers, don't force bad register
12400 allocation just to gain access to it. Deny movcc when the
12401 comparison mode doesn't match the move mode. */
12402 cmode = GET_MODE (ix86_compare_op0);
12403 if (cmode == VOIDmode)
12404 cmode = GET_MODE (ix86_compare_op1);
12405 if (cmode != mode)
12406 return 0;
12408 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12409 &ix86_compare_op0,
12410 &ix86_compare_op1);
12411 if (code == UNKNOWN)
12412 return 0;
12414 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12415 ix86_compare_op1, operands[2],
12416 operands[3]))
12417 return 1;
12419 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12420 ix86_compare_op1, operands[2], operands[3]);
12421 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12422 return 1;
12425 /* The floating point conditional move instructions don't directly
12426 support conditions resulting from a signed integer comparison. */
12428 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12430 /* The floating point conditional move instructions don't directly
12431 support signed integer comparisons. */
12433 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12435 gcc_assert (!second_test && !bypass_test);
12436 tmp = gen_reg_rtx (QImode);
12437 ix86_expand_setcc (code, tmp);
12438 code = NE;
12439 ix86_compare_op0 = tmp;
12440 ix86_compare_op1 = const0_rtx;
12441 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12443 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12445 tmp = gen_reg_rtx (mode);
12446 emit_move_insn (tmp, operands[3]);
12447 operands[3] = tmp;
12449 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12451 tmp = gen_reg_rtx (mode);
12452 emit_move_insn (tmp, operands[2]);
12453 operands[2] = tmp;
12456 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12457 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12458 operands[2], operands[3])));
12459 if (bypass_test)
12460 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12461 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12462 operands[3], operands[0])));
12463 if (second_test)
12464 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12465 gen_rtx_IF_THEN_ELSE (mode, second_test,
12466 operands[2], operands[0])));
12468 return 1;
12471 /* Expand a floating-point vector conditional move; a vcond operation
12472 rather than a movcc operation. */
12474 bool
12475 ix86_expand_fp_vcond (rtx operands[])
12477 enum rtx_code code = GET_CODE (operands[3]);
12478 rtx cmp;
12480 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12481 &operands[4], &operands[5]);
12482 if (code == UNKNOWN)
12483 return false;
12485 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12486 operands[5], operands[1], operands[2]))
12487 return true;
12489 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12490 operands[1], operands[2]);
12491 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12492 return true;
12495 /* Expand a signed integral vector conditional move. */
12497 bool
12498 ix86_expand_int_vcond (rtx operands[])
12500 enum machine_mode mode = GET_MODE (operands[0]);
12501 enum rtx_code code = GET_CODE (operands[3]);
12502 bool negate = false;
12503 rtx x, cop0, cop1;
12505 cop0 = operands[4];
12506 cop1 = operands[5];
12508 /* Canonicalize the comparison to EQ, GT, GTU. */
12509 switch (code)
12511 case EQ:
12512 case GT:
12513 case GTU:
12514 break;
12516 case NE:
12517 case LE:
12518 case LEU:
12519 code = reverse_condition (code);
12520 negate = true;
12521 break;
12523 case GE:
12524 case GEU:
12525 code = reverse_condition (code);
12526 negate = true;
12527 /* FALLTHRU */
12529 case LT:
12530 case LTU:
12531 code = swap_condition (code);
12532 x = cop0, cop0 = cop1, cop1 = x;
12533 break;
12535 default:
12536 gcc_unreachable ();
12539 /* Unsigned parallel compare is not supported by the hardware. Play some
12540 tricks to turn this into a signed comparison against 0. */
12541 if (code == GTU)
12543 cop0 = force_reg (mode, cop0);
12545 switch (mode)
12547 case V4SImode:
12549 rtx t1, t2, mask;
12551 /* Perform a parallel modulo subtraction. */
12552 t1 = gen_reg_rtx (mode);
12553 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12555 /* Extract the original sign bit of op0. */
12556 mask = GEN_INT (-0x80000000);
12557 mask = gen_rtx_CONST_VECTOR (mode,
12558 gen_rtvec (4, mask, mask, mask, mask));
12559 mask = force_reg (mode, mask);
12560 t2 = gen_reg_rtx (mode);
12561 emit_insn (gen_andv4si3 (t2, cop0, mask));
12563 /* XOR it back into the result of the subtraction. This results
12564 in the sign bit set iff we saw unsigned underflow. */
12565 x = gen_reg_rtx (mode);
12566 emit_insn (gen_xorv4si3 (x, t1, t2));
12568 code = GT;
12570 break;
12572 case V16QImode:
12573 case V8HImode:
12574 /* Perform a parallel unsigned saturating subtraction. */
12575 x = gen_reg_rtx (mode);
12576 emit_insn (gen_rtx_SET (VOIDmode, x,
12577 gen_rtx_US_MINUS (mode, cop0, cop1)));
12579 code = EQ;
12580 negate = !negate;
12581 break;
12583 default:
12584 gcc_unreachable ();
12587 cop0 = x;
12588 cop1 = CONST0_RTX (mode);
12591 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12592 operands[1+negate], operands[2-negate]);
12594 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12595 operands[2-negate]);
12596 return true;
12599 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12600 true if we should do zero extension, else sign extension. HIGH_P is
12601 true if we want the N/2 high elements, else the low elements. */
12603 void
12604 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12606 enum machine_mode imode = GET_MODE (operands[1]);
12607 rtx (*unpack)(rtx, rtx, rtx);
12608 rtx se, dest;
12610 switch (imode)
12612 case V16QImode:
12613 if (high_p)
12614 unpack = gen_vec_interleave_highv16qi;
12615 else
12616 unpack = gen_vec_interleave_lowv16qi;
12617 break;
12618 case V8HImode:
12619 if (high_p)
12620 unpack = gen_vec_interleave_highv8hi;
12621 else
12622 unpack = gen_vec_interleave_lowv8hi;
12623 break;
12624 case V4SImode:
12625 if (high_p)
12626 unpack = gen_vec_interleave_highv4si;
12627 else
12628 unpack = gen_vec_interleave_lowv4si;
12629 break;
12630 default:
12631 gcc_unreachable ();
12634 dest = gen_lowpart (imode, operands[0]);
12636 if (unsigned_p)
12637 se = force_reg (imode, CONST0_RTX (imode));
12638 else
12639 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12640 operands[1], pc_rtx, pc_rtx);
12642 emit_insn (unpack (dest, operands[1], se));
12645 /* Expand conditional increment or decrement using adb/sbb instructions.
12646 The default case using setcc followed by the conditional move can be
12647 done by generic code. */
12649 ix86_expand_int_addcc (rtx operands[])
12651 enum rtx_code code = GET_CODE (operands[1]);
12652 rtx compare_op;
12653 rtx val = const0_rtx;
12654 bool fpcmp = false;
12655 enum machine_mode mode = GET_MODE (operands[0]);
12657 if (operands[3] != const1_rtx
12658 && operands[3] != constm1_rtx)
12659 return 0;
12660 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12661 ix86_compare_op1, &compare_op))
12662 return 0;
12663 code = GET_CODE (compare_op);
12665 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12666 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12668 fpcmp = true;
12669 code = ix86_fp_compare_code_to_integer (code);
12672 if (code != LTU)
12674 val = constm1_rtx;
12675 if (fpcmp)
12676 PUT_CODE (compare_op,
12677 reverse_condition_maybe_unordered
12678 (GET_CODE (compare_op)));
12679 else
12680 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12682 PUT_MODE (compare_op, mode);
12684 /* Construct either adc or sbb insn. */
12685 if ((code == LTU) == (operands[3] == constm1_rtx))
12687 switch (GET_MODE (operands[0]))
12689 case QImode:
12690 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12691 break;
12692 case HImode:
12693 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12694 break;
12695 case SImode:
12696 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12697 break;
12698 case DImode:
12699 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12700 break;
12701 default:
12702 gcc_unreachable ();
12705 else
12707 switch (GET_MODE (operands[0]))
12709 case QImode:
12710 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12711 break;
12712 case HImode:
12713 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12714 break;
12715 case SImode:
12716 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12717 break;
12718 case DImode:
12719 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12720 break;
12721 default:
12722 gcc_unreachable ();
12725 return 1; /* DONE */
12729 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12730 works for floating pointer parameters and nonoffsetable memories.
12731 For pushes, it returns just stack offsets; the values will be saved
12732 in the right order. Maximally three parts are generated. */
12734 static int
12735 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12737 int size;
12739 if (!TARGET_64BIT)
12740 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12741 else
12742 size = (GET_MODE_SIZE (mode) + 4) / 8;
12744 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12745 gcc_assert (size >= 2 && size <= 3);
12747 /* Optimize constant pool reference to immediates. This is used by fp
12748 moves, that force all constants to memory to allow combining. */
12749 if (MEM_P (operand) && MEM_READONLY_P (operand))
12751 rtx tmp = maybe_get_pool_constant (operand);
12752 if (tmp)
12753 operand = tmp;
12756 if (MEM_P (operand) && !offsettable_memref_p (operand))
12758 /* The only non-offsetable memories we handle are pushes. */
12759 int ok = push_operand (operand, VOIDmode);
12761 gcc_assert (ok);
12763 operand = copy_rtx (operand);
12764 PUT_MODE (operand, Pmode);
12765 parts[0] = parts[1] = parts[2] = operand;
12766 return size;
12769 if (GET_CODE (operand) == CONST_VECTOR)
12771 enum machine_mode imode = int_mode_for_mode (mode);
12772 /* Caution: if we looked through a constant pool memory above,
12773 the operand may actually have a different mode now. That's
12774 ok, since we want to pun this all the way back to an integer. */
12775 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12776 gcc_assert (operand != NULL);
12777 mode = imode;
12780 if (!TARGET_64BIT)
12782 if (mode == DImode)
12783 split_di (&operand, 1, &parts[0], &parts[1]);
12784 else
12786 if (REG_P (operand))
12788 gcc_assert (reload_completed);
12789 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12790 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12791 if (size == 3)
12792 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12794 else if (offsettable_memref_p (operand))
12796 operand = adjust_address (operand, SImode, 0);
12797 parts[0] = operand;
12798 parts[1] = adjust_address (operand, SImode, 4);
12799 if (size == 3)
12800 parts[2] = adjust_address (operand, SImode, 8);
12802 else if (GET_CODE (operand) == CONST_DOUBLE)
12804 REAL_VALUE_TYPE r;
12805 long l[4];
12807 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12808 switch (mode)
12810 case XFmode:
12811 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12812 parts[2] = gen_int_mode (l[2], SImode);
12813 break;
12814 case DFmode:
12815 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12816 break;
12817 default:
12818 gcc_unreachable ();
12820 parts[1] = gen_int_mode (l[1], SImode);
12821 parts[0] = gen_int_mode (l[0], SImode);
12823 else
12824 gcc_unreachable ();
12827 else
12829 if (mode == TImode)
12830 split_ti (&operand, 1, &parts[0], &parts[1]);
12831 if (mode == XFmode || mode == TFmode)
12833 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12834 if (REG_P (operand))
12836 gcc_assert (reload_completed);
12837 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12838 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12840 else if (offsettable_memref_p (operand))
12842 operand = adjust_address (operand, DImode, 0);
12843 parts[0] = operand;
12844 parts[1] = adjust_address (operand, upper_mode, 8);
12846 else if (GET_CODE (operand) == CONST_DOUBLE)
12848 REAL_VALUE_TYPE r;
12849 long l[4];
12851 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12852 real_to_target (l, &r, mode);
12854 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12855 if (HOST_BITS_PER_WIDE_INT >= 64)
12856 parts[0]
12857 = gen_int_mode
12858 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12859 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12860 DImode);
12861 else
12862 parts[0] = immed_double_const (l[0], l[1], DImode);
12864 if (upper_mode == SImode)
12865 parts[1] = gen_int_mode (l[2], SImode);
12866 else if (HOST_BITS_PER_WIDE_INT >= 64)
12867 parts[1]
12868 = gen_int_mode
12869 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12870 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12871 DImode);
12872 else
12873 parts[1] = immed_double_const (l[2], l[3], DImode);
12875 else
12876 gcc_unreachable ();
12880 return size;
12883 /* Emit insns to perform a move or push of DI, DF, and XF values.
12884 Return false when normal moves are needed; true when all required
12885 insns have been emitted. Operands 2-4 contain the input values
12886 int the correct order; operands 5-7 contain the output values. */
12888 void
12889 ix86_split_long_move (rtx operands[])
12891 rtx part[2][3];
12892 int nparts;
12893 int push = 0;
12894 int collisions = 0;
12895 enum machine_mode mode = GET_MODE (operands[0]);
12897 /* The DFmode expanders may ask us to move double.
12898 For 64bit target this is single move. By hiding the fact
12899 here we simplify i386.md splitters. */
12900 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12902 /* Optimize constant pool reference to immediates. This is used by
12903 fp moves, that force all constants to memory to allow combining. */
12905 if (MEM_P (operands[1])
12906 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12907 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12908 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12909 if (push_operand (operands[0], VOIDmode))
12911 operands[0] = copy_rtx (operands[0]);
12912 PUT_MODE (operands[0], Pmode);
12914 else
12915 operands[0] = gen_lowpart (DImode, operands[0]);
12916 operands[1] = gen_lowpart (DImode, operands[1]);
12917 emit_move_insn (operands[0], operands[1]);
12918 return;
12921 /* The only non-offsettable memory we handle is push. */
12922 if (push_operand (operands[0], VOIDmode))
12923 push = 1;
12924 else
12925 gcc_assert (!MEM_P (operands[0])
12926 || offsettable_memref_p (operands[0]));
12928 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12929 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12931 /* When emitting push, take care for source operands on the stack. */
12932 if (push && MEM_P (operands[1])
12933 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12935 if (nparts == 3)
12936 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12937 XEXP (part[1][2], 0));
12938 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12939 XEXP (part[1][1], 0));
12942 /* We need to do copy in the right order in case an address register
12943 of the source overlaps the destination. */
12944 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12946 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12947 collisions++;
12948 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12949 collisions++;
12950 if (nparts == 3
12951 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12952 collisions++;
12954 /* Collision in the middle part can be handled by reordering. */
12955 if (collisions == 1 && nparts == 3
12956 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12958 rtx tmp;
12959 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12960 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12963 /* If there are more collisions, we can't handle it by reordering.
12964 Do an lea to the last part and use only one colliding move. */
12965 else if (collisions > 1)
12967 rtx base;
12969 collisions = 1;
12971 base = part[0][nparts - 1];
12973 /* Handle the case when the last part isn't valid for lea.
12974 Happens in 64-bit mode storing the 12-byte XFmode. */
12975 if (GET_MODE (base) != Pmode)
12976 base = gen_rtx_REG (Pmode, REGNO (base));
12978 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12979 part[1][0] = replace_equiv_address (part[1][0], base);
12980 part[1][1] = replace_equiv_address (part[1][1],
12981 plus_constant (base, UNITS_PER_WORD));
12982 if (nparts == 3)
12983 part[1][2] = replace_equiv_address (part[1][2],
12984 plus_constant (base, 8));
12988 if (push)
12990 if (!TARGET_64BIT)
12992 if (nparts == 3)
12994 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12995 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12996 emit_move_insn (part[0][2], part[1][2]);
12999 else
13001 /* In 64bit mode we don't have 32bit push available. In case this is
13002 register, it is OK - we will just use larger counterpart. We also
13003 retype memory - these comes from attempt to avoid REX prefix on
13004 moving of second half of TFmode value. */
13005 if (GET_MODE (part[1][1]) == SImode)
13007 switch (GET_CODE (part[1][1]))
13009 case MEM:
13010 part[1][1] = adjust_address (part[1][1], DImode, 0);
13011 break;
13013 case REG:
13014 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13015 break;
13017 default:
13018 gcc_unreachable ();
13021 if (GET_MODE (part[1][0]) == SImode)
13022 part[1][0] = part[1][1];
13025 emit_move_insn (part[0][1], part[1][1]);
13026 emit_move_insn (part[0][0], part[1][0]);
13027 return;
13030 /* Choose correct order to not overwrite the source before it is copied. */
13031 if ((REG_P (part[0][0])
13032 && REG_P (part[1][1])
13033 && (REGNO (part[0][0]) == REGNO (part[1][1])
13034 || (nparts == 3
13035 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13036 || (collisions > 0
13037 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13039 if (nparts == 3)
13041 operands[2] = part[0][2];
13042 operands[3] = part[0][1];
13043 operands[4] = part[0][0];
13044 operands[5] = part[1][2];
13045 operands[6] = part[1][1];
13046 operands[7] = part[1][0];
13048 else
13050 operands[2] = part[0][1];
13051 operands[3] = part[0][0];
13052 operands[5] = part[1][1];
13053 operands[6] = part[1][0];
13056 else
13058 if (nparts == 3)
13060 operands[2] = part[0][0];
13061 operands[3] = part[0][1];
13062 operands[4] = part[0][2];
13063 operands[5] = part[1][0];
13064 operands[6] = part[1][1];
13065 operands[7] = part[1][2];
13067 else
13069 operands[2] = part[0][0];
13070 operands[3] = part[0][1];
13071 operands[5] = part[1][0];
13072 operands[6] = part[1][1];
13076 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13077 if (optimize_size)
13079 if (CONST_INT_P (operands[5])
13080 && operands[5] != const0_rtx
13081 && REG_P (operands[2]))
13083 if (CONST_INT_P (operands[6])
13084 && INTVAL (operands[6]) == INTVAL (operands[5]))
13085 operands[6] = operands[2];
13087 if (nparts == 3
13088 && CONST_INT_P (operands[7])
13089 && INTVAL (operands[7]) == INTVAL (operands[5]))
13090 operands[7] = operands[2];
13093 if (nparts == 3
13094 && CONST_INT_P (operands[6])
13095 && operands[6] != const0_rtx
13096 && REG_P (operands[3])
13097 && CONST_INT_P (operands[7])
13098 && INTVAL (operands[7]) == INTVAL (operands[6]))
13099 operands[7] = operands[3];
13102 emit_move_insn (operands[2], operands[5]);
13103 emit_move_insn (operands[3], operands[6]);
13104 if (nparts == 3)
13105 emit_move_insn (operands[4], operands[7]);
13107 return;
13110 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13111 left shift by a constant, either using a single shift or
13112 a sequence of add instructions. */
13114 static void
13115 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13117 if (count == 1)
13119 emit_insn ((mode == DImode
13120 ? gen_addsi3
13121 : gen_adddi3) (operand, operand, operand));
13123 else if (!optimize_size
13124 && count * ix86_cost->add <= ix86_cost->shift_const)
13126 int i;
13127 for (i=0; i<count; i++)
13129 emit_insn ((mode == DImode
13130 ? gen_addsi3
13131 : gen_adddi3) (operand, operand, operand));
13134 else
13135 emit_insn ((mode == DImode
13136 ? gen_ashlsi3
13137 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13140 void
13141 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13143 rtx low[2], high[2];
13144 int count;
13145 const int single_width = mode == DImode ? 32 : 64;
13147 if (CONST_INT_P (operands[2]))
13149 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13150 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13152 if (count >= single_width)
13154 emit_move_insn (high[0], low[1]);
13155 emit_move_insn (low[0], const0_rtx);
13157 if (count > single_width)
13158 ix86_expand_ashl_const (high[0], count - single_width, mode);
13160 else
13162 if (!rtx_equal_p (operands[0], operands[1]))
13163 emit_move_insn (operands[0], operands[1]);
13164 emit_insn ((mode == DImode
13165 ? gen_x86_shld_1
13166 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13167 ix86_expand_ashl_const (low[0], count, mode);
13169 return;
13172 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13174 if (operands[1] == const1_rtx)
13176 /* Assuming we've chosen a QImode capable registers, then 1 << N
13177 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13178 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13180 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13182 ix86_expand_clear (low[0]);
13183 ix86_expand_clear (high[0]);
13184 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13186 d = gen_lowpart (QImode, low[0]);
13187 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13188 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13189 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13191 d = gen_lowpart (QImode, high[0]);
13192 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13193 s = gen_rtx_NE (QImode, flags, const0_rtx);
13194 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13197 /* Otherwise, we can get the same results by manually performing
13198 a bit extract operation on bit 5/6, and then performing the two
13199 shifts. The two methods of getting 0/1 into low/high are exactly
13200 the same size. Avoiding the shift in the bit extract case helps
13201 pentium4 a bit; no one else seems to care much either way. */
13202 else
13204 rtx x;
13206 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13207 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13208 else
13209 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13210 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13212 emit_insn ((mode == DImode
13213 ? gen_lshrsi3
13214 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13215 emit_insn ((mode == DImode
13216 ? gen_andsi3
13217 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13218 emit_move_insn (low[0], high[0]);
13219 emit_insn ((mode == DImode
13220 ? gen_xorsi3
13221 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13224 emit_insn ((mode == DImode
13225 ? gen_ashlsi3
13226 : gen_ashldi3) (low[0], low[0], operands[2]));
13227 emit_insn ((mode == DImode
13228 ? gen_ashlsi3
13229 : gen_ashldi3) (high[0], high[0], operands[2]));
13230 return;
13233 if (operands[1] == constm1_rtx)
13235 /* For -1 << N, we can avoid the shld instruction, because we
13236 know that we're shifting 0...31/63 ones into a -1. */
13237 emit_move_insn (low[0], constm1_rtx);
13238 if (optimize_size)
13239 emit_move_insn (high[0], low[0]);
13240 else
13241 emit_move_insn (high[0], constm1_rtx);
13243 else
13245 if (!rtx_equal_p (operands[0], operands[1]))
13246 emit_move_insn (operands[0], operands[1]);
13248 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13249 emit_insn ((mode == DImode
13250 ? gen_x86_shld_1
13251 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13254 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13256 if (TARGET_CMOVE && scratch)
13258 ix86_expand_clear (scratch);
13259 emit_insn ((mode == DImode
13260 ? gen_x86_shift_adj_1
13261 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13263 else
13264 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13267 void
13268 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13270 rtx low[2], high[2];
13271 int count;
13272 const int single_width = mode == DImode ? 32 : 64;
13274 if (CONST_INT_P (operands[2]))
13276 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13277 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13279 if (count == single_width * 2 - 1)
13281 emit_move_insn (high[0], high[1]);
13282 emit_insn ((mode == DImode
13283 ? gen_ashrsi3
13284 : gen_ashrdi3) (high[0], high[0],
13285 GEN_INT (single_width - 1)));
13286 emit_move_insn (low[0], high[0]);
13289 else if (count >= single_width)
13291 emit_move_insn (low[0], high[1]);
13292 emit_move_insn (high[0], low[0]);
13293 emit_insn ((mode == DImode
13294 ? gen_ashrsi3
13295 : gen_ashrdi3) (high[0], high[0],
13296 GEN_INT (single_width - 1)));
13297 if (count > single_width)
13298 emit_insn ((mode == DImode
13299 ? gen_ashrsi3
13300 : gen_ashrdi3) (low[0], low[0],
13301 GEN_INT (count - single_width)));
13303 else
13305 if (!rtx_equal_p (operands[0], operands[1]))
13306 emit_move_insn (operands[0], operands[1]);
13307 emit_insn ((mode == DImode
13308 ? gen_x86_shrd_1
13309 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13310 emit_insn ((mode == DImode
13311 ? gen_ashrsi3
13312 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13315 else
13317 if (!rtx_equal_p (operands[0], operands[1]))
13318 emit_move_insn (operands[0], operands[1]);
13320 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13322 emit_insn ((mode == DImode
13323 ? gen_x86_shrd_1
13324 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13325 emit_insn ((mode == DImode
13326 ? gen_ashrsi3
13327 : gen_ashrdi3) (high[0], high[0], operands[2]));
13329 if (TARGET_CMOVE && scratch)
13331 emit_move_insn (scratch, high[0]);
13332 emit_insn ((mode == DImode
13333 ? gen_ashrsi3
13334 : gen_ashrdi3) (scratch, scratch,
13335 GEN_INT (single_width - 1)));
13336 emit_insn ((mode == DImode
13337 ? gen_x86_shift_adj_1
13338 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13339 scratch));
13341 else
13342 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13346 void
13347 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13349 rtx low[2], high[2];
13350 int count;
13351 const int single_width = mode == DImode ? 32 : 64;
13353 if (CONST_INT_P (operands[2]))
13355 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13356 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13358 if (count >= single_width)
13360 emit_move_insn (low[0], high[1]);
13361 ix86_expand_clear (high[0]);
13363 if (count > single_width)
13364 emit_insn ((mode == DImode
13365 ? gen_lshrsi3
13366 : gen_lshrdi3) (low[0], low[0],
13367 GEN_INT (count - single_width)));
13369 else
13371 if (!rtx_equal_p (operands[0], operands[1]))
13372 emit_move_insn (operands[0], operands[1]);
13373 emit_insn ((mode == DImode
13374 ? gen_x86_shrd_1
13375 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13376 emit_insn ((mode == DImode
13377 ? gen_lshrsi3
13378 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13381 else
13383 if (!rtx_equal_p (operands[0], operands[1]))
13384 emit_move_insn (operands[0], operands[1]);
13386 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13388 emit_insn ((mode == DImode
13389 ? gen_x86_shrd_1
13390 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13391 emit_insn ((mode == DImode
13392 ? gen_lshrsi3
13393 : gen_lshrdi3) (high[0], high[0], operands[2]));
13395 /* Heh. By reversing the arguments, we can reuse this pattern. */
13396 if (TARGET_CMOVE && scratch)
13398 ix86_expand_clear (scratch);
13399 emit_insn ((mode == DImode
13400 ? gen_x86_shift_adj_1
13401 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13402 scratch));
13404 else
13405 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13409 /* Predict just emitted jump instruction to be taken with probability PROB. */
13410 static void
13411 predict_jump (int prob)
13413 rtx insn = get_last_insn ();
13414 gcc_assert (JUMP_P (insn));
13415 REG_NOTES (insn)
13416 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13417 GEN_INT (prob),
13418 REG_NOTES (insn));
13421 /* Helper function for the string operations below. Dest VARIABLE whether
13422 it is aligned to VALUE bytes. If true, jump to the label. */
13423 static rtx
13424 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13426 rtx label = gen_label_rtx ();
13427 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13428 if (GET_MODE (variable) == DImode)
13429 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13430 else
13431 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13432 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13433 1, label);
13434 if (epilogue)
13435 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13436 else
13437 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13438 return label;
13441 /* Adjust COUNTER by the VALUE. */
13442 static void
13443 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13445 if (GET_MODE (countreg) == DImode)
13446 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13447 else
13448 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13451 /* Zero extend possibly SImode EXP to Pmode register. */
13453 ix86_zero_extend_to_Pmode (rtx exp)
13455 rtx r;
13456 if (GET_MODE (exp) == VOIDmode)
13457 return force_reg (Pmode, exp);
13458 if (GET_MODE (exp) == Pmode)
13459 return copy_to_mode_reg (Pmode, exp);
13460 r = gen_reg_rtx (Pmode);
13461 emit_insn (gen_zero_extendsidi2 (r, exp));
13462 return r;
13465 /* Divide COUNTREG by SCALE. */
13466 static rtx
13467 scale_counter (rtx countreg, int scale)
13469 rtx sc;
13470 rtx piece_size_mask;
13472 if (scale == 1)
13473 return countreg;
13474 if (CONST_INT_P (countreg))
13475 return GEN_INT (INTVAL (countreg) / scale);
13476 gcc_assert (REG_P (countreg));
13478 piece_size_mask = GEN_INT (scale - 1);
13479 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13480 GEN_INT (exact_log2 (scale)),
13481 NULL, 1, OPTAB_DIRECT);
13482 return sc;
13485 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13486 for constant loop counts. */
13488 static enum machine_mode
13489 counter_mode (rtx count_exp)
13491 if (GET_MODE (count_exp) != VOIDmode)
13492 return GET_MODE (count_exp);
13493 if (GET_CODE (count_exp) != CONST_INT)
13494 return Pmode;
13495 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13496 return DImode;
13497 return SImode;
13500 /* When SRCPTR is non-NULL, output simple loop to move memory
13501 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13502 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13503 equivalent loop to set memory by VALUE (supposed to be in MODE).
13505 The size is rounded down to whole number of chunk size moved at once.
13506 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13509 static void
13510 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13511 rtx destptr, rtx srcptr, rtx value,
13512 rtx count, enum machine_mode mode, int unroll,
13513 int expected_size)
13515 rtx out_label, top_label, iter, tmp;
13516 enum machine_mode iter_mode = counter_mode (count);
13517 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13518 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13519 rtx size;
13520 rtx x_addr;
13521 rtx y_addr;
13522 int i;
13524 top_label = gen_label_rtx ();
13525 out_label = gen_label_rtx ();
13526 iter = gen_reg_rtx (iter_mode);
13528 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13529 NULL, 1, OPTAB_DIRECT);
13530 /* Those two should combine. */
13531 if (piece_size == const1_rtx)
13533 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13534 true, out_label);
13535 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13537 emit_move_insn (iter, const0_rtx);
13539 emit_label (top_label);
13541 tmp = convert_modes (Pmode, iter_mode, iter, true);
13542 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13543 destmem = change_address (destmem, mode, x_addr);
13545 if (srcmem)
13547 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13548 srcmem = change_address (srcmem, mode, y_addr);
13550 /* When unrolling for chips that reorder memory reads and writes,
13551 we can save registers by using single temporary.
13552 Also using 4 temporaries is overkill in 32bit mode. */
13553 if (!TARGET_64BIT && 0)
13555 for (i = 0; i < unroll; i++)
13557 if (i)
13559 destmem =
13560 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13561 srcmem =
13562 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13564 emit_move_insn (destmem, srcmem);
13567 else
13569 rtx tmpreg[4];
13570 gcc_assert (unroll <= 4);
13571 for (i = 0; i < unroll; i++)
13573 tmpreg[i] = gen_reg_rtx (mode);
13574 if (i)
13576 srcmem =
13577 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13579 emit_move_insn (tmpreg[i], srcmem);
13581 for (i = 0; i < unroll; i++)
13583 if (i)
13585 destmem =
13586 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13588 emit_move_insn (destmem, tmpreg[i]);
13592 else
13593 for (i = 0; i < unroll; i++)
13595 if (i)
13596 destmem =
13597 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13598 emit_move_insn (destmem, value);
13601 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13602 true, OPTAB_LIB_WIDEN);
13603 if (tmp != iter)
13604 emit_move_insn (iter, tmp);
13606 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13607 true, top_label);
13608 if (expected_size != -1)
13610 expected_size /= GET_MODE_SIZE (mode) * unroll;
13611 if (expected_size == 0)
13612 predict_jump (0);
13613 else if (expected_size > REG_BR_PROB_BASE)
13614 predict_jump (REG_BR_PROB_BASE - 1);
13615 else
13616 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13618 else
13619 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13620 iter = ix86_zero_extend_to_Pmode (iter);
13621 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13622 true, OPTAB_LIB_WIDEN);
13623 if (tmp != destptr)
13624 emit_move_insn (destptr, tmp);
13625 if (srcptr)
13627 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13628 true, OPTAB_LIB_WIDEN);
13629 if (tmp != srcptr)
13630 emit_move_insn (srcptr, tmp);
13632 emit_label (out_label);
13635 /* Output "rep; mov" instruction.
13636 Arguments have same meaning as for previous function */
13637 static void
13638 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13639 rtx destptr, rtx srcptr,
13640 rtx count,
13641 enum machine_mode mode)
13643 rtx destexp;
13644 rtx srcexp;
13645 rtx countreg;
13647 /* If the size is known, it is shorter to use rep movs. */
13648 if (mode == QImode && CONST_INT_P (count)
13649 && !(INTVAL (count) & 3))
13650 mode = SImode;
13652 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13653 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13654 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13655 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13656 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13657 if (mode != QImode)
13659 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13660 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13661 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13662 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13663 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13664 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13666 else
13668 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13669 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13671 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13672 destexp, srcexp));
13675 /* Output "rep; stos" instruction.
13676 Arguments have same meaning as for previous function */
13677 static void
13678 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13679 rtx count,
13680 enum machine_mode mode)
13682 rtx destexp;
13683 rtx countreg;
13685 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13686 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13687 value = force_reg (mode, gen_lowpart (mode, value));
13688 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13689 if (mode != QImode)
13691 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13692 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13693 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13695 else
13696 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13697 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13700 static void
13701 emit_strmov (rtx destmem, rtx srcmem,
13702 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13704 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13705 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13706 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13709 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13710 static void
13711 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13712 rtx destptr, rtx srcptr, rtx count, int max_size)
13714 rtx src, dest;
13715 if (CONST_INT_P (count))
13717 HOST_WIDE_INT countval = INTVAL (count);
13718 int offset = 0;
13720 if ((countval & 0x10) && max_size > 16)
13722 if (TARGET_64BIT)
13724 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13725 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13727 else
13728 gcc_unreachable ();
13729 offset += 16;
13731 if ((countval & 0x08) && max_size > 8)
13733 if (TARGET_64BIT)
13734 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13735 else
13737 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13738 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13740 offset += 8;
13742 if ((countval & 0x04) && max_size > 4)
13744 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13745 offset += 4;
13747 if ((countval & 0x02) && max_size > 2)
13749 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13750 offset += 2;
13752 if ((countval & 0x01) && max_size > 1)
13754 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13755 offset += 1;
13757 return;
13759 if (max_size > 8)
13761 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13762 count, 1, OPTAB_DIRECT);
13763 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13764 count, QImode, 1, 4);
13765 return;
13768 /* When there are stringops, we can cheaply increase dest and src pointers.
13769 Otherwise we save code size by maintaining offset (zero is readily
13770 available from preceding rep operation) and using x86 addressing modes.
13772 if (TARGET_SINGLE_STRINGOP)
13774 if (max_size > 4)
13776 rtx label = ix86_expand_aligntest (count, 4, true);
13777 src = change_address (srcmem, SImode, srcptr);
13778 dest = change_address (destmem, SImode, destptr);
13779 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13780 emit_label (label);
13781 LABEL_NUSES (label) = 1;
13783 if (max_size > 2)
13785 rtx label = ix86_expand_aligntest (count, 2, true);
13786 src = change_address (srcmem, HImode, srcptr);
13787 dest = change_address (destmem, HImode, destptr);
13788 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13789 emit_label (label);
13790 LABEL_NUSES (label) = 1;
13792 if (max_size > 1)
13794 rtx label = ix86_expand_aligntest (count, 1, true);
13795 src = change_address (srcmem, QImode, srcptr);
13796 dest = change_address (destmem, QImode, destptr);
13797 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13798 emit_label (label);
13799 LABEL_NUSES (label) = 1;
13802 else
13804 rtx offset = force_reg (Pmode, const0_rtx);
13805 rtx tmp;
13807 if (max_size > 4)
13809 rtx label = ix86_expand_aligntest (count, 4, true);
13810 src = change_address (srcmem, SImode, srcptr);
13811 dest = change_address (destmem, SImode, destptr);
13812 emit_move_insn (dest, src);
13813 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13814 true, OPTAB_LIB_WIDEN);
13815 if (tmp != offset)
13816 emit_move_insn (offset, tmp);
13817 emit_label (label);
13818 LABEL_NUSES (label) = 1;
13820 if (max_size > 2)
13822 rtx label = ix86_expand_aligntest (count, 2, true);
13823 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13824 src = change_address (srcmem, HImode, tmp);
13825 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13826 dest = change_address (destmem, HImode, tmp);
13827 emit_move_insn (dest, src);
13828 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13829 true, OPTAB_LIB_WIDEN);
13830 if (tmp != offset)
13831 emit_move_insn (offset, tmp);
13832 emit_label (label);
13833 LABEL_NUSES (label) = 1;
13835 if (max_size > 1)
13837 rtx label = ix86_expand_aligntest (count, 1, true);
13838 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13839 src = change_address (srcmem, QImode, tmp);
13840 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13841 dest = change_address (destmem, QImode, tmp);
13842 emit_move_insn (dest, src);
13843 emit_label (label);
13844 LABEL_NUSES (label) = 1;
13849 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13850 static void
13851 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13852 rtx count, int max_size)
13854 count =
13855 expand_simple_binop (counter_mode (count), AND, count,
13856 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13857 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13858 gen_lowpart (QImode, value), count, QImode,
13859 1, max_size / 2);
13862 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13863 static void
13864 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13866 rtx dest;
13868 if (CONST_INT_P (count))
13870 HOST_WIDE_INT countval = INTVAL (count);
13871 int offset = 0;
13873 if ((countval & 0x10) && max_size > 16)
13875 if (TARGET_64BIT)
13877 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13878 emit_insn (gen_strset (destptr, dest, value));
13879 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13880 emit_insn (gen_strset (destptr, dest, value));
13882 else
13883 gcc_unreachable ();
13884 offset += 16;
13886 if ((countval & 0x08) && max_size > 8)
13888 if (TARGET_64BIT)
13890 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13891 emit_insn (gen_strset (destptr, dest, value));
13893 else
13895 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13896 emit_insn (gen_strset (destptr, dest, value));
13897 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13898 emit_insn (gen_strset (destptr, dest, value));
13900 offset += 8;
13902 if ((countval & 0x04) && max_size > 4)
13904 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13905 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13906 offset += 4;
13908 if ((countval & 0x02) && max_size > 2)
13910 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13911 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13912 offset += 2;
13914 if ((countval & 0x01) && max_size > 1)
13916 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13917 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13918 offset += 1;
13920 return;
13922 if (max_size > 32)
13924 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13925 return;
13927 if (max_size > 16)
13929 rtx label = ix86_expand_aligntest (count, 16, true);
13930 if (TARGET_64BIT)
13932 dest = change_address (destmem, DImode, destptr);
13933 emit_insn (gen_strset (destptr, dest, value));
13934 emit_insn (gen_strset (destptr, dest, value));
13936 else
13938 dest = change_address (destmem, SImode, destptr);
13939 emit_insn (gen_strset (destptr, dest, value));
13940 emit_insn (gen_strset (destptr, dest, value));
13941 emit_insn (gen_strset (destptr, dest, value));
13942 emit_insn (gen_strset (destptr, dest, value));
13944 emit_label (label);
13945 LABEL_NUSES (label) = 1;
13947 if (max_size > 8)
13949 rtx label = ix86_expand_aligntest (count, 8, true);
13950 if (TARGET_64BIT)
13952 dest = change_address (destmem, DImode, destptr);
13953 emit_insn (gen_strset (destptr, dest, value));
13955 else
13957 dest = change_address (destmem, SImode, destptr);
13958 emit_insn (gen_strset (destptr, dest, value));
13959 emit_insn (gen_strset (destptr, dest, value));
13961 emit_label (label);
13962 LABEL_NUSES (label) = 1;
13964 if (max_size > 4)
13966 rtx label = ix86_expand_aligntest (count, 4, true);
13967 dest = change_address (destmem, SImode, destptr);
13968 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13969 emit_label (label);
13970 LABEL_NUSES (label) = 1;
13972 if (max_size > 2)
13974 rtx label = ix86_expand_aligntest (count, 2, true);
13975 dest = change_address (destmem, HImode, destptr);
13976 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13977 emit_label (label);
13978 LABEL_NUSES (label) = 1;
13980 if (max_size > 1)
13982 rtx label = ix86_expand_aligntest (count, 1, true);
13983 dest = change_address (destmem, QImode, destptr);
13984 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13985 emit_label (label);
13986 LABEL_NUSES (label) = 1;
13990 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13991 DESIRED_ALIGNMENT. */
13992 static void
13993 expand_movmem_prologue (rtx destmem, rtx srcmem,
13994 rtx destptr, rtx srcptr, rtx count,
13995 int align, int desired_alignment)
13997 if (align <= 1 && desired_alignment > 1)
13999 rtx label = ix86_expand_aligntest (destptr, 1, false);
14000 srcmem = change_address (srcmem, QImode, srcptr);
14001 destmem = change_address (destmem, QImode, destptr);
14002 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14003 ix86_adjust_counter (count, 1);
14004 emit_label (label);
14005 LABEL_NUSES (label) = 1;
14007 if (align <= 2 && desired_alignment > 2)
14009 rtx label = ix86_expand_aligntest (destptr, 2, false);
14010 srcmem = change_address (srcmem, HImode, srcptr);
14011 destmem = change_address (destmem, HImode, destptr);
14012 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14013 ix86_adjust_counter (count, 2);
14014 emit_label (label);
14015 LABEL_NUSES (label) = 1;
14017 if (align <= 4 && desired_alignment > 4)
14019 rtx label = ix86_expand_aligntest (destptr, 4, false);
14020 srcmem = change_address (srcmem, SImode, srcptr);
14021 destmem = change_address (destmem, SImode, destptr);
14022 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14023 ix86_adjust_counter (count, 4);
14024 emit_label (label);
14025 LABEL_NUSES (label) = 1;
14027 gcc_assert (desired_alignment <= 8);
14030 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14031 DESIRED_ALIGNMENT. */
14032 static void
14033 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14034 int align, int desired_alignment)
14036 if (align <= 1 && desired_alignment > 1)
14038 rtx label = ix86_expand_aligntest (destptr, 1, false);
14039 destmem = change_address (destmem, QImode, destptr);
14040 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14041 ix86_adjust_counter (count, 1);
14042 emit_label (label);
14043 LABEL_NUSES (label) = 1;
14045 if (align <= 2 && desired_alignment > 2)
14047 rtx label = ix86_expand_aligntest (destptr, 2, false);
14048 destmem = change_address (destmem, HImode, destptr);
14049 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14050 ix86_adjust_counter (count, 2);
14051 emit_label (label);
14052 LABEL_NUSES (label) = 1;
14054 if (align <= 4 && desired_alignment > 4)
14056 rtx label = ix86_expand_aligntest (destptr, 4, false);
14057 destmem = change_address (destmem, SImode, destptr);
14058 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14059 ix86_adjust_counter (count, 4);
14060 emit_label (label);
14061 LABEL_NUSES (label) = 1;
14063 gcc_assert (desired_alignment <= 8);
14066 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14067 static enum stringop_alg
14068 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14069 int *dynamic_check)
14071 const struct stringop_algs * algs;
14073 *dynamic_check = -1;
14074 if (memset)
14075 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14076 else
14077 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14078 if (stringop_alg != no_stringop)
14079 return stringop_alg;
14080 /* rep; movq or rep; movl is the smallest variant. */
14081 else if (optimize_size)
14083 if (!count || (count & 3))
14084 return rep_prefix_1_byte;
14085 else
14086 return rep_prefix_4_byte;
14088 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14090 else if (expected_size != -1 && expected_size < 4)
14091 return loop_1_byte;
14092 else if (expected_size != -1)
14094 unsigned int i;
14095 enum stringop_alg alg = libcall;
14096 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14098 gcc_assert (algs->size[i].max);
14099 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14101 if (algs->size[i].alg != libcall)
14102 alg = algs->size[i].alg;
14103 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14104 last non-libcall inline algorithm. */
14105 if (TARGET_INLINE_ALL_STRINGOPS)
14107 /* When the current size is best to be copied by a libcall,
14108 but we are still forced to inline, run the heuristic bellow
14109 that will pick code for medium sized blocks. */
14110 if (alg != libcall)
14111 return alg;
14112 break;
14114 else
14115 return algs->size[i].alg;
14118 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14120 /* When asked to inline the call anyway, try to pick meaningful choice.
14121 We look for maximal size of block that is faster to copy by hand and
14122 take blocks of at most of that size guessing that average size will
14123 be roughly half of the block.
14125 If this turns out to be bad, we might simply specify the preferred
14126 choice in ix86_costs. */
14127 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14128 && algs->unknown_size == libcall)
14130 int max = -1;
14131 enum stringop_alg alg;
14132 int i;
14134 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14135 if (algs->size[i].alg != libcall && algs->size[i].alg)
14136 max = algs->size[i].max;
14137 if (max == -1)
14138 max = 4096;
14139 alg = decide_alg (count, max / 2, memset, dynamic_check);
14140 gcc_assert (*dynamic_check == -1);
14141 gcc_assert (alg != libcall);
14142 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14143 *dynamic_check = max;
14144 return alg;
14146 return algs->unknown_size;
14149 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14150 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14151 static int
14152 decide_alignment (int align,
14153 enum stringop_alg alg,
14154 int expected_size)
14156 int desired_align = 0;
14157 switch (alg)
14159 case no_stringop:
14160 gcc_unreachable ();
14161 case loop:
14162 case unrolled_loop:
14163 desired_align = GET_MODE_SIZE (Pmode);
14164 break;
14165 case rep_prefix_8_byte:
14166 desired_align = 8;
14167 break;
14168 case rep_prefix_4_byte:
14169 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14170 copying whole cacheline at once. */
14171 if (TARGET_PENTIUMPRO)
14172 desired_align = 8;
14173 else
14174 desired_align = 4;
14175 break;
14176 case rep_prefix_1_byte:
14177 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14178 copying whole cacheline at once. */
14179 if (TARGET_PENTIUMPRO)
14180 desired_align = 8;
14181 else
14182 desired_align = 1;
14183 break;
14184 case loop_1_byte:
14185 desired_align = 1;
14186 break;
14187 case libcall:
14188 return 0;
14191 if (optimize_size)
14192 desired_align = 1;
14193 if (desired_align < align)
14194 desired_align = align;
14195 if (expected_size != -1 && expected_size < 4)
14196 desired_align = align;
14197 return desired_align;
14200 /* Return the smallest power of 2 greater than VAL. */
14201 static int
14202 smallest_pow2_greater_than (int val)
14204 int ret = 1;
14205 while (ret <= val)
14206 ret <<= 1;
14207 return ret;
14210 /* Expand string move (memcpy) operation. Use i386 string operations when
14211 profitable. expand_clrmem contains similar code. The code depends upon
14212 architecture, block size and alignment, but always has the same
14213 overall structure:
14215 1) Prologue guard: Conditional that jumps up to epilogues for small
14216 blocks that can be handled by epilogue alone. This is faster but
14217 also needed for correctness, since prologue assume the block is larger
14218 than the desired alignment.
14220 Optional dynamic check for size and libcall for large
14221 blocks is emitted here too, with -minline-stringops-dynamically.
14223 2) Prologue: copy first few bytes in order to get destination aligned
14224 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14225 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14226 We emit either a jump tree on power of two sized blocks, or a byte loop.
14228 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14229 with specified algorithm.
14231 4) Epilogue: code copying tail of the block that is too small to be
14232 handled by main body (or up to size guarded by prologue guard). */
14235 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14236 rtx expected_align_exp, rtx expected_size_exp)
14238 rtx destreg;
14239 rtx srcreg;
14240 rtx label = NULL;
14241 rtx tmp;
14242 rtx jump_around_label = NULL;
14243 HOST_WIDE_INT align = 1;
14244 unsigned HOST_WIDE_INT count = 0;
14245 HOST_WIDE_INT expected_size = -1;
14246 int size_needed = 0, epilogue_size_needed;
14247 int desired_align = 0;
14248 enum stringop_alg alg;
14249 int dynamic_check;
14251 if (CONST_INT_P (align_exp))
14252 align = INTVAL (align_exp);
14253 /* i386 can do misaligned access on reasonably increased cost. */
14254 if (CONST_INT_P (expected_align_exp)
14255 && INTVAL (expected_align_exp) > align)
14256 align = INTVAL (expected_align_exp);
14257 if (CONST_INT_P (count_exp))
14258 count = expected_size = INTVAL (count_exp);
14259 if (CONST_INT_P (expected_size_exp) && count == 0)
14260 expected_size = INTVAL (expected_size_exp);
14262 /* Step 0: Decide on preferred algorithm, desired alignment and
14263 size of chunks to be copied by main loop. */
14265 alg = decide_alg (count, expected_size, false, &dynamic_check);
14266 desired_align = decide_alignment (align, alg, expected_size);
14268 if (!TARGET_ALIGN_STRINGOPS)
14269 align = desired_align;
14271 if (alg == libcall)
14272 return 0;
14273 gcc_assert (alg != no_stringop);
14274 if (!count)
14275 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14276 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14277 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14278 switch (alg)
14280 case libcall:
14281 case no_stringop:
14282 gcc_unreachable ();
14283 case loop:
14284 size_needed = GET_MODE_SIZE (Pmode);
14285 break;
14286 case unrolled_loop:
14287 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14288 break;
14289 case rep_prefix_8_byte:
14290 size_needed = 8;
14291 break;
14292 case rep_prefix_4_byte:
14293 size_needed = 4;
14294 break;
14295 case rep_prefix_1_byte:
14296 case loop_1_byte:
14297 size_needed = 1;
14298 break;
14301 epilogue_size_needed = size_needed;
14303 /* Step 1: Prologue guard. */
14305 /* Alignment code needs count to be in register. */
14306 if (CONST_INT_P (count_exp) && desired_align > align)
14308 enum machine_mode mode = SImode;
14309 if (TARGET_64BIT && (count & ~0xffffffff))
14310 mode = DImode;
14311 count_exp = force_reg (mode, count_exp);
14313 gcc_assert (desired_align >= 1 && align >= 1);
14315 /* Ensure that alignment prologue won't copy past end of block. */
14316 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14318 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14319 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14320 Make sure it is power of 2. */
14321 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14323 label = gen_label_rtx ();
14324 emit_cmp_and_jump_insns (count_exp,
14325 GEN_INT (epilogue_size_needed),
14326 LTU, 0, counter_mode (count_exp), 1, label);
14327 if (GET_CODE (count_exp) == CONST_INT)
14329 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14330 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14331 else
14332 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14334 /* Emit code to decide on runtime whether library call or inline should be
14335 used. */
14336 if (dynamic_check != -1)
14338 rtx hot_label = gen_label_rtx ();
14339 jump_around_label = gen_label_rtx ();
14340 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14341 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14342 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14343 emit_block_move_via_libcall (dst, src, count_exp, false);
14344 emit_jump (jump_around_label);
14345 emit_label (hot_label);
14348 /* Step 2: Alignment prologue. */
14350 if (desired_align > align)
14352 /* Except for the first move in epilogue, we no longer know
14353 constant offset in aliasing info. It don't seems to worth
14354 the pain to maintain it for the first move, so throw away
14355 the info early. */
14356 src = change_address (src, BLKmode, srcreg);
14357 dst = change_address (dst, BLKmode, destreg);
14358 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14359 desired_align);
14361 if (label && size_needed == 1)
14363 emit_label (label);
14364 LABEL_NUSES (label) = 1;
14365 label = NULL;
14368 /* Step 3: Main loop. */
14370 switch (alg)
14372 case libcall:
14373 case no_stringop:
14374 gcc_unreachable ();
14375 case loop_1_byte:
14376 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14377 count_exp, QImode, 1, expected_size);
14378 break;
14379 case loop:
14380 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14381 count_exp, Pmode, 1, expected_size);
14382 break;
14383 case unrolled_loop:
14384 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14385 registers for 4 temporaries anyway. */
14386 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14387 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14388 expected_size);
14389 break;
14390 case rep_prefix_8_byte:
14391 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14392 DImode);
14393 break;
14394 case rep_prefix_4_byte:
14395 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14396 SImode);
14397 break;
14398 case rep_prefix_1_byte:
14399 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14400 QImode);
14401 break;
14403 /* Adjust properly the offset of src and dest memory for aliasing. */
14404 if (CONST_INT_P (count_exp))
14406 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14407 (count / size_needed) * size_needed);
14408 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14409 (count / size_needed) * size_needed);
14411 else
14413 src = change_address (src, BLKmode, srcreg);
14414 dst = change_address (dst, BLKmode, destreg);
14417 /* Step 4: Epilogue to copy the remaining bytes. */
14419 if (label)
14421 /* When the main loop is done, COUNT_EXP might hold original count,
14422 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14423 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14424 bytes. Compensate if needed. */
14426 if (size_needed < epilogue_size_needed)
14428 tmp =
14429 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14430 GEN_INT (size_needed - 1), count_exp, 1,
14431 OPTAB_DIRECT);
14432 if (tmp != count_exp)
14433 emit_move_insn (count_exp, tmp);
14435 emit_label (label);
14436 LABEL_NUSES (label) = 1;
14439 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14440 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14441 epilogue_size_needed);
14442 if (jump_around_label)
14443 emit_label (jump_around_label);
14444 return 1;
14447 /* Helper function for memcpy. For QImode value 0xXY produce
14448 0xXYXYXYXY of wide specified by MODE. This is essentially
14449 a * 0x10101010, but we can do slightly better than
14450 synth_mult by unwinding the sequence by hand on CPUs with
14451 slow multiply. */
14452 static rtx
14453 promote_duplicated_reg (enum machine_mode mode, rtx val)
14455 enum machine_mode valmode = GET_MODE (val);
14456 rtx tmp;
14457 int nops = mode == DImode ? 3 : 2;
14459 gcc_assert (mode == SImode || mode == DImode);
14460 if (val == const0_rtx)
14461 return copy_to_mode_reg (mode, const0_rtx);
14462 if (CONST_INT_P (val))
14464 HOST_WIDE_INT v = INTVAL (val) & 255;
14466 v |= v << 8;
14467 v |= v << 16;
14468 if (mode == DImode)
14469 v |= (v << 16) << 16;
14470 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14473 if (valmode == VOIDmode)
14474 valmode = QImode;
14475 if (valmode != QImode)
14476 val = gen_lowpart (QImode, val);
14477 if (mode == QImode)
14478 return val;
14479 if (!TARGET_PARTIAL_REG_STALL)
14480 nops--;
14481 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14482 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14483 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14484 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14486 rtx reg = convert_modes (mode, QImode, val, true);
14487 tmp = promote_duplicated_reg (mode, const1_rtx);
14488 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14489 OPTAB_DIRECT);
14491 else
14493 rtx reg = convert_modes (mode, QImode, val, true);
14495 if (!TARGET_PARTIAL_REG_STALL)
14496 if (mode == SImode)
14497 emit_insn (gen_movsi_insv_1 (reg, reg));
14498 else
14499 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14500 else
14502 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14503 NULL, 1, OPTAB_DIRECT);
14504 reg =
14505 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14507 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14508 NULL, 1, OPTAB_DIRECT);
14509 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14510 if (mode == SImode)
14511 return reg;
14512 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14513 NULL, 1, OPTAB_DIRECT);
14514 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14515 return reg;
14519 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14520 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14521 alignment from ALIGN to DESIRED_ALIGN. */
14522 static rtx
14523 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14525 rtx promoted_val;
14527 if (TARGET_64BIT
14528 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14529 promoted_val = promote_duplicated_reg (DImode, val);
14530 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14531 promoted_val = promote_duplicated_reg (SImode, val);
14532 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14533 promoted_val = promote_duplicated_reg (HImode, val);
14534 else
14535 promoted_val = val;
14537 return promoted_val;
14540 /* Expand string clear operation (bzero). Use i386 string operations when
14541 profitable. See expand_movmem comment for explanation of individual
14542 steps performed. */
14544 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14545 rtx expected_align_exp, rtx expected_size_exp)
14547 rtx destreg;
14548 rtx label = NULL;
14549 rtx tmp;
14550 rtx jump_around_label = NULL;
14551 HOST_WIDE_INT align = 1;
14552 unsigned HOST_WIDE_INT count = 0;
14553 HOST_WIDE_INT expected_size = -1;
14554 int size_needed = 0, epilogue_size_needed;
14555 int desired_align = 0;
14556 enum stringop_alg alg;
14557 rtx promoted_val = NULL;
14558 bool force_loopy_epilogue = false;
14559 int dynamic_check;
14561 if (CONST_INT_P (align_exp))
14562 align = INTVAL (align_exp);
14563 /* i386 can do misaligned access on reasonably increased cost. */
14564 if (CONST_INT_P (expected_align_exp)
14565 && INTVAL (expected_align_exp) > align)
14566 align = INTVAL (expected_align_exp);
14567 if (CONST_INT_P (count_exp))
14568 count = expected_size = INTVAL (count_exp);
14569 if (CONST_INT_P (expected_size_exp) && count == 0)
14570 expected_size = INTVAL (expected_size_exp);
14572 /* Step 0: Decide on preferred algorithm, desired alignment and
14573 size of chunks to be copied by main loop. */
14575 alg = decide_alg (count, expected_size, true, &dynamic_check);
14576 desired_align = decide_alignment (align, alg, expected_size);
14578 if (!TARGET_ALIGN_STRINGOPS)
14579 align = desired_align;
14581 if (alg == libcall)
14582 return 0;
14583 gcc_assert (alg != no_stringop);
14584 if (!count)
14585 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14586 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14587 switch (alg)
14589 case libcall:
14590 case no_stringop:
14591 gcc_unreachable ();
14592 case loop:
14593 size_needed = GET_MODE_SIZE (Pmode);
14594 break;
14595 case unrolled_loop:
14596 size_needed = GET_MODE_SIZE (Pmode) * 4;
14597 break;
14598 case rep_prefix_8_byte:
14599 size_needed = 8;
14600 break;
14601 case rep_prefix_4_byte:
14602 size_needed = 4;
14603 break;
14604 case rep_prefix_1_byte:
14605 case loop_1_byte:
14606 size_needed = 1;
14607 break;
14609 epilogue_size_needed = size_needed;
14611 /* Step 1: Prologue guard. */
14613 /* Alignment code needs count to be in register. */
14614 if (CONST_INT_P (count_exp) && desired_align > align)
14616 enum machine_mode mode = SImode;
14617 if (TARGET_64BIT && (count & ~0xffffffff))
14618 mode = DImode;
14619 count_exp = force_reg (mode, count_exp);
14621 /* Do the cheap promotion to allow better CSE across the
14622 main loop and epilogue (ie one load of the big constant in the
14623 front of all code. */
14624 if (CONST_INT_P (val_exp))
14625 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14626 desired_align, align);
14627 /* Ensure that alignment prologue won't copy past end of block. */
14628 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14630 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14631 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14632 Make sure it is power of 2. */
14633 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14635 /* To improve performance of small blocks, we jump around the VAL
14636 promoting mode. This mean that if the promoted VAL is not constant,
14637 we might not use it in the epilogue and have to use byte
14638 loop variant. */
14639 if (epilogue_size_needed > 2 && !promoted_val)
14640 force_loopy_epilogue = true;
14641 label = gen_label_rtx ();
14642 emit_cmp_and_jump_insns (count_exp,
14643 GEN_INT (epilogue_size_needed),
14644 LTU, 0, counter_mode (count_exp), 1, label);
14645 if (GET_CODE (count_exp) == CONST_INT)
14647 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14648 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14649 else
14650 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14652 if (dynamic_check != -1)
14654 rtx hot_label = gen_label_rtx ();
14655 jump_around_label = gen_label_rtx ();
14656 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14657 LEU, 0, counter_mode (count_exp), 1, hot_label);
14658 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14659 set_storage_via_libcall (dst, count_exp, val_exp, false);
14660 emit_jump (jump_around_label);
14661 emit_label (hot_label);
14664 /* Step 2: Alignment prologue. */
14666 /* Do the expensive promotion once we branched off the small blocks. */
14667 if (!promoted_val)
14668 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14669 desired_align, align);
14670 gcc_assert (desired_align >= 1 && align >= 1);
14672 if (desired_align > align)
14674 /* Except for the first move in epilogue, we no longer know
14675 constant offset in aliasing info. It don't seems to worth
14676 the pain to maintain it for the first move, so throw away
14677 the info early. */
14678 dst = change_address (dst, BLKmode, destreg);
14679 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14680 desired_align);
14682 if (label && size_needed == 1)
14684 emit_label (label);
14685 LABEL_NUSES (label) = 1;
14686 label = NULL;
14689 /* Step 3: Main loop. */
14691 switch (alg)
14693 case libcall:
14694 case no_stringop:
14695 gcc_unreachable ();
14696 case loop_1_byte:
14697 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14698 count_exp, QImode, 1, expected_size);
14699 break;
14700 case loop:
14701 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14702 count_exp, Pmode, 1, expected_size);
14703 break;
14704 case unrolled_loop:
14705 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14706 count_exp, Pmode, 4, expected_size);
14707 break;
14708 case rep_prefix_8_byte:
14709 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14710 DImode);
14711 break;
14712 case rep_prefix_4_byte:
14713 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14714 SImode);
14715 break;
14716 case rep_prefix_1_byte:
14717 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14718 QImode);
14719 break;
14721 /* Adjust properly the offset of src and dest memory for aliasing. */
14722 if (CONST_INT_P (count_exp))
14723 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14724 (count / size_needed) * size_needed);
14725 else
14726 dst = change_address (dst, BLKmode, destreg);
14728 /* Step 4: Epilogue to copy the remaining bytes. */
14730 if (label)
14732 /* When the main loop is done, COUNT_EXP might hold original count,
14733 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14734 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14735 bytes. Compensate if needed. */
14737 if (size_needed < desired_align - align)
14739 tmp =
14740 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14741 GEN_INT (size_needed - 1), count_exp, 1,
14742 OPTAB_DIRECT);
14743 size_needed = desired_align - align + 1;
14744 if (tmp != count_exp)
14745 emit_move_insn (count_exp, tmp);
14747 emit_label (label);
14748 LABEL_NUSES (label) = 1;
14750 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14752 if (force_loopy_epilogue)
14753 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14754 size_needed);
14755 else
14756 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14757 size_needed);
14759 if (jump_around_label)
14760 emit_label (jump_around_label);
14761 return 1;
14764 /* Expand strlen. */
14766 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14768 rtx addr, scratch1, scratch2, scratch3, scratch4;
14770 /* The generic case of strlen expander is long. Avoid it's
14771 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14773 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14774 && !TARGET_INLINE_ALL_STRINGOPS
14775 && !optimize_size
14776 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14777 return 0;
14779 addr = force_reg (Pmode, XEXP (src, 0));
14780 scratch1 = gen_reg_rtx (Pmode);
14782 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14783 && !optimize_size)
14785 /* Well it seems that some optimizer does not combine a call like
14786 foo(strlen(bar), strlen(bar));
14787 when the move and the subtraction is done here. It does calculate
14788 the length just once when these instructions are done inside of
14789 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14790 often used and I use one fewer register for the lifetime of
14791 output_strlen_unroll() this is better. */
14793 emit_move_insn (out, addr);
14795 ix86_expand_strlensi_unroll_1 (out, src, align);
14797 /* strlensi_unroll_1 returns the address of the zero at the end of
14798 the string, like memchr(), so compute the length by subtracting
14799 the start address. */
14800 if (TARGET_64BIT)
14801 emit_insn (gen_subdi3 (out, out, addr));
14802 else
14803 emit_insn (gen_subsi3 (out, out, addr));
14805 else
14807 rtx unspec;
14808 scratch2 = gen_reg_rtx (Pmode);
14809 scratch3 = gen_reg_rtx (Pmode);
14810 scratch4 = force_reg (Pmode, constm1_rtx);
14812 emit_move_insn (scratch3, addr);
14813 eoschar = force_reg (QImode, eoschar);
14815 src = replace_equiv_address_nv (src, scratch3);
14817 /* If .md starts supporting :P, this can be done in .md. */
14818 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14819 scratch4), UNSPEC_SCAS);
14820 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14821 if (TARGET_64BIT)
14823 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14824 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14826 else
14828 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14829 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14832 return 1;
14835 /* Expand the appropriate insns for doing strlen if not just doing
14836 repnz; scasb
14838 out = result, initialized with the start address
14839 align_rtx = alignment of the address.
14840 scratch = scratch register, initialized with the startaddress when
14841 not aligned, otherwise undefined
14843 This is just the body. It needs the initializations mentioned above and
14844 some address computing at the end. These things are done in i386.md. */
14846 static void
14847 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14849 int align;
14850 rtx tmp;
14851 rtx align_2_label = NULL_RTX;
14852 rtx align_3_label = NULL_RTX;
14853 rtx align_4_label = gen_label_rtx ();
14854 rtx end_0_label = gen_label_rtx ();
14855 rtx mem;
14856 rtx tmpreg = gen_reg_rtx (SImode);
14857 rtx scratch = gen_reg_rtx (SImode);
14858 rtx cmp;
14860 align = 0;
14861 if (CONST_INT_P (align_rtx))
14862 align = INTVAL (align_rtx);
14864 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14866 /* Is there a known alignment and is it less than 4? */
14867 if (align < 4)
14869 rtx scratch1 = gen_reg_rtx (Pmode);
14870 emit_move_insn (scratch1, out);
14871 /* Is there a known alignment and is it not 2? */
14872 if (align != 2)
14874 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14875 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14877 /* Leave just the 3 lower bits. */
14878 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14879 NULL_RTX, 0, OPTAB_WIDEN);
14881 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14882 Pmode, 1, align_4_label);
14883 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14884 Pmode, 1, align_2_label);
14885 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14886 Pmode, 1, align_3_label);
14888 else
14890 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14891 check if is aligned to 4 - byte. */
14893 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14894 NULL_RTX, 0, OPTAB_WIDEN);
14896 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14897 Pmode, 1, align_4_label);
14900 mem = change_address (src, QImode, out);
14902 /* Now compare the bytes. */
14904 /* Compare the first n unaligned byte on a byte per byte basis. */
14905 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14906 QImode, 1, end_0_label);
14908 /* Increment the address. */
14909 if (TARGET_64BIT)
14910 emit_insn (gen_adddi3 (out, out, const1_rtx));
14911 else
14912 emit_insn (gen_addsi3 (out, out, const1_rtx));
14914 /* Not needed with an alignment of 2 */
14915 if (align != 2)
14917 emit_label (align_2_label);
14919 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14920 end_0_label);
14922 if (TARGET_64BIT)
14923 emit_insn (gen_adddi3 (out, out, const1_rtx));
14924 else
14925 emit_insn (gen_addsi3 (out, out, const1_rtx));
14927 emit_label (align_3_label);
14930 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14931 end_0_label);
14933 if (TARGET_64BIT)
14934 emit_insn (gen_adddi3 (out, out, const1_rtx));
14935 else
14936 emit_insn (gen_addsi3 (out, out, const1_rtx));
14939 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14940 align this loop. It gives only huge programs, but does not help to
14941 speed up. */
14942 emit_label (align_4_label);
14944 mem = change_address (src, SImode, out);
14945 emit_move_insn (scratch, mem);
14946 if (TARGET_64BIT)
14947 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14948 else
14949 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14951 /* This formula yields a nonzero result iff one of the bytes is zero.
14952 This saves three branches inside loop and many cycles. */
14954 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14955 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14956 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14957 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14958 gen_int_mode (0x80808080, SImode)));
14959 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14960 align_4_label);
14962 if (TARGET_CMOVE)
14964 rtx reg = gen_reg_rtx (SImode);
14965 rtx reg2 = gen_reg_rtx (Pmode);
14966 emit_move_insn (reg, tmpreg);
14967 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14969 /* If zero is not in the first two bytes, move two bytes forward. */
14970 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14971 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14972 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14973 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14974 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14975 reg,
14976 tmpreg)));
14977 /* Emit lea manually to avoid clobbering of flags. */
14978 emit_insn (gen_rtx_SET (SImode, reg2,
14979 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14981 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14982 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14983 emit_insn (gen_rtx_SET (VOIDmode, out,
14984 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14985 reg2,
14986 out)));
14989 else
14991 rtx end_2_label = gen_label_rtx ();
14992 /* Is zero in the first two bytes? */
14994 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14995 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14996 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14997 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14998 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14999 pc_rtx);
15000 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15001 JUMP_LABEL (tmp) = end_2_label;
15003 /* Not in the first two. Move two bytes forward. */
15004 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15005 if (TARGET_64BIT)
15006 emit_insn (gen_adddi3 (out, out, const2_rtx));
15007 else
15008 emit_insn (gen_addsi3 (out, out, const2_rtx));
15010 emit_label (end_2_label);
15014 /* Avoid branch in fixing the byte. */
15015 tmpreg = gen_lowpart (QImode, tmpreg);
15016 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15017 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15018 if (TARGET_64BIT)
15019 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15020 else
15021 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15023 emit_label (end_0_label);
15026 /* For given symbol (function) construct code to compute address of it's PLT
15027 entry in large x86-64 PIC model. */
15029 construct_plt_address (rtx symbol)
15031 rtx tmp = gen_reg_rtx (Pmode);
15032 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15034 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15035 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15037 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15038 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15039 return tmp;
15042 void
15043 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15044 rtx callarg2 ATTRIBUTE_UNUSED,
15045 rtx pop, int sibcall)
15047 rtx use = NULL, call;
15049 if (pop == const0_rtx)
15050 pop = NULL;
15051 gcc_assert (!TARGET_64BIT || !pop);
15053 if (TARGET_MACHO && !TARGET_64BIT)
15055 #if TARGET_MACHO
15056 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15057 fnaddr = machopic_indirect_call_target (fnaddr);
15058 #endif
15060 else
15062 /* Static functions and indirect calls don't need the pic register. */
15063 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15064 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15065 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15066 use_reg (&use, pic_offset_table_rtx);
15069 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15071 rtx al = gen_rtx_REG (QImode, 0);
15072 emit_move_insn (al, callarg2);
15073 use_reg (&use, al);
15076 if (ix86_cmodel == CM_LARGE_PIC
15077 && GET_CODE (fnaddr) == MEM
15078 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15079 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15080 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15081 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15083 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15084 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15086 if (sibcall && TARGET_64BIT
15087 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15089 rtx addr;
15090 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15091 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15092 emit_move_insn (fnaddr, addr);
15093 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15096 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15097 if (retval)
15098 call = gen_rtx_SET (VOIDmode, retval, call);
15099 if (pop)
15101 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15102 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15103 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15106 call = emit_call_insn (call);
15107 if (use)
15108 CALL_INSN_FUNCTION_USAGE (call) = use;
15112 /* Clear stack slot assignments remembered from previous functions.
15113 This is called from INIT_EXPANDERS once before RTL is emitted for each
15114 function. */
15116 static struct machine_function *
15117 ix86_init_machine_status (void)
15119 struct machine_function *f;
15121 f = ggc_alloc_cleared (sizeof (struct machine_function));
15122 f->use_fast_prologue_epilogue_nregs = -1;
15123 f->tls_descriptor_call_expanded_p = 0;
15125 return f;
15128 /* Return a MEM corresponding to a stack slot with mode MODE.
15129 Allocate a new slot if necessary.
15131 The RTL for a function can have several slots available: N is
15132 which slot to use. */
15135 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15137 struct stack_local_entry *s;
15139 gcc_assert (n < MAX_386_STACK_LOCALS);
15141 for (s = ix86_stack_locals; s; s = s->next)
15142 if (s->mode == mode && s->n == n)
15143 return copy_rtx (s->rtl);
15145 s = (struct stack_local_entry *)
15146 ggc_alloc (sizeof (struct stack_local_entry));
15147 s->n = n;
15148 s->mode = mode;
15149 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15151 s->next = ix86_stack_locals;
15152 ix86_stack_locals = s;
15153 return s->rtl;
15156 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15158 static GTY(()) rtx ix86_tls_symbol;
15160 ix86_tls_get_addr (void)
15163 if (!ix86_tls_symbol)
15165 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15166 (TARGET_ANY_GNU_TLS
15167 && !TARGET_64BIT)
15168 ? "___tls_get_addr"
15169 : "__tls_get_addr");
15172 return ix86_tls_symbol;
15175 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15177 static GTY(()) rtx ix86_tls_module_base_symbol;
15179 ix86_tls_module_base (void)
15182 if (!ix86_tls_module_base_symbol)
15184 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15185 "_TLS_MODULE_BASE_");
15186 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15187 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15190 return ix86_tls_module_base_symbol;
15193 /* Calculate the length of the memory address in the instruction
15194 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15197 memory_address_length (rtx addr)
15199 struct ix86_address parts;
15200 rtx base, index, disp;
15201 int len;
15202 int ok;
15204 if (GET_CODE (addr) == PRE_DEC
15205 || GET_CODE (addr) == POST_INC
15206 || GET_CODE (addr) == PRE_MODIFY
15207 || GET_CODE (addr) == POST_MODIFY)
15208 return 0;
15210 ok = ix86_decompose_address (addr, &parts);
15211 gcc_assert (ok);
15213 if (parts.base && GET_CODE (parts.base) == SUBREG)
15214 parts.base = SUBREG_REG (parts.base);
15215 if (parts.index && GET_CODE (parts.index) == SUBREG)
15216 parts.index = SUBREG_REG (parts.index);
15218 base = parts.base;
15219 index = parts.index;
15220 disp = parts.disp;
15221 len = 0;
15223 /* Rule of thumb:
15224 - esp as the base always wants an index,
15225 - ebp as the base always wants a displacement. */
15227 /* Register Indirect. */
15228 if (base && !index && !disp)
15230 /* esp (for its index) and ebp (for its displacement) need
15231 the two-byte modrm form. */
15232 if (addr == stack_pointer_rtx
15233 || addr == arg_pointer_rtx
15234 || addr == frame_pointer_rtx
15235 || addr == hard_frame_pointer_rtx)
15236 len = 1;
15239 /* Direct Addressing. */
15240 else if (disp && !base && !index)
15241 len = 4;
15243 else
15245 /* Find the length of the displacement constant. */
15246 if (disp)
15248 if (base && satisfies_constraint_K (disp))
15249 len = 1;
15250 else
15251 len = 4;
15253 /* ebp always wants a displacement. */
15254 else if (base == hard_frame_pointer_rtx)
15255 len = 1;
15257 /* An index requires the two-byte modrm form.... */
15258 if (index
15259 /* ...like esp, which always wants an index. */
15260 || base == stack_pointer_rtx
15261 || base == arg_pointer_rtx
15262 || base == frame_pointer_rtx)
15263 len += 1;
15266 return len;
15269 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15270 is set, expect that insn have 8bit immediate alternative. */
15272 ix86_attr_length_immediate_default (rtx insn, int shortform)
15274 int len = 0;
15275 int i;
15276 extract_insn_cached (insn);
15277 for (i = recog_data.n_operands - 1; i >= 0; --i)
15278 if (CONSTANT_P (recog_data.operand[i]))
15280 gcc_assert (!len);
15281 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15282 len = 1;
15283 else
15285 switch (get_attr_mode (insn))
15287 case MODE_QI:
15288 len+=1;
15289 break;
15290 case MODE_HI:
15291 len+=2;
15292 break;
15293 case MODE_SI:
15294 len+=4;
15295 break;
15296 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15297 case MODE_DI:
15298 len+=4;
15299 break;
15300 default:
15301 fatal_insn ("unknown insn mode", insn);
15305 return len;
15307 /* Compute default value for "length_address" attribute. */
15309 ix86_attr_length_address_default (rtx insn)
15311 int i;
15313 if (get_attr_type (insn) == TYPE_LEA)
15315 rtx set = PATTERN (insn);
15317 if (GET_CODE (set) == PARALLEL)
15318 set = XVECEXP (set, 0, 0);
15320 gcc_assert (GET_CODE (set) == SET);
15322 return memory_address_length (SET_SRC (set));
15325 extract_insn_cached (insn);
15326 for (i = recog_data.n_operands - 1; i >= 0; --i)
15327 if (MEM_P (recog_data.operand[i]))
15329 return memory_address_length (XEXP (recog_data.operand[i], 0));
15330 break;
15332 return 0;
15335 /* Return the maximum number of instructions a cpu can issue. */
15337 static int
15338 ix86_issue_rate (void)
15340 switch (ix86_tune)
15342 case PROCESSOR_PENTIUM:
15343 case PROCESSOR_K6:
15344 return 2;
15346 case PROCESSOR_PENTIUMPRO:
15347 case PROCESSOR_PENTIUM4:
15348 case PROCESSOR_ATHLON:
15349 case PROCESSOR_K8:
15350 case PROCESSOR_AMDFAM10:
15351 case PROCESSOR_NOCONA:
15352 case PROCESSOR_GENERIC32:
15353 case PROCESSOR_GENERIC64:
15354 return 3;
15356 case PROCESSOR_CORE2:
15357 return 4;
15359 default:
15360 return 1;
15364 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15365 by DEP_INSN and nothing set by DEP_INSN. */
15367 static int
15368 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15370 rtx set, set2;
15372 /* Simplify the test for uninteresting insns. */
15373 if (insn_type != TYPE_SETCC
15374 && insn_type != TYPE_ICMOV
15375 && insn_type != TYPE_FCMOV
15376 && insn_type != TYPE_IBR)
15377 return 0;
15379 if ((set = single_set (dep_insn)) != 0)
15381 set = SET_DEST (set);
15382 set2 = NULL_RTX;
15384 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15385 && XVECLEN (PATTERN (dep_insn), 0) == 2
15386 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15387 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15389 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15390 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15392 else
15393 return 0;
15395 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15396 return 0;
15398 /* This test is true if the dependent insn reads the flags but
15399 not any other potentially set register. */
15400 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15401 return 0;
15403 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15404 return 0;
15406 return 1;
15409 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15410 address with operands set by DEP_INSN. */
15412 static int
15413 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15415 rtx addr;
15417 if (insn_type == TYPE_LEA
15418 && TARGET_PENTIUM)
15420 addr = PATTERN (insn);
15422 if (GET_CODE (addr) == PARALLEL)
15423 addr = XVECEXP (addr, 0, 0);
15425 gcc_assert (GET_CODE (addr) == SET);
15427 addr = SET_SRC (addr);
15429 else
15431 int i;
15432 extract_insn_cached (insn);
15433 for (i = recog_data.n_operands - 1; i >= 0; --i)
15434 if (MEM_P (recog_data.operand[i]))
15436 addr = XEXP (recog_data.operand[i], 0);
15437 goto found;
15439 return 0;
15440 found:;
15443 return modified_in_p (addr, dep_insn);
15446 static int
15447 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15449 enum attr_type insn_type, dep_insn_type;
15450 enum attr_memory memory;
15451 rtx set, set2;
15452 int dep_insn_code_number;
15454 /* Anti and output dependencies have zero cost on all CPUs. */
15455 if (REG_NOTE_KIND (link) != 0)
15456 return 0;
15458 dep_insn_code_number = recog_memoized (dep_insn);
15460 /* If we can't recognize the insns, we can't really do anything. */
15461 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15462 return cost;
15464 insn_type = get_attr_type (insn);
15465 dep_insn_type = get_attr_type (dep_insn);
15467 switch (ix86_tune)
15469 case PROCESSOR_PENTIUM:
15470 /* Address Generation Interlock adds a cycle of latency. */
15471 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15472 cost += 1;
15474 /* ??? Compares pair with jump/setcc. */
15475 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15476 cost = 0;
15478 /* Floating point stores require value to be ready one cycle earlier. */
15479 if (insn_type == TYPE_FMOV
15480 && get_attr_memory (insn) == MEMORY_STORE
15481 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15482 cost += 1;
15483 break;
15485 case PROCESSOR_PENTIUMPRO:
15486 memory = get_attr_memory (insn);
15488 /* INT->FP conversion is expensive. */
15489 if (get_attr_fp_int_src (dep_insn))
15490 cost += 5;
15492 /* There is one cycle extra latency between an FP op and a store. */
15493 if (insn_type == TYPE_FMOV
15494 && (set = single_set (dep_insn)) != NULL_RTX
15495 && (set2 = single_set (insn)) != NULL_RTX
15496 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15497 && MEM_P (SET_DEST (set2)))
15498 cost += 1;
15500 /* Show ability of reorder buffer to hide latency of load by executing
15501 in parallel with previous instruction in case
15502 previous instruction is not needed to compute the address. */
15503 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15504 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15506 /* Claim moves to take one cycle, as core can issue one load
15507 at time and the next load can start cycle later. */
15508 if (dep_insn_type == TYPE_IMOV
15509 || dep_insn_type == TYPE_FMOV)
15510 cost = 1;
15511 else if (cost > 1)
15512 cost--;
15514 break;
15516 case PROCESSOR_K6:
15517 memory = get_attr_memory (insn);
15519 /* The esp dependency is resolved before the instruction is really
15520 finished. */
15521 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15522 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15523 return 1;
15525 /* INT->FP conversion is expensive. */
15526 if (get_attr_fp_int_src (dep_insn))
15527 cost += 5;
15529 /* Show ability of reorder buffer to hide latency of load by executing
15530 in parallel with previous instruction in case
15531 previous instruction is not needed to compute the address. */
15532 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15533 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15535 /* Claim moves to take one cycle, as core can issue one load
15536 at time and the next load can start cycle later. */
15537 if (dep_insn_type == TYPE_IMOV
15538 || dep_insn_type == TYPE_FMOV)
15539 cost = 1;
15540 else if (cost > 2)
15541 cost -= 2;
15542 else
15543 cost = 1;
15545 break;
15547 case PROCESSOR_ATHLON:
15548 case PROCESSOR_K8:
15549 case PROCESSOR_AMDFAM10:
15550 case PROCESSOR_GENERIC32:
15551 case PROCESSOR_GENERIC64:
15552 memory = get_attr_memory (insn);
15554 /* Show ability of reorder buffer to hide latency of load by executing
15555 in parallel with previous instruction in case
15556 previous instruction is not needed to compute the address. */
15557 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15558 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15560 enum attr_unit unit = get_attr_unit (insn);
15561 int loadcost = 3;
15563 /* Because of the difference between the length of integer and
15564 floating unit pipeline preparation stages, the memory operands
15565 for floating point are cheaper.
15567 ??? For Athlon it the difference is most probably 2. */
15568 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15569 loadcost = 3;
15570 else
15571 loadcost = TARGET_ATHLON ? 2 : 0;
15573 if (cost >= loadcost)
15574 cost -= loadcost;
15575 else
15576 cost = 0;
15579 default:
15580 break;
15583 return cost;
15586 /* How many alternative schedules to try. This should be as wide as the
15587 scheduling freedom in the DFA, but no wider. Making this value too
15588 large results extra work for the scheduler. */
15590 static int
15591 ia32_multipass_dfa_lookahead (void)
15593 if (ix86_tune == PROCESSOR_PENTIUM)
15594 return 2;
15596 if (ix86_tune == PROCESSOR_PENTIUMPRO
15597 || ix86_tune == PROCESSOR_K6)
15598 return 1;
15600 else
15601 return 0;
15605 /* Compute the alignment given to a constant that is being placed in memory.
15606 EXP is the constant and ALIGN is the alignment that the object would
15607 ordinarily have.
15608 The value of this function is used instead of that alignment to align
15609 the object. */
15612 ix86_constant_alignment (tree exp, int align)
15614 if (TREE_CODE (exp) == REAL_CST)
15616 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15617 return 64;
15618 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15619 return 128;
15621 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15622 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15623 return BITS_PER_WORD;
15625 return align;
15628 /* Compute the alignment for a static variable.
15629 TYPE is the data type, and ALIGN is the alignment that
15630 the object would ordinarily have. The value of this function is used
15631 instead of that alignment to align the object. */
15634 ix86_data_alignment (tree type, int align)
15636 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15638 if (AGGREGATE_TYPE_P (type)
15639 && TYPE_SIZE (type)
15640 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15641 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15642 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15643 && align < max_align)
15644 align = max_align;
15646 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15647 to 16byte boundary. */
15648 if (TARGET_64BIT)
15650 if (AGGREGATE_TYPE_P (type)
15651 && TYPE_SIZE (type)
15652 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15653 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15654 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15655 return 128;
15658 if (TREE_CODE (type) == ARRAY_TYPE)
15660 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15661 return 64;
15662 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15663 return 128;
15665 else if (TREE_CODE (type) == COMPLEX_TYPE)
15668 if (TYPE_MODE (type) == DCmode && align < 64)
15669 return 64;
15670 if (TYPE_MODE (type) == XCmode && align < 128)
15671 return 128;
15673 else if ((TREE_CODE (type) == RECORD_TYPE
15674 || TREE_CODE (type) == UNION_TYPE
15675 || TREE_CODE (type) == QUAL_UNION_TYPE)
15676 && TYPE_FIELDS (type))
15678 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15679 return 64;
15680 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15681 return 128;
15683 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15684 || TREE_CODE (type) == INTEGER_TYPE)
15686 if (TYPE_MODE (type) == DFmode && align < 64)
15687 return 64;
15688 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15689 return 128;
15692 return align;
15695 /* Compute the alignment for a local variable.
15696 TYPE is the data type, and ALIGN is the alignment that
15697 the object would ordinarily have. The value of this macro is used
15698 instead of that alignment to align the object. */
15701 ix86_local_alignment (tree type, int align)
15703 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15704 to 16byte boundary. */
15705 if (TARGET_64BIT)
15707 if (AGGREGATE_TYPE_P (type)
15708 && TYPE_SIZE (type)
15709 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15710 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15711 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15712 return 128;
15714 if (TREE_CODE (type) == ARRAY_TYPE)
15716 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15717 return 64;
15718 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15719 return 128;
15721 else if (TREE_CODE (type) == COMPLEX_TYPE)
15723 if (TYPE_MODE (type) == DCmode && align < 64)
15724 return 64;
15725 if (TYPE_MODE (type) == XCmode && align < 128)
15726 return 128;
15728 else if ((TREE_CODE (type) == RECORD_TYPE
15729 || TREE_CODE (type) == UNION_TYPE
15730 || TREE_CODE (type) == QUAL_UNION_TYPE)
15731 && TYPE_FIELDS (type))
15733 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15734 return 64;
15735 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15736 return 128;
15738 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15739 || TREE_CODE (type) == INTEGER_TYPE)
15742 if (TYPE_MODE (type) == DFmode && align < 64)
15743 return 64;
15744 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15745 return 128;
15747 return align;
15750 /* Emit RTL insns to initialize the variable parts of a trampoline.
15751 FNADDR is an RTX for the address of the function's pure code.
15752 CXT is an RTX for the static chain value for the function. */
15753 void
15754 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15756 if (!TARGET_64BIT)
15758 /* Compute offset from the end of the jmp to the target function. */
15759 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15760 plus_constant (tramp, 10),
15761 NULL_RTX, 1, OPTAB_DIRECT);
15762 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15763 gen_int_mode (0xb9, QImode));
15764 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15765 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15766 gen_int_mode (0xe9, QImode));
15767 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15769 else
15771 int offset = 0;
15772 /* Try to load address using shorter movl instead of movabs.
15773 We may want to support movq for kernel mode, but kernel does not use
15774 trampolines at the moment. */
15775 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15777 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15778 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15779 gen_int_mode (0xbb41, HImode));
15780 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15781 gen_lowpart (SImode, fnaddr));
15782 offset += 6;
15784 else
15786 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15787 gen_int_mode (0xbb49, HImode));
15788 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15789 fnaddr);
15790 offset += 10;
15792 /* Load static chain using movabs to r10. */
15793 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15794 gen_int_mode (0xba49, HImode));
15795 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15796 cxt);
15797 offset += 10;
15798 /* Jump to the r11 */
15799 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15800 gen_int_mode (0xff49, HImode));
15801 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15802 gen_int_mode (0xe3, QImode));
15803 offset += 3;
15804 gcc_assert (offset <= TRAMPOLINE_SIZE);
15807 #ifdef ENABLE_EXECUTE_STACK
15808 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15809 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15810 #endif
15813 /* Codes for all the SSE/MMX builtins. */
15814 enum ix86_builtins
15816 IX86_BUILTIN_ADDPS,
15817 IX86_BUILTIN_ADDSS,
15818 IX86_BUILTIN_DIVPS,
15819 IX86_BUILTIN_DIVSS,
15820 IX86_BUILTIN_MULPS,
15821 IX86_BUILTIN_MULSS,
15822 IX86_BUILTIN_SUBPS,
15823 IX86_BUILTIN_SUBSS,
15825 IX86_BUILTIN_CMPEQPS,
15826 IX86_BUILTIN_CMPLTPS,
15827 IX86_BUILTIN_CMPLEPS,
15828 IX86_BUILTIN_CMPGTPS,
15829 IX86_BUILTIN_CMPGEPS,
15830 IX86_BUILTIN_CMPNEQPS,
15831 IX86_BUILTIN_CMPNLTPS,
15832 IX86_BUILTIN_CMPNLEPS,
15833 IX86_BUILTIN_CMPNGTPS,
15834 IX86_BUILTIN_CMPNGEPS,
15835 IX86_BUILTIN_CMPORDPS,
15836 IX86_BUILTIN_CMPUNORDPS,
15837 IX86_BUILTIN_CMPEQSS,
15838 IX86_BUILTIN_CMPLTSS,
15839 IX86_BUILTIN_CMPLESS,
15840 IX86_BUILTIN_CMPNEQSS,
15841 IX86_BUILTIN_CMPNLTSS,
15842 IX86_BUILTIN_CMPNLESS,
15843 IX86_BUILTIN_CMPNGTSS,
15844 IX86_BUILTIN_CMPNGESS,
15845 IX86_BUILTIN_CMPORDSS,
15846 IX86_BUILTIN_CMPUNORDSS,
15848 IX86_BUILTIN_COMIEQSS,
15849 IX86_BUILTIN_COMILTSS,
15850 IX86_BUILTIN_COMILESS,
15851 IX86_BUILTIN_COMIGTSS,
15852 IX86_BUILTIN_COMIGESS,
15853 IX86_BUILTIN_COMINEQSS,
15854 IX86_BUILTIN_UCOMIEQSS,
15855 IX86_BUILTIN_UCOMILTSS,
15856 IX86_BUILTIN_UCOMILESS,
15857 IX86_BUILTIN_UCOMIGTSS,
15858 IX86_BUILTIN_UCOMIGESS,
15859 IX86_BUILTIN_UCOMINEQSS,
15861 IX86_BUILTIN_CVTPI2PS,
15862 IX86_BUILTIN_CVTPS2PI,
15863 IX86_BUILTIN_CVTSI2SS,
15864 IX86_BUILTIN_CVTSI642SS,
15865 IX86_BUILTIN_CVTSS2SI,
15866 IX86_BUILTIN_CVTSS2SI64,
15867 IX86_BUILTIN_CVTTPS2PI,
15868 IX86_BUILTIN_CVTTSS2SI,
15869 IX86_BUILTIN_CVTTSS2SI64,
15871 IX86_BUILTIN_MAXPS,
15872 IX86_BUILTIN_MAXSS,
15873 IX86_BUILTIN_MINPS,
15874 IX86_BUILTIN_MINSS,
15876 IX86_BUILTIN_LOADUPS,
15877 IX86_BUILTIN_STOREUPS,
15878 IX86_BUILTIN_MOVSS,
15880 IX86_BUILTIN_MOVHLPS,
15881 IX86_BUILTIN_MOVLHPS,
15882 IX86_BUILTIN_LOADHPS,
15883 IX86_BUILTIN_LOADLPS,
15884 IX86_BUILTIN_STOREHPS,
15885 IX86_BUILTIN_STORELPS,
15887 IX86_BUILTIN_MASKMOVQ,
15888 IX86_BUILTIN_MOVMSKPS,
15889 IX86_BUILTIN_PMOVMSKB,
15891 IX86_BUILTIN_MOVNTPS,
15892 IX86_BUILTIN_MOVNTQ,
15894 IX86_BUILTIN_LOADDQU,
15895 IX86_BUILTIN_STOREDQU,
15897 IX86_BUILTIN_PACKSSWB,
15898 IX86_BUILTIN_PACKSSDW,
15899 IX86_BUILTIN_PACKUSWB,
15901 IX86_BUILTIN_PADDB,
15902 IX86_BUILTIN_PADDW,
15903 IX86_BUILTIN_PADDD,
15904 IX86_BUILTIN_PADDQ,
15905 IX86_BUILTIN_PADDSB,
15906 IX86_BUILTIN_PADDSW,
15907 IX86_BUILTIN_PADDUSB,
15908 IX86_BUILTIN_PADDUSW,
15909 IX86_BUILTIN_PSUBB,
15910 IX86_BUILTIN_PSUBW,
15911 IX86_BUILTIN_PSUBD,
15912 IX86_BUILTIN_PSUBQ,
15913 IX86_BUILTIN_PSUBSB,
15914 IX86_BUILTIN_PSUBSW,
15915 IX86_BUILTIN_PSUBUSB,
15916 IX86_BUILTIN_PSUBUSW,
15918 IX86_BUILTIN_PAND,
15919 IX86_BUILTIN_PANDN,
15920 IX86_BUILTIN_POR,
15921 IX86_BUILTIN_PXOR,
15923 IX86_BUILTIN_PAVGB,
15924 IX86_BUILTIN_PAVGW,
15926 IX86_BUILTIN_PCMPEQB,
15927 IX86_BUILTIN_PCMPEQW,
15928 IX86_BUILTIN_PCMPEQD,
15929 IX86_BUILTIN_PCMPGTB,
15930 IX86_BUILTIN_PCMPGTW,
15931 IX86_BUILTIN_PCMPGTD,
15933 IX86_BUILTIN_PMADDWD,
15935 IX86_BUILTIN_PMAXSW,
15936 IX86_BUILTIN_PMAXUB,
15937 IX86_BUILTIN_PMINSW,
15938 IX86_BUILTIN_PMINUB,
15940 IX86_BUILTIN_PMULHUW,
15941 IX86_BUILTIN_PMULHW,
15942 IX86_BUILTIN_PMULLW,
15944 IX86_BUILTIN_PSADBW,
15945 IX86_BUILTIN_PSHUFW,
15947 IX86_BUILTIN_PSLLW,
15948 IX86_BUILTIN_PSLLD,
15949 IX86_BUILTIN_PSLLQ,
15950 IX86_BUILTIN_PSRAW,
15951 IX86_BUILTIN_PSRAD,
15952 IX86_BUILTIN_PSRLW,
15953 IX86_BUILTIN_PSRLD,
15954 IX86_BUILTIN_PSRLQ,
15955 IX86_BUILTIN_PSLLWI,
15956 IX86_BUILTIN_PSLLDI,
15957 IX86_BUILTIN_PSLLQI,
15958 IX86_BUILTIN_PSRAWI,
15959 IX86_BUILTIN_PSRADI,
15960 IX86_BUILTIN_PSRLWI,
15961 IX86_BUILTIN_PSRLDI,
15962 IX86_BUILTIN_PSRLQI,
15964 IX86_BUILTIN_PUNPCKHBW,
15965 IX86_BUILTIN_PUNPCKHWD,
15966 IX86_BUILTIN_PUNPCKHDQ,
15967 IX86_BUILTIN_PUNPCKLBW,
15968 IX86_BUILTIN_PUNPCKLWD,
15969 IX86_BUILTIN_PUNPCKLDQ,
15971 IX86_BUILTIN_SHUFPS,
15973 IX86_BUILTIN_RCPPS,
15974 IX86_BUILTIN_RCPSS,
15975 IX86_BUILTIN_RSQRTPS,
15976 IX86_BUILTIN_RSQRTSS,
15977 IX86_BUILTIN_SQRTPS,
15978 IX86_BUILTIN_SQRTSS,
15980 IX86_BUILTIN_UNPCKHPS,
15981 IX86_BUILTIN_UNPCKLPS,
15983 IX86_BUILTIN_ANDPS,
15984 IX86_BUILTIN_ANDNPS,
15985 IX86_BUILTIN_ORPS,
15986 IX86_BUILTIN_XORPS,
15988 IX86_BUILTIN_EMMS,
15989 IX86_BUILTIN_LDMXCSR,
15990 IX86_BUILTIN_STMXCSR,
15991 IX86_BUILTIN_SFENCE,
15993 /* 3DNow! Original */
15994 IX86_BUILTIN_FEMMS,
15995 IX86_BUILTIN_PAVGUSB,
15996 IX86_BUILTIN_PF2ID,
15997 IX86_BUILTIN_PFACC,
15998 IX86_BUILTIN_PFADD,
15999 IX86_BUILTIN_PFCMPEQ,
16000 IX86_BUILTIN_PFCMPGE,
16001 IX86_BUILTIN_PFCMPGT,
16002 IX86_BUILTIN_PFMAX,
16003 IX86_BUILTIN_PFMIN,
16004 IX86_BUILTIN_PFMUL,
16005 IX86_BUILTIN_PFRCP,
16006 IX86_BUILTIN_PFRCPIT1,
16007 IX86_BUILTIN_PFRCPIT2,
16008 IX86_BUILTIN_PFRSQIT1,
16009 IX86_BUILTIN_PFRSQRT,
16010 IX86_BUILTIN_PFSUB,
16011 IX86_BUILTIN_PFSUBR,
16012 IX86_BUILTIN_PI2FD,
16013 IX86_BUILTIN_PMULHRW,
16015 /* 3DNow! Athlon Extensions */
16016 IX86_BUILTIN_PF2IW,
16017 IX86_BUILTIN_PFNACC,
16018 IX86_BUILTIN_PFPNACC,
16019 IX86_BUILTIN_PI2FW,
16020 IX86_BUILTIN_PSWAPDSI,
16021 IX86_BUILTIN_PSWAPDSF,
16023 /* SSE2 */
16024 IX86_BUILTIN_ADDPD,
16025 IX86_BUILTIN_ADDSD,
16026 IX86_BUILTIN_DIVPD,
16027 IX86_BUILTIN_DIVSD,
16028 IX86_BUILTIN_MULPD,
16029 IX86_BUILTIN_MULSD,
16030 IX86_BUILTIN_SUBPD,
16031 IX86_BUILTIN_SUBSD,
16033 IX86_BUILTIN_CMPEQPD,
16034 IX86_BUILTIN_CMPLTPD,
16035 IX86_BUILTIN_CMPLEPD,
16036 IX86_BUILTIN_CMPGTPD,
16037 IX86_BUILTIN_CMPGEPD,
16038 IX86_BUILTIN_CMPNEQPD,
16039 IX86_BUILTIN_CMPNLTPD,
16040 IX86_BUILTIN_CMPNLEPD,
16041 IX86_BUILTIN_CMPNGTPD,
16042 IX86_BUILTIN_CMPNGEPD,
16043 IX86_BUILTIN_CMPORDPD,
16044 IX86_BUILTIN_CMPUNORDPD,
16045 IX86_BUILTIN_CMPNEPD,
16046 IX86_BUILTIN_CMPEQSD,
16047 IX86_BUILTIN_CMPLTSD,
16048 IX86_BUILTIN_CMPLESD,
16049 IX86_BUILTIN_CMPNEQSD,
16050 IX86_BUILTIN_CMPNLTSD,
16051 IX86_BUILTIN_CMPNLESD,
16052 IX86_BUILTIN_CMPORDSD,
16053 IX86_BUILTIN_CMPUNORDSD,
16054 IX86_BUILTIN_CMPNESD,
16056 IX86_BUILTIN_COMIEQSD,
16057 IX86_BUILTIN_COMILTSD,
16058 IX86_BUILTIN_COMILESD,
16059 IX86_BUILTIN_COMIGTSD,
16060 IX86_BUILTIN_COMIGESD,
16061 IX86_BUILTIN_COMINEQSD,
16062 IX86_BUILTIN_UCOMIEQSD,
16063 IX86_BUILTIN_UCOMILTSD,
16064 IX86_BUILTIN_UCOMILESD,
16065 IX86_BUILTIN_UCOMIGTSD,
16066 IX86_BUILTIN_UCOMIGESD,
16067 IX86_BUILTIN_UCOMINEQSD,
16069 IX86_BUILTIN_MAXPD,
16070 IX86_BUILTIN_MAXSD,
16071 IX86_BUILTIN_MINPD,
16072 IX86_BUILTIN_MINSD,
16074 IX86_BUILTIN_ANDPD,
16075 IX86_BUILTIN_ANDNPD,
16076 IX86_BUILTIN_ORPD,
16077 IX86_BUILTIN_XORPD,
16079 IX86_BUILTIN_SQRTPD,
16080 IX86_BUILTIN_SQRTSD,
16082 IX86_BUILTIN_UNPCKHPD,
16083 IX86_BUILTIN_UNPCKLPD,
16085 IX86_BUILTIN_SHUFPD,
16087 IX86_BUILTIN_LOADUPD,
16088 IX86_BUILTIN_STOREUPD,
16089 IX86_BUILTIN_MOVSD,
16091 IX86_BUILTIN_LOADHPD,
16092 IX86_BUILTIN_LOADLPD,
16094 IX86_BUILTIN_CVTDQ2PD,
16095 IX86_BUILTIN_CVTDQ2PS,
16097 IX86_BUILTIN_CVTPD2DQ,
16098 IX86_BUILTIN_CVTPD2PI,
16099 IX86_BUILTIN_CVTPD2PS,
16100 IX86_BUILTIN_CVTTPD2DQ,
16101 IX86_BUILTIN_CVTTPD2PI,
16103 IX86_BUILTIN_CVTPI2PD,
16104 IX86_BUILTIN_CVTSI2SD,
16105 IX86_BUILTIN_CVTSI642SD,
16107 IX86_BUILTIN_CVTSD2SI,
16108 IX86_BUILTIN_CVTSD2SI64,
16109 IX86_BUILTIN_CVTSD2SS,
16110 IX86_BUILTIN_CVTSS2SD,
16111 IX86_BUILTIN_CVTTSD2SI,
16112 IX86_BUILTIN_CVTTSD2SI64,
16114 IX86_BUILTIN_CVTPS2DQ,
16115 IX86_BUILTIN_CVTPS2PD,
16116 IX86_BUILTIN_CVTTPS2DQ,
16118 IX86_BUILTIN_MOVNTI,
16119 IX86_BUILTIN_MOVNTPD,
16120 IX86_BUILTIN_MOVNTDQ,
16122 /* SSE2 MMX */
16123 IX86_BUILTIN_MASKMOVDQU,
16124 IX86_BUILTIN_MOVMSKPD,
16125 IX86_BUILTIN_PMOVMSKB128,
16127 IX86_BUILTIN_PACKSSWB128,
16128 IX86_BUILTIN_PACKSSDW128,
16129 IX86_BUILTIN_PACKUSWB128,
16131 IX86_BUILTIN_PADDB128,
16132 IX86_BUILTIN_PADDW128,
16133 IX86_BUILTIN_PADDD128,
16134 IX86_BUILTIN_PADDQ128,
16135 IX86_BUILTIN_PADDSB128,
16136 IX86_BUILTIN_PADDSW128,
16137 IX86_BUILTIN_PADDUSB128,
16138 IX86_BUILTIN_PADDUSW128,
16139 IX86_BUILTIN_PSUBB128,
16140 IX86_BUILTIN_PSUBW128,
16141 IX86_BUILTIN_PSUBD128,
16142 IX86_BUILTIN_PSUBQ128,
16143 IX86_BUILTIN_PSUBSB128,
16144 IX86_BUILTIN_PSUBSW128,
16145 IX86_BUILTIN_PSUBUSB128,
16146 IX86_BUILTIN_PSUBUSW128,
16148 IX86_BUILTIN_PAND128,
16149 IX86_BUILTIN_PANDN128,
16150 IX86_BUILTIN_POR128,
16151 IX86_BUILTIN_PXOR128,
16153 IX86_BUILTIN_PAVGB128,
16154 IX86_BUILTIN_PAVGW128,
16156 IX86_BUILTIN_PCMPEQB128,
16157 IX86_BUILTIN_PCMPEQW128,
16158 IX86_BUILTIN_PCMPEQD128,
16159 IX86_BUILTIN_PCMPGTB128,
16160 IX86_BUILTIN_PCMPGTW128,
16161 IX86_BUILTIN_PCMPGTD128,
16163 IX86_BUILTIN_PMADDWD128,
16165 IX86_BUILTIN_PMAXSW128,
16166 IX86_BUILTIN_PMAXUB128,
16167 IX86_BUILTIN_PMINSW128,
16168 IX86_BUILTIN_PMINUB128,
16170 IX86_BUILTIN_PMULUDQ,
16171 IX86_BUILTIN_PMULUDQ128,
16172 IX86_BUILTIN_PMULHUW128,
16173 IX86_BUILTIN_PMULHW128,
16174 IX86_BUILTIN_PMULLW128,
16176 IX86_BUILTIN_PSADBW128,
16177 IX86_BUILTIN_PSHUFHW,
16178 IX86_BUILTIN_PSHUFLW,
16179 IX86_BUILTIN_PSHUFD,
16181 IX86_BUILTIN_PSLLW128,
16182 IX86_BUILTIN_PSLLD128,
16183 IX86_BUILTIN_PSLLQ128,
16184 IX86_BUILTIN_PSRAW128,
16185 IX86_BUILTIN_PSRAD128,
16186 IX86_BUILTIN_PSRLW128,
16187 IX86_BUILTIN_PSRLD128,
16188 IX86_BUILTIN_PSRLQ128,
16189 IX86_BUILTIN_PSLLDQI128,
16190 IX86_BUILTIN_PSLLWI128,
16191 IX86_BUILTIN_PSLLDI128,
16192 IX86_BUILTIN_PSLLQI128,
16193 IX86_BUILTIN_PSRAWI128,
16194 IX86_BUILTIN_PSRADI128,
16195 IX86_BUILTIN_PSRLDQI128,
16196 IX86_BUILTIN_PSRLWI128,
16197 IX86_BUILTIN_PSRLDI128,
16198 IX86_BUILTIN_PSRLQI128,
16200 IX86_BUILTIN_PUNPCKHBW128,
16201 IX86_BUILTIN_PUNPCKHWD128,
16202 IX86_BUILTIN_PUNPCKHDQ128,
16203 IX86_BUILTIN_PUNPCKHQDQ128,
16204 IX86_BUILTIN_PUNPCKLBW128,
16205 IX86_BUILTIN_PUNPCKLWD128,
16206 IX86_BUILTIN_PUNPCKLDQ128,
16207 IX86_BUILTIN_PUNPCKLQDQ128,
16209 IX86_BUILTIN_CLFLUSH,
16210 IX86_BUILTIN_MFENCE,
16211 IX86_BUILTIN_LFENCE,
16213 /* Prescott New Instructions. */
16214 IX86_BUILTIN_ADDSUBPS,
16215 IX86_BUILTIN_HADDPS,
16216 IX86_BUILTIN_HSUBPS,
16217 IX86_BUILTIN_MOVSHDUP,
16218 IX86_BUILTIN_MOVSLDUP,
16219 IX86_BUILTIN_ADDSUBPD,
16220 IX86_BUILTIN_HADDPD,
16221 IX86_BUILTIN_HSUBPD,
16222 IX86_BUILTIN_LDDQU,
16224 IX86_BUILTIN_MONITOR,
16225 IX86_BUILTIN_MWAIT,
16227 /* SSSE3. */
16228 IX86_BUILTIN_PHADDW,
16229 IX86_BUILTIN_PHADDD,
16230 IX86_BUILTIN_PHADDSW,
16231 IX86_BUILTIN_PHSUBW,
16232 IX86_BUILTIN_PHSUBD,
16233 IX86_BUILTIN_PHSUBSW,
16234 IX86_BUILTIN_PMADDUBSW,
16235 IX86_BUILTIN_PMULHRSW,
16236 IX86_BUILTIN_PSHUFB,
16237 IX86_BUILTIN_PSIGNB,
16238 IX86_BUILTIN_PSIGNW,
16239 IX86_BUILTIN_PSIGND,
16240 IX86_BUILTIN_PALIGNR,
16241 IX86_BUILTIN_PABSB,
16242 IX86_BUILTIN_PABSW,
16243 IX86_BUILTIN_PABSD,
16245 IX86_BUILTIN_PHADDW128,
16246 IX86_BUILTIN_PHADDD128,
16247 IX86_BUILTIN_PHADDSW128,
16248 IX86_BUILTIN_PHSUBW128,
16249 IX86_BUILTIN_PHSUBD128,
16250 IX86_BUILTIN_PHSUBSW128,
16251 IX86_BUILTIN_PMADDUBSW128,
16252 IX86_BUILTIN_PMULHRSW128,
16253 IX86_BUILTIN_PSHUFB128,
16254 IX86_BUILTIN_PSIGNB128,
16255 IX86_BUILTIN_PSIGNW128,
16256 IX86_BUILTIN_PSIGND128,
16257 IX86_BUILTIN_PALIGNR128,
16258 IX86_BUILTIN_PABSB128,
16259 IX86_BUILTIN_PABSW128,
16260 IX86_BUILTIN_PABSD128,
16262 /* AMDFAM10 - SSE4A New Instructions. */
16263 IX86_BUILTIN_MOVNTSD,
16264 IX86_BUILTIN_MOVNTSS,
16265 IX86_BUILTIN_EXTRQI,
16266 IX86_BUILTIN_EXTRQ,
16267 IX86_BUILTIN_INSERTQI,
16268 IX86_BUILTIN_INSERTQ,
16270 IX86_BUILTIN_VEC_INIT_V2SI,
16271 IX86_BUILTIN_VEC_INIT_V4HI,
16272 IX86_BUILTIN_VEC_INIT_V8QI,
16273 IX86_BUILTIN_VEC_EXT_V2DF,
16274 IX86_BUILTIN_VEC_EXT_V2DI,
16275 IX86_BUILTIN_VEC_EXT_V4SF,
16276 IX86_BUILTIN_VEC_EXT_V4SI,
16277 IX86_BUILTIN_VEC_EXT_V8HI,
16278 IX86_BUILTIN_VEC_EXT_V2SI,
16279 IX86_BUILTIN_VEC_EXT_V4HI,
16280 IX86_BUILTIN_VEC_SET_V8HI,
16281 IX86_BUILTIN_VEC_SET_V4HI,
16283 IX86_BUILTIN_MAX
16286 /* Table for the ix86 builtin decls. */
16287 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16289 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16290 * if the target_flags include one of MASK. Stores the function decl
16291 * in the ix86_builtins array.
16292 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16294 static inline tree
16295 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16297 tree decl = NULL_TREE;
16299 if (mask & target_flags
16300 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16302 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16303 NULL, NULL_TREE);
16304 ix86_builtins[(int) code] = decl;
16307 return decl;
16310 /* Like def_builtin, but also marks the function decl "const". */
16312 static inline tree
16313 def_builtin_const (int mask, const char *name, tree type,
16314 enum ix86_builtins code)
16316 tree decl = def_builtin (mask, name, type, code);
16317 if (decl)
16318 TREE_READONLY (decl) = 1;
16319 return decl;
16322 /* Bits for builtin_description.flag. */
16324 /* Set when we don't support the comparison natively, and should
16325 swap_comparison in order to support it. */
16326 #define BUILTIN_DESC_SWAP_OPERANDS 1
16328 struct builtin_description
16330 const unsigned int mask;
16331 const enum insn_code icode;
16332 const char *const name;
16333 const enum ix86_builtins code;
16334 const enum rtx_code comparison;
16335 const unsigned int flag;
16338 static const struct builtin_description bdesc_comi[] =
16340 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16341 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16342 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16343 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16344 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16345 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16346 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16347 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16348 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16349 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16350 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16351 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16352 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16353 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16354 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16355 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16356 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16357 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16358 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16359 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16360 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16361 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16362 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16363 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16366 static const struct builtin_description bdesc_2arg[] =
16368 /* SSE */
16369 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16370 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16371 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16372 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16373 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16374 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16375 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16376 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16378 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16379 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16380 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16381 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16382 BUILTIN_DESC_SWAP_OPERANDS },
16383 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16384 BUILTIN_DESC_SWAP_OPERANDS },
16385 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16386 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16387 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16388 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16389 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16390 BUILTIN_DESC_SWAP_OPERANDS },
16391 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16392 BUILTIN_DESC_SWAP_OPERANDS },
16393 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16394 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16395 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16396 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16397 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16398 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16399 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16400 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16401 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16402 BUILTIN_DESC_SWAP_OPERANDS },
16403 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16404 BUILTIN_DESC_SWAP_OPERANDS },
16405 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16407 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16408 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16409 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16410 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16412 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16413 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16414 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16415 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16417 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16418 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16419 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16420 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16421 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16423 /* MMX */
16424 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16425 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16426 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16427 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16428 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16429 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16430 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16431 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16433 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16434 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16435 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16436 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16437 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16438 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16439 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16440 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16442 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16443 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16444 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16446 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16447 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16448 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16449 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16451 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16452 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16454 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16455 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16456 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16457 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16458 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16459 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16461 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16462 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16463 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16464 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16466 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16467 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16468 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16469 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16470 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16471 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16473 /* Special. */
16474 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16475 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16476 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16478 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16479 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16480 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16482 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16483 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16484 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16485 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16486 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16487 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16489 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16490 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16491 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16492 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16493 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16494 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16496 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16497 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16498 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16499 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16501 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16502 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16504 /* SSE2 */
16505 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16506 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16507 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16508 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16509 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16510 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16511 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16512 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16514 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16515 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16516 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16517 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16518 BUILTIN_DESC_SWAP_OPERANDS },
16519 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16520 BUILTIN_DESC_SWAP_OPERANDS },
16521 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16522 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16523 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16524 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16525 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16526 BUILTIN_DESC_SWAP_OPERANDS },
16527 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16528 BUILTIN_DESC_SWAP_OPERANDS },
16529 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16530 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16531 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16532 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16533 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16534 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16535 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16536 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16537 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16539 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16540 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16541 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16542 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16544 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16545 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16546 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16547 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16549 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16550 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16551 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16553 /* SSE2 MMX */
16554 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16555 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16556 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16557 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16558 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16559 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16560 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16561 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16563 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16564 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16565 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16566 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16567 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16568 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16569 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16570 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16572 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16573 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16575 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16576 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16577 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16578 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16580 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16581 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16583 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16584 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16585 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16586 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16587 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16588 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16590 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16591 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16592 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16593 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16595 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16596 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16597 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16598 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16599 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16600 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16601 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16602 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16604 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16605 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16606 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16608 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16609 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16611 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16612 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16614 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16615 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16616 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16618 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16619 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16620 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16622 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16623 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16625 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16627 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16628 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16629 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16630 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16632 /* SSE3 MMX */
16633 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16634 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16635 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16636 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16637 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16638 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16640 /* SSSE3 */
16641 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16642 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16643 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16644 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16645 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16646 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16647 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16648 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16649 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16650 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16651 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16652 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16653 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16654 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16655 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16656 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16657 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16658 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16659 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16660 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16661 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16662 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16663 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16664 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16667 static const struct builtin_description bdesc_1arg[] =
16669 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16670 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16672 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16673 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16674 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16676 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16677 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16678 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16679 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16680 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16681 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16683 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16684 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16686 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16688 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16689 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16691 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16692 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16693 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16694 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16695 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16697 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16699 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16700 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16701 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16702 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16704 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16705 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16706 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16708 /* SSE3 */
16709 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16710 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16712 /* SSSE3 */
16713 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16714 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16715 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16716 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16717 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16718 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16721 static void
16722 ix86_init_builtins (void)
16724 if (TARGET_MMX)
16725 ix86_init_mmx_sse_builtins ();
16728 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16729 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16730 builtins. */
16731 static void
16732 ix86_init_mmx_sse_builtins (void)
16734 const struct builtin_description * d;
16735 size_t i;
16737 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16738 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16739 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16740 tree V2DI_type_node
16741 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16742 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16743 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16744 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16745 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16746 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16747 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16749 tree pchar_type_node = build_pointer_type (char_type_node);
16750 tree pcchar_type_node = build_pointer_type (
16751 build_type_variant (char_type_node, 1, 0));
16752 tree pfloat_type_node = build_pointer_type (float_type_node);
16753 tree pcfloat_type_node = build_pointer_type (
16754 build_type_variant (float_type_node, 1, 0));
16755 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16756 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16757 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16759 /* Comparisons. */
16760 tree int_ftype_v4sf_v4sf
16761 = build_function_type_list (integer_type_node,
16762 V4SF_type_node, V4SF_type_node, NULL_TREE);
16763 tree v4si_ftype_v4sf_v4sf
16764 = build_function_type_list (V4SI_type_node,
16765 V4SF_type_node, V4SF_type_node, NULL_TREE);
16766 /* MMX/SSE/integer conversions. */
16767 tree int_ftype_v4sf
16768 = build_function_type_list (integer_type_node,
16769 V4SF_type_node, NULL_TREE);
16770 tree int64_ftype_v4sf
16771 = build_function_type_list (long_long_integer_type_node,
16772 V4SF_type_node, NULL_TREE);
16773 tree int_ftype_v8qi
16774 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16775 tree v4sf_ftype_v4sf_int
16776 = build_function_type_list (V4SF_type_node,
16777 V4SF_type_node, integer_type_node, NULL_TREE);
16778 tree v4sf_ftype_v4sf_int64
16779 = build_function_type_list (V4SF_type_node,
16780 V4SF_type_node, long_long_integer_type_node,
16781 NULL_TREE);
16782 tree v4sf_ftype_v4sf_v2si
16783 = build_function_type_list (V4SF_type_node,
16784 V4SF_type_node, V2SI_type_node, NULL_TREE);
16786 /* Miscellaneous. */
16787 tree v8qi_ftype_v4hi_v4hi
16788 = build_function_type_list (V8QI_type_node,
16789 V4HI_type_node, V4HI_type_node, NULL_TREE);
16790 tree v4hi_ftype_v2si_v2si
16791 = build_function_type_list (V4HI_type_node,
16792 V2SI_type_node, V2SI_type_node, NULL_TREE);
16793 tree v4sf_ftype_v4sf_v4sf_int
16794 = build_function_type_list (V4SF_type_node,
16795 V4SF_type_node, V4SF_type_node,
16796 integer_type_node, NULL_TREE);
16797 tree v2si_ftype_v4hi_v4hi
16798 = build_function_type_list (V2SI_type_node,
16799 V4HI_type_node, V4HI_type_node, NULL_TREE);
16800 tree v4hi_ftype_v4hi_int
16801 = build_function_type_list (V4HI_type_node,
16802 V4HI_type_node, integer_type_node, NULL_TREE);
16803 tree v4hi_ftype_v4hi_di
16804 = build_function_type_list (V4HI_type_node,
16805 V4HI_type_node, long_long_unsigned_type_node,
16806 NULL_TREE);
16807 tree v2si_ftype_v2si_di
16808 = build_function_type_list (V2SI_type_node,
16809 V2SI_type_node, long_long_unsigned_type_node,
16810 NULL_TREE);
16811 tree void_ftype_void
16812 = build_function_type (void_type_node, void_list_node);
16813 tree void_ftype_unsigned
16814 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16815 tree void_ftype_unsigned_unsigned
16816 = build_function_type_list (void_type_node, unsigned_type_node,
16817 unsigned_type_node, NULL_TREE);
16818 tree void_ftype_pcvoid_unsigned_unsigned
16819 = build_function_type_list (void_type_node, const_ptr_type_node,
16820 unsigned_type_node, unsigned_type_node,
16821 NULL_TREE);
16822 tree unsigned_ftype_void
16823 = build_function_type (unsigned_type_node, void_list_node);
16824 tree v2si_ftype_v4sf
16825 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16826 /* Loads/stores. */
16827 tree void_ftype_v8qi_v8qi_pchar
16828 = build_function_type_list (void_type_node,
16829 V8QI_type_node, V8QI_type_node,
16830 pchar_type_node, NULL_TREE);
16831 tree v4sf_ftype_pcfloat
16832 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16833 /* @@@ the type is bogus */
16834 tree v4sf_ftype_v4sf_pv2si
16835 = build_function_type_list (V4SF_type_node,
16836 V4SF_type_node, pv2si_type_node, NULL_TREE);
16837 tree void_ftype_pv2si_v4sf
16838 = build_function_type_list (void_type_node,
16839 pv2si_type_node, V4SF_type_node, NULL_TREE);
16840 tree void_ftype_pfloat_v4sf
16841 = build_function_type_list (void_type_node,
16842 pfloat_type_node, V4SF_type_node, NULL_TREE);
16843 tree void_ftype_pdi_di
16844 = build_function_type_list (void_type_node,
16845 pdi_type_node, long_long_unsigned_type_node,
16846 NULL_TREE);
16847 tree void_ftype_pv2di_v2di
16848 = build_function_type_list (void_type_node,
16849 pv2di_type_node, V2DI_type_node, NULL_TREE);
16850 /* Normal vector unops. */
16851 tree v4sf_ftype_v4sf
16852 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16853 tree v16qi_ftype_v16qi
16854 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16855 tree v8hi_ftype_v8hi
16856 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16857 tree v4si_ftype_v4si
16858 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16859 tree v8qi_ftype_v8qi
16860 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16861 tree v4hi_ftype_v4hi
16862 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16864 /* Normal vector binops. */
16865 tree v4sf_ftype_v4sf_v4sf
16866 = build_function_type_list (V4SF_type_node,
16867 V4SF_type_node, V4SF_type_node, NULL_TREE);
16868 tree v8qi_ftype_v8qi_v8qi
16869 = build_function_type_list (V8QI_type_node,
16870 V8QI_type_node, V8QI_type_node, NULL_TREE);
16871 tree v4hi_ftype_v4hi_v4hi
16872 = build_function_type_list (V4HI_type_node,
16873 V4HI_type_node, V4HI_type_node, NULL_TREE);
16874 tree v2si_ftype_v2si_v2si
16875 = build_function_type_list (V2SI_type_node,
16876 V2SI_type_node, V2SI_type_node, NULL_TREE);
16877 tree di_ftype_di_di
16878 = build_function_type_list (long_long_unsigned_type_node,
16879 long_long_unsigned_type_node,
16880 long_long_unsigned_type_node, NULL_TREE);
16882 tree di_ftype_di_di_int
16883 = build_function_type_list (long_long_unsigned_type_node,
16884 long_long_unsigned_type_node,
16885 long_long_unsigned_type_node,
16886 integer_type_node, NULL_TREE);
16888 tree v2si_ftype_v2sf
16889 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16890 tree v2sf_ftype_v2si
16891 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16892 tree v2si_ftype_v2si
16893 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16894 tree v2sf_ftype_v2sf
16895 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16896 tree v2sf_ftype_v2sf_v2sf
16897 = build_function_type_list (V2SF_type_node,
16898 V2SF_type_node, V2SF_type_node, NULL_TREE);
16899 tree v2si_ftype_v2sf_v2sf
16900 = build_function_type_list (V2SI_type_node,
16901 V2SF_type_node, V2SF_type_node, NULL_TREE);
16902 tree pint_type_node = build_pointer_type (integer_type_node);
16903 tree pdouble_type_node = build_pointer_type (double_type_node);
16904 tree pcdouble_type_node = build_pointer_type (
16905 build_type_variant (double_type_node, 1, 0));
16906 tree int_ftype_v2df_v2df
16907 = build_function_type_list (integer_type_node,
16908 V2DF_type_node, V2DF_type_node, NULL_TREE);
16910 tree void_ftype_pcvoid
16911 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16912 tree v4sf_ftype_v4si
16913 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16914 tree v4si_ftype_v4sf
16915 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16916 tree v2df_ftype_v4si
16917 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16918 tree v4si_ftype_v2df
16919 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16920 tree v2si_ftype_v2df
16921 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16922 tree v4sf_ftype_v2df
16923 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16924 tree v2df_ftype_v2si
16925 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16926 tree v2df_ftype_v4sf
16927 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16928 tree int_ftype_v2df
16929 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16930 tree int64_ftype_v2df
16931 = build_function_type_list (long_long_integer_type_node,
16932 V2DF_type_node, NULL_TREE);
16933 tree v2df_ftype_v2df_int
16934 = build_function_type_list (V2DF_type_node,
16935 V2DF_type_node, integer_type_node, NULL_TREE);
16936 tree v2df_ftype_v2df_int64
16937 = build_function_type_list (V2DF_type_node,
16938 V2DF_type_node, long_long_integer_type_node,
16939 NULL_TREE);
16940 tree v4sf_ftype_v4sf_v2df
16941 = build_function_type_list (V4SF_type_node,
16942 V4SF_type_node, V2DF_type_node, NULL_TREE);
16943 tree v2df_ftype_v2df_v4sf
16944 = build_function_type_list (V2DF_type_node,
16945 V2DF_type_node, V4SF_type_node, NULL_TREE);
16946 tree v2df_ftype_v2df_v2df_int
16947 = build_function_type_list (V2DF_type_node,
16948 V2DF_type_node, V2DF_type_node,
16949 integer_type_node,
16950 NULL_TREE);
16951 tree v2df_ftype_v2df_pcdouble
16952 = build_function_type_list (V2DF_type_node,
16953 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16954 tree void_ftype_pdouble_v2df
16955 = build_function_type_list (void_type_node,
16956 pdouble_type_node, V2DF_type_node, NULL_TREE);
16957 tree void_ftype_pint_int
16958 = build_function_type_list (void_type_node,
16959 pint_type_node, integer_type_node, NULL_TREE);
16960 tree void_ftype_v16qi_v16qi_pchar
16961 = build_function_type_list (void_type_node,
16962 V16QI_type_node, V16QI_type_node,
16963 pchar_type_node, NULL_TREE);
16964 tree v2df_ftype_pcdouble
16965 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16966 tree v2df_ftype_v2df_v2df
16967 = build_function_type_list (V2DF_type_node,
16968 V2DF_type_node, V2DF_type_node, NULL_TREE);
16969 tree v16qi_ftype_v16qi_v16qi
16970 = build_function_type_list (V16QI_type_node,
16971 V16QI_type_node, V16QI_type_node, NULL_TREE);
16972 tree v8hi_ftype_v8hi_v8hi
16973 = build_function_type_list (V8HI_type_node,
16974 V8HI_type_node, V8HI_type_node, NULL_TREE);
16975 tree v4si_ftype_v4si_v4si
16976 = build_function_type_list (V4SI_type_node,
16977 V4SI_type_node, V4SI_type_node, NULL_TREE);
16978 tree v2di_ftype_v2di_v2di
16979 = build_function_type_list (V2DI_type_node,
16980 V2DI_type_node, V2DI_type_node, NULL_TREE);
16981 tree v2di_ftype_v2df_v2df
16982 = build_function_type_list (V2DI_type_node,
16983 V2DF_type_node, V2DF_type_node, NULL_TREE);
16984 tree v2df_ftype_v2df
16985 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16986 tree v2di_ftype_v2di_int
16987 = build_function_type_list (V2DI_type_node,
16988 V2DI_type_node, integer_type_node, NULL_TREE);
16989 tree v2di_ftype_v2di_v2di_int
16990 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16991 V2DI_type_node, integer_type_node, NULL_TREE);
16992 tree v4si_ftype_v4si_int
16993 = build_function_type_list (V4SI_type_node,
16994 V4SI_type_node, integer_type_node, NULL_TREE);
16995 tree v8hi_ftype_v8hi_int
16996 = build_function_type_list (V8HI_type_node,
16997 V8HI_type_node, integer_type_node, NULL_TREE);
16998 tree v8hi_ftype_v8hi_v2di
16999 = build_function_type_list (V8HI_type_node,
17000 V8HI_type_node, V2DI_type_node, NULL_TREE);
17001 tree v4si_ftype_v4si_v2di
17002 = build_function_type_list (V4SI_type_node,
17003 V4SI_type_node, V2DI_type_node, NULL_TREE);
17004 tree v4si_ftype_v8hi_v8hi
17005 = build_function_type_list (V4SI_type_node,
17006 V8HI_type_node, V8HI_type_node, NULL_TREE);
17007 tree di_ftype_v8qi_v8qi
17008 = build_function_type_list (long_long_unsigned_type_node,
17009 V8QI_type_node, V8QI_type_node, NULL_TREE);
17010 tree di_ftype_v2si_v2si
17011 = build_function_type_list (long_long_unsigned_type_node,
17012 V2SI_type_node, V2SI_type_node, NULL_TREE);
17013 tree v2di_ftype_v16qi_v16qi
17014 = build_function_type_list (V2DI_type_node,
17015 V16QI_type_node, V16QI_type_node, NULL_TREE);
17016 tree v2di_ftype_v4si_v4si
17017 = build_function_type_list (V2DI_type_node,
17018 V4SI_type_node, V4SI_type_node, NULL_TREE);
17019 tree int_ftype_v16qi
17020 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17021 tree v16qi_ftype_pcchar
17022 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17023 tree void_ftype_pchar_v16qi
17024 = build_function_type_list (void_type_node,
17025 pchar_type_node, V16QI_type_node, NULL_TREE);
17027 tree v2di_ftype_v2di_unsigned_unsigned
17028 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17029 unsigned_type_node, unsigned_type_node,
17030 NULL_TREE);
17031 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17032 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17033 unsigned_type_node, unsigned_type_node,
17034 NULL_TREE);
17035 tree v2di_ftype_v2di_v16qi
17036 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17037 NULL_TREE);
17039 tree float80_type;
17040 tree float128_type;
17041 tree ftype;
17043 /* The __float80 type. */
17044 if (TYPE_MODE (long_double_type_node) == XFmode)
17045 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17046 "__float80");
17047 else
17049 /* The __float80 type. */
17050 float80_type = make_node (REAL_TYPE);
17051 TYPE_PRECISION (float80_type) = 80;
17052 layout_type (float80_type);
17053 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17056 if (TARGET_64BIT)
17058 float128_type = make_node (REAL_TYPE);
17059 TYPE_PRECISION (float128_type) = 128;
17060 layout_type (float128_type);
17061 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17064 /* Add all builtins that are more or less simple operations on two
17065 operands. */
17066 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17068 /* Use one of the operands; the target can have a different mode for
17069 mask-generating compares. */
17070 enum machine_mode mode;
17071 tree type;
17073 if (d->name == 0)
17074 continue;
17075 mode = insn_data[d->icode].operand[1].mode;
17077 switch (mode)
17079 case V16QImode:
17080 type = v16qi_ftype_v16qi_v16qi;
17081 break;
17082 case V8HImode:
17083 type = v8hi_ftype_v8hi_v8hi;
17084 break;
17085 case V4SImode:
17086 type = v4si_ftype_v4si_v4si;
17087 break;
17088 case V2DImode:
17089 type = v2di_ftype_v2di_v2di;
17090 break;
17091 case V2DFmode:
17092 type = v2df_ftype_v2df_v2df;
17093 break;
17094 case V4SFmode:
17095 type = v4sf_ftype_v4sf_v4sf;
17096 break;
17097 case V8QImode:
17098 type = v8qi_ftype_v8qi_v8qi;
17099 break;
17100 case V4HImode:
17101 type = v4hi_ftype_v4hi_v4hi;
17102 break;
17103 case V2SImode:
17104 type = v2si_ftype_v2si_v2si;
17105 break;
17106 case DImode:
17107 type = di_ftype_di_di;
17108 break;
17110 default:
17111 gcc_unreachable ();
17114 /* Override for comparisons. */
17115 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17116 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17117 type = v4si_ftype_v4sf_v4sf;
17119 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17120 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17121 type = v2di_ftype_v2df_v2df;
17123 def_builtin (d->mask, d->name, type, d->code);
17126 /* Add all builtins that are more or less simple operations on 1 operand. */
17127 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17129 enum machine_mode mode;
17130 tree type;
17132 if (d->name == 0)
17133 continue;
17134 mode = insn_data[d->icode].operand[1].mode;
17136 switch (mode)
17138 case V16QImode:
17139 type = v16qi_ftype_v16qi;
17140 break;
17141 case V8HImode:
17142 type = v8hi_ftype_v8hi;
17143 break;
17144 case V4SImode:
17145 type = v4si_ftype_v4si;
17146 break;
17147 case V2DFmode:
17148 type = v2df_ftype_v2df;
17149 break;
17150 case V4SFmode:
17151 type = v4sf_ftype_v4sf;
17152 break;
17153 case V8QImode:
17154 type = v8qi_ftype_v8qi;
17155 break;
17156 case V4HImode:
17157 type = v4hi_ftype_v4hi;
17158 break;
17159 case V2SImode:
17160 type = v2si_ftype_v2si;
17161 break;
17163 default:
17164 abort ();
17167 def_builtin (d->mask, d->name, type, d->code);
17170 /* Add the remaining MMX insns with somewhat more complicated types. */
17171 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17172 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17173 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17174 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17176 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17177 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17178 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17180 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17181 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17183 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17184 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17186 /* comi/ucomi insns. */
17187 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17188 if (d->mask == MASK_SSE2)
17189 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17190 else
17191 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17193 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17194 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17195 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17197 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17198 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17199 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17200 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17201 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17202 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17203 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17204 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17205 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17206 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17207 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17209 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17211 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17212 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17214 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17215 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17216 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17217 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17219 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17220 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17221 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17222 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17224 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17226 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17228 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17229 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17230 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17231 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17232 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17233 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17235 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17237 /* Original 3DNow! */
17238 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17239 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17240 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17241 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17242 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17243 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17244 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17245 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17246 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17247 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17248 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17249 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17250 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17251 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17252 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17253 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17254 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17255 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17256 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17257 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17259 /* 3DNow! extension as used in the Athlon CPU. */
17260 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17261 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17262 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17263 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17264 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17265 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17267 /* SSE2 */
17268 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17270 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17271 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17273 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17274 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17276 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17277 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17278 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17279 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17280 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17282 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17283 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17284 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17285 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17287 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17288 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17290 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17292 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17293 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17295 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17296 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17297 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17298 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17299 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17301 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17303 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17304 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17305 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17306 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17308 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17309 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17310 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17312 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17313 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17314 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17315 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17317 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17318 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17319 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17321 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17322 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17324 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17325 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17327 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17328 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17329 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17331 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17332 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17333 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17335 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17336 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17338 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17339 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17340 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17341 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17343 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17344 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17345 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17346 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17348 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17349 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17351 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17353 /* Prescott New Instructions. */
17354 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17355 void_ftype_pcvoid_unsigned_unsigned,
17356 IX86_BUILTIN_MONITOR);
17357 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17358 void_ftype_unsigned_unsigned,
17359 IX86_BUILTIN_MWAIT);
17360 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17361 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17363 /* SSSE3. */
17364 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17365 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17366 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17367 IX86_BUILTIN_PALIGNR);
17369 /* AMDFAM10 SSE4A New built-ins */
17370 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17371 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17372 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17373 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17374 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17375 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17376 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17377 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17378 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17379 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17380 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17381 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17383 /* Access to the vec_init patterns. */
17384 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17385 integer_type_node, NULL_TREE);
17386 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17387 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17389 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17390 short_integer_type_node,
17391 short_integer_type_node,
17392 short_integer_type_node, NULL_TREE);
17393 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17394 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17396 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17397 char_type_node, char_type_node,
17398 char_type_node, char_type_node,
17399 char_type_node, char_type_node,
17400 char_type_node, NULL_TREE);
17401 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17402 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17404 /* Access to the vec_extract patterns. */
17405 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17406 integer_type_node, NULL_TREE);
17407 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17408 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17410 ftype = build_function_type_list (long_long_integer_type_node,
17411 V2DI_type_node, integer_type_node,
17412 NULL_TREE);
17413 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17414 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17416 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17417 integer_type_node, NULL_TREE);
17418 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17419 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17421 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17422 integer_type_node, NULL_TREE);
17423 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17424 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17426 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17427 integer_type_node, NULL_TREE);
17428 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17429 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17431 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17432 integer_type_node, NULL_TREE);
17433 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17434 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17436 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17437 integer_type_node, NULL_TREE);
17438 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17439 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17441 /* Access to the vec_set patterns. */
17442 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17443 intHI_type_node,
17444 integer_type_node, NULL_TREE);
17445 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17446 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17448 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17449 intHI_type_node,
17450 integer_type_node, NULL_TREE);
17451 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17452 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17455 /* Errors in the source file can cause expand_expr to return const0_rtx
17456 where we expect a vector. To avoid crashing, use one of the vector
17457 clear instructions. */
17458 static rtx
17459 safe_vector_operand (rtx x, enum machine_mode mode)
17461 if (x == const0_rtx)
17462 x = CONST0_RTX (mode);
17463 return x;
17466 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17468 static rtx
17469 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17471 rtx pat, xops[3];
17472 tree arg0 = CALL_EXPR_ARG (exp, 0);
17473 tree arg1 = CALL_EXPR_ARG (exp, 1);
17474 rtx op0 = expand_normal (arg0);
17475 rtx op1 = expand_normal (arg1);
17476 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17477 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17478 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17480 if (VECTOR_MODE_P (mode0))
17481 op0 = safe_vector_operand (op0, mode0);
17482 if (VECTOR_MODE_P (mode1))
17483 op1 = safe_vector_operand (op1, mode1);
17485 if (optimize || !target
17486 || GET_MODE (target) != tmode
17487 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17488 target = gen_reg_rtx (tmode);
17490 if (GET_MODE (op1) == SImode && mode1 == TImode)
17492 rtx x = gen_reg_rtx (V4SImode);
17493 emit_insn (gen_sse2_loadd (x, op1));
17494 op1 = gen_lowpart (TImode, x);
17497 /* The insn must want input operands in the same modes as the
17498 result. */
17499 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17500 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17502 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17503 op0 = copy_to_mode_reg (mode0, op0);
17504 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17505 op1 = copy_to_mode_reg (mode1, op1);
17507 /* ??? Using ix86_fixup_binary_operands is problematic when
17508 we've got mismatched modes. Fake it. */
17510 xops[0] = target;
17511 xops[1] = op0;
17512 xops[2] = op1;
17514 if (tmode == mode0 && tmode == mode1)
17516 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17517 op0 = xops[1];
17518 op1 = xops[2];
17520 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17522 op0 = force_reg (mode0, op0);
17523 op1 = force_reg (mode1, op1);
17524 target = gen_reg_rtx (tmode);
17527 pat = GEN_FCN (icode) (target, op0, op1);
17528 if (! pat)
17529 return 0;
17530 emit_insn (pat);
17531 return target;
17534 /* Subroutine of ix86_expand_builtin to take care of stores. */
17536 static rtx
17537 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17539 rtx pat;
17540 tree arg0 = CALL_EXPR_ARG (exp, 0);
17541 tree arg1 = CALL_EXPR_ARG (exp, 1);
17542 rtx op0 = expand_normal (arg0);
17543 rtx op1 = expand_normal (arg1);
17544 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17545 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17547 if (VECTOR_MODE_P (mode1))
17548 op1 = safe_vector_operand (op1, mode1);
17550 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17551 op1 = copy_to_mode_reg (mode1, op1);
17553 pat = GEN_FCN (icode) (op0, op1);
17554 if (pat)
17555 emit_insn (pat);
17556 return 0;
17559 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17561 static rtx
17562 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17563 rtx target, int do_load)
17565 rtx pat;
17566 tree arg0 = CALL_EXPR_ARG (exp, 0);
17567 rtx op0 = expand_normal (arg0);
17568 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17569 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17571 if (optimize || !target
17572 || GET_MODE (target) != tmode
17573 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17574 target = gen_reg_rtx (tmode);
17575 if (do_load)
17576 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17577 else
17579 if (VECTOR_MODE_P (mode0))
17580 op0 = safe_vector_operand (op0, mode0);
17582 if ((optimize && !register_operand (op0, mode0))
17583 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17584 op0 = copy_to_mode_reg (mode0, op0);
17587 pat = GEN_FCN (icode) (target, op0);
17588 if (! pat)
17589 return 0;
17590 emit_insn (pat);
17591 return target;
17594 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17595 sqrtss, rsqrtss, rcpss. */
17597 static rtx
17598 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17600 rtx pat;
17601 tree arg0 = CALL_EXPR_ARG (exp, 0);
17602 rtx op1, op0 = expand_normal (arg0);
17603 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17604 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17606 if (optimize || !target
17607 || GET_MODE (target) != tmode
17608 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17609 target = gen_reg_rtx (tmode);
17611 if (VECTOR_MODE_P (mode0))
17612 op0 = safe_vector_operand (op0, mode0);
17614 if ((optimize && !register_operand (op0, mode0))
17615 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17616 op0 = copy_to_mode_reg (mode0, op0);
17618 op1 = op0;
17619 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17620 op1 = copy_to_mode_reg (mode0, op1);
17622 pat = GEN_FCN (icode) (target, op0, op1);
17623 if (! pat)
17624 return 0;
17625 emit_insn (pat);
17626 return target;
17629 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17631 static rtx
17632 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17633 rtx target)
17635 rtx pat;
17636 tree arg0 = CALL_EXPR_ARG (exp, 0);
17637 tree arg1 = CALL_EXPR_ARG (exp, 1);
17638 rtx op0 = expand_normal (arg0);
17639 rtx op1 = expand_normal (arg1);
17640 rtx op2;
17641 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17642 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17643 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17644 enum rtx_code comparison = d->comparison;
17646 if (VECTOR_MODE_P (mode0))
17647 op0 = safe_vector_operand (op0, mode0);
17648 if (VECTOR_MODE_P (mode1))
17649 op1 = safe_vector_operand (op1, mode1);
17651 /* Swap operands if we have a comparison that isn't available in
17652 hardware. */
17653 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17655 rtx tmp = gen_reg_rtx (mode1);
17656 emit_move_insn (tmp, op1);
17657 op1 = op0;
17658 op0 = tmp;
17661 if (optimize || !target
17662 || GET_MODE (target) != tmode
17663 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17664 target = gen_reg_rtx (tmode);
17666 if ((optimize && !register_operand (op0, mode0))
17667 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17668 op0 = copy_to_mode_reg (mode0, op0);
17669 if ((optimize && !register_operand (op1, mode1))
17670 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17671 op1 = copy_to_mode_reg (mode1, op1);
17673 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17674 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17675 if (! pat)
17676 return 0;
17677 emit_insn (pat);
17678 return target;
17681 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17683 static rtx
17684 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17685 rtx target)
17687 rtx pat;
17688 tree arg0 = CALL_EXPR_ARG (exp, 0);
17689 tree arg1 = CALL_EXPR_ARG (exp, 1);
17690 rtx op0 = expand_normal (arg0);
17691 rtx op1 = expand_normal (arg1);
17692 rtx op2;
17693 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17694 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17695 enum rtx_code comparison = d->comparison;
17697 if (VECTOR_MODE_P (mode0))
17698 op0 = safe_vector_operand (op0, mode0);
17699 if (VECTOR_MODE_P (mode1))
17700 op1 = safe_vector_operand (op1, mode1);
17702 /* Swap operands if we have a comparison that isn't available in
17703 hardware. */
17704 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17706 rtx tmp = op1;
17707 op1 = op0;
17708 op0 = tmp;
17711 target = gen_reg_rtx (SImode);
17712 emit_move_insn (target, const0_rtx);
17713 target = gen_rtx_SUBREG (QImode, target, 0);
17715 if ((optimize && !register_operand (op0, mode0))
17716 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17717 op0 = copy_to_mode_reg (mode0, op0);
17718 if ((optimize && !register_operand (op1, mode1))
17719 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17720 op1 = copy_to_mode_reg (mode1, op1);
17722 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17723 pat = GEN_FCN (d->icode) (op0, op1);
17724 if (! pat)
17725 return 0;
17726 emit_insn (pat);
17727 emit_insn (gen_rtx_SET (VOIDmode,
17728 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17729 gen_rtx_fmt_ee (comparison, QImode,
17730 SET_DEST (pat),
17731 const0_rtx)));
17733 return SUBREG_REG (target);
17736 /* Return the integer constant in ARG. Constrain it to be in the range
17737 of the subparts of VEC_TYPE; issue an error if not. */
17739 static int
17740 get_element_number (tree vec_type, tree arg)
17742 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17744 if (!host_integerp (arg, 1)
17745 || (elt = tree_low_cst (arg, 1), elt > max))
17747 error ("selector must be an integer constant in the range 0..%wi", max);
17748 return 0;
17751 return elt;
17754 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17755 ix86_expand_vector_init. We DO have language-level syntax for this, in
17756 the form of (type){ init-list }. Except that since we can't place emms
17757 instructions from inside the compiler, we can't allow the use of MMX
17758 registers unless the user explicitly asks for it. So we do *not* define
17759 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17760 we have builtins invoked by mmintrin.h that gives us license to emit
17761 these sorts of instructions. */
17763 static rtx
17764 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17766 enum machine_mode tmode = TYPE_MODE (type);
17767 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17768 int i, n_elt = GET_MODE_NUNITS (tmode);
17769 rtvec v = rtvec_alloc (n_elt);
17771 gcc_assert (VECTOR_MODE_P (tmode));
17772 gcc_assert (call_expr_nargs (exp) == n_elt);
17774 for (i = 0; i < n_elt; ++i)
17776 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17777 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17780 if (!target || !register_operand (target, tmode))
17781 target = gen_reg_rtx (tmode);
17783 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17784 return target;
17787 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17788 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17789 had a language-level syntax for referencing vector elements. */
17791 static rtx
17792 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17794 enum machine_mode tmode, mode0;
17795 tree arg0, arg1;
17796 int elt;
17797 rtx op0;
17799 arg0 = CALL_EXPR_ARG (exp, 0);
17800 arg1 = CALL_EXPR_ARG (exp, 1);
17802 op0 = expand_normal (arg0);
17803 elt = get_element_number (TREE_TYPE (arg0), arg1);
17805 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17806 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17807 gcc_assert (VECTOR_MODE_P (mode0));
17809 op0 = force_reg (mode0, op0);
17811 if (optimize || !target || !register_operand (target, tmode))
17812 target = gen_reg_rtx (tmode);
17814 ix86_expand_vector_extract (true, target, op0, elt);
17816 return target;
17819 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17820 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17821 a language-level syntax for referencing vector elements. */
17823 static rtx
17824 ix86_expand_vec_set_builtin (tree exp)
17826 enum machine_mode tmode, mode1;
17827 tree arg0, arg1, arg2;
17828 int elt;
17829 rtx op0, op1;
17831 arg0 = CALL_EXPR_ARG (exp, 0);
17832 arg1 = CALL_EXPR_ARG (exp, 1);
17833 arg2 = CALL_EXPR_ARG (exp, 2);
17835 tmode = TYPE_MODE (TREE_TYPE (arg0));
17836 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17837 gcc_assert (VECTOR_MODE_P (tmode));
17839 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17840 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17841 elt = get_element_number (TREE_TYPE (arg0), arg2);
17843 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17844 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17846 op0 = force_reg (tmode, op0);
17847 op1 = force_reg (mode1, op1);
17849 ix86_expand_vector_set (true, op0, op1, elt);
17851 return op0;
17854 /* Expand an expression EXP that calls a built-in function,
17855 with result going to TARGET if that's convenient
17856 (and in mode MODE if that's convenient).
17857 SUBTARGET may be used as the target for computing one of EXP's operands.
17858 IGNORE is nonzero if the value is to be ignored. */
17860 static rtx
17861 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17862 enum machine_mode mode ATTRIBUTE_UNUSED,
17863 int ignore ATTRIBUTE_UNUSED)
17865 const struct builtin_description *d;
17866 size_t i;
17867 enum insn_code icode;
17868 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17869 tree arg0, arg1, arg2, arg3;
17870 rtx op0, op1, op2, op3, pat;
17871 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17872 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17874 switch (fcode)
17876 case IX86_BUILTIN_EMMS:
17877 emit_insn (gen_mmx_emms ());
17878 return 0;
17880 case IX86_BUILTIN_SFENCE:
17881 emit_insn (gen_sse_sfence ());
17882 return 0;
17884 case IX86_BUILTIN_MASKMOVQ:
17885 case IX86_BUILTIN_MASKMOVDQU:
17886 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17887 ? CODE_FOR_mmx_maskmovq
17888 : CODE_FOR_sse2_maskmovdqu);
17889 /* Note the arg order is different from the operand order. */
17890 arg1 = CALL_EXPR_ARG (exp, 0);
17891 arg2 = CALL_EXPR_ARG (exp, 1);
17892 arg0 = CALL_EXPR_ARG (exp, 2);
17893 op0 = expand_normal (arg0);
17894 op1 = expand_normal (arg1);
17895 op2 = expand_normal (arg2);
17896 mode0 = insn_data[icode].operand[0].mode;
17897 mode1 = insn_data[icode].operand[1].mode;
17898 mode2 = insn_data[icode].operand[2].mode;
17900 op0 = force_reg (Pmode, op0);
17901 op0 = gen_rtx_MEM (mode1, op0);
17903 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17904 op0 = copy_to_mode_reg (mode0, op0);
17905 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17906 op1 = copy_to_mode_reg (mode1, op1);
17907 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17908 op2 = copy_to_mode_reg (mode2, op2);
17909 pat = GEN_FCN (icode) (op0, op1, op2);
17910 if (! pat)
17911 return 0;
17912 emit_insn (pat);
17913 return 0;
17915 case IX86_BUILTIN_SQRTSS:
17916 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17917 case IX86_BUILTIN_RSQRTSS:
17918 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17919 case IX86_BUILTIN_RCPSS:
17920 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17922 case IX86_BUILTIN_LOADUPS:
17923 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17925 case IX86_BUILTIN_STOREUPS:
17926 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17928 case IX86_BUILTIN_LOADHPS:
17929 case IX86_BUILTIN_LOADLPS:
17930 case IX86_BUILTIN_LOADHPD:
17931 case IX86_BUILTIN_LOADLPD:
17932 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17933 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17934 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17935 : CODE_FOR_sse2_loadlpd);
17936 arg0 = CALL_EXPR_ARG (exp, 0);
17937 arg1 = CALL_EXPR_ARG (exp, 1);
17938 op0 = expand_normal (arg0);
17939 op1 = expand_normal (arg1);
17940 tmode = insn_data[icode].operand[0].mode;
17941 mode0 = insn_data[icode].operand[1].mode;
17942 mode1 = insn_data[icode].operand[2].mode;
17944 op0 = force_reg (mode0, op0);
17945 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17946 if (optimize || target == 0
17947 || GET_MODE (target) != tmode
17948 || !register_operand (target, tmode))
17949 target = gen_reg_rtx (tmode);
17950 pat = GEN_FCN (icode) (target, op0, op1);
17951 if (! pat)
17952 return 0;
17953 emit_insn (pat);
17954 return target;
17956 case IX86_BUILTIN_STOREHPS:
17957 case IX86_BUILTIN_STORELPS:
17958 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17959 : CODE_FOR_sse_storelps);
17960 arg0 = CALL_EXPR_ARG (exp, 0);
17961 arg1 = CALL_EXPR_ARG (exp, 1);
17962 op0 = expand_normal (arg0);
17963 op1 = expand_normal (arg1);
17964 mode0 = insn_data[icode].operand[0].mode;
17965 mode1 = insn_data[icode].operand[1].mode;
17967 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17968 op1 = force_reg (mode1, op1);
17970 pat = GEN_FCN (icode) (op0, op1);
17971 if (! pat)
17972 return 0;
17973 emit_insn (pat);
17974 return const0_rtx;
17976 case IX86_BUILTIN_MOVNTPS:
17977 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17978 case IX86_BUILTIN_MOVNTQ:
17979 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17981 case IX86_BUILTIN_LDMXCSR:
17982 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17983 target = assign_386_stack_local (SImode, SLOT_TEMP);
17984 emit_move_insn (target, op0);
17985 emit_insn (gen_sse_ldmxcsr (target));
17986 return 0;
17988 case IX86_BUILTIN_STMXCSR:
17989 target = assign_386_stack_local (SImode, SLOT_TEMP);
17990 emit_insn (gen_sse_stmxcsr (target));
17991 return copy_to_mode_reg (SImode, target);
17993 case IX86_BUILTIN_SHUFPS:
17994 case IX86_BUILTIN_SHUFPD:
17995 icode = (fcode == IX86_BUILTIN_SHUFPS
17996 ? CODE_FOR_sse_shufps
17997 : CODE_FOR_sse2_shufpd);
17998 arg0 = CALL_EXPR_ARG (exp, 0);
17999 arg1 = CALL_EXPR_ARG (exp, 1);
18000 arg2 = CALL_EXPR_ARG (exp, 2);
18001 op0 = expand_normal (arg0);
18002 op1 = expand_normal (arg1);
18003 op2 = expand_normal (arg2);
18004 tmode = insn_data[icode].operand[0].mode;
18005 mode0 = insn_data[icode].operand[1].mode;
18006 mode1 = insn_data[icode].operand[2].mode;
18007 mode2 = insn_data[icode].operand[3].mode;
18009 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18010 op0 = copy_to_mode_reg (mode0, op0);
18011 if ((optimize && !register_operand (op1, mode1))
18012 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18013 op1 = copy_to_mode_reg (mode1, op1);
18014 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18016 /* @@@ better error message */
18017 error ("mask must be an immediate");
18018 return gen_reg_rtx (tmode);
18020 if (optimize || target == 0
18021 || GET_MODE (target) != tmode
18022 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18023 target = gen_reg_rtx (tmode);
18024 pat = GEN_FCN (icode) (target, op0, op1, op2);
18025 if (! pat)
18026 return 0;
18027 emit_insn (pat);
18028 return target;
18030 case IX86_BUILTIN_PSHUFW:
18031 case IX86_BUILTIN_PSHUFD:
18032 case IX86_BUILTIN_PSHUFHW:
18033 case IX86_BUILTIN_PSHUFLW:
18034 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18035 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18036 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18037 : CODE_FOR_mmx_pshufw);
18038 arg0 = CALL_EXPR_ARG (exp, 0);
18039 arg1 = CALL_EXPR_ARG (exp, 1);
18040 op0 = expand_normal (arg0);
18041 op1 = expand_normal (arg1);
18042 tmode = insn_data[icode].operand[0].mode;
18043 mode1 = insn_data[icode].operand[1].mode;
18044 mode2 = insn_data[icode].operand[2].mode;
18046 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18047 op0 = copy_to_mode_reg (mode1, op0);
18048 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18050 /* @@@ better error message */
18051 error ("mask must be an immediate");
18052 return const0_rtx;
18054 if (target == 0
18055 || GET_MODE (target) != tmode
18056 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18057 target = gen_reg_rtx (tmode);
18058 pat = GEN_FCN (icode) (target, op0, op1);
18059 if (! pat)
18060 return 0;
18061 emit_insn (pat);
18062 return target;
18064 case IX86_BUILTIN_PSLLDQI128:
18065 case IX86_BUILTIN_PSRLDQI128:
18066 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18067 : CODE_FOR_sse2_lshrti3);
18068 arg0 = CALL_EXPR_ARG (exp, 0);
18069 arg1 = CALL_EXPR_ARG (exp, 1);
18070 op0 = expand_normal (arg0);
18071 op1 = expand_normal (arg1);
18072 tmode = insn_data[icode].operand[0].mode;
18073 mode1 = insn_data[icode].operand[1].mode;
18074 mode2 = insn_data[icode].operand[2].mode;
18076 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18078 op0 = copy_to_reg (op0);
18079 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18081 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18083 error ("shift must be an immediate");
18084 return const0_rtx;
18086 target = gen_reg_rtx (V2DImode);
18087 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
18088 if (! pat)
18089 return 0;
18090 emit_insn (pat);
18091 return target;
18093 case IX86_BUILTIN_FEMMS:
18094 emit_insn (gen_mmx_femms ());
18095 return NULL_RTX;
18097 case IX86_BUILTIN_PAVGUSB:
18098 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18100 case IX86_BUILTIN_PF2ID:
18101 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18103 case IX86_BUILTIN_PFACC:
18104 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18106 case IX86_BUILTIN_PFADD:
18107 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18109 case IX86_BUILTIN_PFCMPEQ:
18110 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18112 case IX86_BUILTIN_PFCMPGE:
18113 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18115 case IX86_BUILTIN_PFCMPGT:
18116 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18118 case IX86_BUILTIN_PFMAX:
18119 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18121 case IX86_BUILTIN_PFMIN:
18122 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18124 case IX86_BUILTIN_PFMUL:
18125 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18127 case IX86_BUILTIN_PFRCP:
18128 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18130 case IX86_BUILTIN_PFRCPIT1:
18131 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18133 case IX86_BUILTIN_PFRCPIT2:
18134 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18136 case IX86_BUILTIN_PFRSQIT1:
18137 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18139 case IX86_BUILTIN_PFRSQRT:
18140 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18142 case IX86_BUILTIN_PFSUB:
18143 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18145 case IX86_BUILTIN_PFSUBR:
18146 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18148 case IX86_BUILTIN_PI2FD:
18149 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18151 case IX86_BUILTIN_PMULHRW:
18152 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18154 case IX86_BUILTIN_PF2IW:
18155 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18157 case IX86_BUILTIN_PFNACC:
18158 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18160 case IX86_BUILTIN_PFPNACC:
18161 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18163 case IX86_BUILTIN_PI2FW:
18164 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18166 case IX86_BUILTIN_PSWAPDSI:
18167 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18169 case IX86_BUILTIN_PSWAPDSF:
18170 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18172 case IX86_BUILTIN_SQRTSD:
18173 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18174 case IX86_BUILTIN_LOADUPD:
18175 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18176 case IX86_BUILTIN_STOREUPD:
18177 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18179 case IX86_BUILTIN_MFENCE:
18180 emit_insn (gen_sse2_mfence ());
18181 return 0;
18182 case IX86_BUILTIN_LFENCE:
18183 emit_insn (gen_sse2_lfence ());
18184 return 0;
18186 case IX86_BUILTIN_CLFLUSH:
18187 arg0 = CALL_EXPR_ARG (exp, 0);
18188 op0 = expand_normal (arg0);
18189 icode = CODE_FOR_sse2_clflush;
18190 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18191 op0 = copy_to_mode_reg (Pmode, op0);
18193 emit_insn (gen_sse2_clflush (op0));
18194 return 0;
18196 case IX86_BUILTIN_MOVNTPD:
18197 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18198 case IX86_BUILTIN_MOVNTDQ:
18199 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18200 case IX86_BUILTIN_MOVNTI:
18201 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18203 case IX86_BUILTIN_LOADDQU:
18204 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18205 case IX86_BUILTIN_STOREDQU:
18206 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18208 case IX86_BUILTIN_MONITOR:
18209 arg0 = CALL_EXPR_ARG (exp, 0);
18210 arg1 = CALL_EXPR_ARG (exp, 1);
18211 arg2 = CALL_EXPR_ARG (exp, 2);
18212 op0 = expand_normal (arg0);
18213 op1 = expand_normal (arg1);
18214 op2 = expand_normal (arg2);
18215 if (!REG_P (op0))
18216 op0 = copy_to_mode_reg (Pmode, op0);
18217 if (!REG_P (op1))
18218 op1 = copy_to_mode_reg (SImode, op1);
18219 if (!REG_P (op2))
18220 op2 = copy_to_mode_reg (SImode, op2);
18221 if (!TARGET_64BIT)
18222 emit_insn (gen_sse3_monitor (op0, op1, op2));
18223 else
18224 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18225 return 0;
18227 case IX86_BUILTIN_MWAIT:
18228 arg0 = CALL_EXPR_ARG (exp, 0);
18229 arg1 = CALL_EXPR_ARG (exp, 1);
18230 op0 = expand_normal (arg0);
18231 op1 = expand_normal (arg1);
18232 if (!REG_P (op0))
18233 op0 = copy_to_mode_reg (SImode, op0);
18234 if (!REG_P (op1))
18235 op1 = copy_to_mode_reg (SImode, op1);
18236 emit_insn (gen_sse3_mwait (op0, op1));
18237 return 0;
18239 case IX86_BUILTIN_LDDQU:
18240 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18241 target, 1);
18243 case IX86_BUILTIN_PALIGNR:
18244 case IX86_BUILTIN_PALIGNR128:
18245 if (fcode == IX86_BUILTIN_PALIGNR)
18247 icode = CODE_FOR_ssse3_palignrdi;
18248 mode = DImode;
18250 else
18252 icode = CODE_FOR_ssse3_palignrti;
18253 mode = V2DImode;
18255 arg0 = CALL_EXPR_ARG (exp, 0);
18256 arg1 = CALL_EXPR_ARG (exp, 1);
18257 arg2 = CALL_EXPR_ARG (exp, 2);
18258 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18259 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18260 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18261 tmode = insn_data[icode].operand[0].mode;
18262 mode1 = insn_data[icode].operand[1].mode;
18263 mode2 = insn_data[icode].operand[2].mode;
18264 mode3 = insn_data[icode].operand[3].mode;
18266 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18268 op0 = copy_to_reg (op0);
18269 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18271 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18273 op1 = copy_to_reg (op1);
18274 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18276 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18278 error ("shift must be an immediate");
18279 return const0_rtx;
18281 target = gen_reg_rtx (mode);
18282 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18283 op0, op1, op2);
18284 if (! pat)
18285 return 0;
18286 emit_insn (pat);
18287 return target;
18289 case IX86_BUILTIN_MOVNTSD:
18290 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18292 case IX86_BUILTIN_MOVNTSS:
18293 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18295 case IX86_BUILTIN_INSERTQ:
18296 case IX86_BUILTIN_EXTRQ:
18297 icode = (fcode == IX86_BUILTIN_EXTRQ
18298 ? CODE_FOR_sse4a_extrq
18299 : CODE_FOR_sse4a_insertq);
18300 arg0 = CALL_EXPR_ARG (exp, 0);
18301 arg1 = CALL_EXPR_ARG (exp, 1);
18302 op0 = expand_normal (arg0);
18303 op1 = expand_normal (arg1);
18304 tmode = insn_data[icode].operand[0].mode;
18305 mode1 = insn_data[icode].operand[1].mode;
18306 mode2 = insn_data[icode].operand[2].mode;
18307 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18308 op0 = copy_to_mode_reg (mode1, op0);
18309 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18310 op1 = copy_to_mode_reg (mode2, op1);
18311 if (optimize || target == 0
18312 || GET_MODE (target) != tmode
18313 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18314 target = gen_reg_rtx (tmode);
18315 pat = GEN_FCN (icode) (target, op0, op1);
18316 if (! pat)
18317 return NULL_RTX;
18318 emit_insn (pat);
18319 return target;
18321 case IX86_BUILTIN_EXTRQI:
18322 icode = CODE_FOR_sse4a_extrqi;
18323 arg0 = CALL_EXPR_ARG (exp, 0);
18324 arg1 = CALL_EXPR_ARG (exp, 1);
18325 arg2 = CALL_EXPR_ARG (exp, 2);
18326 op0 = expand_normal (arg0);
18327 op1 = expand_normal (arg1);
18328 op2 = expand_normal (arg2);
18329 tmode = insn_data[icode].operand[0].mode;
18330 mode1 = insn_data[icode].operand[1].mode;
18331 mode2 = insn_data[icode].operand[2].mode;
18332 mode3 = insn_data[icode].operand[3].mode;
18333 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18334 op0 = copy_to_mode_reg (mode1, op0);
18335 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18337 error ("index mask must be an immediate");
18338 return gen_reg_rtx (tmode);
18340 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18342 error ("length mask must be an immediate");
18343 return gen_reg_rtx (tmode);
18345 if (optimize || target == 0
18346 || GET_MODE (target) != tmode
18347 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18348 target = gen_reg_rtx (tmode);
18349 pat = GEN_FCN (icode) (target, op0, op1, op2);
18350 if (! pat)
18351 return NULL_RTX;
18352 emit_insn (pat);
18353 return target;
18355 case IX86_BUILTIN_INSERTQI:
18356 icode = CODE_FOR_sse4a_insertqi;
18357 arg0 = CALL_EXPR_ARG (exp, 0);
18358 arg1 = CALL_EXPR_ARG (exp, 1);
18359 arg2 = CALL_EXPR_ARG (exp, 2);
18360 arg3 = CALL_EXPR_ARG (exp, 3);
18361 op0 = expand_normal (arg0);
18362 op1 = expand_normal (arg1);
18363 op2 = expand_normal (arg2);
18364 op3 = expand_normal (arg3);
18365 tmode = insn_data[icode].operand[0].mode;
18366 mode1 = insn_data[icode].operand[1].mode;
18367 mode2 = insn_data[icode].operand[2].mode;
18368 mode3 = insn_data[icode].operand[3].mode;
18369 mode4 = insn_data[icode].operand[4].mode;
18371 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18372 op0 = copy_to_mode_reg (mode1, op0);
18374 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18375 op1 = copy_to_mode_reg (mode2, op1);
18377 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18379 error ("index mask must be an immediate");
18380 return gen_reg_rtx (tmode);
18382 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18384 error ("length mask must be an immediate");
18385 return gen_reg_rtx (tmode);
18387 if (optimize || target == 0
18388 || GET_MODE (target) != tmode
18389 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18390 target = gen_reg_rtx (tmode);
18391 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18392 if (! pat)
18393 return NULL_RTX;
18394 emit_insn (pat);
18395 return target;
18397 case IX86_BUILTIN_VEC_INIT_V2SI:
18398 case IX86_BUILTIN_VEC_INIT_V4HI:
18399 case IX86_BUILTIN_VEC_INIT_V8QI:
18400 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18402 case IX86_BUILTIN_VEC_EXT_V2DF:
18403 case IX86_BUILTIN_VEC_EXT_V2DI:
18404 case IX86_BUILTIN_VEC_EXT_V4SF:
18405 case IX86_BUILTIN_VEC_EXT_V4SI:
18406 case IX86_BUILTIN_VEC_EXT_V8HI:
18407 case IX86_BUILTIN_VEC_EXT_V2SI:
18408 case IX86_BUILTIN_VEC_EXT_V4HI:
18409 return ix86_expand_vec_ext_builtin (exp, target);
18411 case IX86_BUILTIN_VEC_SET_V8HI:
18412 case IX86_BUILTIN_VEC_SET_V4HI:
18413 return ix86_expand_vec_set_builtin (exp);
18415 default:
18416 break;
18419 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18420 if (d->code == fcode)
18422 /* Compares are treated specially. */
18423 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18424 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18425 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18426 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18427 return ix86_expand_sse_compare (d, exp, target);
18429 return ix86_expand_binop_builtin (d->icode, exp, target);
18432 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18433 if (d->code == fcode)
18434 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18436 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18437 if (d->code == fcode)
18438 return ix86_expand_sse_comi (d, exp, target);
18440 gcc_unreachable ();
18443 /* Returns a function decl for a vectorized version of the builtin function
18444 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18445 if it is not available. */
18447 static tree
18448 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18449 tree type_in)
18451 enum machine_mode in_mode, out_mode;
18452 int in_n, out_n;
18454 if (TREE_CODE (type_out) != VECTOR_TYPE
18455 || TREE_CODE (type_in) != VECTOR_TYPE)
18456 return NULL_TREE;
18458 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18459 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18460 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18461 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18463 switch (fn)
18465 case BUILT_IN_SQRT:
18466 if (out_mode == DFmode && out_n == 2
18467 && in_mode == DFmode && in_n == 2)
18468 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18469 return NULL_TREE;
18471 case BUILT_IN_SQRTF:
18472 if (out_mode == SFmode && out_n == 4
18473 && in_mode == SFmode && in_n == 4)
18474 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18475 return NULL_TREE;
18477 case BUILT_IN_LRINTF:
18478 if (out_mode == SImode && out_n == 4
18479 && in_mode == SFmode && in_n == 4)
18480 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18481 return NULL_TREE;
18483 default:
18487 return NULL_TREE;
18490 /* Returns a decl of a function that implements conversion of the
18491 input vector of type TYPE, or NULL_TREE if it is not available. */
18493 static tree
18494 ix86_builtin_conversion (enum tree_code code, tree type)
18496 if (TREE_CODE (type) != VECTOR_TYPE)
18497 return NULL_TREE;
18499 switch (code)
18501 case FLOAT_EXPR:
18502 switch (TYPE_MODE (type))
18504 case V4SImode:
18505 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18506 default:
18507 return NULL_TREE;
18510 case FIX_TRUNC_EXPR:
18511 switch (TYPE_MODE (type))
18513 case V4SFmode:
18514 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18515 default:
18516 return NULL_TREE;
18518 default:
18519 return NULL_TREE;
18524 /* Store OPERAND to the memory after reload is completed. This means
18525 that we can't easily use assign_stack_local. */
18527 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18529 rtx result;
18531 gcc_assert (reload_completed);
18532 if (TARGET_RED_ZONE)
18534 result = gen_rtx_MEM (mode,
18535 gen_rtx_PLUS (Pmode,
18536 stack_pointer_rtx,
18537 GEN_INT (-RED_ZONE_SIZE)));
18538 emit_move_insn (result, operand);
18540 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18542 switch (mode)
18544 case HImode:
18545 case SImode:
18546 operand = gen_lowpart (DImode, operand);
18547 /* FALLTHRU */
18548 case DImode:
18549 emit_insn (
18550 gen_rtx_SET (VOIDmode,
18551 gen_rtx_MEM (DImode,
18552 gen_rtx_PRE_DEC (DImode,
18553 stack_pointer_rtx)),
18554 operand));
18555 break;
18556 default:
18557 gcc_unreachable ();
18559 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18561 else
18563 switch (mode)
18565 case DImode:
18567 rtx operands[2];
18568 split_di (&operand, 1, operands, operands + 1);
18569 emit_insn (
18570 gen_rtx_SET (VOIDmode,
18571 gen_rtx_MEM (SImode,
18572 gen_rtx_PRE_DEC (Pmode,
18573 stack_pointer_rtx)),
18574 operands[1]));
18575 emit_insn (
18576 gen_rtx_SET (VOIDmode,
18577 gen_rtx_MEM (SImode,
18578 gen_rtx_PRE_DEC (Pmode,
18579 stack_pointer_rtx)),
18580 operands[0]));
18582 break;
18583 case HImode:
18584 /* Store HImodes as SImodes. */
18585 operand = gen_lowpart (SImode, operand);
18586 /* FALLTHRU */
18587 case SImode:
18588 emit_insn (
18589 gen_rtx_SET (VOIDmode,
18590 gen_rtx_MEM (GET_MODE (operand),
18591 gen_rtx_PRE_DEC (SImode,
18592 stack_pointer_rtx)),
18593 operand));
18594 break;
18595 default:
18596 gcc_unreachable ();
18598 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18600 return result;
18603 /* Free operand from the memory. */
18604 void
18605 ix86_free_from_memory (enum machine_mode mode)
18607 if (!TARGET_RED_ZONE)
18609 int size;
18611 if (mode == DImode || TARGET_64BIT)
18612 size = 8;
18613 else
18614 size = 4;
18615 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18616 to pop or add instruction if registers are available. */
18617 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18618 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18619 GEN_INT (size))));
18623 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18624 QImode must go into class Q_REGS.
18625 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18626 movdf to do mem-to-mem moves through integer regs. */
18627 enum reg_class
18628 ix86_preferred_reload_class (rtx x, enum reg_class class)
18630 enum machine_mode mode = GET_MODE (x);
18632 /* We're only allowed to return a subclass of CLASS. Many of the
18633 following checks fail for NO_REGS, so eliminate that early. */
18634 if (class == NO_REGS)
18635 return NO_REGS;
18637 /* All classes can load zeros. */
18638 if (x == CONST0_RTX (mode))
18639 return class;
18641 /* Force constants into memory if we are loading a (nonzero) constant into
18642 an MMX or SSE register. This is because there are no MMX/SSE instructions
18643 to load from a constant. */
18644 if (CONSTANT_P (x)
18645 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18646 return NO_REGS;
18648 /* Prefer SSE regs only, if we can use them for math. */
18649 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18650 return SSE_CLASS_P (class) ? class : NO_REGS;
18652 /* Floating-point constants need more complex checks. */
18653 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18655 /* General regs can load everything. */
18656 if (reg_class_subset_p (class, GENERAL_REGS))
18657 return class;
18659 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18660 zero above. We only want to wind up preferring 80387 registers if
18661 we plan on doing computation with them. */
18662 if (TARGET_80387
18663 && standard_80387_constant_p (x))
18665 /* Limit class to non-sse. */
18666 if (class == FLOAT_SSE_REGS)
18667 return FLOAT_REGS;
18668 if (class == FP_TOP_SSE_REGS)
18669 return FP_TOP_REG;
18670 if (class == FP_SECOND_SSE_REGS)
18671 return FP_SECOND_REG;
18672 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18673 return class;
18676 return NO_REGS;
18679 /* Generally when we see PLUS here, it's the function invariant
18680 (plus soft-fp const_int). Which can only be computed into general
18681 regs. */
18682 if (GET_CODE (x) == PLUS)
18683 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18685 /* QImode constants are easy to load, but non-constant QImode data
18686 must go into Q_REGS. */
18687 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18689 if (reg_class_subset_p (class, Q_REGS))
18690 return class;
18691 if (reg_class_subset_p (Q_REGS, class))
18692 return Q_REGS;
18693 return NO_REGS;
18696 return class;
18699 /* Discourage putting floating-point values in SSE registers unless
18700 SSE math is being used, and likewise for the 387 registers. */
18701 enum reg_class
18702 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18704 enum machine_mode mode = GET_MODE (x);
18706 /* Restrict the output reload class to the register bank that we are doing
18707 math on. If we would like not to return a subset of CLASS, reject this
18708 alternative: if reload cannot do this, it will still use its choice. */
18709 mode = GET_MODE (x);
18710 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18711 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18713 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18715 if (class == FP_TOP_SSE_REGS)
18716 return FP_TOP_REG;
18717 else if (class == FP_SECOND_SSE_REGS)
18718 return FP_SECOND_REG;
18719 else
18720 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18723 return class;
18726 /* If we are copying between general and FP registers, we need a memory
18727 location. The same is true for SSE and MMX registers.
18729 The macro can't work reliably when one of the CLASSES is class containing
18730 registers from multiple units (SSE, MMX, integer). We avoid this by never
18731 combining those units in single alternative in the machine description.
18732 Ensure that this constraint holds to avoid unexpected surprises.
18734 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18735 enforce these sanity checks. */
18738 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18739 enum machine_mode mode, int strict)
18741 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18742 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18743 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18744 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18745 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18746 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18748 gcc_assert (!strict);
18749 return true;
18752 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18753 return true;
18755 /* ??? This is a lie. We do have moves between mmx/general, and for
18756 mmx/sse2. But by saying we need secondary memory we discourage the
18757 register allocator from using the mmx registers unless needed. */
18758 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18759 return true;
18761 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18763 /* SSE1 doesn't have any direct moves from other classes. */
18764 if (!TARGET_SSE2)
18765 return true;
18767 /* If the target says that inter-unit moves are more expensive
18768 than moving through memory, then don't generate them. */
18769 if (!TARGET_INTER_UNIT_MOVES)
18770 return true;
18772 /* Between SSE and general, we have moves no larger than word size. */
18773 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18774 return true;
18777 return false;
18780 /* Return true if the registers in CLASS cannot represent the change from
18781 modes FROM to TO. */
18783 bool
18784 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18785 enum reg_class class)
18787 if (from == to)
18788 return false;
18790 /* x87 registers can't do subreg at all, as all values are reformatted
18791 to extended precision. */
18792 if (MAYBE_FLOAT_CLASS_P (class))
18793 return true;
18795 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18797 /* Vector registers do not support QI or HImode loads. If we don't
18798 disallow a change to these modes, reload will assume it's ok to
18799 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18800 the vec_dupv4hi pattern. */
18801 if (GET_MODE_SIZE (from) < 4)
18802 return true;
18804 /* Vector registers do not support subreg with nonzero offsets, which
18805 are otherwise valid for integer registers. Since we can't see
18806 whether we have a nonzero offset from here, prohibit all
18807 nonparadoxical subregs changing size. */
18808 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18809 return true;
18812 return false;
18815 /* Return the cost of moving data from a register in class CLASS1 to
18816 one in class CLASS2.
18818 It is not required that the cost always equal 2 when FROM is the same as TO;
18819 on some machines it is expensive to move between registers if they are not
18820 general registers. */
18823 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18824 enum reg_class class2)
18826 /* In case we require secondary memory, compute cost of the store followed
18827 by load. In order to avoid bad register allocation choices, we need
18828 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18830 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18832 int cost = 1;
18834 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18835 MEMORY_MOVE_COST (mode, class1, 1));
18836 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18837 MEMORY_MOVE_COST (mode, class2, 1));
18839 /* In case of copying from general_purpose_register we may emit multiple
18840 stores followed by single load causing memory size mismatch stall.
18841 Count this as arbitrarily high cost of 20. */
18842 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18843 cost += 20;
18845 /* In the case of FP/MMX moves, the registers actually overlap, and we
18846 have to switch modes in order to treat them differently. */
18847 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18848 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18849 cost += 20;
18851 return cost;
18854 /* Moves between SSE/MMX and integer unit are expensive. */
18855 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18856 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18857 return ix86_cost->mmxsse_to_integer;
18858 if (MAYBE_FLOAT_CLASS_P (class1))
18859 return ix86_cost->fp_move;
18860 if (MAYBE_SSE_CLASS_P (class1))
18861 return ix86_cost->sse_move;
18862 if (MAYBE_MMX_CLASS_P (class1))
18863 return ix86_cost->mmx_move;
18864 return 2;
18867 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18869 bool
18870 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18872 /* Flags and only flags can only hold CCmode values. */
18873 if (CC_REGNO_P (regno))
18874 return GET_MODE_CLASS (mode) == MODE_CC;
18875 if (GET_MODE_CLASS (mode) == MODE_CC
18876 || GET_MODE_CLASS (mode) == MODE_RANDOM
18877 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18878 return 0;
18879 if (FP_REGNO_P (regno))
18880 return VALID_FP_MODE_P (mode);
18881 if (SSE_REGNO_P (regno))
18883 /* We implement the move patterns for all vector modes into and
18884 out of SSE registers, even when no operation instructions
18885 are available. */
18886 return (VALID_SSE_REG_MODE (mode)
18887 || VALID_SSE2_REG_MODE (mode)
18888 || VALID_MMX_REG_MODE (mode)
18889 || VALID_MMX_REG_MODE_3DNOW (mode));
18891 if (MMX_REGNO_P (regno))
18893 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18894 so if the register is available at all, then we can move data of
18895 the given mode into or out of it. */
18896 return (VALID_MMX_REG_MODE (mode)
18897 || VALID_MMX_REG_MODE_3DNOW (mode));
18900 if (mode == QImode)
18902 /* Take care for QImode values - they can be in non-QI regs,
18903 but then they do cause partial register stalls. */
18904 if (regno < 4 || TARGET_64BIT)
18905 return 1;
18906 if (!TARGET_PARTIAL_REG_STALL)
18907 return 1;
18908 return reload_in_progress || reload_completed;
18910 /* We handle both integer and floats in the general purpose registers. */
18911 else if (VALID_INT_MODE_P (mode))
18912 return 1;
18913 else if (VALID_FP_MODE_P (mode))
18914 return 1;
18915 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18916 on to use that value in smaller contexts, this can easily force a
18917 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18918 supporting DImode, allow it. */
18919 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18920 return 1;
18922 return 0;
18925 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18926 tieable integer mode. */
18928 static bool
18929 ix86_tieable_integer_mode_p (enum machine_mode mode)
18931 switch (mode)
18933 case HImode:
18934 case SImode:
18935 return true;
18937 case QImode:
18938 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18940 case DImode:
18941 return TARGET_64BIT;
18943 default:
18944 return false;
18948 /* Return true if MODE1 is accessible in a register that can hold MODE2
18949 without copying. That is, all register classes that can hold MODE2
18950 can also hold MODE1. */
18952 bool
18953 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18955 if (mode1 == mode2)
18956 return true;
18958 if (ix86_tieable_integer_mode_p (mode1)
18959 && ix86_tieable_integer_mode_p (mode2))
18960 return true;
18962 /* MODE2 being XFmode implies fp stack or general regs, which means we
18963 can tie any smaller floating point modes to it. Note that we do not
18964 tie this with TFmode. */
18965 if (mode2 == XFmode)
18966 return mode1 == SFmode || mode1 == DFmode;
18968 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18969 that we can tie it with SFmode. */
18970 if (mode2 == DFmode)
18971 return mode1 == SFmode;
18973 /* If MODE2 is only appropriate for an SSE register, then tie with
18974 any other mode acceptable to SSE registers. */
18975 if (GET_MODE_SIZE (mode2) == 16
18976 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18977 return (GET_MODE_SIZE (mode1) == 16
18978 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18980 /* If MODE2 is appropriate for an MMX register, then tie
18981 with any other mode acceptable to MMX registers. */
18982 if (GET_MODE_SIZE (mode2) == 8
18983 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18984 return (GET_MODE_SIZE (mode1) == 8
18985 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
18987 return false;
18990 /* Return the cost of moving data of mode M between a
18991 register and memory. A value of 2 is the default; this cost is
18992 relative to those in `REGISTER_MOVE_COST'.
18994 If moving between registers and memory is more expensive than
18995 between two registers, you should define this macro to express the
18996 relative cost.
18998 Model also increased moving costs of QImode registers in non
18999 Q_REGS classes.
19002 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19004 if (FLOAT_CLASS_P (class))
19006 int index;
19007 switch (mode)
19009 case SFmode:
19010 index = 0;
19011 break;
19012 case DFmode:
19013 index = 1;
19014 break;
19015 case XFmode:
19016 index = 2;
19017 break;
19018 default:
19019 return 100;
19021 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19023 if (SSE_CLASS_P (class))
19025 int index;
19026 switch (GET_MODE_SIZE (mode))
19028 case 4:
19029 index = 0;
19030 break;
19031 case 8:
19032 index = 1;
19033 break;
19034 case 16:
19035 index = 2;
19036 break;
19037 default:
19038 return 100;
19040 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19042 if (MMX_CLASS_P (class))
19044 int index;
19045 switch (GET_MODE_SIZE (mode))
19047 case 4:
19048 index = 0;
19049 break;
19050 case 8:
19051 index = 1;
19052 break;
19053 default:
19054 return 100;
19056 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19058 switch (GET_MODE_SIZE (mode))
19060 case 1:
19061 if (in)
19062 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19063 : ix86_cost->movzbl_load);
19064 else
19065 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19066 : ix86_cost->int_store[0] + 4);
19067 break;
19068 case 2:
19069 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19070 default:
19071 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19072 if (mode == TFmode)
19073 mode = XFmode;
19074 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19075 * (((int) GET_MODE_SIZE (mode)
19076 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19080 /* Compute a (partial) cost for rtx X. Return true if the complete
19081 cost has been computed, and false if subexpressions should be
19082 scanned. In either case, *TOTAL contains the cost result. */
19084 static bool
19085 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19087 enum machine_mode mode = GET_MODE (x);
19089 switch (code)
19091 case CONST_INT:
19092 case CONST:
19093 case LABEL_REF:
19094 case SYMBOL_REF:
19095 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19096 *total = 3;
19097 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19098 *total = 2;
19099 else if (flag_pic && SYMBOLIC_CONST (x)
19100 && (!TARGET_64BIT
19101 || (!GET_CODE (x) != LABEL_REF
19102 && (GET_CODE (x) != SYMBOL_REF
19103 || !SYMBOL_REF_LOCAL_P (x)))))
19104 *total = 1;
19105 else
19106 *total = 0;
19107 return true;
19109 case CONST_DOUBLE:
19110 if (mode == VOIDmode)
19111 *total = 0;
19112 else
19113 switch (standard_80387_constant_p (x))
19115 case 1: /* 0.0 */
19116 *total = 1;
19117 break;
19118 default: /* Other constants */
19119 *total = 2;
19120 break;
19121 case 0:
19122 case -1:
19123 /* Start with (MEM (SYMBOL_REF)), since that's where
19124 it'll probably end up. Add a penalty for size. */
19125 *total = (COSTS_N_INSNS (1)
19126 + (flag_pic != 0 && !TARGET_64BIT)
19127 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19128 break;
19130 return true;
19132 case ZERO_EXTEND:
19133 /* The zero extensions is often completely free on x86_64, so make
19134 it as cheap as possible. */
19135 if (TARGET_64BIT && mode == DImode
19136 && GET_MODE (XEXP (x, 0)) == SImode)
19137 *total = 1;
19138 else if (TARGET_ZERO_EXTEND_WITH_AND)
19139 *total = ix86_cost->add;
19140 else
19141 *total = ix86_cost->movzx;
19142 return false;
19144 case SIGN_EXTEND:
19145 *total = ix86_cost->movsx;
19146 return false;
19148 case ASHIFT:
19149 if (CONST_INT_P (XEXP (x, 1))
19150 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19152 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19153 if (value == 1)
19155 *total = ix86_cost->add;
19156 return false;
19158 if ((value == 2 || value == 3)
19159 && ix86_cost->lea <= ix86_cost->shift_const)
19161 *total = ix86_cost->lea;
19162 return false;
19165 /* FALLTHRU */
19167 case ROTATE:
19168 case ASHIFTRT:
19169 case LSHIFTRT:
19170 case ROTATERT:
19171 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19173 if (CONST_INT_P (XEXP (x, 1)))
19175 if (INTVAL (XEXP (x, 1)) > 32)
19176 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19177 else
19178 *total = ix86_cost->shift_const * 2;
19180 else
19182 if (GET_CODE (XEXP (x, 1)) == AND)
19183 *total = ix86_cost->shift_var * 2;
19184 else
19185 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19188 else
19190 if (CONST_INT_P (XEXP (x, 1)))
19191 *total = ix86_cost->shift_const;
19192 else
19193 *total = ix86_cost->shift_var;
19195 return false;
19197 case MULT:
19198 if (FLOAT_MODE_P (mode))
19200 *total = ix86_cost->fmul;
19201 return false;
19203 else
19205 rtx op0 = XEXP (x, 0);
19206 rtx op1 = XEXP (x, 1);
19207 int nbits;
19208 if (CONST_INT_P (XEXP (x, 1)))
19210 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19211 for (nbits = 0; value != 0; value &= value - 1)
19212 nbits++;
19214 else
19215 /* This is arbitrary. */
19216 nbits = 7;
19218 /* Compute costs correctly for widening multiplication. */
19219 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19220 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19221 == GET_MODE_SIZE (mode))
19223 int is_mulwiden = 0;
19224 enum machine_mode inner_mode = GET_MODE (op0);
19226 if (GET_CODE (op0) == GET_CODE (op1))
19227 is_mulwiden = 1, op1 = XEXP (op1, 0);
19228 else if (CONST_INT_P (op1))
19230 if (GET_CODE (op0) == SIGN_EXTEND)
19231 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19232 == INTVAL (op1);
19233 else
19234 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19237 if (is_mulwiden)
19238 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19241 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19242 + nbits * ix86_cost->mult_bit
19243 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19245 return true;
19248 case DIV:
19249 case UDIV:
19250 case MOD:
19251 case UMOD:
19252 if (FLOAT_MODE_P (mode))
19253 *total = ix86_cost->fdiv;
19254 else
19255 *total = ix86_cost->divide[MODE_INDEX (mode)];
19256 return false;
19258 case PLUS:
19259 if (FLOAT_MODE_P (mode))
19260 *total = ix86_cost->fadd;
19261 else if (GET_MODE_CLASS (mode) == MODE_INT
19262 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19264 if (GET_CODE (XEXP (x, 0)) == PLUS
19265 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19266 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19267 && CONSTANT_P (XEXP (x, 1)))
19269 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19270 if (val == 2 || val == 4 || val == 8)
19272 *total = ix86_cost->lea;
19273 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19274 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19275 outer_code);
19276 *total += rtx_cost (XEXP (x, 1), outer_code);
19277 return true;
19280 else if (GET_CODE (XEXP (x, 0)) == MULT
19281 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19283 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19284 if (val == 2 || val == 4 || val == 8)
19286 *total = ix86_cost->lea;
19287 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19288 *total += rtx_cost (XEXP (x, 1), outer_code);
19289 return true;
19292 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19294 *total = ix86_cost->lea;
19295 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19296 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19297 *total += rtx_cost (XEXP (x, 1), outer_code);
19298 return true;
19301 /* FALLTHRU */
19303 case MINUS:
19304 if (FLOAT_MODE_P (mode))
19306 *total = ix86_cost->fadd;
19307 return false;
19309 /* FALLTHRU */
19311 case AND:
19312 case IOR:
19313 case XOR:
19314 if (!TARGET_64BIT && mode == DImode)
19316 *total = (ix86_cost->add * 2
19317 + (rtx_cost (XEXP (x, 0), outer_code)
19318 << (GET_MODE (XEXP (x, 0)) != DImode))
19319 + (rtx_cost (XEXP (x, 1), outer_code)
19320 << (GET_MODE (XEXP (x, 1)) != DImode)));
19321 return true;
19323 /* FALLTHRU */
19325 case NEG:
19326 if (FLOAT_MODE_P (mode))
19328 *total = ix86_cost->fchs;
19329 return false;
19331 /* FALLTHRU */
19333 case NOT:
19334 if (!TARGET_64BIT && mode == DImode)
19335 *total = ix86_cost->add * 2;
19336 else
19337 *total = ix86_cost->add;
19338 return false;
19340 case COMPARE:
19341 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19342 && XEXP (XEXP (x, 0), 1) == const1_rtx
19343 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19344 && XEXP (x, 1) == const0_rtx)
19346 /* This kind of construct is implemented using test[bwl].
19347 Treat it as if we had an AND. */
19348 *total = (ix86_cost->add
19349 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19350 + rtx_cost (const1_rtx, outer_code));
19351 return true;
19353 return false;
19355 case FLOAT_EXTEND:
19356 if (!TARGET_SSE_MATH
19357 || mode == XFmode
19358 || (mode == DFmode && !TARGET_SSE2))
19359 *total = 0;
19360 return false;
19362 case ABS:
19363 if (FLOAT_MODE_P (mode))
19364 *total = ix86_cost->fabs;
19365 return false;
19367 case SQRT:
19368 if (FLOAT_MODE_P (mode))
19369 *total = ix86_cost->fsqrt;
19370 return false;
19372 case UNSPEC:
19373 if (XINT (x, 1) == UNSPEC_TP)
19374 *total = 0;
19375 return false;
19377 default:
19378 return false;
19382 #if TARGET_MACHO
19384 static int current_machopic_label_num;
19386 /* Given a symbol name and its associated stub, write out the
19387 definition of the stub. */
19389 void
19390 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19392 unsigned int length;
19393 char *binder_name, *symbol_name, lazy_ptr_name[32];
19394 int label = ++current_machopic_label_num;
19396 /* For 64-bit we shouldn't get here. */
19397 gcc_assert (!TARGET_64BIT);
19399 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19400 symb = (*targetm.strip_name_encoding) (symb);
19402 length = strlen (stub);
19403 binder_name = alloca (length + 32);
19404 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19406 length = strlen (symb);
19407 symbol_name = alloca (length + 32);
19408 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19410 sprintf (lazy_ptr_name, "L%d$lz", label);
19412 if (MACHOPIC_PURE)
19413 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19414 else
19415 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19417 fprintf (file, "%s:\n", stub);
19418 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19420 if (MACHOPIC_PURE)
19422 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19423 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19424 fprintf (file, "\tjmp\t*%%edx\n");
19426 else
19427 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19429 fprintf (file, "%s:\n", binder_name);
19431 if (MACHOPIC_PURE)
19433 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19434 fprintf (file, "\tpushl\t%%eax\n");
19436 else
19437 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19439 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19441 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19442 fprintf (file, "%s:\n", lazy_ptr_name);
19443 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19444 fprintf (file, "\t.long %s\n", binder_name);
19447 void
19448 darwin_x86_file_end (void)
19450 darwin_file_end ();
19451 ix86_file_end ();
19453 #endif /* TARGET_MACHO */
19455 /* Order the registers for register allocator. */
19457 void
19458 x86_order_regs_for_local_alloc (void)
19460 int pos = 0;
19461 int i;
19463 /* First allocate the local general purpose registers. */
19464 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19465 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19466 reg_alloc_order [pos++] = i;
19468 /* Global general purpose registers. */
19469 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19470 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19471 reg_alloc_order [pos++] = i;
19473 /* x87 registers come first in case we are doing FP math
19474 using them. */
19475 if (!TARGET_SSE_MATH)
19476 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19477 reg_alloc_order [pos++] = i;
19479 /* SSE registers. */
19480 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19481 reg_alloc_order [pos++] = i;
19482 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19483 reg_alloc_order [pos++] = i;
19485 /* x87 registers. */
19486 if (TARGET_SSE_MATH)
19487 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19488 reg_alloc_order [pos++] = i;
19490 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19491 reg_alloc_order [pos++] = i;
19493 /* Initialize the rest of array as we do not allocate some registers
19494 at all. */
19495 while (pos < FIRST_PSEUDO_REGISTER)
19496 reg_alloc_order [pos++] = 0;
19499 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19500 struct attribute_spec.handler. */
19501 static tree
19502 ix86_handle_struct_attribute (tree *node, tree name,
19503 tree args ATTRIBUTE_UNUSED,
19504 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19506 tree *type = NULL;
19507 if (DECL_P (*node))
19509 if (TREE_CODE (*node) == TYPE_DECL)
19510 type = &TREE_TYPE (*node);
19512 else
19513 type = node;
19515 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19516 || TREE_CODE (*type) == UNION_TYPE)))
19518 warning (OPT_Wattributes, "%qs attribute ignored",
19519 IDENTIFIER_POINTER (name));
19520 *no_add_attrs = true;
19523 else if ((is_attribute_p ("ms_struct", name)
19524 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19525 || ((is_attribute_p ("gcc_struct", name)
19526 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19528 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19529 IDENTIFIER_POINTER (name));
19530 *no_add_attrs = true;
19533 return NULL_TREE;
19536 static bool
19537 ix86_ms_bitfield_layout_p (tree record_type)
19539 return (TARGET_MS_BITFIELD_LAYOUT &&
19540 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19541 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19544 /* Returns an expression indicating where the this parameter is
19545 located on entry to the FUNCTION. */
19547 static rtx
19548 x86_this_parameter (tree function)
19550 tree type = TREE_TYPE (function);
19552 if (TARGET_64BIT)
19554 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19555 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19558 if (ix86_function_regparm (type, function) > 0)
19560 tree parm;
19562 parm = TYPE_ARG_TYPES (type);
19563 /* Figure out whether or not the function has a variable number of
19564 arguments. */
19565 for (; parm; parm = TREE_CHAIN (parm))
19566 if (TREE_VALUE (parm) == void_type_node)
19567 break;
19568 /* If not, the this parameter is in the first argument. */
19569 if (parm)
19571 int regno = 0;
19572 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19573 regno = 2;
19574 return gen_rtx_REG (SImode, regno);
19578 if (aggregate_value_p (TREE_TYPE (type), type))
19579 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19580 else
19581 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19584 /* Determine whether x86_output_mi_thunk can succeed. */
19586 static bool
19587 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19588 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19589 HOST_WIDE_INT vcall_offset, tree function)
19591 /* 64-bit can handle anything. */
19592 if (TARGET_64BIT)
19593 return true;
19595 /* For 32-bit, everything's fine if we have one free register. */
19596 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19597 return true;
19599 /* Need a free register for vcall_offset. */
19600 if (vcall_offset)
19601 return false;
19603 /* Need a free register for GOT references. */
19604 if (flag_pic && !(*targetm.binds_local_p) (function))
19605 return false;
19607 /* Otherwise ok. */
19608 return true;
19611 /* Output the assembler code for a thunk function. THUNK_DECL is the
19612 declaration for the thunk function itself, FUNCTION is the decl for
19613 the target function. DELTA is an immediate constant offset to be
19614 added to THIS. If VCALL_OFFSET is nonzero, the word at
19615 *(*this + vcall_offset) should be added to THIS. */
19617 static void
19618 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19619 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19620 HOST_WIDE_INT vcall_offset, tree function)
19622 rtx xops[3];
19623 rtx this = x86_this_parameter (function);
19624 rtx this_reg, tmp;
19626 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19627 pull it in now and let DELTA benefit. */
19628 if (REG_P (this))
19629 this_reg = this;
19630 else if (vcall_offset)
19632 /* Put the this parameter into %eax. */
19633 xops[0] = this;
19634 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19635 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19637 else
19638 this_reg = NULL_RTX;
19640 /* Adjust the this parameter by a fixed constant. */
19641 if (delta)
19643 xops[0] = GEN_INT (delta);
19644 xops[1] = this_reg ? this_reg : this;
19645 if (TARGET_64BIT)
19647 if (!x86_64_general_operand (xops[0], DImode))
19649 tmp = gen_rtx_REG (DImode, R10_REG);
19650 xops[1] = tmp;
19651 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19652 xops[0] = tmp;
19653 xops[1] = this;
19655 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19657 else
19658 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19661 /* Adjust the this parameter by a value stored in the vtable. */
19662 if (vcall_offset)
19664 if (TARGET_64BIT)
19665 tmp = gen_rtx_REG (DImode, R10_REG);
19666 else
19668 int tmp_regno = 2 /* ECX */;
19669 if (lookup_attribute ("fastcall",
19670 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19671 tmp_regno = 0 /* EAX */;
19672 tmp = gen_rtx_REG (SImode, tmp_regno);
19675 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19676 xops[1] = tmp;
19677 if (TARGET_64BIT)
19678 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19679 else
19680 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19682 /* Adjust the this parameter. */
19683 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19684 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19686 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19687 xops[0] = GEN_INT (vcall_offset);
19688 xops[1] = tmp2;
19689 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19690 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19692 xops[1] = this_reg;
19693 if (TARGET_64BIT)
19694 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19695 else
19696 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19699 /* If necessary, drop THIS back to its stack slot. */
19700 if (this_reg && this_reg != this)
19702 xops[0] = this_reg;
19703 xops[1] = this;
19704 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19707 xops[0] = XEXP (DECL_RTL (function), 0);
19708 if (TARGET_64BIT)
19710 if (!flag_pic || (*targetm.binds_local_p) (function))
19711 output_asm_insn ("jmp\t%P0", xops);
19712 else
19714 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19715 tmp = gen_rtx_CONST (Pmode, tmp);
19716 tmp = gen_rtx_MEM (QImode, tmp);
19717 xops[0] = tmp;
19718 output_asm_insn ("jmp\t%A0", xops);
19721 else
19723 if (!flag_pic || (*targetm.binds_local_p) (function))
19724 output_asm_insn ("jmp\t%P0", xops);
19725 else
19726 #if TARGET_MACHO
19727 if (TARGET_MACHO)
19729 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19730 tmp = (gen_rtx_SYMBOL_REF
19731 (Pmode,
19732 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19733 tmp = gen_rtx_MEM (QImode, tmp);
19734 xops[0] = tmp;
19735 output_asm_insn ("jmp\t%0", xops);
19737 else
19738 #endif /* TARGET_MACHO */
19740 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19741 output_set_got (tmp, NULL_RTX);
19743 xops[1] = tmp;
19744 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19745 output_asm_insn ("jmp\t{*}%1", xops);
19750 static void
19751 x86_file_start (void)
19753 default_file_start ();
19754 #if TARGET_MACHO
19755 darwin_file_start ();
19756 #endif
19757 if (X86_FILE_START_VERSION_DIRECTIVE)
19758 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19759 if (X86_FILE_START_FLTUSED)
19760 fputs ("\t.global\t__fltused\n", asm_out_file);
19761 if (ix86_asm_dialect == ASM_INTEL)
19762 fputs ("\t.intel_syntax\n", asm_out_file);
19766 x86_field_alignment (tree field, int computed)
19768 enum machine_mode mode;
19769 tree type = TREE_TYPE (field);
19771 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19772 return computed;
19773 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19774 ? get_inner_array_type (type) : type);
19775 if (mode == DFmode || mode == DCmode
19776 || GET_MODE_CLASS (mode) == MODE_INT
19777 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19778 return MIN (32, computed);
19779 return computed;
19782 /* Output assembler code to FILE to increment profiler label # LABELNO
19783 for profiling a function entry. */
19784 void
19785 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19787 if (TARGET_64BIT)
19788 if (flag_pic)
19790 #ifndef NO_PROFILE_COUNTERS
19791 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19792 #endif
19793 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19795 else
19797 #ifndef NO_PROFILE_COUNTERS
19798 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19799 #endif
19800 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19802 else if (flag_pic)
19804 #ifndef NO_PROFILE_COUNTERS
19805 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19806 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19807 #endif
19808 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19810 else
19812 #ifndef NO_PROFILE_COUNTERS
19813 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19814 PROFILE_COUNT_REGISTER);
19815 #endif
19816 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19820 /* We don't have exact information about the insn sizes, but we may assume
19821 quite safely that we are informed about all 1 byte insns and memory
19822 address sizes. This is enough to eliminate unnecessary padding in
19823 99% of cases. */
19825 static int
19826 min_insn_size (rtx insn)
19828 int l = 0;
19830 if (!INSN_P (insn) || !active_insn_p (insn))
19831 return 0;
19833 /* Discard alignments we've emit and jump instructions. */
19834 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19835 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19836 return 0;
19837 if (JUMP_P (insn)
19838 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19839 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19840 return 0;
19842 /* Important case - calls are always 5 bytes.
19843 It is common to have many calls in the row. */
19844 if (CALL_P (insn)
19845 && symbolic_reference_mentioned_p (PATTERN (insn))
19846 && !SIBLING_CALL_P (insn))
19847 return 5;
19848 if (get_attr_length (insn) <= 1)
19849 return 1;
19851 /* For normal instructions we may rely on the sizes of addresses
19852 and the presence of symbol to require 4 bytes of encoding.
19853 This is not the case for jumps where references are PC relative. */
19854 if (!JUMP_P (insn))
19856 l = get_attr_length_address (insn);
19857 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19858 l = 4;
19860 if (l)
19861 return 1+l;
19862 else
19863 return 2;
19866 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19867 window. */
19869 static void
19870 ix86_avoid_jump_misspredicts (void)
19872 rtx insn, start = get_insns ();
19873 int nbytes = 0, njumps = 0;
19874 int isjump = 0;
19876 /* Look for all minimal intervals of instructions containing 4 jumps.
19877 The intervals are bounded by START and INSN. NBYTES is the total
19878 size of instructions in the interval including INSN and not including
19879 START. When the NBYTES is smaller than 16 bytes, it is possible
19880 that the end of START and INSN ends up in the same 16byte page.
19882 The smallest offset in the page INSN can start is the case where START
19883 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19884 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19886 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19889 nbytes += min_insn_size (insn);
19890 if (dump_file)
19891 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19892 INSN_UID (insn), min_insn_size (insn));
19893 if ((JUMP_P (insn)
19894 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19895 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19896 || CALL_P (insn))
19897 njumps++;
19898 else
19899 continue;
19901 while (njumps > 3)
19903 start = NEXT_INSN (start);
19904 if ((JUMP_P (start)
19905 && GET_CODE (PATTERN (start)) != ADDR_VEC
19906 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19907 || CALL_P (start))
19908 njumps--, isjump = 1;
19909 else
19910 isjump = 0;
19911 nbytes -= min_insn_size (start);
19913 gcc_assert (njumps >= 0);
19914 if (dump_file)
19915 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19916 INSN_UID (start), INSN_UID (insn), nbytes);
19918 if (njumps == 3 && isjump && nbytes < 16)
19920 int padsize = 15 - nbytes + min_insn_size (insn);
19922 if (dump_file)
19923 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19924 INSN_UID (insn), padsize);
19925 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19930 /* AMD Athlon works faster
19931 when RET is not destination of conditional jump or directly preceded
19932 by other jump instruction. We avoid the penalty by inserting NOP just
19933 before the RET instructions in such cases. */
19934 static void
19935 ix86_pad_returns (void)
19937 edge e;
19938 edge_iterator ei;
19940 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19942 basic_block bb = e->src;
19943 rtx ret = BB_END (bb);
19944 rtx prev;
19945 bool replace = false;
19947 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19948 || !maybe_hot_bb_p (bb))
19949 continue;
19950 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19951 if (active_insn_p (prev) || LABEL_P (prev))
19952 break;
19953 if (prev && LABEL_P (prev))
19955 edge e;
19956 edge_iterator ei;
19958 FOR_EACH_EDGE (e, ei, bb->preds)
19959 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19960 && !(e->flags & EDGE_FALLTHRU))
19961 replace = true;
19963 if (!replace)
19965 prev = prev_active_insn (ret);
19966 if (prev
19967 && ((JUMP_P (prev) && any_condjump_p (prev))
19968 || CALL_P (prev)))
19969 replace = true;
19970 /* Empty functions get branch mispredict even when the jump destination
19971 is not visible to us. */
19972 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19973 replace = true;
19975 if (replace)
19977 emit_insn_before (gen_return_internal_long (), ret);
19978 delete_insn (ret);
19983 /* Implement machine specific optimizations. We implement padding of returns
19984 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19985 static void
19986 ix86_reorg (void)
19988 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19989 ix86_pad_returns ();
19990 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19991 ix86_avoid_jump_misspredicts ();
19994 /* Return nonzero when QImode register that must be represented via REX prefix
19995 is used. */
19996 bool
19997 x86_extended_QIreg_mentioned_p (rtx insn)
19999 int i;
20000 extract_insn_cached (insn);
20001 for (i = 0; i < recog_data.n_operands; i++)
20002 if (REG_P (recog_data.operand[i])
20003 && REGNO (recog_data.operand[i]) >= 4)
20004 return true;
20005 return false;
20008 /* Return nonzero when P points to register encoded via REX prefix.
20009 Called via for_each_rtx. */
20010 static int
20011 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20013 unsigned int regno;
20014 if (!REG_P (*p))
20015 return 0;
20016 regno = REGNO (*p);
20017 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20020 /* Return true when INSN mentions register that must be encoded using REX
20021 prefix. */
20022 bool
20023 x86_extended_reg_mentioned_p (rtx insn)
20025 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20028 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20029 optabs would emit if we didn't have TFmode patterns. */
20031 void
20032 x86_emit_floatuns (rtx operands[2])
20034 rtx neglab, donelab, i0, i1, f0, in, out;
20035 enum machine_mode mode, inmode;
20037 inmode = GET_MODE (operands[1]);
20038 gcc_assert (inmode == SImode || inmode == DImode);
20040 out = operands[0];
20041 in = force_reg (inmode, operands[1]);
20042 mode = GET_MODE (out);
20043 neglab = gen_label_rtx ();
20044 donelab = gen_label_rtx ();
20045 f0 = gen_reg_rtx (mode);
20047 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20049 expand_float (out, in, 0);
20051 emit_jump_insn (gen_jump (donelab));
20052 emit_barrier ();
20054 emit_label (neglab);
20056 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20057 1, OPTAB_DIRECT);
20058 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20059 1, OPTAB_DIRECT);
20060 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20062 expand_float (f0, i0, 0);
20064 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20066 emit_label (donelab);
20069 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20070 with all elements equal to VAR. Return true if successful. */
20072 static bool
20073 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20074 rtx target, rtx val)
20076 enum machine_mode smode, wsmode, wvmode;
20077 rtx x;
20079 switch (mode)
20081 case V2SImode:
20082 case V2SFmode:
20083 if (!mmx_ok)
20084 return false;
20085 /* FALLTHRU */
20087 case V2DFmode:
20088 case V2DImode:
20089 case V4SFmode:
20090 case V4SImode:
20091 val = force_reg (GET_MODE_INNER (mode), val);
20092 x = gen_rtx_VEC_DUPLICATE (mode, val);
20093 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20094 return true;
20096 case V4HImode:
20097 if (!mmx_ok)
20098 return false;
20099 if (TARGET_SSE || TARGET_3DNOW_A)
20101 val = gen_lowpart (SImode, val);
20102 x = gen_rtx_TRUNCATE (HImode, val);
20103 x = gen_rtx_VEC_DUPLICATE (mode, x);
20104 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20105 return true;
20107 else
20109 smode = HImode;
20110 wsmode = SImode;
20111 wvmode = V2SImode;
20112 goto widen;
20115 case V8QImode:
20116 if (!mmx_ok)
20117 return false;
20118 smode = QImode;
20119 wsmode = HImode;
20120 wvmode = V4HImode;
20121 goto widen;
20122 case V8HImode:
20123 if (TARGET_SSE2)
20125 rtx tmp1, tmp2;
20126 /* Extend HImode to SImode using a paradoxical SUBREG. */
20127 tmp1 = gen_reg_rtx (SImode);
20128 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20129 /* Insert the SImode value as low element of V4SImode vector. */
20130 tmp2 = gen_reg_rtx (V4SImode);
20131 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20132 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20133 CONST0_RTX (V4SImode),
20134 const1_rtx);
20135 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20136 /* Cast the V4SImode vector back to a V8HImode vector. */
20137 tmp1 = gen_reg_rtx (V8HImode);
20138 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20139 /* Duplicate the low short through the whole low SImode word. */
20140 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20141 /* Cast the V8HImode vector back to a V4SImode vector. */
20142 tmp2 = gen_reg_rtx (V4SImode);
20143 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20144 /* Replicate the low element of the V4SImode vector. */
20145 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20146 /* Cast the V2SImode back to V8HImode, and store in target. */
20147 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20148 return true;
20150 smode = HImode;
20151 wsmode = SImode;
20152 wvmode = V4SImode;
20153 goto widen;
20154 case V16QImode:
20155 if (TARGET_SSE2)
20157 rtx tmp1, tmp2;
20158 /* Extend QImode to SImode using a paradoxical SUBREG. */
20159 tmp1 = gen_reg_rtx (SImode);
20160 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20161 /* Insert the SImode value as low element of V4SImode vector. */
20162 tmp2 = gen_reg_rtx (V4SImode);
20163 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20164 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20165 CONST0_RTX (V4SImode),
20166 const1_rtx);
20167 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20168 /* Cast the V4SImode vector back to a V16QImode vector. */
20169 tmp1 = gen_reg_rtx (V16QImode);
20170 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20171 /* Duplicate the low byte through the whole low SImode word. */
20172 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20173 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20174 /* Cast the V16QImode vector back to a V4SImode vector. */
20175 tmp2 = gen_reg_rtx (V4SImode);
20176 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20177 /* Replicate the low element of the V4SImode vector. */
20178 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20179 /* Cast the V2SImode back to V16QImode, and store in target. */
20180 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20181 return true;
20183 smode = QImode;
20184 wsmode = HImode;
20185 wvmode = V8HImode;
20186 goto widen;
20187 widen:
20188 /* Replicate the value once into the next wider mode and recurse. */
20189 val = convert_modes (wsmode, smode, val, true);
20190 x = expand_simple_binop (wsmode, ASHIFT, val,
20191 GEN_INT (GET_MODE_BITSIZE (smode)),
20192 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20193 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20195 x = gen_reg_rtx (wvmode);
20196 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20197 gcc_unreachable ();
20198 emit_move_insn (target, gen_lowpart (mode, x));
20199 return true;
20201 default:
20202 return false;
20206 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20207 whose ONE_VAR element is VAR, and other elements are zero. Return true
20208 if successful. */
20210 static bool
20211 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20212 rtx target, rtx var, int one_var)
20214 enum machine_mode vsimode;
20215 rtx new_target;
20216 rtx x, tmp;
20218 switch (mode)
20220 case V2SFmode:
20221 case V2SImode:
20222 if (!mmx_ok)
20223 return false;
20224 /* FALLTHRU */
20226 case V2DFmode:
20227 case V2DImode:
20228 if (one_var != 0)
20229 return false;
20230 var = force_reg (GET_MODE_INNER (mode), var);
20231 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20232 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20233 return true;
20235 case V4SFmode:
20236 case V4SImode:
20237 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20238 new_target = gen_reg_rtx (mode);
20239 else
20240 new_target = target;
20241 var = force_reg (GET_MODE_INNER (mode), var);
20242 x = gen_rtx_VEC_DUPLICATE (mode, var);
20243 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20244 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20245 if (one_var != 0)
20247 /* We need to shuffle the value to the correct position, so
20248 create a new pseudo to store the intermediate result. */
20250 /* With SSE2, we can use the integer shuffle insns. */
20251 if (mode != V4SFmode && TARGET_SSE2)
20253 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20254 GEN_INT (1),
20255 GEN_INT (one_var == 1 ? 0 : 1),
20256 GEN_INT (one_var == 2 ? 0 : 1),
20257 GEN_INT (one_var == 3 ? 0 : 1)));
20258 if (target != new_target)
20259 emit_move_insn (target, new_target);
20260 return true;
20263 /* Otherwise convert the intermediate result to V4SFmode and
20264 use the SSE1 shuffle instructions. */
20265 if (mode != V4SFmode)
20267 tmp = gen_reg_rtx (V4SFmode);
20268 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20270 else
20271 tmp = new_target;
20273 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20274 GEN_INT (1),
20275 GEN_INT (one_var == 1 ? 0 : 1),
20276 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20277 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20279 if (mode != V4SFmode)
20280 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20281 else if (tmp != target)
20282 emit_move_insn (target, tmp);
20284 else if (target != new_target)
20285 emit_move_insn (target, new_target);
20286 return true;
20288 case V8HImode:
20289 case V16QImode:
20290 vsimode = V4SImode;
20291 goto widen;
20292 case V4HImode:
20293 case V8QImode:
20294 if (!mmx_ok)
20295 return false;
20296 vsimode = V2SImode;
20297 goto widen;
20298 widen:
20299 if (one_var != 0)
20300 return false;
20302 /* Zero extend the variable element to SImode and recurse. */
20303 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20305 x = gen_reg_rtx (vsimode);
20306 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20307 var, one_var))
20308 gcc_unreachable ();
20310 emit_move_insn (target, gen_lowpart (mode, x));
20311 return true;
20313 default:
20314 return false;
20318 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20319 consisting of the values in VALS. It is known that all elements
20320 except ONE_VAR are constants. Return true if successful. */
20322 static bool
20323 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20324 rtx target, rtx vals, int one_var)
20326 rtx var = XVECEXP (vals, 0, one_var);
20327 enum machine_mode wmode;
20328 rtx const_vec, x;
20330 const_vec = copy_rtx (vals);
20331 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20332 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20334 switch (mode)
20336 case V2DFmode:
20337 case V2DImode:
20338 case V2SFmode:
20339 case V2SImode:
20340 /* For the two element vectors, it's just as easy to use
20341 the general case. */
20342 return false;
20344 case V4SFmode:
20345 case V4SImode:
20346 case V8HImode:
20347 case V4HImode:
20348 break;
20350 case V16QImode:
20351 wmode = V8HImode;
20352 goto widen;
20353 case V8QImode:
20354 wmode = V4HImode;
20355 goto widen;
20356 widen:
20357 /* There's no way to set one QImode entry easily. Combine
20358 the variable value with its adjacent constant value, and
20359 promote to an HImode set. */
20360 x = XVECEXP (vals, 0, one_var ^ 1);
20361 if (one_var & 1)
20363 var = convert_modes (HImode, QImode, var, true);
20364 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20365 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20366 x = GEN_INT (INTVAL (x) & 0xff);
20368 else
20370 var = convert_modes (HImode, QImode, var, true);
20371 x = gen_int_mode (INTVAL (x) << 8, HImode);
20373 if (x != const0_rtx)
20374 var = expand_simple_binop (HImode, IOR, var, x, var,
20375 1, OPTAB_LIB_WIDEN);
20377 x = gen_reg_rtx (wmode);
20378 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20379 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20381 emit_move_insn (target, gen_lowpart (mode, x));
20382 return true;
20384 default:
20385 return false;
20388 emit_move_insn (target, const_vec);
20389 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20390 return true;
20393 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20394 all values variable, and none identical. */
20396 static void
20397 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20398 rtx target, rtx vals)
20400 enum machine_mode half_mode = GET_MODE_INNER (mode);
20401 rtx op0 = NULL, op1 = NULL;
20402 bool use_vec_concat = false;
20404 switch (mode)
20406 case V2SFmode:
20407 case V2SImode:
20408 if (!mmx_ok && !TARGET_SSE)
20409 break;
20410 /* FALLTHRU */
20412 case V2DFmode:
20413 case V2DImode:
20414 /* For the two element vectors, we always implement VEC_CONCAT. */
20415 op0 = XVECEXP (vals, 0, 0);
20416 op1 = XVECEXP (vals, 0, 1);
20417 use_vec_concat = true;
20418 break;
20420 case V4SFmode:
20421 half_mode = V2SFmode;
20422 goto half;
20423 case V4SImode:
20424 half_mode = V2SImode;
20425 goto half;
20426 half:
20428 rtvec v;
20430 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20431 Recurse to load the two halves. */
20433 op0 = gen_reg_rtx (half_mode);
20434 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20435 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20437 op1 = gen_reg_rtx (half_mode);
20438 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20439 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20441 use_vec_concat = true;
20443 break;
20445 case V8HImode:
20446 case V16QImode:
20447 case V4HImode:
20448 case V8QImode:
20449 break;
20451 default:
20452 gcc_unreachable ();
20455 if (use_vec_concat)
20457 if (!register_operand (op0, half_mode))
20458 op0 = force_reg (half_mode, op0);
20459 if (!register_operand (op1, half_mode))
20460 op1 = force_reg (half_mode, op1);
20462 emit_insn (gen_rtx_SET (VOIDmode, target,
20463 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20465 else
20467 int i, j, n_elts, n_words, n_elt_per_word;
20468 enum machine_mode inner_mode;
20469 rtx words[4], shift;
20471 inner_mode = GET_MODE_INNER (mode);
20472 n_elts = GET_MODE_NUNITS (mode);
20473 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20474 n_elt_per_word = n_elts / n_words;
20475 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20477 for (i = 0; i < n_words; ++i)
20479 rtx word = NULL_RTX;
20481 for (j = 0; j < n_elt_per_word; ++j)
20483 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20484 elt = convert_modes (word_mode, inner_mode, elt, true);
20486 if (j == 0)
20487 word = elt;
20488 else
20490 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20491 word, 1, OPTAB_LIB_WIDEN);
20492 word = expand_simple_binop (word_mode, IOR, word, elt,
20493 word, 1, OPTAB_LIB_WIDEN);
20497 words[i] = word;
20500 if (n_words == 1)
20501 emit_move_insn (target, gen_lowpart (mode, words[0]));
20502 else if (n_words == 2)
20504 rtx tmp = gen_reg_rtx (mode);
20505 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20506 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20507 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20508 emit_move_insn (target, tmp);
20510 else if (n_words == 4)
20512 rtx tmp = gen_reg_rtx (V4SImode);
20513 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20514 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20515 emit_move_insn (target, gen_lowpart (mode, tmp));
20517 else
20518 gcc_unreachable ();
20522 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20523 instructions unless MMX_OK is true. */
20525 void
20526 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20528 enum machine_mode mode = GET_MODE (target);
20529 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20530 int n_elts = GET_MODE_NUNITS (mode);
20531 int n_var = 0, one_var = -1;
20532 bool all_same = true, all_const_zero = true;
20533 int i;
20534 rtx x;
20536 for (i = 0; i < n_elts; ++i)
20538 x = XVECEXP (vals, 0, i);
20539 if (!CONSTANT_P (x))
20540 n_var++, one_var = i;
20541 else if (x != CONST0_RTX (inner_mode))
20542 all_const_zero = false;
20543 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20544 all_same = false;
20547 /* Constants are best loaded from the constant pool. */
20548 if (n_var == 0)
20550 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20551 return;
20554 /* If all values are identical, broadcast the value. */
20555 if (all_same
20556 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20557 XVECEXP (vals, 0, 0)))
20558 return;
20560 /* Values where only one field is non-constant are best loaded from
20561 the pool and overwritten via move later. */
20562 if (n_var == 1)
20564 if (all_const_zero
20565 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20566 XVECEXP (vals, 0, one_var),
20567 one_var))
20568 return;
20570 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20571 return;
20574 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20577 void
20578 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20580 enum machine_mode mode = GET_MODE (target);
20581 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20582 bool use_vec_merge = false;
20583 rtx tmp;
20585 switch (mode)
20587 case V2SFmode:
20588 case V2SImode:
20589 if (mmx_ok)
20591 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20592 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20593 if (elt == 0)
20594 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20595 else
20596 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20597 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20598 return;
20600 break;
20602 case V2DFmode:
20603 case V2DImode:
20605 rtx op0, op1;
20607 /* For the two element vectors, we implement a VEC_CONCAT with
20608 the extraction of the other element. */
20610 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20611 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20613 if (elt == 0)
20614 op0 = val, op1 = tmp;
20615 else
20616 op0 = tmp, op1 = val;
20618 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20619 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20621 return;
20623 case V4SFmode:
20624 switch (elt)
20626 case 0:
20627 use_vec_merge = true;
20628 break;
20630 case 1:
20631 /* tmp = target = A B C D */
20632 tmp = copy_to_reg (target);
20633 /* target = A A B B */
20634 emit_insn (gen_sse_unpcklps (target, target, target));
20635 /* target = X A B B */
20636 ix86_expand_vector_set (false, target, val, 0);
20637 /* target = A X C D */
20638 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20639 GEN_INT (1), GEN_INT (0),
20640 GEN_INT (2+4), GEN_INT (3+4)));
20641 return;
20643 case 2:
20644 /* tmp = target = A B C D */
20645 tmp = copy_to_reg (target);
20646 /* tmp = X B C D */
20647 ix86_expand_vector_set (false, tmp, val, 0);
20648 /* target = A B X D */
20649 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20650 GEN_INT (0), GEN_INT (1),
20651 GEN_INT (0+4), GEN_INT (3+4)));
20652 return;
20654 case 3:
20655 /* tmp = target = A B C D */
20656 tmp = copy_to_reg (target);
20657 /* tmp = X B C D */
20658 ix86_expand_vector_set (false, tmp, val, 0);
20659 /* target = A B X D */
20660 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20661 GEN_INT (0), GEN_INT (1),
20662 GEN_INT (2+4), GEN_INT (0+4)));
20663 return;
20665 default:
20666 gcc_unreachable ();
20668 break;
20670 case V4SImode:
20671 /* Element 0 handled by vec_merge below. */
20672 if (elt == 0)
20674 use_vec_merge = true;
20675 break;
20678 if (TARGET_SSE2)
20680 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20681 store into element 0, then shuffle them back. */
20683 rtx order[4];
20685 order[0] = GEN_INT (elt);
20686 order[1] = const1_rtx;
20687 order[2] = const2_rtx;
20688 order[3] = GEN_INT (3);
20689 order[elt] = const0_rtx;
20691 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20692 order[1], order[2], order[3]));
20694 ix86_expand_vector_set (false, target, val, 0);
20696 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20697 order[1], order[2], order[3]));
20699 else
20701 /* For SSE1, we have to reuse the V4SF code. */
20702 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20703 gen_lowpart (SFmode, val), elt);
20705 return;
20707 case V8HImode:
20708 use_vec_merge = TARGET_SSE2;
20709 break;
20710 case V4HImode:
20711 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20712 break;
20714 case V16QImode:
20715 case V8QImode:
20716 default:
20717 break;
20720 if (use_vec_merge)
20722 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20723 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20724 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20726 else
20728 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20730 emit_move_insn (mem, target);
20732 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20733 emit_move_insn (tmp, val);
20735 emit_move_insn (target, mem);
20739 void
20740 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20742 enum machine_mode mode = GET_MODE (vec);
20743 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20744 bool use_vec_extr = false;
20745 rtx tmp;
20747 switch (mode)
20749 case V2SImode:
20750 case V2SFmode:
20751 if (!mmx_ok)
20752 break;
20753 /* FALLTHRU */
20755 case V2DFmode:
20756 case V2DImode:
20757 use_vec_extr = true;
20758 break;
20760 case V4SFmode:
20761 switch (elt)
20763 case 0:
20764 tmp = vec;
20765 break;
20767 case 1:
20768 case 3:
20769 tmp = gen_reg_rtx (mode);
20770 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20771 GEN_INT (elt), GEN_INT (elt),
20772 GEN_INT (elt+4), GEN_INT (elt+4)));
20773 break;
20775 case 2:
20776 tmp = gen_reg_rtx (mode);
20777 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20778 break;
20780 default:
20781 gcc_unreachable ();
20783 vec = tmp;
20784 use_vec_extr = true;
20785 elt = 0;
20786 break;
20788 case V4SImode:
20789 if (TARGET_SSE2)
20791 switch (elt)
20793 case 0:
20794 tmp = vec;
20795 break;
20797 case 1:
20798 case 3:
20799 tmp = gen_reg_rtx (mode);
20800 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20801 GEN_INT (elt), GEN_INT (elt),
20802 GEN_INT (elt), GEN_INT (elt)));
20803 break;
20805 case 2:
20806 tmp = gen_reg_rtx (mode);
20807 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20808 break;
20810 default:
20811 gcc_unreachable ();
20813 vec = tmp;
20814 use_vec_extr = true;
20815 elt = 0;
20817 else
20819 /* For SSE1, we have to reuse the V4SF code. */
20820 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20821 gen_lowpart (V4SFmode, vec), elt);
20822 return;
20824 break;
20826 case V8HImode:
20827 use_vec_extr = TARGET_SSE2;
20828 break;
20829 case V4HImode:
20830 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20831 break;
20833 case V16QImode:
20834 case V8QImode:
20835 /* ??? Could extract the appropriate HImode element and shift. */
20836 default:
20837 break;
20840 if (use_vec_extr)
20842 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20843 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20845 /* Let the rtl optimizers know about the zero extension performed. */
20846 if (inner_mode == HImode)
20848 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20849 target = gen_lowpart (SImode, target);
20852 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20854 else
20856 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20858 emit_move_insn (mem, vec);
20860 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20861 emit_move_insn (target, tmp);
20865 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20866 pattern to reduce; DEST is the destination; IN is the input vector. */
20868 void
20869 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20871 rtx tmp1, tmp2, tmp3;
20873 tmp1 = gen_reg_rtx (V4SFmode);
20874 tmp2 = gen_reg_rtx (V4SFmode);
20875 tmp3 = gen_reg_rtx (V4SFmode);
20877 emit_insn (gen_sse_movhlps (tmp1, in, in));
20878 emit_insn (fn (tmp2, tmp1, in));
20880 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20881 GEN_INT (1), GEN_INT (1),
20882 GEN_INT (1+4), GEN_INT (1+4)));
20883 emit_insn (fn (dest, tmp2, tmp3));
20886 /* Target hook for scalar_mode_supported_p. */
20887 static bool
20888 ix86_scalar_mode_supported_p (enum machine_mode mode)
20890 if (DECIMAL_FLOAT_MODE_P (mode))
20891 return true;
20892 else
20893 return default_scalar_mode_supported_p (mode);
20896 /* Implements target hook vector_mode_supported_p. */
20897 static bool
20898 ix86_vector_mode_supported_p (enum machine_mode mode)
20900 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20901 return true;
20902 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20903 return true;
20904 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20905 return true;
20906 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20907 return true;
20908 return false;
20911 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20913 We do this in the new i386 backend to maintain source compatibility
20914 with the old cc0-based compiler. */
20916 static tree
20917 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20918 tree inputs ATTRIBUTE_UNUSED,
20919 tree clobbers)
20921 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20922 clobbers);
20923 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20924 clobbers);
20925 return clobbers;
20928 /* Return true if this goes in small data/bss. */
20930 static bool
20931 ix86_in_large_data_p (tree exp)
20933 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20934 return false;
20936 /* Functions are never large data. */
20937 if (TREE_CODE (exp) == FUNCTION_DECL)
20938 return false;
20940 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20942 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20943 if (strcmp (section, ".ldata") == 0
20944 || strcmp (section, ".lbss") == 0)
20945 return true;
20946 return false;
20948 else
20950 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20952 /* If this is an incomplete type with size 0, then we can't put it
20953 in data because it might be too big when completed. */
20954 if (!size || size > ix86_section_threshold)
20955 return true;
20958 return false;
20960 static void
20961 ix86_encode_section_info (tree decl, rtx rtl, int first)
20963 default_encode_section_info (decl, rtl, first);
20965 if (TREE_CODE (decl) == VAR_DECL
20966 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20967 && ix86_in_large_data_p (decl))
20968 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20971 /* Worker function for REVERSE_CONDITION. */
20973 enum rtx_code
20974 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20976 return (mode != CCFPmode && mode != CCFPUmode
20977 ? reverse_condition (code)
20978 : reverse_condition_maybe_unordered (code));
20981 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20982 to OPERANDS[0]. */
20984 const char *
20985 output_387_reg_move (rtx insn, rtx *operands)
20987 if (REG_P (operands[1])
20988 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20990 if (REGNO (operands[0]) == FIRST_STACK_REG)
20991 return output_387_ffreep (operands, 0);
20992 return "fstp\t%y0";
20994 if (STACK_TOP_P (operands[0]))
20995 return "fld%z1\t%y1";
20996 return "fst\t%y0";
20999 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21000 FP status register is set. */
21002 void
21003 ix86_emit_fp_unordered_jump (rtx label)
21005 rtx reg = gen_reg_rtx (HImode);
21006 rtx temp;
21008 emit_insn (gen_x86_fnstsw_1 (reg));
21010 if (TARGET_USE_SAHF)
21012 emit_insn (gen_x86_sahf_1 (reg));
21014 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21015 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21017 else
21019 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21021 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21022 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21025 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21026 gen_rtx_LABEL_REF (VOIDmode, label),
21027 pc_rtx);
21028 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21029 emit_jump_insn (temp);
21032 /* Output code to perform a log1p XFmode calculation. */
21034 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21036 rtx label1 = gen_label_rtx ();
21037 rtx label2 = gen_label_rtx ();
21039 rtx tmp = gen_reg_rtx (XFmode);
21040 rtx tmp2 = gen_reg_rtx (XFmode);
21042 emit_insn (gen_absxf2 (tmp, op1));
21043 emit_insn (gen_cmpxf (tmp,
21044 CONST_DOUBLE_FROM_REAL_VALUE (
21045 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21046 XFmode)));
21047 emit_jump_insn (gen_bge (label1));
21049 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21050 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21051 emit_jump (label2);
21053 emit_label (label1);
21054 emit_move_insn (tmp, CONST1_RTX (XFmode));
21055 emit_insn (gen_addxf3 (tmp, op1, tmp));
21056 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21057 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21059 emit_label (label2);
21062 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21064 static void
21065 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21066 tree decl)
21068 /* With Binutils 2.15, the "@unwind" marker must be specified on
21069 every occurrence of the ".eh_frame" section, not just the first
21070 one. */
21071 if (TARGET_64BIT
21072 && strcmp (name, ".eh_frame") == 0)
21074 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21075 flags & SECTION_WRITE ? "aw" : "a");
21076 return;
21078 default_elf_asm_named_section (name, flags, decl);
21081 /* Return the mangling of TYPE if it is an extended fundamental type. */
21083 static const char *
21084 ix86_mangle_fundamental_type (tree type)
21086 switch (TYPE_MODE (type))
21088 case TFmode:
21089 /* __float128 is "g". */
21090 return "g";
21091 case XFmode:
21092 /* "long double" or __float80 is "e". */
21093 return "e";
21094 default:
21095 return NULL;
21099 /* For 32-bit code we can save PIC register setup by using
21100 __stack_chk_fail_local hidden function instead of calling
21101 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21102 register, so it is better to call __stack_chk_fail directly. */
21104 static tree
21105 ix86_stack_protect_fail (void)
21107 return TARGET_64BIT
21108 ? default_external_stack_protect_fail ()
21109 : default_hidden_stack_protect_fail ();
21112 /* Select a format to encode pointers in exception handling data. CODE
21113 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21114 true if the symbol may be affected by dynamic relocations.
21116 ??? All x86 object file formats are capable of representing this.
21117 After all, the relocation needed is the same as for the call insn.
21118 Whether or not a particular assembler allows us to enter such, I
21119 guess we'll have to see. */
21121 asm_preferred_eh_data_format (int code, int global)
21123 if (flag_pic)
21125 int type = DW_EH_PE_sdata8;
21126 if (!TARGET_64BIT
21127 || ix86_cmodel == CM_SMALL_PIC
21128 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21129 type = DW_EH_PE_sdata4;
21130 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21132 if (ix86_cmodel == CM_SMALL
21133 || (ix86_cmodel == CM_MEDIUM && code))
21134 return DW_EH_PE_udata4;
21135 return DW_EH_PE_absptr;
21138 /* Expand copysign from SIGN to the positive value ABS_VALUE
21139 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21140 the sign-bit. */
21141 static void
21142 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21144 enum machine_mode mode = GET_MODE (sign);
21145 rtx sgn = gen_reg_rtx (mode);
21146 if (mask == NULL_RTX)
21148 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21149 if (!VECTOR_MODE_P (mode))
21151 /* We need to generate a scalar mode mask in this case. */
21152 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21153 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21154 mask = gen_reg_rtx (mode);
21155 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21158 else
21159 mask = gen_rtx_NOT (mode, mask);
21160 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21161 gen_rtx_AND (mode, mask, sign)));
21162 emit_insn (gen_rtx_SET (VOIDmode, result,
21163 gen_rtx_IOR (mode, abs_value, sgn)));
21166 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21167 mask for masking out the sign-bit is stored in *SMASK, if that is
21168 non-null. */
21169 static rtx
21170 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21172 enum machine_mode mode = GET_MODE (op0);
21173 rtx xa, mask;
21175 xa = gen_reg_rtx (mode);
21176 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21177 if (!VECTOR_MODE_P (mode))
21179 /* We need to generate a scalar mode mask in this case. */
21180 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21181 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21182 mask = gen_reg_rtx (mode);
21183 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21185 emit_insn (gen_rtx_SET (VOIDmode, xa,
21186 gen_rtx_AND (mode, op0, mask)));
21188 if (smask)
21189 *smask = mask;
21191 return xa;
21194 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21195 swapping the operands if SWAP_OPERANDS is true. The expanded
21196 code is a forward jump to a newly created label in case the
21197 comparison is true. The generated label rtx is returned. */
21198 static rtx
21199 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21200 bool swap_operands)
21202 rtx label, tmp;
21204 if (swap_operands)
21206 tmp = op0;
21207 op0 = op1;
21208 op1 = tmp;
21211 label = gen_label_rtx ();
21212 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21213 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21214 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21215 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21216 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21217 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21218 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21219 JUMP_LABEL (tmp) = label;
21221 return label;
21224 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21225 using comparison code CODE. Operands are swapped for the comparison if
21226 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21227 static rtx
21228 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21229 bool swap_operands)
21231 enum machine_mode mode = GET_MODE (op0);
21232 rtx mask = gen_reg_rtx (mode);
21234 if (swap_operands)
21236 rtx tmp = op0;
21237 op0 = op1;
21238 op1 = tmp;
21241 if (mode == DFmode)
21242 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21243 gen_rtx_fmt_ee (code, mode, op0, op1)));
21244 else
21245 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21246 gen_rtx_fmt_ee (code, mode, op0, op1)));
21248 return mask;
21251 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21252 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21253 static rtx
21254 ix86_gen_TWO52 (enum machine_mode mode)
21256 REAL_VALUE_TYPE TWO52r;
21257 rtx TWO52;
21259 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21260 TWO52 = const_double_from_real_value (TWO52r, mode);
21261 TWO52 = force_reg (mode, TWO52);
21263 return TWO52;
21266 /* Expand SSE sequence for computing lround from OP1 storing
21267 into OP0. */
21268 void
21269 ix86_expand_lround (rtx op0, rtx op1)
21271 /* C code for the stuff we're doing below:
21272 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21273 return (long)tmp;
21275 enum machine_mode mode = GET_MODE (op1);
21276 const struct real_format *fmt;
21277 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21278 rtx adj;
21280 /* load nextafter (0.5, 0.0) */
21281 fmt = REAL_MODE_FORMAT (mode);
21282 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21283 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21285 /* adj = copysign (0.5, op1) */
21286 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21287 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21289 /* adj = op1 + adj */
21290 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21292 /* op0 = (imode)adj */
21293 expand_fix (op0, adj, 0);
21296 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21297 into OPERAND0. */
21298 void
21299 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21301 /* C code for the stuff we're doing below (for do_floor):
21302 xi = (long)op1;
21303 xi -= (double)xi > op1 ? 1 : 0;
21304 return xi;
21306 enum machine_mode fmode = GET_MODE (op1);
21307 enum machine_mode imode = GET_MODE (op0);
21308 rtx ireg, freg, label, tmp;
21310 /* reg = (long)op1 */
21311 ireg = gen_reg_rtx (imode);
21312 expand_fix (ireg, op1, 0);
21314 /* freg = (double)reg */
21315 freg = gen_reg_rtx (fmode);
21316 expand_float (freg, ireg, 0);
21318 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21319 label = ix86_expand_sse_compare_and_jump (UNLE,
21320 freg, op1, !do_floor);
21321 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21322 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21323 emit_move_insn (ireg, tmp);
21325 emit_label (label);
21326 LABEL_NUSES (label) = 1;
21328 emit_move_insn (op0, ireg);
21331 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21332 result in OPERAND0. */
21333 void
21334 ix86_expand_rint (rtx operand0, rtx operand1)
21336 /* C code for the stuff we're doing below:
21337 xa = fabs (operand1);
21338 if (!isless (xa, 2**52))
21339 return operand1;
21340 xa = xa + 2**52 - 2**52;
21341 return copysign (xa, operand1);
21343 enum machine_mode mode = GET_MODE (operand0);
21344 rtx res, xa, label, TWO52, mask;
21346 res = gen_reg_rtx (mode);
21347 emit_move_insn (res, operand1);
21349 /* xa = abs (operand1) */
21350 xa = ix86_expand_sse_fabs (res, &mask);
21352 /* if (!isless (xa, TWO52)) goto label; */
21353 TWO52 = ix86_gen_TWO52 (mode);
21354 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21356 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21357 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21359 ix86_sse_copysign_to_positive (res, xa, res, mask);
21361 emit_label (label);
21362 LABEL_NUSES (label) = 1;
21364 emit_move_insn (operand0, res);
21367 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21368 into OPERAND0. */
21369 void
21370 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21372 /* C code for the stuff we expand below.
21373 double xa = fabs (x), x2;
21374 if (!isless (xa, TWO52))
21375 return x;
21376 xa = xa + TWO52 - TWO52;
21377 x2 = copysign (xa, x);
21378 Compensate. Floor:
21379 if (x2 > x)
21380 x2 -= 1;
21381 Compensate. Ceil:
21382 if (x2 < x)
21383 x2 -= -1;
21384 return x2;
21386 enum machine_mode mode = GET_MODE (operand0);
21387 rtx xa, TWO52, tmp, label, one, res, mask;
21389 TWO52 = ix86_gen_TWO52 (mode);
21391 /* Temporary for holding the result, initialized to the input
21392 operand to ease control flow. */
21393 res = gen_reg_rtx (mode);
21394 emit_move_insn (res, operand1);
21396 /* xa = abs (operand1) */
21397 xa = ix86_expand_sse_fabs (res, &mask);
21399 /* if (!isless (xa, TWO52)) goto label; */
21400 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21402 /* xa = xa + TWO52 - TWO52; */
21403 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21404 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21406 /* xa = copysign (xa, operand1) */
21407 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21409 /* generate 1.0 or -1.0 */
21410 one = force_reg (mode,
21411 const_double_from_real_value (do_floor
21412 ? dconst1 : dconstm1, mode));
21414 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21415 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21416 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21417 gen_rtx_AND (mode, one, tmp)));
21418 /* We always need to subtract here to preserve signed zero. */
21419 tmp = expand_simple_binop (mode, MINUS,
21420 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21421 emit_move_insn (res, tmp);
21423 emit_label (label);
21424 LABEL_NUSES (label) = 1;
21426 emit_move_insn (operand0, res);
21429 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21430 into OPERAND0. */
21431 void
21432 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21434 /* C code for the stuff we expand below.
21435 double xa = fabs (x), x2;
21436 if (!isless (xa, TWO52))
21437 return x;
21438 x2 = (double)(long)x;
21439 Compensate. Floor:
21440 if (x2 > x)
21441 x2 -= 1;
21442 Compensate. Ceil:
21443 if (x2 < x)
21444 x2 += 1;
21445 if (HONOR_SIGNED_ZEROS (mode))
21446 return copysign (x2, x);
21447 return x2;
21449 enum machine_mode mode = GET_MODE (operand0);
21450 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21452 TWO52 = ix86_gen_TWO52 (mode);
21454 /* Temporary for holding the result, initialized to the input
21455 operand to ease control flow. */
21456 res = gen_reg_rtx (mode);
21457 emit_move_insn (res, operand1);
21459 /* xa = abs (operand1) */
21460 xa = ix86_expand_sse_fabs (res, &mask);
21462 /* if (!isless (xa, TWO52)) goto label; */
21463 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21465 /* xa = (double)(long)x */
21466 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21467 expand_fix (xi, res, 0);
21468 expand_float (xa, xi, 0);
21470 /* generate 1.0 */
21471 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21473 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21474 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21475 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21476 gen_rtx_AND (mode, one, tmp)));
21477 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21478 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21479 emit_move_insn (res, tmp);
21481 if (HONOR_SIGNED_ZEROS (mode))
21482 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21484 emit_label (label);
21485 LABEL_NUSES (label) = 1;
21487 emit_move_insn (operand0, res);
21490 /* Expand SSE sequence for computing round from OPERAND1 storing
21491 into OPERAND0. Sequence that works without relying on DImode truncation
21492 via cvttsd2siq that is only available on 64bit targets. */
21493 void
21494 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21496 /* C code for the stuff we expand below.
21497 double xa = fabs (x), xa2, x2;
21498 if (!isless (xa, TWO52))
21499 return x;
21500 Using the absolute value and copying back sign makes
21501 -0.0 -> -0.0 correct.
21502 xa2 = xa + TWO52 - TWO52;
21503 Compensate.
21504 dxa = xa2 - xa;
21505 if (dxa <= -0.5)
21506 xa2 += 1;
21507 else if (dxa > 0.5)
21508 xa2 -= 1;
21509 x2 = copysign (xa2, x);
21510 return x2;
21512 enum machine_mode mode = GET_MODE (operand0);
21513 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21515 TWO52 = ix86_gen_TWO52 (mode);
21517 /* Temporary for holding the result, initialized to the input
21518 operand to ease control flow. */
21519 res = gen_reg_rtx (mode);
21520 emit_move_insn (res, operand1);
21522 /* xa = abs (operand1) */
21523 xa = ix86_expand_sse_fabs (res, &mask);
21525 /* if (!isless (xa, TWO52)) goto label; */
21526 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21528 /* xa2 = xa + TWO52 - TWO52; */
21529 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21530 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21532 /* dxa = xa2 - xa; */
21533 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21535 /* generate 0.5, 1.0 and -0.5 */
21536 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21537 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21538 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21539 0, OPTAB_DIRECT);
21541 /* Compensate. */
21542 tmp = gen_reg_rtx (mode);
21543 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21544 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21545 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21546 gen_rtx_AND (mode, one, tmp)));
21547 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21548 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21549 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21550 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21551 gen_rtx_AND (mode, one, tmp)));
21552 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21554 /* res = copysign (xa2, operand1) */
21555 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21557 emit_label (label);
21558 LABEL_NUSES (label) = 1;
21560 emit_move_insn (operand0, res);
21563 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21564 into OPERAND0. */
21565 void
21566 ix86_expand_trunc (rtx operand0, rtx operand1)
21568 /* C code for SSE variant we expand below.
21569 double xa = fabs (x), x2;
21570 if (!isless (xa, TWO52))
21571 return x;
21572 x2 = (double)(long)x;
21573 if (HONOR_SIGNED_ZEROS (mode))
21574 return copysign (x2, x);
21575 return x2;
21577 enum machine_mode mode = GET_MODE (operand0);
21578 rtx xa, xi, TWO52, label, res, mask;
21580 TWO52 = ix86_gen_TWO52 (mode);
21582 /* Temporary for holding the result, initialized to the input
21583 operand to ease control flow. */
21584 res = gen_reg_rtx (mode);
21585 emit_move_insn (res, operand1);
21587 /* xa = abs (operand1) */
21588 xa = ix86_expand_sse_fabs (res, &mask);
21590 /* if (!isless (xa, TWO52)) goto label; */
21591 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21593 /* x = (double)(long)x */
21594 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21595 expand_fix (xi, res, 0);
21596 expand_float (res, xi, 0);
21598 if (HONOR_SIGNED_ZEROS (mode))
21599 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21601 emit_label (label);
21602 LABEL_NUSES (label) = 1;
21604 emit_move_insn (operand0, res);
21607 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21608 into OPERAND0. */
21609 void
21610 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21612 enum machine_mode mode = GET_MODE (operand0);
21613 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21615 /* C code for SSE variant we expand below.
21616 double xa = fabs (x), x2;
21617 if (!isless (xa, TWO52))
21618 return x;
21619 xa2 = xa + TWO52 - TWO52;
21620 Compensate:
21621 if (xa2 > xa)
21622 xa2 -= 1.0;
21623 x2 = copysign (xa2, x);
21624 return x2;
21627 TWO52 = ix86_gen_TWO52 (mode);
21629 /* Temporary for holding the result, initialized to the input
21630 operand to ease control flow. */
21631 res = gen_reg_rtx (mode);
21632 emit_move_insn (res, operand1);
21634 /* xa = abs (operand1) */
21635 xa = ix86_expand_sse_fabs (res, &smask);
21637 /* if (!isless (xa, TWO52)) goto label; */
21638 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21640 /* res = xa + TWO52 - TWO52; */
21641 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21642 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21643 emit_move_insn (res, tmp);
21645 /* generate 1.0 */
21646 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21648 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21649 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21650 emit_insn (gen_rtx_SET (VOIDmode, mask,
21651 gen_rtx_AND (mode, mask, one)));
21652 tmp = expand_simple_binop (mode, MINUS,
21653 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21654 emit_move_insn (res, tmp);
21656 /* res = copysign (res, operand1) */
21657 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21659 emit_label (label);
21660 LABEL_NUSES (label) = 1;
21662 emit_move_insn (operand0, res);
21665 /* Expand SSE sequence for computing round from OPERAND1 storing
21666 into OPERAND0. */
21667 void
21668 ix86_expand_round (rtx operand0, rtx operand1)
21670 /* C code for the stuff we're doing below:
21671 double xa = fabs (x);
21672 if (!isless (xa, TWO52))
21673 return x;
21674 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21675 return copysign (xa, x);
21677 enum machine_mode mode = GET_MODE (operand0);
21678 rtx res, TWO52, xa, label, xi, half, mask;
21679 const struct real_format *fmt;
21680 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21682 /* Temporary for holding the result, initialized to the input
21683 operand to ease control flow. */
21684 res = gen_reg_rtx (mode);
21685 emit_move_insn (res, operand1);
21687 TWO52 = ix86_gen_TWO52 (mode);
21688 xa = ix86_expand_sse_fabs (res, &mask);
21689 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21691 /* load nextafter (0.5, 0.0) */
21692 fmt = REAL_MODE_FORMAT (mode);
21693 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21694 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21696 /* xa = xa + 0.5 */
21697 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21698 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21700 /* xa = (double)(int64_t)xa */
21701 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21702 expand_fix (xi, xa, 0);
21703 expand_float (xa, xi, 0);
21705 /* res = copysign (xa, operand1) */
21706 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21708 emit_label (label);
21709 LABEL_NUSES (label) = 1;
21711 emit_move_insn (operand0, res);
21714 #include "gt-i386.h"