re PR target/25377 (weakref sibcalled with -fPIC)
[official-gcc.git] / gcc / config / i386 / i386.c
blob27b42ea7d9c89519aec06e17f98ab17b543e43e8
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
53 #ifndef CHECK_STACK_LIMIT
54 #define CHECK_STACK_LIMIT (-1)
55 #endif
57 /* Return index of given mode in mult and division cost tables. */
58 #define MODE_INDEX(mode) \
59 ((mode) == QImode ? 0 \
60 : (mode) == HImode ? 1 \
61 : (mode) == SImode ? 2 \
62 : (mode) == DImode ? 3 \
63 : 4)
65 /* Processor costs (relative to an add) */
66 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
67 #define COSTS_N_BYTES(N) ((N) * 2)
69 static const
70 struct processor_costs size_cost = { /* costs for tunning for size */
71 COSTS_N_BYTES (2), /* cost of an add instruction */
72 COSTS_N_BYTES (3), /* cost of a lea instruction */
73 COSTS_N_BYTES (2), /* variable shift costs */
74 COSTS_N_BYTES (3), /* constant shift costs */
75 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
76 COSTS_N_BYTES (3), /* HI */
77 COSTS_N_BYTES (3), /* SI */
78 COSTS_N_BYTES (3), /* DI */
79 COSTS_N_BYTES (5)}, /* other */
80 0, /* cost of multiply per each bit set */
81 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 COSTS_N_BYTES (3), /* cost of movsx */
87 COSTS_N_BYTES (3), /* cost of movzx */
88 0, /* "large" insn */
89 2, /* MOVE_RATIO */
90 2, /* cost for loading QImode using movzbl */
91 {2, 2, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 2, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {2, 2, 2}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {2, 2, 2}, /* cost of loading integer registers */
99 3, /* cost of moving MMX register */
100 {3, 3}, /* cost of loading MMX registers
101 in SImode and DImode */
102 {3, 3}, /* cost of storing MMX registers
103 in SImode and DImode */
104 3, /* cost of moving SSE register */
105 {3, 3, 3}, /* cost of loading SSE registers
106 in SImode, DImode and TImode */
107 {3, 3, 3}, /* cost of storing SSE registers
108 in SImode, DImode and TImode */
109 3, /* MMX or SSE register to integer */
110 0, /* size of prefetch block */
111 0, /* number of parallel prefetches */
112 2, /* Branch cost */
113 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
114 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
115 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
116 COSTS_N_BYTES (2), /* cost of FABS instruction. */
117 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
118 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
121 /* Processor costs (relative to an add) */
122 static const
123 struct processor_costs i386_cost = { /* 386 specific costs */
124 COSTS_N_INSNS (1), /* cost of an add instruction */
125 COSTS_N_INSNS (1), /* cost of a lea instruction */
126 COSTS_N_INSNS (3), /* variable shift costs */
127 COSTS_N_INSNS (2), /* constant shift costs */
128 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
129 COSTS_N_INSNS (6), /* HI */
130 COSTS_N_INSNS (6), /* SI */
131 COSTS_N_INSNS (6), /* DI */
132 COSTS_N_INSNS (6)}, /* other */
133 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
134 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
135 COSTS_N_INSNS (23), /* HI */
136 COSTS_N_INSNS (23), /* SI */
137 COSTS_N_INSNS (23), /* DI */
138 COSTS_N_INSNS (23)}, /* other */
139 COSTS_N_INSNS (3), /* cost of movsx */
140 COSTS_N_INSNS (2), /* cost of movzx */
141 15, /* "large" insn */
142 3, /* MOVE_RATIO */
143 4, /* cost for loading QImode using movzbl */
144 {2, 4, 2}, /* cost of loading integer registers
145 in QImode, HImode and SImode.
146 Relative to reg-reg move (2). */
147 {2, 4, 2}, /* cost of storing integer registers */
148 2, /* cost of reg,reg fld/fst */
149 {8, 8, 8}, /* cost of loading fp registers
150 in SFmode, DFmode and XFmode */
151 {8, 8, 8}, /* cost of loading integer registers */
152 2, /* cost of moving MMX register */
153 {4, 8}, /* cost of loading MMX registers
154 in SImode and DImode */
155 {4, 8}, /* cost of storing MMX registers
156 in SImode and DImode */
157 2, /* cost of moving SSE register */
158 {4, 8, 16}, /* cost of loading SSE registers
159 in SImode, DImode and TImode */
160 {4, 8, 16}, /* cost of storing SSE registers
161 in SImode, DImode and TImode */
162 3, /* MMX or SSE register to integer */
163 0, /* size of prefetch block */
164 0, /* number of parallel prefetches */
165 1, /* Branch cost */
166 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
167 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
168 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
169 COSTS_N_INSNS (22), /* cost of FABS instruction. */
170 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
171 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
174 static const
175 struct processor_costs i486_cost = { /* 486 specific costs */
176 COSTS_N_INSNS (1), /* cost of an add instruction */
177 COSTS_N_INSNS (1), /* cost of a lea instruction */
178 COSTS_N_INSNS (3), /* variable shift costs */
179 COSTS_N_INSNS (2), /* constant shift costs */
180 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
181 COSTS_N_INSNS (12), /* HI */
182 COSTS_N_INSNS (12), /* SI */
183 COSTS_N_INSNS (12), /* DI */
184 COSTS_N_INSNS (12)}, /* other */
185 1, /* cost of multiply per each bit set */
186 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
187 COSTS_N_INSNS (40), /* HI */
188 COSTS_N_INSNS (40), /* SI */
189 COSTS_N_INSNS (40), /* DI */
190 COSTS_N_INSNS (40)}, /* other */
191 COSTS_N_INSNS (3), /* cost of movsx */
192 COSTS_N_INSNS (2), /* cost of movzx */
193 15, /* "large" insn */
194 3, /* MOVE_RATIO */
195 4, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {8, 8, 8}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {8, 8, 8}, /* cost of loading integer registers */
204 2, /* cost of moving MMX register */
205 {4, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {4, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 1, /* Branch cost */
218 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
219 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
220 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
221 COSTS_N_INSNS (3), /* cost of FABS instruction. */
222 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
223 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
226 static const
227 struct processor_costs pentium_cost = {
228 COSTS_N_INSNS (1), /* cost of an add instruction */
229 COSTS_N_INSNS (1), /* cost of a lea instruction */
230 COSTS_N_INSNS (4), /* variable shift costs */
231 COSTS_N_INSNS (1), /* constant shift costs */
232 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
233 COSTS_N_INSNS (11), /* HI */
234 COSTS_N_INSNS (11), /* SI */
235 COSTS_N_INSNS (11), /* DI */
236 COSTS_N_INSNS (11)}, /* other */
237 0, /* cost of multiply per each bit set */
238 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
239 COSTS_N_INSNS (25), /* HI */
240 COSTS_N_INSNS (25), /* SI */
241 COSTS_N_INSNS (25), /* DI */
242 COSTS_N_INSNS (25)}, /* other */
243 COSTS_N_INSNS (3), /* cost of movsx */
244 COSTS_N_INSNS (2), /* cost of movzx */
245 8, /* "large" insn */
246 6, /* MOVE_RATIO */
247 6, /* cost for loading QImode using movzbl */
248 {2, 4, 2}, /* cost of loading integer registers
249 in QImode, HImode and SImode.
250 Relative to reg-reg move (2). */
251 {2, 4, 2}, /* cost of storing integer registers */
252 2, /* cost of reg,reg fld/fst */
253 {2, 2, 6}, /* cost of loading fp registers
254 in SFmode, DFmode and XFmode */
255 {4, 4, 6}, /* cost of loading integer registers */
256 8, /* cost of moving MMX register */
257 {8, 8}, /* cost of loading MMX registers
258 in SImode and DImode */
259 {8, 8}, /* cost of storing MMX registers
260 in SImode and DImode */
261 2, /* cost of moving SSE register */
262 {4, 8, 16}, /* cost of loading SSE registers
263 in SImode, DImode and TImode */
264 {4, 8, 16}, /* cost of storing SSE registers
265 in SImode, DImode and TImode */
266 3, /* MMX or SSE register to integer */
267 0, /* size of prefetch block */
268 0, /* number of parallel prefetches */
269 2, /* Branch cost */
270 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
271 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
272 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
273 COSTS_N_INSNS (1), /* cost of FABS instruction. */
274 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
275 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
278 static const
279 struct processor_costs pentiumpro_cost = {
280 COSTS_N_INSNS (1), /* cost of an add instruction */
281 COSTS_N_INSNS (1), /* cost of a lea instruction */
282 COSTS_N_INSNS (1), /* variable shift costs */
283 COSTS_N_INSNS (1), /* constant shift costs */
284 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
285 COSTS_N_INSNS (4), /* HI */
286 COSTS_N_INSNS (4), /* SI */
287 COSTS_N_INSNS (4), /* DI */
288 COSTS_N_INSNS (4)}, /* other */
289 0, /* cost of multiply per each bit set */
290 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
291 COSTS_N_INSNS (17), /* HI */
292 COSTS_N_INSNS (17), /* SI */
293 COSTS_N_INSNS (17), /* DI */
294 COSTS_N_INSNS (17)}, /* other */
295 COSTS_N_INSNS (1), /* cost of movsx */
296 COSTS_N_INSNS (1), /* cost of movzx */
297 8, /* "large" insn */
298 6, /* MOVE_RATIO */
299 2, /* cost for loading QImode using movzbl */
300 {4, 4, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 2, 2}, /* cost of storing integer registers */
304 2, /* cost of reg,reg fld/fst */
305 {2, 2, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 6}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 3, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 6, /* number of parallel prefetches */
321 2, /* Branch cost */
322 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
323 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
324 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
325 COSTS_N_INSNS (2), /* cost of FABS instruction. */
326 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
327 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs k6_cost = {
332 COSTS_N_INSNS (1), /* cost of an add instruction */
333 COSTS_N_INSNS (2), /* cost of a lea instruction */
334 COSTS_N_INSNS (1), /* variable shift costs */
335 COSTS_N_INSNS (1), /* constant shift costs */
336 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
337 COSTS_N_INSNS (3), /* HI */
338 COSTS_N_INSNS (3), /* SI */
339 COSTS_N_INSNS (3), /* DI */
340 COSTS_N_INSNS (3)}, /* other */
341 0, /* cost of multiply per each bit set */
342 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
343 COSTS_N_INSNS (18), /* HI */
344 COSTS_N_INSNS (18), /* SI */
345 COSTS_N_INSNS (18), /* DI */
346 COSTS_N_INSNS (18)}, /* other */
347 COSTS_N_INSNS (2), /* cost of movsx */
348 COSTS_N_INSNS (2), /* cost of movzx */
349 8, /* "large" insn */
350 4, /* MOVE_RATIO */
351 3, /* cost for loading QImode using movzbl */
352 {4, 5, 4}, /* cost of loading integer registers
353 in QImode, HImode and SImode.
354 Relative to reg-reg move (2). */
355 {2, 3, 2}, /* cost of storing integer registers */
356 4, /* cost of reg,reg fld/fst */
357 {6, 6, 6}, /* cost of loading fp registers
358 in SFmode, DFmode and XFmode */
359 {4, 4, 4}, /* cost of loading integer registers */
360 2, /* cost of moving MMX register */
361 {2, 2}, /* cost of loading MMX registers
362 in SImode and DImode */
363 {2, 2}, /* cost of storing MMX registers
364 in SImode and DImode */
365 2, /* cost of moving SSE register */
366 {2, 2, 8}, /* cost of loading SSE registers
367 in SImode, DImode and TImode */
368 {2, 2, 8}, /* cost of storing SSE registers
369 in SImode, DImode and TImode */
370 6, /* MMX or SSE register to integer */
371 32, /* size of prefetch block */
372 1, /* number of parallel prefetches */
373 1, /* Branch cost */
374 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
375 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
376 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
377 COSTS_N_INSNS (2), /* cost of FABS instruction. */
378 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
379 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
382 static const
383 struct processor_costs athlon_cost = {
384 COSTS_N_INSNS (1), /* cost of an add instruction */
385 COSTS_N_INSNS (2), /* cost of a lea instruction */
386 COSTS_N_INSNS (1), /* variable shift costs */
387 COSTS_N_INSNS (1), /* constant shift costs */
388 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
389 COSTS_N_INSNS (5), /* HI */
390 COSTS_N_INSNS (5), /* SI */
391 COSTS_N_INSNS (5), /* DI */
392 COSTS_N_INSNS (5)}, /* other */
393 0, /* cost of multiply per each bit set */
394 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
395 COSTS_N_INSNS (26), /* HI */
396 COSTS_N_INSNS (42), /* SI */
397 COSTS_N_INSNS (74), /* DI */
398 COSTS_N_INSNS (74)}, /* other */
399 COSTS_N_INSNS (1), /* cost of movsx */
400 COSTS_N_INSNS (1), /* cost of movzx */
401 8, /* "large" insn */
402 9, /* MOVE_RATIO */
403 4, /* cost for loading QImode using movzbl */
404 {3, 4, 3}, /* cost of loading integer registers
405 in QImode, HImode and SImode.
406 Relative to reg-reg move (2). */
407 {3, 4, 3}, /* cost of storing integer registers */
408 4, /* cost of reg,reg fld/fst */
409 {4, 4, 12}, /* cost of loading fp registers
410 in SFmode, DFmode and XFmode */
411 {6, 6, 8}, /* cost of loading integer registers */
412 2, /* cost of moving MMX register */
413 {4, 4}, /* cost of loading MMX registers
414 in SImode and DImode */
415 {4, 4}, /* cost of storing MMX registers
416 in SImode and DImode */
417 2, /* cost of moving SSE register */
418 {4, 4, 6}, /* cost of loading SSE registers
419 in SImode, DImode and TImode */
420 {4, 4, 5}, /* cost of storing SSE registers
421 in SImode, DImode and TImode */
422 5, /* MMX or SSE register to integer */
423 64, /* size of prefetch block */
424 6, /* number of parallel prefetches */
425 5, /* Branch cost */
426 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (2), /* cost of FABS instruction. */
430 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
434 static const
435 struct processor_costs k8_cost = {
436 COSTS_N_INSNS (1), /* cost of an add instruction */
437 COSTS_N_INSNS (2), /* cost of a lea instruction */
438 COSTS_N_INSNS (1), /* variable shift costs */
439 COSTS_N_INSNS (1), /* constant shift costs */
440 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
441 COSTS_N_INSNS (4), /* HI */
442 COSTS_N_INSNS (3), /* SI */
443 COSTS_N_INSNS (4), /* DI */
444 COSTS_N_INSNS (5)}, /* other */
445 0, /* cost of multiply per each bit set */
446 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
447 COSTS_N_INSNS (26), /* HI */
448 COSTS_N_INSNS (42), /* SI */
449 COSTS_N_INSNS (74), /* DI */
450 COSTS_N_INSNS (74)}, /* other */
451 COSTS_N_INSNS (1), /* cost of movsx */
452 COSTS_N_INSNS (1), /* cost of movzx */
453 8, /* "large" insn */
454 9, /* MOVE_RATIO */
455 4, /* cost for loading QImode using movzbl */
456 {3, 4, 3}, /* cost of loading integer registers
457 in QImode, HImode and SImode.
458 Relative to reg-reg move (2). */
459 {3, 4, 3}, /* cost of storing integer registers */
460 4, /* cost of reg,reg fld/fst */
461 {4, 4, 12}, /* cost of loading fp registers
462 in SFmode, DFmode and XFmode */
463 {6, 6, 8}, /* cost of loading integer registers */
464 2, /* cost of moving MMX register */
465 {3, 3}, /* cost of loading MMX registers
466 in SImode and DImode */
467 {4, 4}, /* cost of storing MMX registers
468 in SImode and DImode */
469 2, /* cost of moving SSE register */
470 {4, 3, 6}, /* cost of loading SSE registers
471 in SImode, DImode and TImode */
472 {4, 4, 5}, /* cost of storing SSE registers
473 in SImode, DImode and TImode */
474 5, /* MMX or SSE register to integer */
475 64, /* size of prefetch block */
476 6, /* number of parallel prefetches */
477 5, /* Branch cost */
478 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
479 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
480 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
481 COSTS_N_INSNS (2), /* cost of FABS instruction. */
482 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
483 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
486 static const
487 struct processor_costs pentium4_cost = {
488 COSTS_N_INSNS (1), /* cost of an add instruction */
489 COSTS_N_INSNS (3), /* cost of a lea instruction */
490 COSTS_N_INSNS (4), /* variable shift costs */
491 COSTS_N_INSNS (4), /* constant shift costs */
492 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
493 COSTS_N_INSNS (15), /* HI */
494 COSTS_N_INSNS (15), /* SI */
495 COSTS_N_INSNS (15), /* DI */
496 COSTS_N_INSNS (15)}, /* other */
497 0, /* cost of multiply per each bit set */
498 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
499 COSTS_N_INSNS (56), /* HI */
500 COSTS_N_INSNS (56), /* SI */
501 COSTS_N_INSNS (56), /* DI */
502 COSTS_N_INSNS (56)}, /* other */
503 COSTS_N_INSNS (1), /* cost of movsx */
504 COSTS_N_INSNS (1), /* cost of movzx */
505 16, /* "large" insn */
506 6, /* MOVE_RATIO */
507 2, /* cost for loading QImode using movzbl */
508 {4, 5, 4}, /* cost of loading integer registers
509 in QImode, HImode and SImode.
510 Relative to reg-reg move (2). */
511 {2, 3, 2}, /* cost of storing integer registers */
512 2, /* cost of reg,reg fld/fst */
513 {2, 2, 6}, /* cost of loading fp registers
514 in SFmode, DFmode and XFmode */
515 {4, 4, 6}, /* cost of loading integer registers */
516 2, /* cost of moving MMX register */
517 {2, 2}, /* cost of loading MMX registers
518 in SImode and DImode */
519 {2, 2}, /* cost of storing MMX registers
520 in SImode and DImode */
521 12, /* cost of moving SSE register */
522 {12, 12, 12}, /* cost of loading SSE registers
523 in SImode, DImode and TImode */
524 {2, 2, 8}, /* cost of storing SSE registers
525 in SImode, DImode and TImode */
526 10, /* MMX or SSE register to integer */
527 64, /* size of prefetch block */
528 6, /* number of parallel prefetches */
529 2, /* Branch cost */
530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
538 static const
539 struct processor_costs nocona_cost = {
540 COSTS_N_INSNS (1), /* cost of an add instruction */
541 COSTS_N_INSNS (1), /* cost of a lea instruction */
542 COSTS_N_INSNS (1), /* variable shift costs */
543 COSTS_N_INSNS (1), /* constant shift costs */
544 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
545 COSTS_N_INSNS (10), /* HI */
546 COSTS_N_INSNS (10), /* SI */
547 COSTS_N_INSNS (10), /* DI */
548 COSTS_N_INSNS (10)}, /* other */
549 0, /* cost of multiply per each bit set */
550 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
551 COSTS_N_INSNS (66), /* HI */
552 COSTS_N_INSNS (66), /* SI */
553 COSTS_N_INSNS (66), /* DI */
554 COSTS_N_INSNS (66)}, /* other */
555 COSTS_N_INSNS (1), /* cost of movsx */
556 COSTS_N_INSNS (1), /* cost of movzx */
557 16, /* "large" insn */
558 17, /* MOVE_RATIO */
559 4, /* cost for loading QImode using movzbl */
560 {4, 4, 4}, /* cost of loading integer registers
561 in QImode, HImode and SImode.
562 Relative to reg-reg move (2). */
563 {4, 4, 4}, /* cost of storing integer registers */
564 3, /* cost of reg,reg fld/fst */
565 {12, 12, 12}, /* cost of loading fp registers
566 in SFmode, DFmode and XFmode */
567 {4, 4, 4}, /* cost of loading integer registers */
568 6, /* cost of moving MMX register */
569 {12, 12}, /* cost of loading MMX registers
570 in SImode and DImode */
571 {12, 12}, /* cost of storing MMX registers
572 in SImode and DImode */
573 6, /* cost of moving SSE register */
574 {12, 12, 12}, /* cost of loading SSE registers
575 in SImode, DImode and TImode */
576 {12, 12, 12}, /* cost of storing SSE registers
577 in SImode, DImode and TImode */
578 8, /* MMX or SSE register to integer */
579 128, /* size of prefetch block */
580 8, /* number of parallel prefetches */
581 1, /* Branch cost */
582 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
583 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
584 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
585 COSTS_N_INSNS (3), /* cost of FABS instruction. */
586 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
587 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
590 /* Generic64 should produce code tuned for Nocona and K8. */
591 static const
592 struct processor_costs generic64_cost = {
593 COSTS_N_INSNS (1), /* cost of an add instruction */
594 /* On all chips taken into consideration lea is 2 cycles and more. With
595 this cost however our current implementation of synth_mult results in
596 use of unnecesary temporary registers causing regression on several
597 SPECfp benchmarks. */
598 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (4), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (4), /* DI */
605 COSTS_N_INSNS (2)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (26), /* HI */
609 COSTS_N_INSNS (42), /* SI */
610 COSTS_N_INSNS (74), /* DI */
611 COSTS_N_INSNS (74)}, /* other */
612 COSTS_N_INSNS (1), /* cost of movsx */
613 COSTS_N_INSNS (1), /* cost of movzx */
614 8, /* "large" insn */
615 17, /* MOVE_RATIO */
616 4, /* cost for loading QImode using movzbl */
617 {4, 4, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {4, 4, 4}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {12, 12, 12}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {6, 6, 8}, /* cost of loading integer registers */
625 2, /* cost of moving MMX register */
626 {8, 8}, /* cost of loading MMX registers
627 in SImode and DImode */
628 {8, 8}, /* cost of storing MMX registers
629 in SImode and DImode */
630 2, /* cost of moving SSE register */
631 {8, 8, 8}, /* cost of loading SSE registers
632 in SImode, DImode and TImode */
633 {8, 8, 8}, /* cost of storing SSE registers
634 in SImode, DImode and TImode */
635 5, /* MMX or SSE register to integer */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
638 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
639 is increased to perhaps more appropriate value of 5. */
640 3, /* Branch cost */
641 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
642 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
643 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
644 COSTS_N_INSNS (8), /* cost of FABS instruction. */
645 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
646 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
649 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
650 static const
651 struct processor_costs generic32_cost = {
652 COSTS_N_INSNS (1), /* cost of an add instruction */
653 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
654 COSTS_N_INSNS (1), /* variable shift costs */
655 COSTS_N_INSNS (1), /* constant shift costs */
656 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
657 COSTS_N_INSNS (4), /* HI */
658 COSTS_N_INSNS (3), /* SI */
659 COSTS_N_INSNS (4), /* DI */
660 COSTS_N_INSNS (2)}, /* other */
661 0, /* cost of multiply per each bit set */
662 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
663 COSTS_N_INSNS (26), /* HI */
664 COSTS_N_INSNS (42), /* SI */
665 COSTS_N_INSNS (74), /* DI */
666 COSTS_N_INSNS (74)}, /* other */
667 COSTS_N_INSNS (1), /* cost of movsx */
668 COSTS_N_INSNS (1), /* cost of movzx */
669 8, /* "large" insn */
670 17, /* MOVE_RATIO */
671 4, /* cost for loading QImode using movzbl */
672 {4, 4, 4}, /* cost of loading integer registers
673 in QImode, HImode and SImode.
674 Relative to reg-reg move (2). */
675 {4, 4, 4}, /* cost of storing integer registers */
676 4, /* cost of reg,reg fld/fst */
677 {12, 12, 12}, /* cost of loading fp registers
678 in SFmode, DFmode and XFmode */
679 {6, 6, 8}, /* cost of loading integer registers */
680 2, /* cost of moving MMX register */
681 {8, 8}, /* cost of loading MMX registers
682 in SImode and DImode */
683 {8, 8}, /* cost of storing MMX registers
684 in SImode and DImode */
685 2, /* cost of moving SSE register */
686 {8, 8, 8}, /* cost of loading SSE registers
687 in SImode, DImode and TImode */
688 {8, 8, 8}, /* cost of storing SSE registers
689 in SImode, DImode and TImode */
690 5, /* MMX or SSE register to integer */
691 64, /* size of prefetch block */
692 6, /* number of parallel prefetches */
693 3, /* Branch cost */
694 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
695 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
696 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
697 COSTS_N_INSNS (8), /* cost of FABS instruction. */
698 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
699 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
702 const struct processor_costs *ix86_cost = &pentium_cost;
704 /* Processor feature/optimization bitmasks. */
705 #define m_386 (1<<PROCESSOR_I386)
706 #define m_486 (1<<PROCESSOR_I486)
707 #define m_PENT (1<<PROCESSOR_PENTIUM)
708 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
709 #define m_K6 (1<<PROCESSOR_K6)
710 #define m_ATHLON (1<<PROCESSOR_ATHLON)
711 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
712 #define m_K8 (1<<PROCESSOR_K8)
713 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
714 #define m_NOCONA (1<<PROCESSOR_NOCONA)
715 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
716 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
717 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
719 /* Generic instruction choice should be common subset of supported CPUs
720 (PPro/PENT4/NOCONA/Athlon/K8). */
722 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
723 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
724 generic because it is not working well with PPro base chips. */
725 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
726 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
727 const int x86_zero_extend_with_and = m_486 | m_PENT;
728 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
729 const int x86_double_with_add = ~m_386;
730 const int x86_use_bit_test = m_386;
731 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
732 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
733 const int x86_fisttp = m_NOCONA;
734 const int x86_3dnow_a = m_ATHLON_K8;
735 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
736 /* Branch hints were put in P4 based on simulation result. But
737 after P4 was made, no performance benefit was observed with
738 branch hints. It also increases the code size. As the result,
739 icc never generates branch hints. */
740 const int x86_branch_hints = 0;
741 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
742 /* We probably ought to watch for partial register stalls on Generic32
743 compilation setting as well. However in current implementation the
744 partial register stalls are not eliminated very well - they can
745 be introduced via subregs synthetized by combine and can happen
746 in caller/callee saving sequences.
747 Because this option pays back little on PPro based chips and is in conflict
748 with partial reg. dependencies used by Athlon/P4 based chips, it is better
749 to leave it off for generic32 for now. */
750 const int x86_partial_reg_stall = m_PPRO;
751 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
752 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
753 const int x86_use_mov0 = m_K6;
754 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
755 const int x86_read_modify_write = ~m_PENT;
756 const int x86_read_modify = ~(m_PENT | m_PPRO);
757 const int x86_split_long_moves = m_PPRO;
758 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
759 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
760 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
761 const int x86_qimode_math = ~(0);
762 const int x86_promote_qi_regs = 0;
763 /* On PPro this flag is meant to avoid partial register stalls. Just like
764 the x86_partial_reg_stall this option might be considered for Generic32
765 if our scheme for avoiding partial stalls was more effective. */
766 const int x86_himode_math = ~(m_PPRO);
767 const int x86_promote_hi_regs = m_PPRO;
768 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
769 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
770 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
771 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
772 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
773 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
774 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
775 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
776 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
777 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
778 const int x86_shift1 = ~m_486;
779 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
780 /* In Generic model we have an confict here in between PPro/Pentium4 based chips
781 that thread 128bit SSE registers as single units versus K8 based chips that
782 divide SSE registers to two 64bit halves.
783 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
784 to allow register renaming on 128bit SSE units, but usually results in one
785 extra microop on 64bit SSE units. Experimental results shows that disabling
786 this option on P4 brings over 20% SPECfp regression, while enabling it on
787 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
788 of moves. */
789 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
790 /* Set for machines where the type and dependencies are resolved on SSE
791 register parts instead of whole registers, so we may maintain just
792 lower part of scalar values in proper format leaving the upper part
793 undefined. */
794 const int x86_sse_split_regs = m_ATHLON_K8;
795 const int x86_sse_typeless_stores = m_ATHLON_K8;
796 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
797 const int x86_use_ffreep = m_ATHLON_K8;
798 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
799 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
801 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
802 integer data in xmm registers. Which results in pretty abysmal code. */
803 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
805 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
806 /* Some CPU cores are not able to predict more than 4 branch instructions in
807 the 16 byte window. */
808 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
809 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
810 const int x86_use_bt = m_ATHLON_K8;
811 /* Compare and exchange was added for 80486. */
812 const int x86_cmpxchg = ~m_386;
813 /* Compare and exchange 8 bytes was added for pentium. */
814 const int x86_cmpxchg8b = ~(m_386 | m_486);
815 /* Compare and exchange 16 bytes was added for nocona. */
816 const int x86_cmpxchg16b = m_NOCONA;
817 /* Exchange and add was added for 80486. */
818 const int x86_xadd = ~m_386;
819 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
821 /* In case the average insn count for single function invocation is
822 lower than this constant, emit fast (but longer) prologue and
823 epilogue code. */
824 #define FAST_PROLOGUE_INSN_COUNT 20
826 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
827 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
828 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
829 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
831 /* Array of the smallest class containing reg number REGNO, indexed by
832 REGNO. Used by REGNO_REG_CLASS in i386.h. */
834 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
836 /* ax, dx, cx, bx */
837 AREG, DREG, CREG, BREG,
838 /* si, di, bp, sp */
839 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
840 /* FP registers */
841 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
842 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
843 /* arg pointer */
844 NON_Q_REGS,
845 /* flags, fpsr, dirflag, frame */
846 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
847 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
848 SSE_REGS, SSE_REGS,
849 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
850 MMX_REGS, MMX_REGS,
851 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
852 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
853 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
854 SSE_REGS, SSE_REGS,
857 /* The "default" register map used in 32bit mode. */
859 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
861 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
862 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
863 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
864 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
865 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
866 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
867 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
870 static int const x86_64_int_parameter_registers[6] =
872 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
873 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
876 static int const x86_64_int_return_registers[4] =
878 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
881 /* The "default" register map used in 64bit mode. */
882 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
884 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
885 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
886 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
887 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
888 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
889 8,9,10,11,12,13,14,15, /* extended integer registers */
890 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
893 /* Define the register numbers to be used in Dwarf debugging information.
894 The SVR4 reference port C compiler uses the following register numbers
895 in its Dwarf output code:
896 0 for %eax (gcc regno = 0)
897 1 for %ecx (gcc regno = 2)
898 2 for %edx (gcc regno = 1)
899 3 for %ebx (gcc regno = 3)
900 4 for %esp (gcc regno = 7)
901 5 for %ebp (gcc regno = 6)
902 6 for %esi (gcc regno = 4)
903 7 for %edi (gcc regno = 5)
904 The following three DWARF register numbers are never generated by
905 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
906 believes these numbers have these meanings.
907 8 for %eip (no gcc equivalent)
908 9 for %eflags (gcc regno = 17)
909 10 for %trapno (no gcc equivalent)
910 It is not at all clear how we should number the FP stack registers
911 for the x86 architecture. If the version of SDB on x86/svr4 were
912 a bit less brain dead with respect to floating-point then we would
913 have a precedent to follow with respect to DWARF register numbers
914 for x86 FP registers, but the SDB on x86/svr4 is so completely
915 broken with respect to FP registers that it is hardly worth thinking
916 of it as something to strive for compatibility with.
917 The version of x86/svr4 SDB I have at the moment does (partially)
918 seem to believe that DWARF register number 11 is associated with
919 the x86 register %st(0), but that's about all. Higher DWARF
920 register numbers don't seem to be associated with anything in
921 particular, and even for DWARF regno 11, SDB only seems to under-
922 stand that it should say that a variable lives in %st(0) (when
923 asked via an `=' command) if we said it was in DWARF regno 11,
924 but SDB still prints garbage when asked for the value of the
925 variable in question (via a `/' command).
926 (Also note that the labels SDB prints for various FP stack regs
927 when doing an `x' command are all wrong.)
928 Note that these problems generally don't affect the native SVR4
929 C compiler because it doesn't allow the use of -O with -g and
930 because when it is *not* optimizing, it allocates a memory
931 location for each floating-point variable, and the memory
932 location is what gets described in the DWARF AT_location
933 attribute for the variable in question.
934 Regardless of the severe mental illness of the x86/svr4 SDB, we
935 do something sensible here and we use the following DWARF
936 register numbers. Note that these are all stack-top-relative
937 numbers.
938 11 for %st(0) (gcc regno = 8)
939 12 for %st(1) (gcc regno = 9)
940 13 for %st(2) (gcc regno = 10)
941 14 for %st(3) (gcc regno = 11)
942 15 for %st(4) (gcc regno = 12)
943 16 for %st(5) (gcc regno = 13)
944 17 for %st(6) (gcc regno = 14)
945 18 for %st(7) (gcc regno = 15)
947 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
949 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
950 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
951 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
952 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
953 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
954 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
955 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
958 /* Test and compare insns in i386.md store the information needed to
959 generate branch and scc insns here. */
961 rtx ix86_compare_op0 = NULL_RTX;
962 rtx ix86_compare_op1 = NULL_RTX;
963 rtx ix86_compare_emitted = NULL_RTX;
965 /* Size of the register save area. */
966 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
968 /* Define the structure for the machine field in struct function. */
970 struct stack_local_entry GTY(())
972 unsigned short mode;
973 unsigned short n;
974 rtx rtl;
975 struct stack_local_entry *next;
978 /* Structure describing stack frame layout.
979 Stack grows downward:
981 [arguments]
982 <- ARG_POINTER
983 saved pc
985 saved frame pointer if frame_pointer_needed
986 <- HARD_FRAME_POINTER
987 [saved regs]
989 [padding1] \
991 [va_arg registers] (
992 > to_allocate <- FRAME_POINTER
993 [frame] (
995 [padding2] /
997 struct ix86_frame
999 int nregs;
1000 int padding1;
1001 int va_arg_size;
1002 HOST_WIDE_INT frame;
1003 int padding2;
1004 int outgoing_arguments_size;
1005 int red_zone_size;
1007 HOST_WIDE_INT to_allocate;
1008 /* The offsets relative to ARG_POINTER. */
1009 HOST_WIDE_INT frame_pointer_offset;
1010 HOST_WIDE_INT hard_frame_pointer_offset;
1011 HOST_WIDE_INT stack_pointer_offset;
1013 /* When save_regs_using_mov is set, emit prologue using
1014 move instead of push instructions. */
1015 bool save_regs_using_mov;
1018 /* Code model option. */
1019 enum cmodel ix86_cmodel;
1020 /* Asm dialect. */
1021 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1022 /* TLS dialects. */
1023 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1025 /* Which unit we are generating floating point math for. */
1026 enum fpmath_unit ix86_fpmath;
1028 /* Which cpu are we scheduling for. */
1029 enum processor_type ix86_tune;
1030 /* Which instruction set architecture to use. */
1031 enum processor_type ix86_arch;
1033 /* true if sse prefetch instruction is not NOOP. */
1034 int x86_prefetch_sse;
1036 /* ix86_regparm_string as a number */
1037 static int ix86_regparm;
1039 /* Preferred alignment for stack boundary in bits. */
1040 unsigned int ix86_preferred_stack_boundary;
1042 /* Values 1-5: see jump.c */
1043 int ix86_branch_cost;
1045 /* Variables which are this size or smaller are put in the data/bss
1046 or ldata/lbss sections. */
1048 int ix86_section_threshold = 65536;
1050 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1051 char internal_label_prefix[16];
1052 int internal_label_prefix_len;
1054 /* Table for BUILT_IN_NORMAL to BUILT_IN_MD mapping. */
1055 static GTY(()) tree ix86_builtin_function_variants[(int) END_BUILTINS];
1057 static bool ix86_handle_option (size_t, const char *, int);
1058 static void output_pic_addr_const (FILE *, rtx, int);
1059 static void put_condition_code (enum rtx_code, enum machine_mode,
1060 int, int, FILE *);
1061 static const char *get_some_local_dynamic_name (void);
1062 static int get_some_local_dynamic_name_1 (rtx *, void *);
1063 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1064 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1065 rtx *);
1066 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1067 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1068 enum machine_mode);
1069 static rtx get_thread_pointer (int);
1070 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1071 static void get_pc_thunk_name (char [32], unsigned int);
1072 static rtx gen_push (rtx);
1073 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
1074 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
1075 static struct machine_function * ix86_init_machine_status (void);
1076 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1077 static int ix86_nsaved_regs (void);
1078 static void ix86_emit_save_regs (void);
1079 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1080 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1081 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1082 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1083 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1084 static rtx ix86_expand_aligntest (rtx, int);
1085 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1086 static int ix86_issue_rate (void);
1087 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1088 static int ia32_multipass_dfa_lookahead (void);
1089 static void ix86_init_mmx_sse_builtins (void);
1090 static void ix86_init_sse_abi_builtins (void);
1091 static rtx x86_this_parameter (tree);
1092 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1093 HOST_WIDE_INT, tree);
1094 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1095 static void x86_file_start (void);
1096 static void ix86_reorg (void);
1097 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1098 static tree ix86_build_builtin_va_list (void);
1099 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1100 tree, int *, int);
1101 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1102 static bool ix86_vector_mode_supported_p (enum machine_mode);
1104 static int ix86_address_cost (rtx);
1105 static bool ix86_cannot_force_const_mem (rtx);
1106 static rtx ix86_delegitimize_address (rtx);
1108 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1110 struct builtin_description;
1111 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1112 tree, rtx);
1113 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1114 tree, rtx);
1115 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1116 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1117 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1118 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1119 static rtx safe_vector_operand (rtx, enum machine_mode);
1120 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1121 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1122 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1123 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1124 static int ix86_fp_comparison_cost (enum rtx_code code);
1125 static unsigned int ix86_select_alt_pic_regnum (void);
1126 static int ix86_save_reg (unsigned int, int);
1127 static void ix86_compute_frame_layout (struct ix86_frame *);
1128 static int ix86_comp_type_attributes (tree, tree);
1129 static int ix86_function_regparm (tree, tree);
1130 const struct attribute_spec ix86_attribute_table[];
1131 static bool ix86_function_ok_for_sibcall (tree, tree);
1132 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1133 static int ix86_value_regno (enum machine_mode, tree, tree);
1134 static bool contains_128bit_aligned_vector_p (tree);
1135 static rtx ix86_struct_value_rtx (tree, int);
1136 static bool ix86_ms_bitfield_layout_p (tree);
1137 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1138 static int extended_reg_mentioned_1 (rtx *, void *);
1139 static bool ix86_rtx_costs (rtx, int, int, int *);
1140 static int min_insn_size (rtx);
1141 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1142 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1143 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1144 tree, bool);
1145 static void ix86_init_builtins (void);
1146 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1147 static rtx ix86_expand_library_builtin (tree, rtx, rtx, enum machine_mode, int);
1148 static const char *ix86_mangle_fundamental_type (tree);
1149 static tree ix86_stack_protect_fail (void);
1150 static rtx ix86_internal_arg_pointer (void);
1151 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1153 /* This function is only used on Solaris. */
1154 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1155 ATTRIBUTE_UNUSED;
1157 /* Register class used for passing given 64bit part of the argument.
1158 These represent classes as documented by the PS ABI, with the exception
1159 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1160 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1162 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1163 whenever possible (upper half does contain padding).
1165 enum x86_64_reg_class
1167 X86_64_NO_CLASS,
1168 X86_64_INTEGER_CLASS,
1169 X86_64_INTEGERSI_CLASS,
1170 X86_64_SSE_CLASS,
1171 X86_64_SSESF_CLASS,
1172 X86_64_SSEDF_CLASS,
1173 X86_64_SSEUP_CLASS,
1174 X86_64_X87_CLASS,
1175 X86_64_X87UP_CLASS,
1176 X86_64_COMPLEX_X87_CLASS,
1177 X86_64_MEMORY_CLASS
1179 static const char * const x86_64_reg_class_name[] = {
1180 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1181 "sseup", "x87", "x87up", "cplx87", "no"
1184 #define MAX_CLASSES 4
1186 /* Table of constants used by fldpi, fldln2, etc.... */
1187 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1188 static bool ext_80387_constants_init = 0;
1189 static void init_ext_80387_constants (void);
1190 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1191 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1192 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1193 static section *x86_64_elf_select_section (tree decl, int reloc,
1194 unsigned HOST_WIDE_INT align)
1195 ATTRIBUTE_UNUSED;
1197 /* Initialize the GCC target structure. */
1198 #undef TARGET_ATTRIBUTE_TABLE
1199 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1200 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1201 # undef TARGET_MERGE_DECL_ATTRIBUTES
1202 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1203 #endif
1205 #undef TARGET_COMP_TYPE_ATTRIBUTES
1206 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1208 #undef TARGET_INIT_BUILTINS
1209 #define TARGET_INIT_BUILTINS ix86_init_builtins
1210 #undef TARGET_EXPAND_BUILTIN
1211 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1212 #undef TARGET_EXPAND_LIBRARY_BUILTIN
1213 #define TARGET_EXPAND_LIBRARY_BUILTIN ix86_expand_library_builtin
1215 #undef TARGET_ASM_FUNCTION_EPILOGUE
1216 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1218 #undef TARGET_ENCODE_SECTION_INFO
1219 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1220 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1221 #else
1222 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1223 #endif
1225 #undef TARGET_ASM_OPEN_PAREN
1226 #define TARGET_ASM_OPEN_PAREN ""
1227 #undef TARGET_ASM_CLOSE_PAREN
1228 #define TARGET_ASM_CLOSE_PAREN ""
1230 #undef TARGET_ASM_ALIGNED_HI_OP
1231 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1232 #undef TARGET_ASM_ALIGNED_SI_OP
1233 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1234 #ifdef ASM_QUAD
1235 #undef TARGET_ASM_ALIGNED_DI_OP
1236 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1237 #endif
1239 #undef TARGET_ASM_UNALIGNED_HI_OP
1240 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1241 #undef TARGET_ASM_UNALIGNED_SI_OP
1242 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1243 #undef TARGET_ASM_UNALIGNED_DI_OP
1244 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1246 #undef TARGET_SCHED_ADJUST_COST
1247 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1248 #undef TARGET_SCHED_ISSUE_RATE
1249 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1250 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1251 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1252 ia32_multipass_dfa_lookahead
1254 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1255 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1257 #ifdef HAVE_AS_TLS
1258 #undef TARGET_HAVE_TLS
1259 #define TARGET_HAVE_TLS true
1260 #endif
1261 #undef TARGET_CANNOT_FORCE_CONST_MEM
1262 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1264 #undef TARGET_DELEGITIMIZE_ADDRESS
1265 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1267 #undef TARGET_MS_BITFIELD_LAYOUT_P
1268 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1270 #if TARGET_MACHO
1271 #undef TARGET_BINDS_LOCAL_P
1272 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1273 #endif
1275 #undef TARGET_ASM_OUTPUT_MI_THUNK
1276 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1277 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1278 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1280 #undef TARGET_ASM_FILE_START
1281 #define TARGET_ASM_FILE_START x86_file_start
1283 #undef TARGET_DEFAULT_TARGET_FLAGS
1284 #define TARGET_DEFAULT_TARGET_FLAGS \
1285 (TARGET_DEFAULT \
1286 | TARGET_64BIT_DEFAULT \
1287 | TARGET_SUBTARGET_DEFAULT \
1288 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1290 #undef TARGET_HANDLE_OPTION
1291 #define TARGET_HANDLE_OPTION ix86_handle_option
1293 #undef TARGET_RTX_COSTS
1294 #define TARGET_RTX_COSTS ix86_rtx_costs
1295 #undef TARGET_ADDRESS_COST
1296 #define TARGET_ADDRESS_COST ix86_address_cost
1298 #undef TARGET_FIXED_CONDITION_CODE_REGS
1299 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1300 #undef TARGET_CC_MODES_COMPATIBLE
1301 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1303 #undef TARGET_MACHINE_DEPENDENT_REORG
1304 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1306 #undef TARGET_BUILD_BUILTIN_VA_LIST
1307 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1309 #undef TARGET_MD_ASM_CLOBBERS
1310 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1312 #undef TARGET_PROMOTE_PROTOTYPES
1313 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1314 #undef TARGET_STRUCT_VALUE_RTX
1315 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1316 #undef TARGET_SETUP_INCOMING_VARARGS
1317 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1318 #undef TARGET_MUST_PASS_IN_STACK
1319 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1320 #undef TARGET_PASS_BY_REFERENCE
1321 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1322 #undef TARGET_INTERNAL_ARG_POINTER
1323 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1324 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1325 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1327 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1328 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1330 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1331 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1333 #ifdef HAVE_AS_TLS
1334 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1335 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1336 #endif
1338 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1339 #undef TARGET_INSERT_ATTRIBUTES
1340 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1341 #endif
1343 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1344 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1346 #undef TARGET_STACK_PROTECT_FAIL
1347 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1349 #undef TARGET_FUNCTION_VALUE
1350 #define TARGET_FUNCTION_VALUE ix86_function_value
1352 struct gcc_target targetm = TARGET_INITIALIZER;
1355 /* The svr4 ABI for the i386 says that records and unions are returned
1356 in memory. */
1357 #ifndef DEFAULT_PCC_STRUCT_RETURN
1358 #define DEFAULT_PCC_STRUCT_RETURN 1
1359 #endif
1361 /* Implement TARGET_HANDLE_OPTION. */
1363 static bool
1364 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1366 switch (code)
1368 case OPT_m3dnow:
1369 if (!value)
1371 target_flags &= ~MASK_3DNOW_A;
1372 target_flags_explicit |= MASK_3DNOW_A;
1374 return true;
1376 case OPT_mmmx:
1377 if (!value)
1379 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1380 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1382 return true;
1384 case OPT_msse:
1385 if (!value)
1387 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1388 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1390 return true;
1392 case OPT_msse2:
1393 if (!value)
1395 target_flags &= ~MASK_SSE3;
1396 target_flags_explicit |= MASK_SSE3;
1398 return true;
1400 default:
1401 return true;
1405 /* Sometimes certain combinations of command options do not make
1406 sense on a particular target machine. You can define a macro
1407 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1408 defined, is executed once just after all the command options have
1409 been parsed.
1411 Don't use this macro to turn on various extra optimizations for
1412 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1414 void
1415 override_options (void)
1417 int i;
1418 int ix86_tune_defaulted = 0;
1420 /* Comes from final.c -- no real reason to change it. */
1421 #define MAX_CODE_ALIGN 16
1423 static struct ptt
1425 const struct processor_costs *cost; /* Processor costs */
1426 const int target_enable; /* Target flags to enable. */
1427 const int target_disable; /* Target flags to disable. */
1428 const int align_loop; /* Default alignments. */
1429 const int align_loop_max_skip;
1430 const int align_jump;
1431 const int align_jump_max_skip;
1432 const int align_func;
1434 const processor_target_table[PROCESSOR_max] =
1436 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1437 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1438 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1439 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1440 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1441 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1442 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1443 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1444 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1445 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1446 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1449 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1450 static struct pta
1452 const char *const name; /* processor name or nickname. */
1453 const enum processor_type processor;
1454 const enum pta_flags
1456 PTA_SSE = 1,
1457 PTA_SSE2 = 2,
1458 PTA_SSE3 = 4,
1459 PTA_MMX = 8,
1460 PTA_PREFETCH_SSE = 16,
1461 PTA_3DNOW = 32,
1462 PTA_3DNOW_A = 64,
1463 PTA_64BIT = 128
1464 } flags;
1466 const processor_alias_table[] =
1468 {"i386", PROCESSOR_I386, 0},
1469 {"i486", PROCESSOR_I486, 0},
1470 {"i586", PROCESSOR_PENTIUM, 0},
1471 {"pentium", PROCESSOR_PENTIUM, 0},
1472 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1473 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1474 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1475 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1476 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1477 {"i686", PROCESSOR_PENTIUMPRO, 0},
1478 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1479 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1480 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1481 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1482 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1483 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1484 | PTA_MMX | PTA_PREFETCH_SSE},
1485 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1486 | PTA_MMX | PTA_PREFETCH_SSE},
1487 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1488 | PTA_MMX | PTA_PREFETCH_SSE},
1489 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1490 | PTA_MMX | PTA_PREFETCH_SSE},
1491 {"k6", PROCESSOR_K6, PTA_MMX},
1492 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1493 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1494 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1495 | PTA_3DNOW_A},
1496 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1497 | PTA_3DNOW | PTA_3DNOW_A},
1498 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1499 | PTA_3DNOW_A | PTA_SSE},
1500 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1501 | PTA_3DNOW_A | PTA_SSE},
1502 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1503 | PTA_3DNOW_A | PTA_SSE},
1504 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1505 | PTA_SSE | PTA_SSE2 },
1506 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1507 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1508 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1509 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1510 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1511 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1512 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1513 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1514 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1515 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1518 int const pta_size = ARRAY_SIZE (processor_alias_table);
1520 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1521 SUBTARGET_OVERRIDE_OPTIONS;
1522 #endif
1524 /* Set the default values for switches whose default depends on TARGET_64BIT
1525 in case they weren't overwritten by command line options. */
1526 if (TARGET_64BIT)
1528 if (flag_omit_frame_pointer == 2)
1529 flag_omit_frame_pointer = 1;
1530 if (flag_asynchronous_unwind_tables == 2)
1531 flag_asynchronous_unwind_tables = 1;
1532 if (flag_pcc_struct_return == 2)
1533 flag_pcc_struct_return = 0;
1535 else
1537 if (flag_omit_frame_pointer == 2)
1538 flag_omit_frame_pointer = 0;
1539 if (flag_asynchronous_unwind_tables == 2)
1540 flag_asynchronous_unwind_tables = 0;
1541 if (flag_pcc_struct_return == 2)
1542 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1545 /* Need to check -mtune=generic first. */
1546 if (ix86_tune_string)
1548 if (!strcmp (ix86_tune_string, "generic")
1549 || !strcmp (ix86_tune_string, "i686"))
1551 if (TARGET_64BIT)
1552 ix86_tune_string = "generic64";
1553 else
1554 ix86_tune_string = "generic32";
1556 else if (!strncmp (ix86_tune_string, "generic", 7))
1557 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1559 else
1561 if (ix86_arch_string)
1562 ix86_tune_string = ix86_arch_string;
1563 if (!ix86_tune_string)
1565 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1566 ix86_tune_defaulted = 1;
1569 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1570 need to use a sensible tune option. */
1571 if (!strcmp (ix86_tune_string, "generic")
1572 || !strcmp (ix86_tune_string, "x86-64")
1573 || !strcmp (ix86_tune_string, "i686"))
1575 if (TARGET_64BIT)
1576 ix86_tune_string = "generic64";
1577 else
1578 ix86_tune_string = "generic32";
1581 if (!strcmp (ix86_tune_string, "x86-64"))
1582 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1583 "-mtune=generic instead as appropriate.");
1585 if (!ix86_arch_string)
1586 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1587 if (!strcmp (ix86_arch_string, "generic"))
1588 error ("generic CPU can be used only for -mtune= switch");
1589 if (!strncmp (ix86_arch_string, "generic", 7))
1590 error ("bad value (%s) for -march= switch", ix86_arch_string);
1592 if (ix86_cmodel_string != 0)
1594 if (!strcmp (ix86_cmodel_string, "small"))
1595 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1596 else if (!strcmp (ix86_cmodel_string, "medium"))
1597 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1598 else if (flag_pic)
1599 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1600 else if (!strcmp (ix86_cmodel_string, "32"))
1601 ix86_cmodel = CM_32;
1602 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1603 ix86_cmodel = CM_KERNEL;
1604 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1605 ix86_cmodel = CM_LARGE;
1606 else
1607 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1609 else
1611 ix86_cmodel = CM_32;
1612 if (TARGET_64BIT)
1613 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1615 if (ix86_asm_string != 0)
1617 if (! TARGET_MACHO
1618 && !strcmp (ix86_asm_string, "intel"))
1619 ix86_asm_dialect = ASM_INTEL;
1620 else if (!strcmp (ix86_asm_string, "att"))
1621 ix86_asm_dialect = ASM_ATT;
1622 else
1623 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1625 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1626 error ("code model %qs not supported in the %s bit mode",
1627 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1628 if (ix86_cmodel == CM_LARGE)
1629 sorry ("code model %<large%> not supported yet");
1630 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1631 sorry ("%i-bit mode not compiled in",
1632 (target_flags & MASK_64BIT) ? 64 : 32);
1634 for (i = 0; i < pta_size; i++)
1635 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1637 ix86_arch = processor_alias_table[i].processor;
1638 /* Default cpu tuning to the architecture. */
1639 ix86_tune = ix86_arch;
1640 if (processor_alias_table[i].flags & PTA_MMX
1641 && !(target_flags_explicit & MASK_MMX))
1642 target_flags |= MASK_MMX;
1643 if (processor_alias_table[i].flags & PTA_3DNOW
1644 && !(target_flags_explicit & MASK_3DNOW))
1645 target_flags |= MASK_3DNOW;
1646 if (processor_alias_table[i].flags & PTA_3DNOW_A
1647 && !(target_flags_explicit & MASK_3DNOW_A))
1648 target_flags |= MASK_3DNOW_A;
1649 if (processor_alias_table[i].flags & PTA_SSE
1650 && !(target_flags_explicit & MASK_SSE))
1651 target_flags |= MASK_SSE;
1652 if (processor_alias_table[i].flags & PTA_SSE2
1653 && !(target_flags_explicit & MASK_SSE2))
1654 target_flags |= MASK_SSE2;
1655 if (processor_alias_table[i].flags & PTA_SSE3
1656 && !(target_flags_explicit & MASK_SSE3))
1657 target_flags |= MASK_SSE3;
1658 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1659 x86_prefetch_sse = true;
1660 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1661 error ("CPU you selected does not support x86-64 "
1662 "instruction set");
1663 break;
1666 if (i == pta_size)
1667 error ("bad value (%s) for -march= switch", ix86_arch_string);
1669 for (i = 0; i < pta_size; i++)
1670 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1672 ix86_tune = processor_alias_table[i].processor;
1673 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1675 if (ix86_tune_defaulted)
1677 ix86_tune_string = "x86-64";
1678 for (i = 0; i < pta_size; i++)
1679 if (! strcmp (ix86_tune_string,
1680 processor_alias_table[i].name))
1681 break;
1682 ix86_tune = processor_alias_table[i].processor;
1684 else
1685 error ("CPU you selected does not support x86-64 "
1686 "instruction set");
1688 /* Intel CPUs have always interpreted SSE prefetch instructions as
1689 NOPs; so, we can enable SSE prefetch instructions even when
1690 -mtune (rather than -march) points us to a processor that has them.
1691 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1692 higher processors. */
1693 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1694 x86_prefetch_sse = true;
1695 break;
1697 if (i == pta_size)
1698 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1700 if (optimize_size)
1701 ix86_cost = &size_cost;
1702 else
1703 ix86_cost = processor_target_table[ix86_tune].cost;
1704 target_flags |= processor_target_table[ix86_tune].target_enable;
1705 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1707 /* Arrange to set up i386_stack_locals for all functions. */
1708 init_machine_status = ix86_init_machine_status;
1710 /* Validate -mregparm= value. */
1711 if (ix86_regparm_string)
1713 i = atoi (ix86_regparm_string);
1714 if (i < 0 || i > REGPARM_MAX)
1715 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1716 else
1717 ix86_regparm = i;
1719 else
1720 if (TARGET_64BIT)
1721 ix86_regparm = REGPARM_MAX;
1723 /* If the user has provided any of the -malign-* options,
1724 warn and use that value only if -falign-* is not set.
1725 Remove this code in GCC 3.2 or later. */
1726 if (ix86_align_loops_string)
1728 warning (0, "-malign-loops is obsolete, use -falign-loops");
1729 if (align_loops == 0)
1731 i = atoi (ix86_align_loops_string);
1732 if (i < 0 || i > MAX_CODE_ALIGN)
1733 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1734 else
1735 align_loops = 1 << i;
1739 if (ix86_align_jumps_string)
1741 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1742 if (align_jumps == 0)
1744 i = atoi (ix86_align_jumps_string);
1745 if (i < 0 || i > MAX_CODE_ALIGN)
1746 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1747 else
1748 align_jumps = 1 << i;
1752 if (ix86_align_funcs_string)
1754 warning (0, "-malign-functions is obsolete, use -falign-functions");
1755 if (align_functions == 0)
1757 i = atoi (ix86_align_funcs_string);
1758 if (i < 0 || i > MAX_CODE_ALIGN)
1759 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1760 else
1761 align_functions = 1 << i;
1765 /* Default align_* from the processor table. */
1766 if (align_loops == 0)
1768 align_loops = processor_target_table[ix86_tune].align_loop;
1769 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1771 if (align_jumps == 0)
1773 align_jumps = processor_target_table[ix86_tune].align_jump;
1774 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1776 if (align_functions == 0)
1778 align_functions = processor_target_table[ix86_tune].align_func;
1781 /* Validate -mpreferred-stack-boundary= value, or provide default.
1782 The default of 128 bits is for Pentium III's SSE __m128, but we
1783 don't want additional code to keep the stack aligned when
1784 optimizing for code size. */
1785 ix86_preferred_stack_boundary = ((TARGET_64BIT || TARGET_MACHO || !optimize_size)
1786 ? 128 : 32);
1787 if (ix86_preferred_stack_boundary_string)
1789 i = atoi (ix86_preferred_stack_boundary_string);
1790 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1791 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1792 TARGET_64BIT ? 4 : 2);
1793 else
1794 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1797 /* Validate -mbranch-cost= value, or provide default. */
1798 ix86_branch_cost = ix86_cost->branch_cost;
1799 if (ix86_branch_cost_string)
1801 i = atoi (ix86_branch_cost_string);
1802 if (i < 0 || i > 5)
1803 error ("-mbranch-cost=%d is not between 0 and 5", i);
1804 else
1805 ix86_branch_cost = i;
1807 if (ix86_section_threshold_string)
1809 i = atoi (ix86_section_threshold_string);
1810 if (i < 0)
1811 error ("-mlarge-data-threshold=%d is negative", i);
1812 else
1813 ix86_section_threshold = i;
1816 if (ix86_tls_dialect_string)
1818 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1819 ix86_tls_dialect = TLS_DIALECT_GNU;
1820 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1821 ix86_tls_dialect = TLS_DIALECT_GNU2;
1822 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1823 ix86_tls_dialect = TLS_DIALECT_SUN;
1824 else
1825 error ("bad value (%s) for -mtls-dialect= switch",
1826 ix86_tls_dialect_string);
1829 /* Keep nonleaf frame pointers. */
1830 if (flag_omit_frame_pointer)
1831 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1832 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1833 flag_omit_frame_pointer = 1;
1835 /* If we're doing fast math, we don't care about comparison order
1836 wrt NaNs. This lets us use a shorter comparison sequence. */
1837 if (flag_unsafe_math_optimizations)
1838 target_flags &= ~MASK_IEEE_FP;
1840 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1841 since the insns won't need emulation. */
1842 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1843 target_flags &= ~MASK_NO_FANCY_MATH_387;
1845 /* Likewise, if the target doesn't have a 387, or we've specified
1846 software floating point, don't use 387 inline intrinsics. */
1847 if (!TARGET_80387)
1848 target_flags |= MASK_NO_FANCY_MATH_387;
1850 /* Turn on SSE2 builtins for -msse3. */
1851 if (TARGET_SSE3)
1852 target_flags |= MASK_SSE2;
1854 /* Turn on SSE builtins for -msse2. */
1855 if (TARGET_SSE2)
1856 target_flags |= MASK_SSE;
1858 /* Turn on MMX builtins for -msse. */
1859 if (TARGET_SSE)
1861 target_flags |= MASK_MMX & ~target_flags_explicit;
1862 x86_prefetch_sse = true;
1865 /* Turn on MMX builtins for 3Dnow. */
1866 if (TARGET_3DNOW)
1867 target_flags |= MASK_MMX;
1869 if (TARGET_64BIT)
1871 if (TARGET_ALIGN_DOUBLE)
1872 error ("-malign-double makes no sense in the 64bit mode");
1873 if (TARGET_RTD)
1874 error ("-mrtd calling convention not supported in the 64bit mode");
1876 /* Enable by default the SSE and MMX builtins. Do allow the user to
1877 explicitly disable any of these. In particular, disabling SSE and
1878 MMX for kernel code is extremely useful. */
1879 target_flags
1880 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1881 & ~target_flags_explicit);
1883 else
1885 /* i386 ABI does not specify red zone. It still makes sense to use it
1886 when programmer takes care to stack from being destroyed. */
1887 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1888 target_flags |= MASK_NO_RED_ZONE;
1891 /* Accept -msseregparm only if at least SSE support is enabled. */
1892 if (TARGET_SSEREGPARM
1893 && ! TARGET_SSE)
1894 error ("-msseregparm used without SSE enabled");
1896 /* Accept -msselibm only if at least SSE support is enabled. */
1897 if (TARGET_SSELIBM
1898 && ! TARGET_SSE2)
1899 error ("-msselibm used without SSE2 enabled");
1901 /* Ignore -msselibm on 64bit targets. */
1902 if (TARGET_SSELIBM
1903 && TARGET_64BIT)
1904 error ("-msselibm used on a 64bit target");
1906 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1908 if (ix86_fpmath_string != 0)
1910 if (! strcmp (ix86_fpmath_string, "387"))
1911 ix86_fpmath = FPMATH_387;
1912 else if (! strcmp (ix86_fpmath_string, "sse"))
1914 if (!TARGET_SSE)
1916 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1917 ix86_fpmath = FPMATH_387;
1919 else
1920 ix86_fpmath = FPMATH_SSE;
1922 else if (! strcmp (ix86_fpmath_string, "387,sse")
1923 || ! strcmp (ix86_fpmath_string, "sse,387"))
1925 if (!TARGET_SSE)
1927 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1928 ix86_fpmath = FPMATH_387;
1930 else if (!TARGET_80387)
1932 warning (0, "387 instruction set disabled, using SSE arithmetics");
1933 ix86_fpmath = FPMATH_SSE;
1935 else
1936 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1938 else
1939 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1942 /* If the i387 is disabled, then do not return values in it. */
1943 if (!TARGET_80387)
1944 target_flags &= ~MASK_FLOAT_RETURNS;
1946 if ((x86_accumulate_outgoing_args & TUNEMASK)
1947 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1948 && !optimize_size)
1949 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1951 /* ??? Unwind info is not correct around the CFG unless either a frame
1952 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1953 unwind info generation to be aware of the CFG and propagating states
1954 around edges. */
1955 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1956 || flag_exceptions || flag_non_call_exceptions)
1957 && flag_omit_frame_pointer
1958 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1960 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1961 warning (0, "unwind tables currently require either a frame pointer "
1962 "or -maccumulate-outgoing-args for correctness");
1963 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1966 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1968 char *p;
1969 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1970 p = strchr (internal_label_prefix, 'X');
1971 internal_label_prefix_len = p - internal_label_prefix;
1972 *p = '\0';
1975 /* When scheduling description is not available, disable scheduler pass
1976 so it won't slow down the compilation and make x87 code slower. */
1977 if (!TARGET_SCHEDULE)
1978 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1981 /* switch to the appropriate section for output of DECL.
1982 DECL is either a `VAR_DECL' node or a constant of some sort.
1983 RELOC indicates whether forming the initial value of DECL requires
1984 link-time relocations. */
1986 static section *
1987 x86_64_elf_select_section (tree decl, int reloc,
1988 unsigned HOST_WIDE_INT align)
1990 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1991 && ix86_in_large_data_p (decl))
1993 const char *sname = NULL;
1994 unsigned int flags = SECTION_WRITE;
1995 switch (categorize_decl_for_section (decl, reloc, flag_pic))
1997 case SECCAT_DATA:
1998 sname = ".ldata";
1999 break;
2000 case SECCAT_DATA_REL:
2001 sname = ".ldata.rel";
2002 break;
2003 case SECCAT_DATA_REL_LOCAL:
2004 sname = ".ldata.rel.local";
2005 break;
2006 case SECCAT_DATA_REL_RO:
2007 sname = ".ldata.rel.ro";
2008 break;
2009 case SECCAT_DATA_REL_RO_LOCAL:
2010 sname = ".ldata.rel.ro.local";
2011 break;
2012 case SECCAT_BSS:
2013 sname = ".lbss";
2014 flags |= SECTION_BSS;
2015 break;
2016 case SECCAT_RODATA:
2017 case SECCAT_RODATA_MERGE_STR:
2018 case SECCAT_RODATA_MERGE_STR_INIT:
2019 case SECCAT_RODATA_MERGE_CONST:
2020 sname = ".lrodata";
2021 flags = 0;
2022 break;
2023 case SECCAT_SRODATA:
2024 case SECCAT_SDATA:
2025 case SECCAT_SBSS:
2026 gcc_unreachable ();
2027 case SECCAT_TEXT:
2028 case SECCAT_TDATA:
2029 case SECCAT_TBSS:
2030 /* We don't split these for medium model. Place them into
2031 default sections and hope for best. */
2032 break;
2034 if (sname)
2036 /* We might get called with string constants, but get_named_section
2037 doesn't like them as they are not DECLs. Also, we need to set
2038 flags in that case. */
2039 if (!DECL_P (decl))
2040 return get_section (sname, flags, NULL);
2041 return get_named_section (decl, sname, reloc);
2044 return default_elf_select_section (decl, reloc, align);
2047 /* Build up a unique section name, expressed as a
2048 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2049 RELOC indicates whether the initial value of EXP requires
2050 link-time relocations. */
2052 static void
2053 x86_64_elf_unique_section (tree decl, int reloc)
2055 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2056 && ix86_in_large_data_p (decl))
2058 const char *prefix = NULL;
2059 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2060 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2062 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2064 case SECCAT_DATA:
2065 case SECCAT_DATA_REL:
2066 case SECCAT_DATA_REL_LOCAL:
2067 case SECCAT_DATA_REL_RO:
2068 case SECCAT_DATA_REL_RO_LOCAL:
2069 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2070 break;
2071 case SECCAT_BSS:
2072 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2073 break;
2074 case SECCAT_RODATA:
2075 case SECCAT_RODATA_MERGE_STR:
2076 case SECCAT_RODATA_MERGE_STR_INIT:
2077 case SECCAT_RODATA_MERGE_CONST:
2078 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2079 break;
2080 case SECCAT_SRODATA:
2081 case SECCAT_SDATA:
2082 case SECCAT_SBSS:
2083 gcc_unreachable ();
2084 case SECCAT_TEXT:
2085 case SECCAT_TDATA:
2086 case SECCAT_TBSS:
2087 /* We don't split these for medium model. Place them into
2088 default sections and hope for best. */
2089 break;
2091 if (prefix)
2093 const char *name;
2094 size_t nlen, plen;
2095 char *string;
2096 plen = strlen (prefix);
2098 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2099 name = targetm.strip_name_encoding (name);
2100 nlen = strlen (name);
2102 string = alloca (nlen + plen + 1);
2103 memcpy (string, prefix, plen);
2104 memcpy (string + plen, name, nlen + 1);
2106 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2107 return;
2110 default_unique_section (decl, reloc);
2113 #ifdef COMMON_ASM_OP
2114 /* This says how to output assembler code to declare an
2115 uninitialized external linkage data object.
2117 For medium model x86-64 we need to use .largecomm opcode for
2118 large objects. */
2119 void
2120 x86_elf_aligned_common (FILE *file,
2121 const char *name, unsigned HOST_WIDE_INT size,
2122 int align)
2124 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2125 && size > (unsigned int)ix86_section_threshold)
2126 fprintf (file, ".largecomm\t");
2127 else
2128 fprintf (file, "%s", COMMON_ASM_OP);
2129 assemble_name (file, name);
2130 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2131 size, align / BITS_PER_UNIT);
2134 /* Utility function for targets to use in implementing
2135 ASM_OUTPUT_ALIGNED_BSS. */
2137 void
2138 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2139 const char *name, unsigned HOST_WIDE_INT size,
2140 int align)
2142 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2143 && size > (unsigned int)ix86_section_threshold)
2144 switch_to_section (get_named_section (decl, ".lbss", 0));
2145 else
2146 switch_to_section (bss_section);
2147 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2148 #ifdef ASM_DECLARE_OBJECT_NAME
2149 last_assemble_variable_decl = decl;
2150 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2151 #else
2152 /* Standard thing is just output label for the object. */
2153 ASM_OUTPUT_LABEL (file, name);
2154 #endif /* ASM_DECLARE_OBJECT_NAME */
2155 ASM_OUTPUT_SKIP (file, size ? size : 1);
2157 #endif
2159 void
2160 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2162 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2163 make the problem with not enough registers even worse. */
2164 #ifdef INSN_SCHEDULING
2165 if (level > 1)
2166 flag_schedule_insns = 0;
2167 #endif
2169 if (TARGET_MACHO)
2170 /* The Darwin libraries never set errno, so we might as well
2171 avoid calling them when that's the only reason we would. */
2172 flag_errno_math = 0;
2174 /* The default values of these switches depend on the TARGET_64BIT
2175 that is not known at this moment. Mark these values with 2 and
2176 let user the to override these. In case there is no command line option
2177 specifying them, we will set the defaults in override_options. */
2178 if (optimize >= 1)
2179 flag_omit_frame_pointer = 2;
2180 flag_pcc_struct_return = 2;
2181 flag_asynchronous_unwind_tables = 2;
2182 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2183 SUBTARGET_OPTIMIZATION_OPTIONS;
2184 #endif
2187 /* Table of valid machine attributes. */
2188 const struct attribute_spec ix86_attribute_table[] =
2190 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2191 /* Stdcall attribute says callee is responsible for popping arguments
2192 if they are not variable. */
2193 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2194 /* Fastcall attribute says callee is responsible for popping arguments
2195 if they are not variable. */
2196 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2197 /* Cdecl attribute says the callee is a normal C declaration */
2198 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2199 /* Regparm attribute specifies how many integer arguments are to be
2200 passed in registers. */
2201 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2202 /* Sseregparm attribute says we are using x86_64 calling conventions
2203 for FP arguments. */
2204 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2205 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2206 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2207 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2208 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2209 #endif
2210 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2211 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2212 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2213 SUBTARGET_ATTRIBUTE_TABLE,
2214 #endif
2215 { NULL, 0, 0, false, false, false, NULL }
2218 /* Decide whether we can make a sibling call to a function. DECL is the
2219 declaration of the function being targeted by the call and EXP is the
2220 CALL_EXPR representing the call. */
2222 static bool
2223 ix86_function_ok_for_sibcall (tree decl, tree exp)
2225 tree func;
2226 rtx a, b;
2228 /* If we are generating position-independent code, we cannot sibcall
2229 optimize any indirect call, or a direct call to a global function,
2230 as the PLT requires %ebx be live. */
2231 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2232 return false;
2234 if (decl)
2235 func = decl;
2236 else
2238 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2239 if (POINTER_TYPE_P (func))
2240 func = TREE_TYPE (func);
2243 /* Check that the return value locations are the same. Like
2244 if we are returning floats on the 80387 register stack, we cannot
2245 make a sibcall from a function that doesn't return a float to a
2246 function that does or, conversely, from a function that does return
2247 a float to a function that doesn't; the necessary stack adjustment
2248 would not be executed. This is also the place we notice
2249 differences in the return value ABI. Note that it is ok for one
2250 of the functions to have void return type as long as the return
2251 value of the other is passed in a register. */
2252 a = ix86_function_value (TREE_TYPE (exp), func, false);
2253 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2254 cfun->decl, false);
2255 if (STACK_REG_P (a) || STACK_REG_P (b))
2257 if (!rtx_equal_p (a, b))
2258 return false;
2260 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2262 else if (!rtx_equal_p (a, b))
2263 return false;
2265 /* If this call is indirect, we'll need to be able to use a call-clobbered
2266 register for the address of the target function. Make sure that all
2267 such registers are not used for passing parameters. */
2268 if (!decl && !TARGET_64BIT)
2270 tree type;
2272 /* We're looking at the CALL_EXPR, we need the type of the function. */
2273 type = TREE_OPERAND (exp, 0); /* pointer expression */
2274 type = TREE_TYPE (type); /* pointer type */
2275 type = TREE_TYPE (type); /* function type */
2277 if (ix86_function_regparm (type, NULL) >= 3)
2279 /* ??? Need to count the actual number of registers to be used,
2280 not the possible number of registers. Fix later. */
2281 return false;
2285 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2286 /* Dllimport'd functions are also called indirectly. */
2287 if (decl && DECL_DLLIMPORT_P (decl)
2288 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2289 return false;
2290 #endif
2292 /* If we forced aligned the stack, then sibcalling would unalign the
2293 stack, which may break the called function. */
2294 if (cfun->machine->force_align_arg_pointer)
2295 return false;
2297 /* Otherwise okay. That also includes certain types of indirect calls. */
2298 return true;
2301 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2302 calling convention attributes;
2303 arguments as in struct attribute_spec.handler. */
2305 static tree
2306 ix86_handle_cconv_attribute (tree *node, tree name,
2307 tree args,
2308 int flags ATTRIBUTE_UNUSED,
2309 bool *no_add_attrs)
2311 if (TREE_CODE (*node) != FUNCTION_TYPE
2312 && TREE_CODE (*node) != METHOD_TYPE
2313 && TREE_CODE (*node) != FIELD_DECL
2314 && TREE_CODE (*node) != TYPE_DECL)
2316 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2317 IDENTIFIER_POINTER (name));
2318 *no_add_attrs = true;
2319 return NULL_TREE;
2322 /* Can combine regparm with all attributes but fastcall. */
2323 if (is_attribute_p ("regparm", name))
2325 tree cst;
2327 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2329 error ("fastcall and regparm attributes are not compatible");
2332 cst = TREE_VALUE (args);
2333 if (TREE_CODE (cst) != INTEGER_CST)
2335 warning (OPT_Wattributes,
2336 "%qs attribute requires an integer constant argument",
2337 IDENTIFIER_POINTER (name));
2338 *no_add_attrs = true;
2340 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2342 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2343 IDENTIFIER_POINTER (name), REGPARM_MAX);
2344 *no_add_attrs = true;
2347 return NULL_TREE;
2350 if (TARGET_64BIT)
2352 warning (OPT_Wattributes, "%qs attribute ignored",
2353 IDENTIFIER_POINTER (name));
2354 *no_add_attrs = true;
2355 return NULL_TREE;
2358 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2359 if (is_attribute_p ("fastcall", name))
2361 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2363 error ("fastcall and cdecl attributes are not compatible");
2365 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2367 error ("fastcall and stdcall attributes are not compatible");
2369 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2371 error ("fastcall and regparm attributes are not compatible");
2375 /* Can combine stdcall with fastcall (redundant), regparm and
2376 sseregparm. */
2377 else if (is_attribute_p ("stdcall", name))
2379 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2381 error ("stdcall and cdecl attributes are not compatible");
2383 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2385 error ("stdcall and fastcall attributes are not compatible");
2389 /* Can combine cdecl with regparm and sseregparm. */
2390 else if (is_attribute_p ("cdecl", name))
2392 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2394 error ("stdcall and cdecl attributes are not compatible");
2396 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2398 error ("fastcall and cdecl attributes are not compatible");
2402 /* Can combine sseregparm with all attributes. */
2404 return NULL_TREE;
2407 /* Return 0 if the attributes for two types are incompatible, 1 if they
2408 are compatible, and 2 if they are nearly compatible (which causes a
2409 warning to be generated). */
2411 static int
2412 ix86_comp_type_attributes (tree type1, tree type2)
2414 /* Check for mismatch of non-default calling convention. */
2415 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2417 if (TREE_CODE (type1) != FUNCTION_TYPE)
2418 return 1;
2420 /* Check for mismatched fastcall/regparm types. */
2421 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2422 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2423 || (ix86_function_regparm (type1, NULL)
2424 != ix86_function_regparm (type2, NULL)))
2425 return 0;
2427 /* Check for mismatched sseregparm types. */
2428 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2429 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2430 return 0;
2432 /* Check for mismatched return types (cdecl vs stdcall). */
2433 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2434 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2435 return 0;
2437 return 1;
2440 /* Return the regparm value for a function with the indicated TYPE and DECL.
2441 DECL may be NULL when calling function indirectly
2442 or considering a libcall. */
2444 static int
2445 ix86_function_regparm (tree type, tree decl)
2447 tree attr;
2448 int regparm = ix86_regparm;
2449 bool user_convention = false;
2451 if (!TARGET_64BIT)
2453 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2454 if (attr)
2456 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2457 user_convention = true;
2460 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2462 regparm = 2;
2463 user_convention = true;
2466 /* Use register calling convention for local functions when possible. */
2467 if (!TARGET_64BIT && !user_convention && decl
2468 && flag_unit_at_a_time && !profile_flag)
2470 struct cgraph_local_info *i = cgraph_local_info (decl);
2471 if (i && i->local)
2473 int local_regparm, globals = 0, regno;
2475 /* Make sure no regparm register is taken by a global register
2476 variable. */
2477 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2478 if (global_regs[local_regparm])
2479 break;
2480 /* We can't use regparm(3) for nested functions as these use
2481 static chain pointer in third argument. */
2482 if (local_regparm == 3
2483 && decl_function_context (decl)
2484 && !DECL_NO_STATIC_CHAIN (decl))
2485 local_regparm = 2;
2486 /* Each global register variable increases register preassure,
2487 so the more global reg vars there are, the smaller regparm
2488 optimization use, unless requested by the user explicitly. */
2489 for (regno = 0; regno < 6; regno++)
2490 if (global_regs[regno])
2491 globals++;
2492 local_regparm
2493 = globals < local_regparm ? local_regparm - globals : 0;
2495 if (local_regparm > regparm)
2496 regparm = local_regparm;
2500 return regparm;
2503 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2504 in SSE registers for a function with the indicated TYPE and DECL.
2505 DECL may be NULL when calling function indirectly
2506 or considering a libcall. Otherwise return 0. */
2508 static int
2509 ix86_function_sseregparm (tree type, tree decl)
2511 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2512 by the sseregparm attribute. */
2513 if (TARGET_SSEREGPARM
2514 || (type
2515 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2517 if (!TARGET_SSE)
2519 if (decl)
2520 error ("Calling %qD with attribute sseregparm without "
2521 "SSE/SSE2 enabled", decl);
2522 else
2523 error ("Calling %qT with attribute sseregparm without "
2524 "SSE/SSE2 enabled", type);
2525 return 0;
2528 return 2;
2531 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2532 in SSE registers even for 32-bit mode and not just 3, but up to
2533 8 SSE arguments in registers. */
2534 if (!TARGET_64BIT && decl
2535 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2537 struct cgraph_local_info *i = cgraph_local_info (decl);
2538 if (i && i->local)
2539 return TARGET_SSE2 ? 2 : 1;
2542 return 0;
2545 /* Return true if EAX is live at the start of the function. Used by
2546 ix86_expand_prologue to determine if we need special help before
2547 calling allocate_stack_worker. */
2549 static bool
2550 ix86_eax_live_at_start_p (void)
2552 /* Cheat. Don't bother working forward from ix86_function_regparm
2553 to the function type to whether an actual argument is located in
2554 eax. Instead just look at cfg info, which is still close enough
2555 to correct at this point. This gives false positives for broken
2556 functions that might use uninitialized data that happens to be
2557 allocated in eax, but who cares? */
2558 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2561 /* Value is the number of bytes of arguments automatically
2562 popped when returning from a subroutine call.
2563 FUNDECL is the declaration node of the function (as a tree),
2564 FUNTYPE is the data type of the function (as a tree),
2565 or for a library call it is an identifier node for the subroutine name.
2566 SIZE is the number of bytes of arguments passed on the stack.
2568 On the 80386, the RTD insn may be used to pop them if the number
2569 of args is fixed, but if the number is variable then the caller
2570 must pop them all. RTD can't be used for library calls now
2571 because the library is compiled with the Unix compiler.
2572 Use of RTD is a selectable option, since it is incompatible with
2573 standard Unix calling sequences. If the option is not selected,
2574 the caller must always pop the args.
2576 The attribute stdcall is equivalent to RTD on a per module basis. */
2579 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2581 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2583 /* Cdecl functions override -mrtd, and never pop the stack. */
2584 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2586 /* Stdcall and fastcall functions will pop the stack if not
2587 variable args. */
2588 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2589 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2590 rtd = 1;
2592 if (rtd
2593 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2594 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2595 == void_type_node)))
2596 return size;
2599 /* Lose any fake structure return argument if it is passed on the stack. */
2600 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2601 && !TARGET_64BIT
2602 && !KEEP_AGGREGATE_RETURN_POINTER)
2604 int nregs = ix86_function_regparm (funtype, fundecl);
2606 if (!nregs)
2607 return GET_MODE_SIZE (Pmode);
2610 return 0;
2613 /* Argument support functions. */
2615 /* Return true when register may be used to pass function parameters. */
2616 bool
2617 ix86_function_arg_regno_p (int regno)
2619 int i;
2620 if (!TARGET_64BIT)
2621 return (regno < REGPARM_MAX
2622 || (TARGET_MMX && MMX_REGNO_P (regno)
2623 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2624 || (TARGET_SSE && SSE_REGNO_P (regno)
2625 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2627 if (TARGET_SSE && SSE_REGNO_P (regno)
2628 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2629 return true;
2630 /* RAX is used as hidden argument to va_arg functions. */
2631 if (!regno)
2632 return true;
2633 for (i = 0; i < REGPARM_MAX; i++)
2634 if (regno == x86_64_int_parameter_registers[i])
2635 return true;
2636 return false;
2639 /* Return if we do not know how to pass TYPE solely in registers. */
2641 static bool
2642 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2644 if (must_pass_in_stack_var_size_or_pad (mode, type))
2645 return true;
2647 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2648 The layout_type routine is crafty and tries to trick us into passing
2649 currently unsupported vector types on the stack by using TImode. */
2650 return (!TARGET_64BIT && mode == TImode
2651 && type && TREE_CODE (type) != VECTOR_TYPE);
2654 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2655 for a call to a function whose data type is FNTYPE.
2656 For a library call, FNTYPE is 0. */
2658 void
2659 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2660 tree fntype, /* tree ptr for function decl */
2661 rtx libname, /* SYMBOL_REF of library name or 0 */
2662 tree fndecl)
2664 static CUMULATIVE_ARGS zero_cum;
2665 tree param, next_param;
2667 if (TARGET_DEBUG_ARG)
2669 fprintf (stderr, "\ninit_cumulative_args (");
2670 if (fntype)
2671 fprintf (stderr, "fntype code = %s, ret code = %s",
2672 tree_code_name[(int) TREE_CODE (fntype)],
2673 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2674 else
2675 fprintf (stderr, "no fntype");
2677 if (libname)
2678 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2681 *cum = zero_cum;
2683 /* Set up the number of registers to use for passing arguments. */
2684 cum->nregs = ix86_regparm;
2685 if (TARGET_SSE)
2686 cum->sse_nregs = SSE_REGPARM_MAX;
2687 if (TARGET_MMX)
2688 cum->mmx_nregs = MMX_REGPARM_MAX;
2689 cum->warn_sse = true;
2690 cum->warn_mmx = true;
2691 cum->maybe_vaarg = false;
2693 /* Use ecx and edx registers if function has fastcall attribute,
2694 else look for regparm information. */
2695 if (fntype && !TARGET_64BIT)
2697 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2699 cum->nregs = 2;
2700 cum->fastcall = 1;
2702 else
2703 cum->nregs = ix86_function_regparm (fntype, fndecl);
2706 /* Set up the number of SSE registers used for passing SFmode
2707 and DFmode arguments. Warn for mismatching ABI. */
2708 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2710 /* Determine if this function has variable arguments. This is
2711 indicated by the last argument being 'void_type_mode' if there
2712 are no variable arguments. If there are variable arguments, then
2713 we won't pass anything in registers in 32-bit mode. */
2715 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2717 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2718 param != 0; param = next_param)
2720 next_param = TREE_CHAIN (param);
2721 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2723 if (!TARGET_64BIT)
2725 cum->nregs = 0;
2726 cum->sse_nregs = 0;
2727 cum->mmx_nregs = 0;
2728 cum->warn_sse = 0;
2729 cum->warn_mmx = 0;
2730 cum->fastcall = 0;
2731 cum->float_in_sse = 0;
2733 cum->maybe_vaarg = true;
2737 if ((!fntype && !libname)
2738 || (fntype && !TYPE_ARG_TYPES (fntype)))
2739 cum->maybe_vaarg = true;
2741 if (TARGET_DEBUG_ARG)
2742 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2744 return;
2747 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2748 But in the case of vector types, it is some vector mode.
2750 When we have only some of our vector isa extensions enabled, then there
2751 are some modes for which vector_mode_supported_p is false. For these
2752 modes, the generic vector support in gcc will choose some non-vector mode
2753 in order to implement the type. By computing the natural mode, we'll
2754 select the proper ABI location for the operand and not depend on whatever
2755 the middle-end decides to do with these vector types. */
2757 static enum machine_mode
2758 type_natural_mode (tree type)
2760 enum machine_mode mode = TYPE_MODE (type);
2762 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2764 HOST_WIDE_INT size = int_size_in_bytes (type);
2765 if ((size == 8 || size == 16)
2766 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2767 && TYPE_VECTOR_SUBPARTS (type) > 1)
2769 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2771 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2772 mode = MIN_MODE_VECTOR_FLOAT;
2773 else
2774 mode = MIN_MODE_VECTOR_INT;
2776 /* Get the mode which has this inner mode and number of units. */
2777 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2778 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2779 && GET_MODE_INNER (mode) == innermode)
2780 return mode;
2782 gcc_unreachable ();
2786 return mode;
2789 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2790 this may not agree with the mode that the type system has chosen for the
2791 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2792 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2794 static rtx
2795 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2796 unsigned int regno)
2798 rtx tmp;
2800 if (orig_mode != BLKmode)
2801 tmp = gen_rtx_REG (orig_mode, regno);
2802 else
2804 tmp = gen_rtx_REG (mode, regno);
2805 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2806 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2809 return tmp;
2812 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2813 of this code is to classify each 8bytes of incoming argument by the register
2814 class and assign registers accordingly. */
2816 /* Return the union class of CLASS1 and CLASS2.
2817 See the x86-64 PS ABI for details. */
2819 static enum x86_64_reg_class
2820 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2822 /* Rule #1: If both classes are equal, this is the resulting class. */
2823 if (class1 == class2)
2824 return class1;
2826 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2827 the other class. */
2828 if (class1 == X86_64_NO_CLASS)
2829 return class2;
2830 if (class2 == X86_64_NO_CLASS)
2831 return class1;
2833 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2834 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2835 return X86_64_MEMORY_CLASS;
2837 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2838 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2839 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2840 return X86_64_INTEGERSI_CLASS;
2841 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2842 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2843 return X86_64_INTEGER_CLASS;
2845 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2846 MEMORY is used. */
2847 if (class1 == X86_64_X87_CLASS
2848 || class1 == X86_64_X87UP_CLASS
2849 || class1 == X86_64_COMPLEX_X87_CLASS
2850 || class2 == X86_64_X87_CLASS
2851 || class2 == X86_64_X87UP_CLASS
2852 || class2 == X86_64_COMPLEX_X87_CLASS)
2853 return X86_64_MEMORY_CLASS;
2855 /* Rule #6: Otherwise class SSE is used. */
2856 return X86_64_SSE_CLASS;
2859 /* Classify the argument of type TYPE and mode MODE.
2860 CLASSES will be filled by the register class used to pass each word
2861 of the operand. The number of words is returned. In case the parameter
2862 should be passed in memory, 0 is returned. As a special case for zero
2863 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2865 BIT_OFFSET is used internally for handling records and specifies offset
2866 of the offset in bits modulo 256 to avoid overflow cases.
2868 See the x86-64 PS ABI for details.
2871 static int
2872 classify_argument (enum machine_mode mode, tree type,
2873 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2875 HOST_WIDE_INT bytes =
2876 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2877 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2879 /* Variable sized entities are always passed/returned in memory. */
2880 if (bytes < 0)
2881 return 0;
2883 if (mode != VOIDmode
2884 && targetm.calls.must_pass_in_stack (mode, type))
2885 return 0;
2887 if (type && AGGREGATE_TYPE_P (type))
2889 int i;
2890 tree field;
2891 enum x86_64_reg_class subclasses[MAX_CLASSES];
2893 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2894 if (bytes > 16)
2895 return 0;
2897 for (i = 0; i < words; i++)
2898 classes[i] = X86_64_NO_CLASS;
2900 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2901 signalize memory class, so handle it as special case. */
2902 if (!words)
2904 classes[0] = X86_64_NO_CLASS;
2905 return 1;
2908 /* Classify each field of record and merge classes. */
2909 switch (TREE_CODE (type))
2911 case RECORD_TYPE:
2912 /* For classes first merge in the field of the subclasses. */
2913 if (TYPE_BINFO (type))
2915 tree binfo, base_binfo;
2916 int basenum;
2918 for (binfo = TYPE_BINFO (type), basenum = 0;
2919 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2921 int num;
2922 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2923 tree type = BINFO_TYPE (base_binfo);
2925 num = classify_argument (TYPE_MODE (type),
2926 type, subclasses,
2927 (offset + bit_offset) % 256);
2928 if (!num)
2929 return 0;
2930 for (i = 0; i < num; i++)
2932 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2933 classes[i + pos] =
2934 merge_classes (subclasses[i], classes[i + pos]);
2938 /* And now merge the fields of structure. */
2939 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2941 if (TREE_CODE (field) == FIELD_DECL)
2943 int num;
2945 /* Bitfields are always classified as integer. Handle them
2946 early, since later code would consider them to be
2947 misaligned integers. */
2948 if (DECL_BIT_FIELD (field))
2950 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2951 i < ((int_bit_position (field) + (bit_offset % 64))
2952 + tree_low_cst (DECL_SIZE (field), 0)
2953 + 63) / 8 / 8; i++)
2954 classes[i] =
2955 merge_classes (X86_64_INTEGER_CLASS,
2956 classes[i]);
2958 else
2960 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2961 TREE_TYPE (field), subclasses,
2962 (int_bit_position (field)
2963 + bit_offset) % 256);
2964 if (!num)
2965 return 0;
2966 for (i = 0; i < num; i++)
2968 int pos =
2969 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2970 classes[i + pos] =
2971 merge_classes (subclasses[i], classes[i + pos]);
2976 break;
2978 case ARRAY_TYPE:
2979 /* Arrays are handled as small records. */
2981 int num;
2982 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2983 TREE_TYPE (type), subclasses, bit_offset);
2984 if (!num)
2985 return 0;
2987 /* The partial classes are now full classes. */
2988 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2989 subclasses[0] = X86_64_SSE_CLASS;
2990 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2991 subclasses[0] = X86_64_INTEGER_CLASS;
2993 for (i = 0; i < words; i++)
2994 classes[i] = subclasses[i % num];
2996 break;
2998 case UNION_TYPE:
2999 case QUAL_UNION_TYPE:
3000 /* Unions are similar to RECORD_TYPE but offset is always 0.
3003 /* Unions are not derived. */
3004 gcc_assert (!TYPE_BINFO (type)
3005 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3006 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3008 if (TREE_CODE (field) == FIELD_DECL)
3010 int num;
3011 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3012 TREE_TYPE (field), subclasses,
3013 bit_offset);
3014 if (!num)
3015 return 0;
3016 for (i = 0; i < num; i++)
3017 classes[i] = merge_classes (subclasses[i], classes[i]);
3020 break;
3022 default:
3023 gcc_unreachable ();
3026 /* Final merger cleanup. */
3027 for (i = 0; i < words; i++)
3029 /* If one class is MEMORY, everything should be passed in
3030 memory. */
3031 if (classes[i] == X86_64_MEMORY_CLASS)
3032 return 0;
3034 /* The X86_64_SSEUP_CLASS should be always preceded by
3035 X86_64_SSE_CLASS. */
3036 if (classes[i] == X86_64_SSEUP_CLASS
3037 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3038 classes[i] = X86_64_SSE_CLASS;
3040 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3041 if (classes[i] == X86_64_X87UP_CLASS
3042 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3043 classes[i] = X86_64_SSE_CLASS;
3045 return words;
3048 /* Compute alignment needed. We align all types to natural boundaries with
3049 exception of XFmode that is aligned to 64bits. */
3050 if (mode != VOIDmode && mode != BLKmode)
3052 int mode_alignment = GET_MODE_BITSIZE (mode);
3054 if (mode == XFmode)
3055 mode_alignment = 128;
3056 else if (mode == XCmode)
3057 mode_alignment = 256;
3058 if (COMPLEX_MODE_P (mode))
3059 mode_alignment /= 2;
3060 /* Misaligned fields are always returned in memory. */
3061 if (bit_offset % mode_alignment)
3062 return 0;
3065 /* for V1xx modes, just use the base mode */
3066 if (VECTOR_MODE_P (mode)
3067 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3068 mode = GET_MODE_INNER (mode);
3070 /* Classification of atomic types. */
3071 switch (mode)
3073 case DImode:
3074 case SImode:
3075 case HImode:
3076 case QImode:
3077 case CSImode:
3078 case CHImode:
3079 case CQImode:
3080 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3081 classes[0] = X86_64_INTEGERSI_CLASS;
3082 else
3083 classes[0] = X86_64_INTEGER_CLASS;
3084 return 1;
3085 case CDImode:
3086 case TImode:
3087 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3088 return 2;
3089 case CTImode:
3090 return 0;
3091 case SFmode:
3092 if (!(bit_offset % 64))
3093 classes[0] = X86_64_SSESF_CLASS;
3094 else
3095 classes[0] = X86_64_SSE_CLASS;
3096 return 1;
3097 case DFmode:
3098 classes[0] = X86_64_SSEDF_CLASS;
3099 return 1;
3100 case XFmode:
3101 classes[0] = X86_64_X87_CLASS;
3102 classes[1] = X86_64_X87UP_CLASS;
3103 return 2;
3104 case TFmode:
3105 classes[0] = X86_64_SSE_CLASS;
3106 classes[1] = X86_64_SSEUP_CLASS;
3107 return 2;
3108 case SCmode:
3109 classes[0] = X86_64_SSE_CLASS;
3110 return 1;
3111 case DCmode:
3112 classes[0] = X86_64_SSEDF_CLASS;
3113 classes[1] = X86_64_SSEDF_CLASS;
3114 return 2;
3115 case XCmode:
3116 classes[0] = X86_64_COMPLEX_X87_CLASS;
3117 return 1;
3118 case TCmode:
3119 /* This modes is larger than 16 bytes. */
3120 return 0;
3121 case V4SFmode:
3122 case V4SImode:
3123 case V16QImode:
3124 case V8HImode:
3125 case V2DFmode:
3126 case V2DImode:
3127 classes[0] = X86_64_SSE_CLASS;
3128 classes[1] = X86_64_SSEUP_CLASS;
3129 return 2;
3130 case V2SFmode:
3131 case V2SImode:
3132 case V4HImode:
3133 case V8QImode:
3134 classes[0] = X86_64_SSE_CLASS;
3135 return 1;
3136 case BLKmode:
3137 case VOIDmode:
3138 return 0;
3139 default:
3140 gcc_assert (VECTOR_MODE_P (mode));
3142 if (bytes > 16)
3143 return 0;
3145 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3147 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3148 classes[0] = X86_64_INTEGERSI_CLASS;
3149 else
3150 classes[0] = X86_64_INTEGER_CLASS;
3151 classes[1] = X86_64_INTEGER_CLASS;
3152 return 1 + (bytes > 8);
3156 /* Examine the argument and return set number of register required in each
3157 class. Return 0 iff parameter should be passed in memory. */
3158 static int
3159 examine_argument (enum machine_mode mode, tree type, int in_return,
3160 int *int_nregs, int *sse_nregs)
3162 enum x86_64_reg_class class[MAX_CLASSES];
3163 int n = classify_argument (mode, type, class, 0);
3165 *int_nregs = 0;
3166 *sse_nregs = 0;
3167 if (!n)
3168 return 0;
3169 for (n--; n >= 0; n--)
3170 switch (class[n])
3172 case X86_64_INTEGER_CLASS:
3173 case X86_64_INTEGERSI_CLASS:
3174 (*int_nregs)++;
3175 break;
3176 case X86_64_SSE_CLASS:
3177 case X86_64_SSESF_CLASS:
3178 case X86_64_SSEDF_CLASS:
3179 (*sse_nregs)++;
3180 break;
3181 case X86_64_NO_CLASS:
3182 case X86_64_SSEUP_CLASS:
3183 break;
3184 case X86_64_X87_CLASS:
3185 case X86_64_X87UP_CLASS:
3186 if (!in_return)
3187 return 0;
3188 break;
3189 case X86_64_COMPLEX_X87_CLASS:
3190 return in_return ? 2 : 0;
3191 case X86_64_MEMORY_CLASS:
3192 gcc_unreachable ();
3194 return 1;
3197 /* Construct container for the argument used by GCC interface. See
3198 FUNCTION_ARG for the detailed description. */
3200 static rtx
3201 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3202 tree type, int in_return, int nintregs, int nsseregs,
3203 const int *intreg, int sse_regno)
3205 enum machine_mode tmpmode;
3206 int bytes =
3207 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3208 enum x86_64_reg_class class[MAX_CLASSES];
3209 int n;
3210 int i;
3211 int nexps = 0;
3212 int needed_sseregs, needed_intregs;
3213 rtx exp[MAX_CLASSES];
3214 rtx ret;
3216 n = classify_argument (mode, type, class, 0);
3217 if (TARGET_DEBUG_ARG)
3219 if (!n)
3220 fprintf (stderr, "Memory class\n");
3221 else
3223 fprintf (stderr, "Classes:");
3224 for (i = 0; i < n; i++)
3226 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3228 fprintf (stderr, "\n");
3231 if (!n)
3232 return NULL;
3233 if (!examine_argument (mode, type, in_return, &needed_intregs,
3234 &needed_sseregs))
3235 return NULL;
3236 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3237 return NULL;
3239 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3240 some less clueful developer tries to use floating-point anyway. */
3241 if (needed_sseregs && !TARGET_SSE)
3243 static bool issued_error;
3244 if (!issued_error)
3246 issued_error = true;
3247 if (in_return)
3248 error ("SSE register return with SSE disabled");
3249 else
3250 error ("SSE register argument with SSE disabled");
3252 return NULL;
3255 /* First construct simple cases. Avoid SCmode, since we want to use
3256 single register to pass this type. */
3257 if (n == 1 && mode != SCmode)
3258 switch (class[0])
3260 case X86_64_INTEGER_CLASS:
3261 case X86_64_INTEGERSI_CLASS:
3262 return gen_rtx_REG (mode, intreg[0]);
3263 case X86_64_SSE_CLASS:
3264 case X86_64_SSESF_CLASS:
3265 case X86_64_SSEDF_CLASS:
3266 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3267 case X86_64_X87_CLASS:
3268 case X86_64_COMPLEX_X87_CLASS:
3269 return gen_rtx_REG (mode, FIRST_STACK_REG);
3270 case X86_64_NO_CLASS:
3271 /* Zero sized array, struct or class. */
3272 return NULL;
3273 default:
3274 gcc_unreachable ();
3276 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3277 && mode != BLKmode)
3278 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3279 if (n == 2
3280 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3281 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3282 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3283 && class[1] == X86_64_INTEGER_CLASS
3284 && (mode == CDImode || mode == TImode || mode == TFmode)
3285 && intreg[0] + 1 == intreg[1])
3286 return gen_rtx_REG (mode, intreg[0]);
3288 /* Otherwise figure out the entries of the PARALLEL. */
3289 for (i = 0; i < n; i++)
3291 switch (class[i])
3293 case X86_64_NO_CLASS:
3294 break;
3295 case X86_64_INTEGER_CLASS:
3296 case X86_64_INTEGERSI_CLASS:
3297 /* Merge TImodes on aligned occasions here too. */
3298 if (i * 8 + 8 > bytes)
3299 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3300 else if (class[i] == X86_64_INTEGERSI_CLASS)
3301 tmpmode = SImode;
3302 else
3303 tmpmode = DImode;
3304 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3305 if (tmpmode == BLKmode)
3306 tmpmode = DImode;
3307 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3308 gen_rtx_REG (tmpmode, *intreg),
3309 GEN_INT (i*8));
3310 intreg++;
3311 break;
3312 case X86_64_SSESF_CLASS:
3313 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3314 gen_rtx_REG (SFmode,
3315 SSE_REGNO (sse_regno)),
3316 GEN_INT (i*8));
3317 sse_regno++;
3318 break;
3319 case X86_64_SSEDF_CLASS:
3320 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3321 gen_rtx_REG (DFmode,
3322 SSE_REGNO (sse_regno)),
3323 GEN_INT (i*8));
3324 sse_regno++;
3325 break;
3326 case X86_64_SSE_CLASS:
3327 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3328 tmpmode = TImode;
3329 else
3330 tmpmode = DImode;
3331 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3332 gen_rtx_REG (tmpmode,
3333 SSE_REGNO (sse_regno)),
3334 GEN_INT (i*8));
3335 if (tmpmode == TImode)
3336 i++;
3337 sse_regno++;
3338 break;
3339 default:
3340 gcc_unreachable ();
3344 /* Empty aligned struct, union or class. */
3345 if (nexps == 0)
3346 return NULL;
3348 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3349 for (i = 0; i < nexps; i++)
3350 XVECEXP (ret, 0, i) = exp [i];
3351 return ret;
3354 /* Update the data in CUM to advance over an argument
3355 of mode MODE and data type TYPE.
3356 (TYPE is null for libcalls where that information may not be available.) */
3358 void
3359 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3360 tree type, int named)
3362 int bytes =
3363 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3364 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3366 if (type)
3367 mode = type_natural_mode (type);
3369 if (TARGET_DEBUG_ARG)
3370 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3371 "mode=%s, named=%d)\n\n",
3372 words, cum->words, cum->nregs, cum->sse_nregs,
3373 GET_MODE_NAME (mode), named);
3375 if (TARGET_64BIT)
3377 int int_nregs, sse_nregs;
3378 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3379 cum->words += words;
3380 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3382 cum->nregs -= int_nregs;
3383 cum->sse_nregs -= sse_nregs;
3384 cum->regno += int_nregs;
3385 cum->sse_regno += sse_nregs;
3387 else
3388 cum->words += words;
3390 else
3392 switch (mode)
3394 default:
3395 break;
3397 case BLKmode:
3398 if (bytes < 0)
3399 break;
3400 /* FALLTHRU */
3402 case DImode:
3403 case SImode:
3404 case HImode:
3405 case QImode:
3406 cum->words += words;
3407 cum->nregs -= words;
3408 cum->regno += words;
3410 if (cum->nregs <= 0)
3412 cum->nregs = 0;
3413 cum->regno = 0;
3415 break;
3417 case DFmode:
3418 if (cum->float_in_sse < 2)
3419 break;
3420 case SFmode:
3421 if (cum->float_in_sse < 1)
3422 break;
3423 /* FALLTHRU */
3425 case TImode:
3426 case V16QImode:
3427 case V8HImode:
3428 case V4SImode:
3429 case V2DImode:
3430 case V4SFmode:
3431 case V2DFmode:
3432 if (!type || !AGGREGATE_TYPE_P (type))
3434 cum->sse_words += words;
3435 cum->sse_nregs -= 1;
3436 cum->sse_regno += 1;
3437 if (cum->sse_nregs <= 0)
3439 cum->sse_nregs = 0;
3440 cum->sse_regno = 0;
3443 break;
3445 case V8QImode:
3446 case V4HImode:
3447 case V2SImode:
3448 case V2SFmode:
3449 if (!type || !AGGREGATE_TYPE_P (type))
3451 cum->mmx_words += words;
3452 cum->mmx_nregs -= 1;
3453 cum->mmx_regno += 1;
3454 if (cum->mmx_nregs <= 0)
3456 cum->mmx_nregs = 0;
3457 cum->mmx_regno = 0;
3460 break;
3465 /* Define where to put the arguments to a function.
3466 Value is zero to push the argument on the stack,
3467 or a hard register in which to store the argument.
3469 MODE is the argument's machine mode.
3470 TYPE is the data type of the argument (as a tree).
3471 This is null for libcalls where that information may
3472 not be available.
3473 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3474 the preceding args and about the function being called.
3475 NAMED is nonzero if this argument is a named parameter
3476 (otherwise it is an extra parameter matching an ellipsis). */
3479 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3480 tree type, int named)
3482 enum machine_mode mode = orig_mode;
3483 rtx ret = NULL_RTX;
3484 int bytes =
3485 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3486 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3487 static bool warnedsse, warnedmmx;
3489 /* To simplify the code below, represent vector types with a vector mode
3490 even if MMX/SSE are not active. */
3491 if (type && TREE_CODE (type) == VECTOR_TYPE)
3492 mode = type_natural_mode (type);
3494 /* Handle a hidden AL argument containing number of registers for varargs
3495 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3496 any AL settings. */
3497 if (mode == VOIDmode)
3499 if (TARGET_64BIT)
3500 return GEN_INT (cum->maybe_vaarg
3501 ? (cum->sse_nregs < 0
3502 ? SSE_REGPARM_MAX
3503 : cum->sse_regno)
3504 : -1);
3505 else
3506 return constm1_rtx;
3508 if (TARGET_64BIT)
3509 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3510 cum->sse_nregs,
3511 &x86_64_int_parameter_registers [cum->regno],
3512 cum->sse_regno);
3513 else
3514 switch (mode)
3516 /* For now, pass fp/complex values on the stack. */
3517 default:
3518 break;
3520 case BLKmode:
3521 if (bytes < 0)
3522 break;
3523 /* FALLTHRU */
3524 case DImode:
3525 case SImode:
3526 case HImode:
3527 case QImode:
3528 if (words <= cum->nregs)
3530 int regno = cum->regno;
3532 /* Fastcall allocates the first two DWORD (SImode) or
3533 smaller arguments to ECX and EDX. */
3534 if (cum->fastcall)
3536 if (mode == BLKmode || mode == DImode)
3537 break;
3539 /* ECX not EAX is the first allocated register. */
3540 if (regno == 0)
3541 regno = 2;
3543 ret = gen_rtx_REG (mode, regno);
3545 break;
3546 case DFmode:
3547 if (cum->float_in_sse < 2)
3548 break;
3549 case SFmode:
3550 if (cum->float_in_sse < 1)
3551 break;
3552 /* FALLTHRU */
3553 case TImode:
3554 case V16QImode:
3555 case V8HImode:
3556 case V4SImode:
3557 case V2DImode:
3558 case V4SFmode:
3559 case V2DFmode:
3560 if (!type || !AGGREGATE_TYPE_P (type))
3562 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3564 warnedsse = true;
3565 warning (0, "SSE vector argument without SSE enabled "
3566 "changes the ABI");
3568 if (cum->sse_nregs)
3569 ret = gen_reg_or_parallel (mode, orig_mode,
3570 cum->sse_regno + FIRST_SSE_REG);
3572 break;
3573 case V8QImode:
3574 case V4HImode:
3575 case V2SImode:
3576 case V2SFmode:
3577 if (!type || !AGGREGATE_TYPE_P (type))
3579 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3581 warnedmmx = true;
3582 warning (0, "MMX vector argument without MMX enabled "
3583 "changes the ABI");
3585 if (cum->mmx_nregs)
3586 ret = gen_reg_or_parallel (mode, orig_mode,
3587 cum->mmx_regno + FIRST_MMX_REG);
3589 break;
3592 if (TARGET_DEBUG_ARG)
3594 fprintf (stderr,
3595 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3596 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3598 if (ret)
3599 print_simple_rtl (stderr, ret);
3600 else
3601 fprintf (stderr, ", stack");
3603 fprintf (stderr, " )\n");
3606 return ret;
3609 /* A C expression that indicates when an argument must be passed by
3610 reference. If nonzero for an argument, a copy of that argument is
3611 made in memory and a pointer to the argument is passed instead of
3612 the argument itself. The pointer is passed in whatever way is
3613 appropriate for passing a pointer to that type. */
3615 static bool
3616 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3617 enum machine_mode mode ATTRIBUTE_UNUSED,
3618 tree type, bool named ATTRIBUTE_UNUSED)
3620 if (!TARGET_64BIT)
3621 return 0;
3623 if (type && int_size_in_bytes (type) == -1)
3625 if (TARGET_DEBUG_ARG)
3626 fprintf (stderr, "function_arg_pass_by_reference\n");
3627 return 1;
3630 return 0;
3633 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3634 ABI. Only called if TARGET_SSE. */
3635 static bool
3636 contains_128bit_aligned_vector_p (tree type)
3638 enum machine_mode mode = TYPE_MODE (type);
3639 if (SSE_REG_MODE_P (mode)
3640 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3641 return true;
3642 if (TYPE_ALIGN (type) < 128)
3643 return false;
3645 if (AGGREGATE_TYPE_P (type))
3647 /* Walk the aggregates recursively. */
3648 switch (TREE_CODE (type))
3650 case RECORD_TYPE:
3651 case UNION_TYPE:
3652 case QUAL_UNION_TYPE:
3654 tree field;
3656 if (TYPE_BINFO (type))
3658 tree binfo, base_binfo;
3659 int i;
3661 for (binfo = TYPE_BINFO (type), i = 0;
3662 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3663 if (contains_128bit_aligned_vector_p
3664 (BINFO_TYPE (base_binfo)))
3665 return true;
3667 /* And now merge the fields of structure. */
3668 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3670 if (TREE_CODE (field) == FIELD_DECL
3671 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3672 return true;
3674 break;
3677 case ARRAY_TYPE:
3678 /* Just for use if some languages passes arrays by value. */
3679 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3680 return true;
3681 break;
3683 default:
3684 gcc_unreachable ();
3687 return false;
3690 /* Gives the alignment boundary, in bits, of an argument with the
3691 specified mode and type. */
3694 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3696 int align;
3697 if (type)
3698 align = TYPE_ALIGN (type);
3699 else
3700 align = GET_MODE_ALIGNMENT (mode);
3701 if (align < PARM_BOUNDARY)
3702 align = PARM_BOUNDARY;
3703 if (!TARGET_64BIT)
3705 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3706 make an exception for SSE modes since these require 128bit
3707 alignment.
3709 The handling here differs from field_alignment. ICC aligns MMX
3710 arguments to 4 byte boundaries, while structure fields are aligned
3711 to 8 byte boundaries. */
3712 if (!TARGET_SSE)
3713 align = PARM_BOUNDARY;
3714 else if (!type)
3716 if (!SSE_REG_MODE_P (mode))
3717 align = PARM_BOUNDARY;
3719 else
3721 if (!contains_128bit_aligned_vector_p (type))
3722 align = PARM_BOUNDARY;
3725 if (align > 128)
3726 align = 128;
3727 return align;
3730 /* Return true if N is a possible register number of function value. */
3731 bool
3732 ix86_function_value_regno_p (int regno)
3734 if (regno == 0
3735 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3736 || (regno == FIRST_SSE_REG && TARGET_SSE))
3737 return true;
3739 if (!TARGET_64BIT
3740 && (regno == FIRST_MMX_REG && TARGET_MMX))
3741 return true;
3743 return false;
3746 /* Define how to find the value returned by a function.
3747 VALTYPE is the data type of the value (as a tree).
3748 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3749 otherwise, FUNC is 0. */
3751 ix86_function_value (tree valtype, tree fntype_or_decl,
3752 bool outgoing ATTRIBUTE_UNUSED)
3754 enum machine_mode natmode = type_natural_mode (valtype);
3756 if (TARGET_64BIT)
3758 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3759 1, REGPARM_MAX, SSE_REGPARM_MAX,
3760 x86_64_int_return_registers, 0);
3761 /* For zero sized structures, construct_container return NULL, but we
3762 need to keep rest of compiler happy by returning meaningful value. */
3763 if (!ret)
3764 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3765 return ret;
3767 else
3769 tree fn = NULL_TREE, fntype;
3770 if (fntype_or_decl
3771 && DECL_P (fntype_or_decl))
3772 fn = fntype_or_decl;
3773 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3774 return gen_rtx_REG (TYPE_MODE (valtype),
3775 ix86_value_regno (natmode, fn, fntype));
3779 /* Return true iff type is returned in memory. */
3781 ix86_return_in_memory (tree type)
3783 int needed_intregs, needed_sseregs, size;
3784 enum machine_mode mode = type_natural_mode (type);
3786 if (TARGET_64BIT)
3787 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3789 if (mode == BLKmode)
3790 return 1;
3792 size = int_size_in_bytes (type);
3794 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3795 return 0;
3797 if (VECTOR_MODE_P (mode) || mode == TImode)
3799 /* User-created vectors small enough to fit in EAX. */
3800 if (size < 8)
3801 return 0;
3803 /* MMX/3dNow values are returned in MM0,
3804 except when it doesn't exits. */
3805 if (size == 8)
3806 return (TARGET_MMX ? 0 : 1);
3808 /* SSE values are returned in XMM0, except when it doesn't exist. */
3809 if (size == 16)
3810 return (TARGET_SSE ? 0 : 1);
3813 if (mode == XFmode)
3814 return 0;
3816 if (size > 12)
3817 return 1;
3818 return 0;
3821 /* When returning SSE vector types, we have a choice of either
3822 (1) being abi incompatible with a -march switch, or
3823 (2) generating an error.
3824 Given no good solution, I think the safest thing is one warning.
3825 The user won't be able to use -Werror, but....
3827 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3828 called in response to actually generating a caller or callee that
3829 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3830 via aggregate_value_p for general type probing from tree-ssa. */
3832 static rtx
3833 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3835 static bool warnedsse, warnedmmx;
3837 if (type)
3839 /* Look at the return type of the function, not the function type. */
3840 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3842 if (!TARGET_SSE && !warnedsse)
3844 if (mode == TImode
3845 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3847 warnedsse = true;
3848 warning (0, "SSE vector return without SSE enabled "
3849 "changes the ABI");
3853 if (!TARGET_MMX && !warnedmmx)
3855 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3857 warnedmmx = true;
3858 warning (0, "MMX vector return without MMX enabled "
3859 "changes the ABI");
3864 return NULL;
3867 /* Define how to find the value returned by a library function
3868 assuming the value has mode MODE. */
3870 ix86_libcall_value (enum machine_mode mode)
3872 if (TARGET_64BIT)
3874 switch (mode)
3876 case SFmode:
3877 case SCmode:
3878 case DFmode:
3879 case DCmode:
3880 case TFmode:
3881 return gen_rtx_REG (mode, FIRST_SSE_REG);
3882 case XFmode:
3883 case XCmode:
3884 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3885 case TCmode:
3886 return NULL;
3887 default:
3888 return gen_rtx_REG (mode, 0);
3891 else
3892 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3895 /* Given a mode, return the register to use for a return value. */
3897 static int
3898 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3900 gcc_assert (!TARGET_64BIT);
3902 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3903 we prevent this case when mmx is not available. */
3904 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3905 return FIRST_MMX_REG;
3907 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3908 we prevent this case when sse is not available. */
3909 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3910 return FIRST_SSE_REG;
3912 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3913 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
3914 return 0;
3916 /* Floating point return values in %st(0), except for local functions when
3917 SSE math is enabled or for functions with sseregparm attribute. */
3918 if ((func || fntype)
3919 && (mode == SFmode || mode == DFmode))
3921 int sse_level = ix86_function_sseregparm (fntype, func);
3922 if ((sse_level >= 1 && mode == SFmode)
3923 || (sse_level == 2 && mode == DFmode))
3924 return FIRST_SSE_REG;
3927 return FIRST_FLOAT_REG;
3930 /* Create the va_list data type. */
3932 static tree
3933 ix86_build_builtin_va_list (void)
3935 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3937 /* For i386 we use plain pointer to argument area. */
3938 if (!TARGET_64BIT)
3939 return build_pointer_type (char_type_node);
3941 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3942 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3944 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3945 unsigned_type_node);
3946 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3947 unsigned_type_node);
3948 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3949 ptr_type_node);
3950 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3951 ptr_type_node);
3953 va_list_gpr_counter_field = f_gpr;
3954 va_list_fpr_counter_field = f_fpr;
3956 DECL_FIELD_CONTEXT (f_gpr) = record;
3957 DECL_FIELD_CONTEXT (f_fpr) = record;
3958 DECL_FIELD_CONTEXT (f_ovf) = record;
3959 DECL_FIELD_CONTEXT (f_sav) = record;
3961 TREE_CHAIN (record) = type_decl;
3962 TYPE_NAME (record) = type_decl;
3963 TYPE_FIELDS (record) = f_gpr;
3964 TREE_CHAIN (f_gpr) = f_fpr;
3965 TREE_CHAIN (f_fpr) = f_ovf;
3966 TREE_CHAIN (f_ovf) = f_sav;
3968 layout_type (record);
3970 /* The correct type is an array type of one element. */
3971 return build_array_type (record, build_index_type (size_zero_node));
3974 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3976 static void
3977 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3978 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3979 int no_rtl)
3981 CUMULATIVE_ARGS next_cum;
3982 rtx save_area = NULL_RTX, mem;
3983 rtx label;
3984 rtx label_ref;
3985 rtx tmp_reg;
3986 rtx nsse_reg;
3987 int set;
3988 tree fntype;
3989 int stdarg_p;
3990 int i;
3992 if (!TARGET_64BIT)
3993 return;
3995 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3996 return;
3998 /* Indicate to allocate space on the stack for varargs save area. */
3999 ix86_save_varrargs_registers = 1;
4001 cfun->stack_alignment_needed = 128;
4003 fntype = TREE_TYPE (current_function_decl);
4004 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4005 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4006 != void_type_node));
4008 /* For varargs, we do not want to skip the dummy va_dcl argument.
4009 For stdargs, we do want to skip the last named argument. */
4010 next_cum = *cum;
4011 if (stdarg_p)
4012 function_arg_advance (&next_cum, mode, type, 1);
4014 if (!no_rtl)
4015 save_area = frame_pointer_rtx;
4017 set = get_varargs_alias_set ();
4019 for (i = next_cum.regno;
4020 i < ix86_regparm
4021 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4022 i++)
4024 mem = gen_rtx_MEM (Pmode,
4025 plus_constant (save_area, i * UNITS_PER_WORD));
4026 MEM_NOTRAP_P (mem) = 1;
4027 set_mem_alias_set (mem, set);
4028 emit_move_insn (mem, gen_rtx_REG (Pmode,
4029 x86_64_int_parameter_registers[i]));
4032 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4034 /* Now emit code to save SSE registers. The AX parameter contains number
4035 of SSE parameter registers used to call this function. We use
4036 sse_prologue_save insn template that produces computed jump across
4037 SSE saves. We need some preparation work to get this working. */
4039 label = gen_label_rtx ();
4040 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4042 /* Compute address to jump to :
4043 label - 5*eax + nnamed_sse_arguments*5 */
4044 tmp_reg = gen_reg_rtx (Pmode);
4045 nsse_reg = gen_reg_rtx (Pmode);
4046 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4047 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4048 gen_rtx_MULT (Pmode, nsse_reg,
4049 GEN_INT (4))));
4050 if (next_cum.sse_regno)
4051 emit_move_insn
4052 (nsse_reg,
4053 gen_rtx_CONST (DImode,
4054 gen_rtx_PLUS (DImode,
4055 label_ref,
4056 GEN_INT (next_cum.sse_regno * 4))));
4057 else
4058 emit_move_insn (nsse_reg, label_ref);
4059 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4061 /* Compute address of memory block we save into. We always use pointer
4062 pointing 127 bytes after first byte to store - this is needed to keep
4063 instruction size limited by 4 bytes. */
4064 tmp_reg = gen_reg_rtx (Pmode);
4065 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4066 plus_constant (save_area,
4067 8 * REGPARM_MAX + 127)));
4068 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4069 MEM_NOTRAP_P (mem) = 1;
4070 set_mem_alias_set (mem, set);
4071 set_mem_align (mem, BITS_PER_WORD);
4073 /* And finally do the dirty job! */
4074 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4075 GEN_INT (next_cum.sse_regno), label));
4080 /* Implement va_start. */
4082 void
4083 ix86_va_start (tree valist, rtx nextarg)
4085 HOST_WIDE_INT words, n_gpr, n_fpr;
4086 tree f_gpr, f_fpr, f_ovf, f_sav;
4087 tree gpr, fpr, ovf, sav, t;
4089 /* Only 64bit target needs something special. */
4090 if (!TARGET_64BIT)
4092 std_expand_builtin_va_start (valist, nextarg);
4093 return;
4096 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4097 f_fpr = TREE_CHAIN (f_gpr);
4098 f_ovf = TREE_CHAIN (f_fpr);
4099 f_sav = TREE_CHAIN (f_ovf);
4101 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4102 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4103 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4104 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4105 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4107 /* Count number of gp and fp argument registers used. */
4108 words = current_function_args_info.words;
4109 n_gpr = current_function_args_info.regno;
4110 n_fpr = current_function_args_info.sse_regno;
4112 if (TARGET_DEBUG_ARG)
4113 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4114 (int) words, (int) n_gpr, (int) n_fpr);
4116 if (cfun->va_list_gpr_size)
4118 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
4119 build_int_cst (NULL_TREE, n_gpr * 8));
4120 TREE_SIDE_EFFECTS (t) = 1;
4121 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4124 if (cfun->va_list_fpr_size)
4126 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
4127 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
4128 TREE_SIDE_EFFECTS (t) = 1;
4129 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4132 /* Find the overflow area. */
4133 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
4134 if (words != 0)
4135 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), t,
4136 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
4137 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4138 TREE_SIDE_EFFECTS (t) = 1;
4139 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4141 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4143 /* Find the register save area.
4144 Prologue of the function save it right above stack frame. */
4145 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
4146 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
4147 TREE_SIDE_EFFECTS (t) = 1;
4148 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4152 /* Implement va_arg. */
4154 tree
4155 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4157 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4158 tree f_gpr, f_fpr, f_ovf, f_sav;
4159 tree gpr, fpr, ovf, sav, t;
4160 int size, rsize;
4161 tree lab_false, lab_over = NULL_TREE;
4162 tree addr, t2;
4163 rtx container;
4164 int indirect_p = 0;
4165 tree ptrtype;
4166 enum machine_mode nat_mode;
4168 /* Only 64bit target needs something special. */
4169 if (!TARGET_64BIT)
4170 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4172 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4173 f_fpr = TREE_CHAIN (f_gpr);
4174 f_ovf = TREE_CHAIN (f_fpr);
4175 f_sav = TREE_CHAIN (f_ovf);
4177 valist = build_va_arg_indirect_ref (valist);
4178 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4179 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4180 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4181 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4183 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4184 if (indirect_p)
4185 type = build_pointer_type (type);
4186 size = int_size_in_bytes (type);
4187 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4189 nat_mode = type_natural_mode (type);
4190 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4191 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4193 /* Pull the value out of the saved registers. */
4195 addr = create_tmp_var (ptr_type_node, "addr");
4196 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4198 if (container)
4200 int needed_intregs, needed_sseregs;
4201 bool need_temp;
4202 tree int_addr, sse_addr;
4204 lab_false = create_artificial_label ();
4205 lab_over = create_artificial_label ();
4207 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4209 need_temp = (!REG_P (container)
4210 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4211 || TYPE_ALIGN (type) > 128));
4213 /* In case we are passing structure, verify that it is consecutive block
4214 on the register save area. If not we need to do moves. */
4215 if (!need_temp && !REG_P (container))
4217 /* Verify that all registers are strictly consecutive */
4218 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4220 int i;
4222 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4224 rtx slot = XVECEXP (container, 0, i);
4225 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4226 || INTVAL (XEXP (slot, 1)) != i * 16)
4227 need_temp = 1;
4230 else
4232 int i;
4234 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4236 rtx slot = XVECEXP (container, 0, i);
4237 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4238 || INTVAL (XEXP (slot, 1)) != i * 8)
4239 need_temp = 1;
4243 if (!need_temp)
4245 int_addr = addr;
4246 sse_addr = addr;
4248 else
4250 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4251 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4252 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4253 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4256 /* First ensure that we fit completely in registers. */
4257 if (needed_intregs)
4259 t = build_int_cst (TREE_TYPE (gpr),
4260 (REGPARM_MAX - needed_intregs + 1) * 8);
4261 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4262 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4263 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4264 gimplify_and_add (t, pre_p);
4266 if (needed_sseregs)
4268 t = build_int_cst (TREE_TYPE (fpr),
4269 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4270 + REGPARM_MAX * 8);
4271 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4272 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4273 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4274 gimplify_and_add (t, pre_p);
4277 /* Compute index to start of area used for integer regs. */
4278 if (needed_intregs)
4280 /* int_addr = gpr + sav; */
4281 t = fold_convert (ptr_type_node, gpr);
4282 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4283 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4284 gimplify_and_add (t, pre_p);
4286 if (needed_sseregs)
4288 /* sse_addr = fpr + sav; */
4289 t = fold_convert (ptr_type_node, fpr);
4290 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4291 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4292 gimplify_and_add (t, pre_p);
4294 if (need_temp)
4296 int i;
4297 tree temp = create_tmp_var (type, "va_arg_tmp");
4299 /* addr = &temp; */
4300 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4301 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4302 gimplify_and_add (t, pre_p);
4304 for (i = 0; i < XVECLEN (container, 0); i++)
4306 rtx slot = XVECEXP (container, 0, i);
4307 rtx reg = XEXP (slot, 0);
4308 enum machine_mode mode = GET_MODE (reg);
4309 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4310 tree addr_type = build_pointer_type (piece_type);
4311 tree src_addr, src;
4312 int src_offset;
4313 tree dest_addr, dest;
4315 if (SSE_REGNO_P (REGNO (reg)))
4317 src_addr = sse_addr;
4318 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4320 else
4322 src_addr = int_addr;
4323 src_offset = REGNO (reg) * 8;
4325 src_addr = fold_convert (addr_type, src_addr);
4326 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4327 size_int (src_offset)));
4328 src = build_va_arg_indirect_ref (src_addr);
4330 dest_addr = fold_convert (addr_type, addr);
4331 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4332 size_int (INTVAL (XEXP (slot, 1)))));
4333 dest = build_va_arg_indirect_ref (dest_addr);
4335 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4336 gimplify_and_add (t, pre_p);
4340 if (needed_intregs)
4342 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4343 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4344 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4345 gimplify_and_add (t, pre_p);
4347 if (needed_sseregs)
4349 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4350 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4351 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4352 gimplify_and_add (t, pre_p);
4355 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4356 gimplify_and_add (t, pre_p);
4358 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4359 append_to_statement_list (t, pre_p);
4362 /* ... otherwise out of the overflow area. */
4364 /* Care for on-stack alignment if needed. */
4365 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4366 || integer_zerop (TYPE_SIZE (type)))
4367 t = ovf;
4368 else
4370 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4371 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4372 build_int_cst (TREE_TYPE (ovf), align - 1));
4373 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4374 build_int_cst (TREE_TYPE (t), -align));
4376 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4378 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4379 gimplify_and_add (t2, pre_p);
4381 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4382 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4383 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4384 gimplify_and_add (t, pre_p);
4386 if (container)
4388 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4389 append_to_statement_list (t, pre_p);
4392 ptrtype = build_pointer_type (type);
4393 addr = fold_convert (ptrtype, addr);
4395 if (indirect_p)
4396 addr = build_va_arg_indirect_ref (addr);
4397 return build_va_arg_indirect_ref (addr);
4400 /* Return nonzero if OPNUM's MEM should be matched
4401 in movabs* patterns. */
4404 ix86_check_movabs (rtx insn, int opnum)
4406 rtx set, mem;
4408 set = PATTERN (insn);
4409 if (GET_CODE (set) == PARALLEL)
4410 set = XVECEXP (set, 0, 0);
4411 gcc_assert (GET_CODE (set) == SET);
4412 mem = XEXP (set, opnum);
4413 while (GET_CODE (mem) == SUBREG)
4414 mem = SUBREG_REG (mem);
4415 gcc_assert (GET_CODE (mem) == MEM);
4416 return (volatile_ok || !MEM_VOLATILE_P (mem));
4419 /* Initialize the table of extra 80387 mathematical constants. */
4421 static void
4422 init_ext_80387_constants (void)
4424 static const char * cst[5] =
4426 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4427 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4428 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4429 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4430 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4432 int i;
4434 for (i = 0; i < 5; i++)
4436 real_from_string (&ext_80387_constants_table[i], cst[i]);
4437 /* Ensure each constant is rounded to XFmode precision. */
4438 real_convert (&ext_80387_constants_table[i],
4439 XFmode, &ext_80387_constants_table[i]);
4442 ext_80387_constants_init = 1;
4445 /* Return true if the constant is something that can be loaded with
4446 a special instruction. */
4449 standard_80387_constant_p (rtx x)
4451 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4452 return -1;
4454 if (x == CONST0_RTX (GET_MODE (x)))
4455 return 1;
4456 if (x == CONST1_RTX (GET_MODE (x)))
4457 return 2;
4459 /* For XFmode constants, try to find a special 80387 instruction when
4460 optimizing for size or on those CPUs that benefit from them. */
4461 if (GET_MODE (x) == XFmode
4462 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4464 REAL_VALUE_TYPE r;
4465 int i;
4467 if (! ext_80387_constants_init)
4468 init_ext_80387_constants ();
4470 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4471 for (i = 0; i < 5; i++)
4472 if (real_identical (&r, &ext_80387_constants_table[i]))
4473 return i + 3;
4476 return 0;
4479 /* Return the opcode of the special instruction to be used to load
4480 the constant X. */
4482 const char *
4483 standard_80387_constant_opcode (rtx x)
4485 switch (standard_80387_constant_p (x))
4487 case 1:
4488 return "fldz";
4489 case 2:
4490 return "fld1";
4491 case 3:
4492 return "fldlg2";
4493 case 4:
4494 return "fldln2";
4495 case 5:
4496 return "fldl2e";
4497 case 6:
4498 return "fldl2t";
4499 case 7:
4500 return "fldpi";
4501 default:
4502 gcc_unreachable ();
4506 /* Return the CONST_DOUBLE representing the 80387 constant that is
4507 loaded by the specified special instruction. The argument IDX
4508 matches the return value from standard_80387_constant_p. */
4511 standard_80387_constant_rtx (int idx)
4513 int i;
4515 if (! ext_80387_constants_init)
4516 init_ext_80387_constants ();
4518 switch (idx)
4520 case 3:
4521 case 4:
4522 case 5:
4523 case 6:
4524 case 7:
4525 i = idx - 3;
4526 break;
4528 default:
4529 gcc_unreachable ();
4532 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4533 XFmode);
4536 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4539 standard_sse_constant_p (rtx x)
4541 if (x == const0_rtx)
4542 return 1;
4543 return (x == CONST0_RTX (GET_MODE (x)));
4546 /* Returns 1 if OP contains a symbol reference */
4549 symbolic_reference_mentioned_p (rtx op)
4551 const char *fmt;
4552 int i;
4554 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4555 return 1;
4557 fmt = GET_RTX_FORMAT (GET_CODE (op));
4558 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4560 if (fmt[i] == 'E')
4562 int j;
4564 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4565 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4566 return 1;
4569 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4570 return 1;
4573 return 0;
4576 /* Return 1 if it is appropriate to emit `ret' instructions in the
4577 body of a function. Do this only if the epilogue is simple, needing a
4578 couple of insns. Prior to reloading, we can't tell how many registers
4579 must be saved, so return 0 then. Return 0 if there is no frame
4580 marker to de-allocate. */
4583 ix86_can_use_return_insn_p (void)
4585 struct ix86_frame frame;
4587 if (! reload_completed || frame_pointer_needed)
4588 return 0;
4590 /* Don't allow more than 32 pop, since that's all we can do
4591 with one instruction. */
4592 if (current_function_pops_args
4593 && current_function_args_size >= 32768)
4594 return 0;
4596 ix86_compute_frame_layout (&frame);
4597 return frame.to_allocate == 0 && frame.nregs == 0;
4600 /* Value should be nonzero if functions must have frame pointers.
4601 Zero means the frame pointer need not be set up (and parms may
4602 be accessed via the stack pointer) in functions that seem suitable. */
4605 ix86_frame_pointer_required (void)
4607 /* If we accessed previous frames, then the generated code expects
4608 to be able to access the saved ebp value in our frame. */
4609 if (cfun->machine->accesses_prev_frame)
4610 return 1;
4612 /* Several x86 os'es need a frame pointer for other reasons,
4613 usually pertaining to setjmp. */
4614 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4615 return 1;
4617 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4618 the frame pointer by default. Turn it back on now if we've not
4619 got a leaf function. */
4620 if (TARGET_OMIT_LEAF_FRAME_POINTER
4621 && (!current_function_is_leaf
4622 || ix86_current_function_calls_tls_descriptor))
4623 return 1;
4625 if (current_function_profile)
4626 return 1;
4628 return 0;
4631 /* Record that the current function accesses previous call frames. */
4633 void
4634 ix86_setup_frame_addresses (void)
4636 cfun->machine->accesses_prev_frame = 1;
4639 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4640 # define USE_HIDDEN_LINKONCE 1
4641 #else
4642 # define USE_HIDDEN_LINKONCE 0
4643 #endif
4645 static int pic_labels_used;
4647 /* Fills in the label name that should be used for a pc thunk for
4648 the given register. */
4650 static void
4651 get_pc_thunk_name (char name[32], unsigned int regno)
4653 if (USE_HIDDEN_LINKONCE)
4654 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4655 else
4656 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4660 /* This function generates code for -fpic that loads %ebx with
4661 the return address of the caller and then returns. */
4663 void
4664 ix86_file_end (void)
4666 rtx xops[2];
4667 int regno;
4669 for (regno = 0; regno < 8; ++regno)
4671 char name[32];
4673 if (! ((pic_labels_used >> regno) & 1))
4674 continue;
4676 get_pc_thunk_name (name, regno);
4678 #if TARGET_MACHO
4679 if (TARGET_MACHO)
4681 switch_to_section (darwin_sections[text_coal_section]);
4682 fputs ("\t.weak_definition\t", asm_out_file);
4683 assemble_name (asm_out_file, name);
4684 fputs ("\n\t.private_extern\t", asm_out_file);
4685 assemble_name (asm_out_file, name);
4686 fputs ("\n", asm_out_file);
4687 ASM_OUTPUT_LABEL (asm_out_file, name);
4689 else
4690 #endif
4691 if (USE_HIDDEN_LINKONCE)
4693 tree decl;
4695 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4696 error_mark_node);
4697 TREE_PUBLIC (decl) = 1;
4698 TREE_STATIC (decl) = 1;
4699 DECL_ONE_ONLY (decl) = 1;
4701 (*targetm.asm_out.unique_section) (decl, 0);
4702 switch_to_section (get_named_section (decl, NULL, 0));
4704 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4705 fputs ("\t.hidden\t", asm_out_file);
4706 assemble_name (asm_out_file, name);
4707 fputc ('\n', asm_out_file);
4708 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4710 else
4712 switch_to_section (text_section);
4713 ASM_OUTPUT_LABEL (asm_out_file, name);
4716 xops[0] = gen_rtx_REG (SImode, regno);
4717 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4718 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4719 output_asm_insn ("ret", xops);
4722 if (NEED_INDICATE_EXEC_STACK)
4723 file_end_indicate_exec_stack ();
4726 /* Emit code for the SET_GOT patterns. */
4728 const char *
4729 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4731 rtx xops[3];
4733 xops[0] = dest;
4734 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4736 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4738 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4740 if (!flag_pic)
4741 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4742 else
4743 output_asm_insn ("call\t%a2", xops);
4745 #if TARGET_MACHO
4746 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4747 is what will be referenced by the Mach-O PIC subsystem. */
4748 if (!label)
4749 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4750 #endif
4752 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4753 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4755 if (flag_pic)
4756 output_asm_insn ("pop{l}\t%0", xops);
4758 else
4760 char name[32];
4761 get_pc_thunk_name (name, REGNO (dest));
4762 pic_labels_used |= 1 << REGNO (dest);
4764 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4765 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4766 output_asm_insn ("call\t%X2", xops);
4767 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4768 is what will be referenced by the Mach-O PIC subsystem. */
4769 #if TARGET_MACHO
4770 if (!label)
4771 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4772 else
4773 targetm.asm_out.internal_label (asm_out_file, "L",
4774 CODE_LABEL_NUMBER (label));
4775 #endif
4778 if (TARGET_MACHO)
4779 return "";
4781 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4782 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4783 else
4784 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4786 return "";
4789 /* Generate an "push" pattern for input ARG. */
4791 static rtx
4792 gen_push (rtx arg)
4794 return gen_rtx_SET (VOIDmode,
4795 gen_rtx_MEM (Pmode,
4796 gen_rtx_PRE_DEC (Pmode,
4797 stack_pointer_rtx)),
4798 arg);
4801 /* Return >= 0 if there is an unused call-clobbered register available
4802 for the entire function. */
4804 static unsigned int
4805 ix86_select_alt_pic_regnum (void)
4807 if (current_function_is_leaf && !current_function_profile
4808 && !ix86_current_function_calls_tls_descriptor)
4810 int i;
4811 for (i = 2; i >= 0; --i)
4812 if (!regs_ever_live[i])
4813 return i;
4816 return INVALID_REGNUM;
4819 /* Return 1 if we need to save REGNO. */
4820 static int
4821 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4823 if (pic_offset_table_rtx
4824 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4825 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4826 || current_function_profile
4827 || current_function_calls_eh_return
4828 || current_function_uses_const_pool))
4830 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4831 return 0;
4832 return 1;
4835 if (current_function_calls_eh_return && maybe_eh_return)
4837 unsigned i;
4838 for (i = 0; ; i++)
4840 unsigned test = EH_RETURN_DATA_REGNO (i);
4841 if (test == INVALID_REGNUM)
4842 break;
4843 if (test == regno)
4844 return 1;
4848 if (cfun->machine->force_align_arg_pointer
4849 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4850 return 1;
4852 return (regs_ever_live[regno]
4853 && !call_used_regs[regno]
4854 && !fixed_regs[regno]
4855 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4858 /* Return number of registers to be saved on the stack. */
4860 static int
4861 ix86_nsaved_regs (void)
4863 int nregs = 0;
4864 int regno;
4866 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4867 if (ix86_save_reg (regno, true))
4868 nregs++;
4869 return nregs;
4872 /* Return the offset between two registers, one to be eliminated, and the other
4873 its replacement, at the start of a routine. */
4875 HOST_WIDE_INT
4876 ix86_initial_elimination_offset (int from, int to)
4878 struct ix86_frame frame;
4879 ix86_compute_frame_layout (&frame);
4881 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4882 return frame.hard_frame_pointer_offset;
4883 else if (from == FRAME_POINTER_REGNUM
4884 && to == HARD_FRAME_POINTER_REGNUM)
4885 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4886 else
4888 gcc_assert (to == STACK_POINTER_REGNUM);
4890 if (from == ARG_POINTER_REGNUM)
4891 return frame.stack_pointer_offset;
4893 gcc_assert (from == FRAME_POINTER_REGNUM);
4894 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4898 /* Fill structure ix86_frame about frame of currently computed function. */
4900 static void
4901 ix86_compute_frame_layout (struct ix86_frame *frame)
4903 HOST_WIDE_INT total_size;
4904 unsigned int stack_alignment_needed;
4905 HOST_WIDE_INT offset;
4906 unsigned int preferred_alignment;
4907 HOST_WIDE_INT size = get_frame_size ();
4909 frame->nregs = ix86_nsaved_regs ();
4910 total_size = size;
4912 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4913 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4915 /* During reload iteration the amount of registers saved can change.
4916 Recompute the value as needed. Do not recompute when amount of registers
4917 didn't change as reload does multiple calls to the function and does not
4918 expect the decision to change within single iteration. */
4919 if (!optimize_size
4920 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4922 int count = frame->nregs;
4924 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4925 /* The fast prologue uses move instead of push to save registers. This
4926 is significantly longer, but also executes faster as modern hardware
4927 can execute the moves in parallel, but can't do that for push/pop.
4929 Be careful about choosing what prologue to emit: When function takes
4930 many instructions to execute we may use slow version as well as in
4931 case function is known to be outside hot spot (this is known with
4932 feedback only). Weight the size of function by number of registers
4933 to save as it is cheap to use one or two push instructions but very
4934 slow to use many of them. */
4935 if (count)
4936 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4937 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4938 || (flag_branch_probabilities
4939 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4940 cfun->machine->use_fast_prologue_epilogue = false;
4941 else
4942 cfun->machine->use_fast_prologue_epilogue
4943 = !expensive_function_p (count);
4945 if (TARGET_PROLOGUE_USING_MOVE
4946 && cfun->machine->use_fast_prologue_epilogue)
4947 frame->save_regs_using_mov = true;
4948 else
4949 frame->save_regs_using_mov = false;
4952 /* Skip return address and saved base pointer. */
4953 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4955 frame->hard_frame_pointer_offset = offset;
4957 /* Do some sanity checking of stack_alignment_needed and
4958 preferred_alignment, since i386 port is the only using those features
4959 that may break easily. */
4961 gcc_assert (!size || stack_alignment_needed);
4962 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4963 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4964 gcc_assert (stack_alignment_needed
4965 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4967 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4968 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4970 /* Register save area */
4971 offset += frame->nregs * UNITS_PER_WORD;
4973 /* Va-arg area */
4974 if (ix86_save_varrargs_registers)
4976 offset += X86_64_VARARGS_SIZE;
4977 frame->va_arg_size = X86_64_VARARGS_SIZE;
4979 else
4980 frame->va_arg_size = 0;
4982 /* Align start of frame for local function. */
4983 frame->padding1 = ((offset + stack_alignment_needed - 1)
4984 & -stack_alignment_needed) - offset;
4986 offset += frame->padding1;
4988 /* Frame pointer points here. */
4989 frame->frame_pointer_offset = offset;
4991 offset += size;
4993 /* Add outgoing arguments area. Can be skipped if we eliminated
4994 all the function calls as dead code.
4995 Skipping is however impossible when function calls alloca. Alloca
4996 expander assumes that last current_function_outgoing_args_size
4997 of stack frame are unused. */
4998 if (ACCUMULATE_OUTGOING_ARGS
4999 && (!current_function_is_leaf || current_function_calls_alloca
5000 || ix86_current_function_calls_tls_descriptor))
5002 offset += current_function_outgoing_args_size;
5003 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5005 else
5006 frame->outgoing_arguments_size = 0;
5008 /* Align stack boundary. Only needed if we're calling another function
5009 or using alloca. */
5010 if (!current_function_is_leaf || current_function_calls_alloca
5011 || ix86_current_function_calls_tls_descriptor)
5012 frame->padding2 = ((offset + preferred_alignment - 1)
5013 & -preferred_alignment) - offset;
5014 else
5015 frame->padding2 = 0;
5017 offset += frame->padding2;
5019 /* We've reached end of stack frame. */
5020 frame->stack_pointer_offset = offset;
5022 /* Size prologue needs to allocate. */
5023 frame->to_allocate =
5024 (size + frame->padding1 + frame->padding2
5025 + frame->outgoing_arguments_size + frame->va_arg_size);
5027 if ((!frame->to_allocate && frame->nregs <= 1)
5028 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5029 frame->save_regs_using_mov = false;
5031 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5032 && current_function_is_leaf
5033 && !ix86_current_function_calls_tls_descriptor)
5035 frame->red_zone_size = frame->to_allocate;
5036 if (frame->save_regs_using_mov)
5037 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5038 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5039 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5041 else
5042 frame->red_zone_size = 0;
5043 frame->to_allocate -= frame->red_zone_size;
5044 frame->stack_pointer_offset -= frame->red_zone_size;
5045 #if 0
5046 fprintf (stderr, "nregs: %i\n", frame->nregs);
5047 fprintf (stderr, "size: %i\n", size);
5048 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5049 fprintf (stderr, "padding1: %i\n", frame->padding1);
5050 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5051 fprintf (stderr, "padding2: %i\n", frame->padding2);
5052 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5053 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5054 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5055 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5056 frame->hard_frame_pointer_offset);
5057 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5058 #endif
5061 /* Emit code to save registers in the prologue. */
5063 static void
5064 ix86_emit_save_regs (void)
5066 unsigned int regno;
5067 rtx insn;
5069 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5070 if (ix86_save_reg (regno, true))
5072 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5073 RTX_FRAME_RELATED_P (insn) = 1;
5077 /* Emit code to save registers using MOV insns. First register
5078 is restored from POINTER + OFFSET. */
5079 static void
5080 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5082 unsigned int regno;
5083 rtx insn;
5085 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5086 if (ix86_save_reg (regno, true))
5088 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5089 Pmode, offset),
5090 gen_rtx_REG (Pmode, regno));
5091 RTX_FRAME_RELATED_P (insn) = 1;
5092 offset += UNITS_PER_WORD;
5096 /* Expand prologue or epilogue stack adjustment.
5097 The pattern exist to put a dependency on all ebp-based memory accesses.
5098 STYLE should be negative if instructions should be marked as frame related,
5099 zero if %r11 register is live and cannot be freely used and positive
5100 otherwise. */
5102 static void
5103 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5105 rtx insn;
5107 if (! TARGET_64BIT)
5108 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5109 else if (x86_64_immediate_operand (offset, DImode))
5110 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5111 else
5113 rtx r11;
5114 /* r11 is used by indirect sibcall return as well, set before the
5115 epilogue and used after the epilogue. ATM indirect sibcall
5116 shouldn't be used together with huge frame sizes in one
5117 function because of the frame_size check in sibcall.c. */
5118 gcc_assert (style);
5119 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5120 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5121 if (style < 0)
5122 RTX_FRAME_RELATED_P (insn) = 1;
5123 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5124 offset));
5126 if (style < 0)
5127 RTX_FRAME_RELATED_P (insn) = 1;
5130 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5132 static rtx
5133 ix86_internal_arg_pointer (void)
5135 if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5136 && DECL_NAME (current_function_decl)
5137 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5138 && DECL_FILE_SCOPE_P (current_function_decl))
5140 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5141 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5143 else
5144 return virtual_incoming_args_rtx;
5147 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5148 This is called from dwarf2out.c to emit call frame instructions
5149 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5150 static void
5151 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5153 rtx unspec = SET_SRC (pattern);
5154 gcc_assert (GET_CODE (unspec) == UNSPEC);
5156 switch (index)
5158 case UNSPEC_REG_SAVE:
5159 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5160 SET_DEST (pattern));
5161 break;
5162 case UNSPEC_DEF_CFA:
5163 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5164 INTVAL (XVECEXP (unspec, 0, 0)));
5165 break;
5166 default:
5167 gcc_unreachable ();
5171 /* Expand the prologue into a bunch of separate insns. */
5173 void
5174 ix86_expand_prologue (void)
5176 rtx insn;
5177 bool pic_reg_used;
5178 struct ix86_frame frame;
5179 HOST_WIDE_INT allocate;
5181 ix86_compute_frame_layout (&frame);
5183 if (cfun->machine->force_align_arg_pointer)
5185 rtx x, y;
5187 /* Grab the argument pointer. */
5188 x = plus_constant (stack_pointer_rtx, 4);
5189 y = cfun->machine->force_align_arg_pointer;
5190 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5191 RTX_FRAME_RELATED_P (insn) = 1;
5193 /* The unwind info consists of two parts: install the fafp as the cfa,
5194 and record the fafp as the "save register" of the stack pointer.
5195 The later is there in order that the unwinder can see where it
5196 should restore the stack pointer across the and insn. */
5197 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5198 x = gen_rtx_SET (VOIDmode, y, x);
5199 RTX_FRAME_RELATED_P (x) = 1;
5200 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5201 UNSPEC_REG_SAVE);
5202 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5203 RTX_FRAME_RELATED_P (y) = 1;
5204 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5205 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5206 REG_NOTES (insn) = x;
5208 /* Align the stack. */
5209 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5210 GEN_INT (-16)));
5212 /* And here we cheat like madmen with the unwind info. We force the
5213 cfa register back to sp+4, which is exactly what it was at the
5214 start of the function. Re-pushing the return address results in
5215 the return at the same spot relative to the cfa, and thus is
5216 correct wrt the unwind info. */
5217 x = cfun->machine->force_align_arg_pointer;
5218 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5219 insn = emit_insn (gen_push (x));
5220 RTX_FRAME_RELATED_P (insn) = 1;
5222 x = GEN_INT (4);
5223 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5224 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5225 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5226 REG_NOTES (insn) = x;
5229 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5230 slower on all targets. Also sdb doesn't like it. */
5232 if (frame_pointer_needed)
5234 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5235 RTX_FRAME_RELATED_P (insn) = 1;
5237 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5238 RTX_FRAME_RELATED_P (insn) = 1;
5241 allocate = frame.to_allocate;
5243 if (!frame.save_regs_using_mov)
5244 ix86_emit_save_regs ();
5245 else
5246 allocate += frame.nregs * UNITS_PER_WORD;
5248 /* When using red zone we may start register saving before allocating
5249 the stack frame saving one cycle of the prologue. */
5250 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5251 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5252 : stack_pointer_rtx,
5253 -frame.nregs * UNITS_PER_WORD);
5255 if (allocate == 0)
5257 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5258 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5259 GEN_INT (-allocate), -1);
5260 else
5262 /* Only valid for Win32. */
5263 rtx eax = gen_rtx_REG (SImode, 0);
5264 bool eax_live = ix86_eax_live_at_start_p ();
5265 rtx t;
5267 gcc_assert (!TARGET_64BIT);
5269 if (eax_live)
5271 emit_insn (gen_push (eax));
5272 allocate -= 4;
5275 emit_move_insn (eax, GEN_INT (allocate));
5277 insn = emit_insn (gen_allocate_stack_worker (eax));
5278 RTX_FRAME_RELATED_P (insn) = 1;
5279 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5280 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5281 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5282 t, REG_NOTES (insn));
5284 if (eax_live)
5286 if (frame_pointer_needed)
5287 t = plus_constant (hard_frame_pointer_rtx,
5288 allocate
5289 - frame.to_allocate
5290 - frame.nregs * UNITS_PER_WORD);
5291 else
5292 t = plus_constant (stack_pointer_rtx, allocate);
5293 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5297 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5299 if (!frame_pointer_needed || !frame.to_allocate)
5300 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5301 else
5302 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5303 -frame.nregs * UNITS_PER_WORD);
5306 pic_reg_used = false;
5307 if (pic_offset_table_rtx
5308 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5309 || current_function_profile))
5311 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5313 if (alt_pic_reg_used != INVALID_REGNUM)
5314 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5316 pic_reg_used = true;
5319 if (pic_reg_used)
5321 if (TARGET_64BIT)
5322 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5323 else
5324 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5326 /* Even with accurate pre-reload life analysis, we can wind up
5327 deleting all references to the pic register after reload.
5328 Consider if cross-jumping unifies two sides of a branch
5329 controlled by a comparison vs the only read from a global.
5330 In which case, allow the set_got to be deleted, though we're
5331 too late to do anything about the ebx save in the prologue. */
5332 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5335 /* Prevent function calls from be scheduled before the call to mcount.
5336 In the pic_reg_used case, make sure that the got load isn't deleted. */
5337 if (current_function_profile)
5338 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5341 /* Emit code to restore saved registers using MOV insns. First register
5342 is restored from POINTER + OFFSET. */
5343 static void
5344 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5345 int maybe_eh_return)
5347 int regno;
5348 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5350 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5351 if (ix86_save_reg (regno, maybe_eh_return))
5353 /* Ensure that adjust_address won't be forced to produce pointer
5354 out of range allowed by x86-64 instruction set. */
5355 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5357 rtx r11;
5359 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5360 emit_move_insn (r11, GEN_INT (offset));
5361 emit_insn (gen_adddi3 (r11, r11, pointer));
5362 base_address = gen_rtx_MEM (Pmode, r11);
5363 offset = 0;
5365 emit_move_insn (gen_rtx_REG (Pmode, regno),
5366 adjust_address (base_address, Pmode, offset));
5367 offset += UNITS_PER_WORD;
5371 /* Restore function stack, frame, and registers. */
5373 void
5374 ix86_expand_epilogue (int style)
5376 int regno;
5377 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5378 struct ix86_frame frame;
5379 HOST_WIDE_INT offset;
5381 ix86_compute_frame_layout (&frame);
5383 /* Calculate start of saved registers relative to ebp. Special care
5384 must be taken for the normal return case of a function using
5385 eh_return: the eax and edx registers are marked as saved, but not
5386 restored along this path. */
5387 offset = frame.nregs;
5388 if (current_function_calls_eh_return && style != 2)
5389 offset -= 2;
5390 offset *= -UNITS_PER_WORD;
5392 /* If we're only restoring one register and sp is not valid then
5393 using a move instruction to restore the register since it's
5394 less work than reloading sp and popping the register.
5396 The default code result in stack adjustment using add/lea instruction,
5397 while this code results in LEAVE instruction (or discrete equivalent),
5398 so it is profitable in some other cases as well. Especially when there
5399 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5400 and there is exactly one register to pop. This heuristic may need some
5401 tuning in future. */
5402 if ((!sp_valid && frame.nregs <= 1)
5403 || (TARGET_EPILOGUE_USING_MOVE
5404 && cfun->machine->use_fast_prologue_epilogue
5405 && (frame.nregs > 1 || frame.to_allocate))
5406 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5407 || (frame_pointer_needed && TARGET_USE_LEAVE
5408 && cfun->machine->use_fast_prologue_epilogue
5409 && frame.nregs == 1)
5410 || current_function_calls_eh_return)
5412 /* Restore registers. We can use ebp or esp to address the memory
5413 locations. If both are available, default to ebp, since offsets
5414 are known to be small. Only exception is esp pointing directly to the
5415 end of block of saved registers, where we may simplify addressing
5416 mode. */
5418 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5419 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5420 frame.to_allocate, style == 2);
5421 else
5422 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5423 offset, style == 2);
5425 /* eh_return epilogues need %ecx added to the stack pointer. */
5426 if (style == 2)
5428 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5430 if (frame_pointer_needed)
5432 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5433 tmp = plus_constant (tmp, UNITS_PER_WORD);
5434 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5436 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5437 emit_move_insn (hard_frame_pointer_rtx, tmp);
5439 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5440 const0_rtx, style);
5442 else
5444 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5445 tmp = plus_constant (tmp, (frame.to_allocate
5446 + frame.nregs * UNITS_PER_WORD));
5447 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5450 else if (!frame_pointer_needed)
5451 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5452 GEN_INT (frame.to_allocate
5453 + frame.nregs * UNITS_PER_WORD),
5454 style);
5455 /* If not an i386, mov & pop is faster than "leave". */
5456 else if (TARGET_USE_LEAVE || optimize_size
5457 || !cfun->machine->use_fast_prologue_epilogue)
5458 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5459 else
5461 pro_epilogue_adjust_stack (stack_pointer_rtx,
5462 hard_frame_pointer_rtx,
5463 const0_rtx, style);
5464 if (TARGET_64BIT)
5465 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5466 else
5467 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5470 else
5472 /* First step is to deallocate the stack frame so that we can
5473 pop the registers. */
5474 if (!sp_valid)
5476 gcc_assert (frame_pointer_needed);
5477 pro_epilogue_adjust_stack (stack_pointer_rtx,
5478 hard_frame_pointer_rtx,
5479 GEN_INT (offset), style);
5481 else if (frame.to_allocate)
5482 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5483 GEN_INT (frame.to_allocate), style);
5485 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5486 if (ix86_save_reg (regno, false))
5488 if (TARGET_64BIT)
5489 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5490 else
5491 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5493 if (frame_pointer_needed)
5495 /* Leave results in shorter dependency chains on CPUs that are
5496 able to grok it fast. */
5497 if (TARGET_USE_LEAVE)
5498 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5499 else if (TARGET_64BIT)
5500 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5501 else
5502 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5506 if (cfun->machine->force_align_arg_pointer)
5508 emit_insn (gen_addsi3 (stack_pointer_rtx,
5509 cfun->machine->force_align_arg_pointer,
5510 GEN_INT (-4)));
5513 /* Sibcall epilogues don't want a return instruction. */
5514 if (style == 0)
5515 return;
5517 if (current_function_pops_args && current_function_args_size)
5519 rtx popc = GEN_INT (current_function_pops_args);
5521 /* i386 can only pop 64K bytes. If asked to pop more, pop
5522 return address, do explicit add, and jump indirectly to the
5523 caller. */
5525 if (current_function_pops_args >= 65536)
5527 rtx ecx = gen_rtx_REG (SImode, 2);
5529 /* There is no "pascal" calling convention in 64bit ABI. */
5530 gcc_assert (!TARGET_64BIT);
5532 emit_insn (gen_popsi1 (ecx));
5533 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5534 emit_jump_insn (gen_return_indirect_internal (ecx));
5536 else
5537 emit_jump_insn (gen_return_pop_internal (popc));
5539 else
5540 emit_jump_insn (gen_return_internal ());
5543 /* Reset from the function's potential modifications. */
5545 static void
5546 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5547 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5549 if (pic_offset_table_rtx)
5550 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5553 /* Extract the parts of an RTL expression that is a valid memory address
5554 for an instruction. Return 0 if the structure of the address is
5555 grossly off. Return -1 if the address contains ASHIFT, so it is not
5556 strictly valid, but still used for computing length of lea instruction. */
5559 ix86_decompose_address (rtx addr, struct ix86_address *out)
5561 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5562 rtx base_reg, index_reg;
5563 HOST_WIDE_INT scale = 1;
5564 rtx scale_rtx = NULL_RTX;
5565 int retval = 1;
5566 enum ix86_address_seg seg = SEG_DEFAULT;
5568 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5569 base = addr;
5570 else if (GET_CODE (addr) == PLUS)
5572 rtx addends[4], op;
5573 int n = 0, i;
5575 op = addr;
5578 if (n >= 4)
5579 return 0;
5580 addends[n++] = XEXP (op, 1);
5581 op = XEXP (op, 0);
5583 while (GET_CODE (op) == PLUS);
5584 if (n >= 4)
5585 return 0;
5586 addends[n] = op;
5588 for (i = n; i >= 0; --i)
5590 op = addends[i];
5591 switch (GET_CODE (op))
5593 case MULT:
5594 if (index)
5595 return 0;
5596 index = XEXP (op, 0);
5597 scale_rtx = XEXP (op, 1);
5598 break;
5600 case UNSPEC:
5601 if (XINT (op, 1) == UNSPEC_TP
5602 && TARGET_TLS_DIRECT_SEG_REFS
5603 && seg == SEG_DEFAULT)
5604 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5605 else
5606 return 0;
5607 break;
5609 case REG:
5610 case SUBREG:
5611 if (!base)
5612 base = op;
5613 else if (!index)
5614 index = op;
5615 else
5616 return 0;
5617 break;
5619 case CONST:
5620 case CONST_INT:
5621 case SYMBOL_REF:
5622 case LABEL_REF:
5623 if (disp)
5624 return 0;
5625 disp = op;
5626 break;
5628 default:
5629 return 0;
5633 else if (GET_CODE (addr) == MULT)
5635 index = XEXP (addr, 0); /* index*scale */
5636 scale_rtx = XEXP (addr, 1);
5638 else if (GET_CODE (addr) == ASHIFT)
5640 rtx tmp;
5642 /* We're called for lea too, which implements ashift on occasion. */
5643 index = XEXP (addr, 0);
5644 tmp = XEXP (addr, 1);
5645 if (GET_CODE (tmp) != CONST_INT)
5646 return 0;
5647 scale = INTVAL (tmp);
5648 if ((unsigned HOST_WIDE_INT) scale > 3)
5649 return 0;
5650 scale = 1 << scale;
5651 retval = -1;
5653 else
5654 disp = addr; /* displacement */
5656 /* Extract the integral value of scale. */
5657 if (scale_rtx)
5659 if (GET_CODE (scale_rtx) != CONST_INT)
5660 return 0;
5661 scale = INTVAL (scale_rtx);
5664 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5665 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5667 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5668 if (base_reg && index_reg && scale == 1
5669 && (index_reg == arg_pointer_rtx
5670 || index_reg == frame_pointer_rtx
5671 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5673 rtx tmp;
5674 tmp = base, base = index, index = tmp;
5675 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5678 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5679 if ((base_reg == hard_frame_pointer_rtx
5680 || base_reg == frame_pointer_rtx
5681 || base_reg == arg_pointer_rtx) && !disp)
5682 disp = const0_rtx;
5684 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5685 Avoid this by transforming to [%esi+0]. */
5686 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5687 && base_reg && !index_reg && !disp
5688 && REG_P (base_reg)
5689 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5690 disp = const0_rtx;
5692 /* Special case: encode reg+reg instead of reg*2. */
5693 if (!base && index && scale && scale == 2)
5694 base = index, base_reg = index_reg, scale = 1;
5696 /* Special case: scaling cannot be encoded without base or displacement. */
5697 if (!base && !disp && index && scale != 1)
5698 disp = const0_rtx;
5700 out->base = base;
5701 out->index = index;
5702 out->disp = disp;
5703 out->scale = scale;
5704 out->seg = seg;
5706 return retval;
5709 /* Return cost of the memory address x.
5710 For i386, it is better to use a complex address than let gcc copy
5711 the address into a reg and make a new pseudo. But not if the address
5712 requires to two regs - that would mean more pseudos with longer
5713 lifetimes. */
5714 static int
5715 ix86_address_cost (rtx x)
5717 struct ix86_address parts;
5718 int cost = 1;
5719 int ok = ix86_decompose_address (x, &parts);
5721 gcc_assert (ok);
5723 if (parts.base && GET_CODE (parts.base) == SUBREG)
5724 parts.base = SUBREG_REG (parts.base);
5725 if (parts.index && GET_CODE (parts.index) == SUBREG)
5726 parts.index = SUBREG_REG (parts.index);
5728 /* More complex memory references are better. */
5729 if (parts.disp && parts.disp != const0_rtx)
5730 cost--;
5731 if (parts.seg != SEG_DEFAULT)
5732 cost--;
5734 /* Attempt to minimize number of registers in the address. */
5735 if ((parts.base
5736 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5737 || (parts.index
5738 && (!REG_P (parts.index)
5739 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5740 cost++;
5742 if (parts.base
5743 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5744 && parts.index
5745 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5746 && parts.base != parts.index)
5747 cost++;
5749 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5750 since it's predecode logic can't detect the length of instructions
5751 and it degenerates to vector decoded. Increase cost of such
5752 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5753 to split such addresses or even refuse such addresses at all.
5755 Following addressing modes are affected:
5756 [base+scale*index]
5757 [scale*index+disp]
5758 [base+index]
5760 The first and last case may be avoidable by explicitly coding the zero in
5761 memory address, but I don't have AMD-K6 machine handy to check this
5762 theory. */
5764 if (TARGET_K6
5765 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5766 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5767 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5768 cost += 10;
5770 return cost;
5773 /* If X is a machine specific address (i.e. a symbol or label being
5774 referenced as a displacement from the GOT implemented using an
5775 UNSPEC), then return the base term. Otherwise return X. */
5778 ix86_find_base_term (rtx x)
5780 rtx term;
5782 if (TARGET_64BIT)
5784 if (GET_CODE (x) != CONST)
5785 return x;
5786 term = XEXP (x, 0);
5787 if (GET_CODE (term) == PLUS
5788 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5789 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5790 term = XEXP (term, 0);
5791 if (GET_CODE (term) != UNSPEC
5792 || XINT (term, 1) != UNSPEC_GOTPCREL)
5793 return x;
5795 term = XVECEXP (term, 0, 0);
5797 if (GET_CODE (term) != SYMBOL_REF
5798 && GET_CODE (term) != LABEL_REF)
5799 return x;
5801 return term;
5804 term = ix86_delegitimize_address (x);
5806 if (GET_CODE (term) != SYMBOL_REF
5807 && GET_CODE (term) != LABEL_REF)
5808 return x;
5810 return term;
5813 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5814 this is used for to form addresses to local data when -fPIC is in
5815 use. */
5817 static bool
5818 darwin_local_data_pic (rtx disp)
5820 if (GET_CODE (disp) == MINUS)
5822 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5823 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5824 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5826 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5827 if (! strcmp (sym_name, "<pic base>"))
5828 return true;
5832 return false;
5835 /* Determine if a given RTX is a valid constant. We already know this
5836 satisfies CONSTANT_P. */
5838 bool
5839 legitimate_constant_p (rtx x)
5841 switch (GET_CODE (x))
5843 case CONST:
5844 x = XEXP (x, 0);
5846 if (GET_CODE (x) == PLUS)
5848 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5849 return false;
5850 x = XEXP (x, 0);
5853 if (TARGET_MACHO && darwin_local_data_pic (x))
5854 return true;
5856 /* Only some unspecs are valid as "constants". */
5857 if (GET_CODE (x) == UNSPEC)
5858 switch (XINT (x, 1))
5860 case UNSPEC_GOTOFF:
5861 return TARGET_64BIT;
5862 case UNSPEC_TPOFF:
5863 case UNSPEC_NTPOFF:
5864 x = XVECEXP (x, 0, 0);
5865 return (GET_CODE (x) == SYMBOL_REF
5866 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5867 case UNSPEC_DTPOFF:
5868 x = XVECEXP (x, 0, 0);
5869 return (GET_CODE (x) == SYMBOL_REF
5870 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
5871 default:
5872 return false;
5875 /* We must have drilled down to a symbol. */
5876 if (GET_CODE (x) == LABEL_REF)
5877 return true;
5878 if (GET_CODE (x) != SYMBOL_REF)
5879 return false;
5880 /* FALLTHRU */
5882 case SYMBOL_REF:
5883 /* TLS symbols are never valid. */
5884 if (SYMBOL_REF_TLS_MODEL (x))
5885 return false;
5886 break;
5888 default:
5889 break;
5892 /* Otherwise we handle everything else in the move patterns. */
5893 return true;
5896 /* Determine if it's legal to put X into the constant pool. This
5897 is not possible for the address of thread-local symbols, which
5898 is checked above. */
5900 static bool
5901 ix86_cannot_force_const_mem (rtx x)
5903 return !legitimate_constant_p (x);
5906 /* Determine if a given RTX is a valid constant address. */
5908 bool
5909 constant_address_p (rtx x)
5911 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5914 /* Nonzero if the constant value X is a legitimate general operand
5915 when generating PIC code. It is given that flag_pic is on and
5916 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5918 bool
5919 legitimate_pic_operand_p (rtx x)
5921 rtx inner;
5923 switch (GET_CODE (x))
5925 case CONST:
5926 inner = XEXP (x, 0);
5927 if (GET_CODE (inner) == PLUS
5928 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
5929 inner = XEXP (inner, 0);
5931 /* Only some unspecs are valid as "constants". */
5932 if (GET_CODE (inner) == UNSPEC)
5933 switch (XINT (inner, 1))
5935 case UNSPEC_GOTOFF:
5936 return TARGET_64BIT;
5937 case UNSPEC_TPOFF:
5938 x = XVECEXP (inner, 0, 0);
5939 return (GET_CODE (x) == SYMBOL_REF
5940 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5941 default:
5942 return false;
5944 /* FALLTHRU */
5946 case SYMBOL_REF:
5947 case LABEL_REF:
5948 return legitimate_pic_address_disp_p (x);
5950 default:
5951 return true;
5955 /* Determine if a given CONST RTX is a valid memory displacement
5956 in PIC mode. */
5959 legitimate_pic_address_disp_p (rtx disp)
5961 bool saw_plus;
5963 /* In 64bit mode we can allow direct addresses of symbols and labels
5964 when they are not dynamic symbols. */
5965 if (TARGET_64BIT)
5967 rtx op0 = disp, op1;
5969 switch (GET_CODE (disp))
5971 case LABEL_REF:
5972 return true;
5974 case CONST:
5975 if (GET_CODE (XEXP (disp, 0)) != PLUS)
5976 break;
5977 op0 = XEXP (XEXP (disp, 0), 0);
5978 op1 = XEXP (XEXP (disp, 0), 1);
5979 if (GET_CODE (op1) != CONST_INT
5980 || INTVAL (op1) >= 16*1024*1024
5981 || INTVAL (op1) < -16*1024*1024)
5982 break;
5983 if (GET_CODE (op0) == LABEL_REF)
5984 return true;
5985 if (GET_CODE (op0) != SYMBOL_REF)
5986 break;
5987 /* FALLTHRU */
5989 case SYMBOL_REF:
5990 /* TLS references should always be enclosed in UNSPEC. */
5991 if (SYMBOL_REF_TLS_MODEL (op0))
5992 return false;
5993 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
5994 return true;
5995 break;
5997 default:
5998 break;
6001 if (GET_CODE (disp) != CONST)
6002 return 0;
6003 disp = XEXP (disp, 0);
6005 if (TARGET_64BIT)
6007 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6008 of GOT tables. We should not need these anyway. */
6009 if (GET_CODE (disp) != UNSPEC
6010 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6011 && XINT (disp, 1) != UNSPEC_GOTOFF))
6012 return 0;
6014 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6015 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6016 return 0;
6017 return 1;
6020 saw_plus = false;
6021 if (GET_CODE (disp) == PLUS)
6023 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6024 return 0;
6025 disp = XEXP (disp, 0);
6026 saw_plus = true;
6029 if (TARGET_MACHO && darwin_local_data_pic (disp))
6030 return 1;
6032 if (GET_CODE (disp) != UNSPEC)
6033 return 0;
6035 switch (XINT (disp, 1))
6037 case UNSPEC_GOT:
6038 if (saw_plus)
6039 return false;
6040 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6041 case UNSPEC_GOTOFF:
6042 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6043 While ABI specify also 32bit relocation but we don't produce it in
6044 small PIC model at all. */
6045 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6046 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6047 && !TARGET_64BIT)
6048 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6049 return false;
6050 case UNSPEC_GOTTPOFF:
6051 case UNSPEC_GOTNTPOFF:
6052 case UNSPEC_INDNTPOFF:
6053 if (saw_plus)
6054 return false;
6055 disp = XVECEXP (disp, 0, 0);
6056 return (GET_CODE (disp) == SYMBOL_REF
6057 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6058 case UNSPEC_NTPOFF:
6059 disp = XVECEXP (disp, 0, 0);
6060 return (GET_CODE (disp) == SYMBOL_REF
6061 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6062 case UNSPEC_DTPOFF:
6063 disp = XVECEXP (disp, 0, 0);
6064 return (GET_CODE (disp) == SYMBOL_REF
6065 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6068 return 0;
6071 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6072 memory address for an instruction. The MODE argument is the machine mode
6073 for the MEM expression that wants to use this address.
6075 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6076 convert common non-canonical forms to canonical form so that they will
6077 be recognized. */
6080 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6082 struct ix86_address parts;
6083 rtx base, index, disp;
6084 HOST_WIDE_INT scale;
6085 const char *reason = NULL;
6086 rtx reason_rtx = NULL_RTX;
6088 if (TARGET_DEBUG_ADDR)
6090 fprintf (stderr,
6091 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6092 GET_MODE_NAME (mode), strict);
6093 debug_rtx (addr);
6096 if (ix86_decompose_address (addr, &parts) <= 0)
6098 reason = "decomposition failed";
6099 goto report_error;
6102 base = parts.base;
6103 index = parts.index;
6104 disp = parts.disp;
6105 scale = parts.scale;
6107 /* Validate base register.
6109 Don't allow SUBREG's that span more than a word here. It can lead to spill
6110 failures when the base is one word out of a two word structure, which is
6111 represented internally as a DImode int. */
6113 if (base)
6115 rtx reg;
6116 reason_rtx = base;
6118 if (REG_P (base))
6119 reg = base;
6120 else if (GET_CODE (base) == SUBREG
6121 && REG_P (SUBREG_REG (base))
6122 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6123 <= UNITS_PER_WORD)
6124 reg = SUBREG_REG (base);
6125 else
6127 reason = "base is not a register";
6128 goto report_error;
6131 if (GET_MODE (base) != Pmode)
6133 reason = "base is not in Pmode";
6134 goto report_error;
6137 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6138 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6140 reason = "base is not valid";
6141 goto report_error;
6145 /* Validate index register.
6147 Don't allow SUBREG's that span more than a word here -- same as above. */
6149 if (index)
6151 rtx reg;
6152 reason_rtx = index;
6154 if (REG_P (index))
6155 reg = index;
6156 else if (GET_CODE (index) == SUBREG
6157 && REG_P (SUBREG_REG (index))
6158 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6159 <= UNITS_PER_WORD)
6160 reg = SUBREG_REG (index);
6161 else
6163 reason = "index is not a register";
6164 goto report_error;
6167 if (GET_MODE (index) != Pmode)
6169 reason = "index is not in Pmode";
6170 goto report_error;
6173 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6174 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6176 reason = "index is not valid";
6177 goto report_error;
6181 /* Validate scale factor. */
6182 if (scale != 1)
6184 reason_rtx = GEN_INT (scale);
6185 if (!index)
6187 reason = "scale without index";
6188 goto report_error;
6191 if (scale != 2 && scale != 4 && scale != 8)
6193 reason = "scale is not a valid multiplier";
6194 goto report_error;
6198 /* Validate displacement. */
6199 if (disp)
6201 reason_rtx = disp;
6203 if (GET_CODE (disp) == CONST
6204 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6205 switch (XINT (XEXP (disp, 0), 1))
6207 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6208 used. While ABI specify also 32bit relocations, we don't produce
6209 them at all and use IP relative instead. */
6210 case UNSPEC_GOT:
6211 case UNSPEC_GOTOFF:
6212 gcc_assert (flag_pic);
6213 if (!TARGET_64BIT)
6214 goto is_legitimate_pic;
6215 reason = "64bit address unspec";
6216 goto report_error;
6218 case UNSPEC_GOTPCREL:
6219 gcc_assert (flag_pic);
6220 goto is_legitimate_pic;
6222 case UNSPEC_GOTTPOFF:
6223 case UNSPEC_GOTNTPOFF:
6224 case UNSPEC_INDNTPOFF:
6225 case UNSPEC_NTPOFF:
6226 case UNSPEC_DTPOFF:
6227 break;
6229 default:
6230 reason = "invalid address unspec";
6231 goto report_error;
6234 else if (flag_pic && (SYMBOLIC_CONST (disp)
6235 #if TARGET_MACHO
6236 && !machopic_operand_p (disp)
6237 #endif
6240 is_legitimate_pic:
6241 if (TARGET_64BIT && (index || base))
6243 /* foo@dtpoff(%rX) is ok. */
6244 if (GET_CODE (disp) != CONST
6245 || GET_CODE (XEXP (disp, 0)) != PLUS
6246 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6247 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6248 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6249 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6251 reason = "non-constant pic memory reference";
6252 goto report_error;
6255 else if (! legitimate_pic_address_disp_p (disp))
6257 reason = "displacement is an invalid pic construct";
6258 goto report_error;
6261 /* This code used to verify that a symbolic pic displacement
6262 includes the pic_offset_table_rtx register.
6264 While this is good idea, unfortunately these constructs may
6265 be created by "adds using lea" optimization for incorrect
6266 code like:
6268 int a;
6269 int foo(int i)
6271 return *(&a+i);
6274 This code is nonsensical, but results in addressing
6275 GOT table with pic_offset_table_rtx base. We can't
6276 just refuse it easily, since it gets matched by
6277 "addsi3" pattern, that later gets split to lea in the
6278 case output register differs from input. While this
6279 can be handled by separate addsi pattern for this case
6280 that never results in lea, this seems to be easier and
6281 correct fix for crash to disable this test. */
6283 else if (GET_CODE (disp) != LABEL_REF
6284 && GET_CODE (disp) != CONST_INT
6285 && (GET_CODE (disp) != CONST
6286 || !legitimate_constant_p (disp))
6287 && (GET_CODE (disp) != SYMBOL_REF
6288 || !legitimate_constant_p (disp)))
6290 reason = "displacement is not constant";
6291 goto report_error;
6293 else if (TARGET_64BIT
6294 && !x86_64_immediate_operand (disp, VOIDmode))
6296 reason = "displacement is out of range";
6297 goto report_error;
6301 /* Everything looks valid. */
6302 if (TARGET_DEBUG_ADDR)
6303 fprintf (stderr, "Success.\n");
6304 return TRUE;
6306 report_error:
6307 if (TARGET_DEBUG_ADDR)
6309 fprintf (stderr, "Error: %s\n", reason);
6310 debug_rtx (reason_rtx);
6312 return FALSE;
6315 /* Return a unique alias set for the GOT. */
6317 static HOST_WIDE_INT
6318 ix86_GOT_alias_set (void)
6320 static HOST_WIDE_INT set = -1;
6321 if (set == -1)
6322 set = new_alias_set ();
6323 return set;
6326 /* Return a legitimate reference for ORIG (an address) using the
6327 register REG. If REG is 0, a new pseudo is generated.
6329 There are two types of references that must be handled:
6331 1. Global data references must load the address from the GOT, via
6332 the PIC reg. An insn is emitted to do this load, and the reg is
6333 returned.
6335 2. Static data references, constant pool addresses, and code labels
6336 compute the address as an offset from the GOT, whose base is in
6337 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6338 differentiate them from global data objects. The returned
6339 address is the PIC reg + an unspec constant.
6341 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6342 reg also appears in the address. */
6344 static rtx
6345 legitimize_pic_address (rtx orig, rtx reg)
6347 rtx addr = orig;
6348 rtx new = orig;
6349 rtx base;
6351 #if TARGET_MACHO
6352 if (reg == 0)
6353 reg = gen_reg_rtx (Pmode);
6354 /* Use the generic Mach-O PIC machinery. */
6355 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6356 #endif
6358 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6359 new = addr;
6360 else if (TARGET_64BIT
6361 && ix86_cmodel != CM_SMALL_PIC
6362 && local_symbolic_operand (addr, Pmode))
6364 rtx tmpreg;
6365 /* This symbol may be referenced via a displacement from the PIC
6366 base address (@GOTOFF). */
6368 if (reload_in_progress)
6369 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6370 if (GET_CODE (addr) == CONST)
6371 addr = XEXP (addr, 0);
6372 if (GET_CODE (addr) == PLUS)
6374 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6375 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6377 else
6378 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6379 new = gen_rtx_CONST (Pmode, new);
6380 if (!reg)
6381 tmpreg = gen_reg_rtx (Pmode);
6382 else
6383 tmpreg = reg;
6384 emit_move_insn (tmpreg, new);
6386 if (reg != 0)
6388 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6389 tmpreg, 1, OPTAB_DIRECT);
6390 new = reg;
6392 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6394 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6396 /* This symbol may be referenced via a displacement from the PIC
6397 base address (@GOTOFF). */
6399 if (reload_in_progress)
6400 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6401 if (GET_CODE (addr) == CONST)
6402 addr = XEXP (addr, 0);
6403 if (GET_CODE (addr) == PLUS)
6405 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6406 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6408 else
6409 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6410 new = gen_rtx_CONST (Pmode, new);
6411 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6413 if (reg != 0)
6415 emit_move_insn (reg, new);
6416 new = reg;
6419 else if (GET_CODE (addr) == SYMBOL_REF)
6421 if (TARGET_64BIT)
6423 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6424 new = gen_rtx_CONST (Pmode, new);
6425 new = gen_const_mem (Pmode, new);
6426 set_mem_alias_set (new, ix86_GOT_alias_set ());
6428 if (reg == 0)
6429 reg = gen_reg_rtx (Pmode);
6430 /* Use directly gen_movsi, otherwise the address is loaded
6431 into register for CSE. We don't want to CSE this addresses,
6432 instead we CSE addresses from the GOT table, so skip this. */
6433 emit_insn (gen_movsi (reg, new));
6434 new = reg;
6436 else
6438 /* This symbol must be referenced via a load from the
6439 Global Offset Table (@GOT). */
6441 if (reload_in_progress)
6442 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6443 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6444 new = gen_rtx_CONST (Pmode, new);
6445 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6446 new = gen_const_mem (Pmode, new);
6447 set_mem_alias_set (new, ix86_GOT_alias_set ());
6449 if (reg == 0)
6450 reg = gen_reg_rtx (Pmode);
6451 emit_move_insn (reg, new);
6452 new = reg;
6455 else
6457 if (GET_CODE (addr) == CONST_INT
6458 && !x86_64_immediate_operand (addr, VOIDmode))
6460 if (reg)
6462 emit_move_insn (reg, addr);
6463 new = reg;
6465 else
6466 new = force_reg (Pmode, addr);
6468 else if (GET_CODE (addr) == CONST)
6470 addr = XEXP (addr, 0);
6472 /* We must match stuff we generate before. Assume the only
6473 unspecs that can get here are ours. Not that we could do
6474 anything with them anyway.... */
6475 if (GET_CODE (addr) == UNSPEC
6476 || (GET_CODE (addr) == PLUS
6477 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6478 return orig;
6479 gcc_assert (GET_CODE (addr) == PLUS);
6481 if (GET_CODE (addr) == PLUS)
6483 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6485 /* Check first to see if this is a constant offset from a @GOTOFF
6486 symbol reference. */
6487 if (local_symbolic_operand (op0, Pmode)
6488 && GET_CODE (op1) == CONST_INT)
6490 if (!TARGET_64BIT)
6492 if (reload_in_progress)
6493 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6494 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6495 UNSPEC_GOTOFF);
6496 new = gen_rtx_PLUS (Pmode, new, op1);
6497 new = gen_rtx_CONST (Pmode, new);
6498 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6500 if (reg != 0)
6502 emit_move_insn (reg, new);
6503 new = reg;
6506 else
6508 if (INTVAL (op1) < -16*1024*1024
6509 || INTVAL (op1) >= 16*1024*1024)
6511 if (!x86_64_immediate_operand (op1, Pmode))
6512 op1 = force_reg (Pmode, op1);
6513 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6517 else
6519 base = legitimize_pic_address (XEXP (addr, 0), reg);
6520 new = legitimize_pic_address (XEXP (addr, 1),
6521 base == reg ? NULL_RTX : reg);
6523 if (GET_CODE (new) == CONST_INT)
6524 new = plus_constant (base, INTVAL (new));
6525 else
6527 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6529 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6530 new = XEXP (new, 1);
6532 new = gen_rtx_PLUS (Pmode, base, new);
6537 return new;
6540 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6542 static rtx
6543 get_thread_pointer (int to_reg)
6545 rtx tp, reg, insn;
6547 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6548 if (!to_reg)
6549 return tp;
6551 reg = gen_reg_rtx (Pmode);
6552 insn = gen_rtx_SET (VOIDmode, reg, tp);
6553 insn = emit_insn (insn);
6555 return reg;
6558 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6559 false if we expect this to be used for a memory address and true if
6560 we expect to load the address into a register. */
6562 static rtx
6563 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6565 rtx dest, base, off, pic, tp;
6566 int type;
6568 switch (model)
6570 case TLS_MODEL_GLOBAL_DYNAMIC:
6571 dest = gen_reg_rtx (Pmode);
6572 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6574 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6576 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6578 start_sequence ();
6579 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6580 insns = get_insns ();
6581 end_sequence ();
6583 emit_libcall_block (insns, dest, rax, x);
6585 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6586 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6587 else
6588 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6590 if (TARGET_GNU2_TLS)
6592 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6594 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6596 break;
6598 case TLS_MODEL_LOCAL_DYNAMIC:
6599 base = gen_reg_rtx (Pmode);
6600 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6602 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6604 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6606 start_sequence ();
6607 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6608 insns = get_insns ();
6609 end_sequence ();
6611 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6612 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6613 emit_libcall_block (insns, base, rax, note);
6615 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6616 emit_insn (gen_tls_local_dynamic_base_64 (base));
6617 else
6618 emit_insn (gen_tls_local_dynamic_base_32 (base));
6620 if (TARGET_GNU2_TLS)
6622 rtx x = ix86_tls_module_base ();
6624 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, base));
6626 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6629 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6630 off = gen_rtx_CONST (Pmode, off);
6632 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6633 break;
6635 case TLS_MODEL_INITIAL_EXEC:
6636 if (TARGET_64BIT)
6638 pic = NULL;
6639 type = UNSPEC_GOTNTPOFF;
6641 else if (flag_pic)
6643 if (reload_in_progress)
6644 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6645 pic = pic_offset_table_rtx;
6646 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6648 else if (!TARGET_ANY_GNU_TLS)
6650 pic = gen_reg_rtx (Pmode);
6651 emit_insn (gen_set_got (pic));
6652 type = UNSPEC_GOTTPOFF;
6654 else
6656 pic = NULL;
6657 type = UNSPEC_INDNTPOFF;
6660 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6661 off = gen_rtx_CONST (Pmode, off);
6662 if (pic)
6663 off = gen_rtx_PLUS (Pmode, pic, off);
6664 off = gen_const_mem (Pmode, off);
6665 set_mem_alias_set (off, ix86_GOT_alias_set ());
6667 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6669 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6670 off = force_reg (Pmode, off);
6671 return gen_rtx_PLUS (Pmode, base, off);
6673 else
6675 base = get_thread_pointer (true);
6676 dest = gen_reg_rtx (Pmode);
6677 emit_insn (gen_subsi3 (dest, base, off));
6679 break;
6681 case TLS_MODEL_LOCAL_EXEC:
6682 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6683 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6684 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6685 off = gen_rtx_CONST (Pmode, off);
6687 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6689 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6690 return gen_rtx_PLUS (Pmode, base, off);
6692 else
6694 base = get_thread_pointer (true);
6695 dest = gen_reg_rtx (Pmode);
6696 emit_insn (gen_subsi3 (dest, base, off));
6698 break;
6700 default:
6701 gcc_unreachable ();
6704 return dest;
6707 /* Try machine-dependent ways of modifying an illegitimate address
6708 to be legitimate. If we find one, return the new, valid address.
6709 This macro is used in only one place: `memory_address' in explow.c.
6711 OLDX is the address as it was before break_out_memory_refs was called.
6712 In some cases it is useful to look at this to decide what needs to be done.
6714 MODE and WIN are passed so that this macro can use
6715 GO_IF_LEGITIMATE_ADDRESS.
6717 It is always safe for this macro to do nothing. It exists to recognize
6718 opportunities to optimize the output.
6720 For the 80386, we handle X+REG by loading X into a register R and
6721 using R+REG. R will go in a general reg and indexing will be used.
6722 However, if REG is a broken-out memory address or multiplication,
6723 nothing needs to be done because REG can certainly go in a general reg.
6725 When -fpic is used, special handling is needed for symbolic references.
6726 See comments by legitimize_pic_address in i386.c for details. */
6729 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6731 int changed = 0;
6732 unsigned log;
6734 if (TARGET_DEBUG_ADDR)
6736 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6737 GET_MODE_NAME (mode));
6738 debug_rtx (x);
6741 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6742 if (log)
6743 return legitimize_tls_address (x, log, false);
6744 if (GET_CODE (x) == CONST
6745 && GET_CODE (XEXP (x, 0)) == PLUS
6746 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6747 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6749 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6750 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6753 if (flag_pic && SYMBOLIC_CONST (x))
6754 return legitimize_pic_address (x, 0);
6756 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6757 if (GET_CODE (x) == ASHIFT
6758 && GET_CODE (XEXP (x, 1)) == CONST_INT
6759 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6761 changed = 1;
6762 log = INTVAL (XEXP (x, 1));
6763 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6764 GEN_INT (1 << log));
6767 if (GET_CODE (x) == PLUS)
6769 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6771 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6772 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6773 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6775 changed = 1;
6776 log = INTVAL (XEXP (XEXP (x, 0), 1));
6777 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6778 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6779 GEN_INT (1 << log));
6782 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6783 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6784 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6786 changed = 1;
6787 log = INTVAL (XEXP (XEXP (x, 1), 1));
6788 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6789 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6790 GEN_INT (1 << log));
6793 /* Put multiply first if it isn't already. */
6794 if (GET_CODE (XEXP (x, 1)) == MULT)
6796 rtx tmp = XEXP (x, 0);
6797 XEXP (x, 0) = XEXP (x, 1);
6798 XEXP (x, 1) = tmp;
6799 changed = 1;
6802 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6803 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6804 created by virtual register instantiation, register elimination, and
6805 similar optimizations. */
6806 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6808 changed = 1;
6809 x = gen_rtx_PLUS (Pmode,
6810 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6811 XEXP (XEXP (x, 1), 0)),
6812 XEXP (XEXP (x, 1), 1));
6815 /* Canonicalize
6816 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6817 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6818 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6819 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6820 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6821 && CONSTANT_P (XEXP (x, 1)))
6823 rtx constant;
6824 rtx other = NULL_RTX;
6826 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6828 constant = XEXP (x, 1);
6829 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6831 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6833 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6834 other = XEXP (x, 1);
6836 else
6837 constant = 0;
6839 if (constant)
6841 changed = 1;
6842 x = gen_rtx_PLUS (Pmode,
6843 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6844 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6845 plus_constant (other, INTVAL (constant)));
6849 if (changed && legitimate_address_p (mode, x, FALSE))
6850 return x;
6852 if (GET_CODE (XEXP (x, 0)) == MULT)
6854 changed = 1;
6855 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6858 if (GET_CODE (XEXP (x, 1)) == MULT)
6860 changed = 1;
6861 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6864 if (changed
6865 && GET_CODE (XEXP (x, 1)) == REG
6866 && GET_CODE (XEXP (x, 0)) == REG)
6867 return x;
6869 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6871 changed = 1;
6872 x = legitimize_pic_address (x, 0);
6875 if (changed && legitimate_address_p (mode, x, FALSE))
6876 return x;
6878 if (GET_CODE (XEXP (x, 0)) == REG)
6880 rtx temp = gen_reg_rtx (Pmode);
6881 rtx val = force_operand (XEXP (x, 1), temp);
6882 if (val != temp)
6883 emit_move_insn (temp, val);
6885 XEXP (x, 1) = temp;
6886 return x;
6889 else if (GET_CODE (XEXP (x, 1)) == REG)
6891 rtx temp = gen_reg_rtx (Pmode);
6892 rtx val = force_operand (XEXP (x, 0), temp);
6893 if (val != temp)
6894 emit_move_insn (temp, val);
6896 XEXP (x, 0) = temp;
6897 return x;
6901 return x;
6904 /* Print an integer constant expression in assembler syntax. Addition
6905 and subtraction are the only arithmetic that may appear in these
6906 expressions. FILE is the stdio stream to write to, X is the rtx, and
6907 CODE is the operand print code from the output string. */
6909 static void
6910 output_pic_addr_const (FILE *file, rtx x, int code)
6912 char buf[256];
6914 switch (GET_CODE (x))
6916 case PC:
6917 gcc_assert (flag_pic);
6918 putc ('.', file);
6919 break;
6921 case SYMBOL_REF:
6922 output_addr_const (file, x);
6923 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6924 fputs ("@PLT", file);
6925 break;
6927 case LABEL_REF:
6928 x = XEXP (x, 0);
6929 /* FALLTHRU */
6930 case CODE_LABEL:
6931 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6932 assemble_name (asm_out_file, buf);
6933 break;
6935 case CONST_INT:
6936 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6937 break;
6939 case CONST:
6940 /* This used to output parentheses around the expression,
6941 but that does not work on the 386 (either ATT or BSD assembler). */
6942 output_pic_addr_const (file, XEXP (x, 0), code);
6943 break;
6945 case CONST_DOUBLE:
6946 if (GET_MODE (x) == VOIDmode)
6948 /* We can use %d if the number is <32 bits and positive. */
6949 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6950 fprintf (file, "0x%lx%08lx",
6951 (unsigned long) CONST_DOUBLE_HIGH (x),
6952 (unsigned long) CONST_DOUBLE_LOW (x));
6953 else
6954 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6956 else
6957 /* We can't handle floating point constants;
6958 PRINT_OPERAND must handle them. */
6959 output_operand_lossage ("floating constant misused");
6960 break;
6962 case PLUS:
6963 /* Some assemblers need integer constants to appear first. */
6964 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6966 output_pic_addr_const (file, XEXP (x, 0), code);
6967 putc ('+', file);
6968 output_pic_addr_const (file, XEXP (x, 1), code);
6970 else
6972 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6973 output_pic_addr_const (file, XEXP (x, 1), code);
6974 putc ('+', file);
6975 output_pic_addr_const (file, XEXP (x, 0), code);
6977 break;
6979 case MINUS:
6980 if (!TARGET_MACHO)
6981 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6982 output_pic_addr_const (file, XEXP (x, 0), code);
6983 putc ('-', file);
6984 output_pic_addr_const (file, XEXP (x, 1), code);
6985 if (!TARGET_MACHO)
6986 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6987 break;
6989 case UNSPEC:
6990 gcc_assert (XVECLEN (x, 0) == 1);
6991 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6992 switch (XINT (x, 1))
6994 case UNSPEC_GOT:
6995 fputs ("@GOT", file);
6996 break;
6997 case UNSPEC_GOTOFF:
6998 fputs ("@GOTOFF", file);
6999 break;
7000 case UNSPEC_GOTPCREL:
7001 fputs ("@GOTPCREL(%rip)", file);
7002 break;
7003 case UNSPEC_GOTTPOFF:
7004 /* FIXME: This might be @TPOFF in Sun ld too. */
7005 fputs ("@GOTTPOFF", file);
7006 break;
7007 case UNSPEC_TPOFF:
7008 fputs ("@TPOFF", file);
7009 break;
7010 case UNSPEC_NTPOFF:
7011 if (TARGET_64BIT)
7012 fputs ("@TPOFF", file);
7013 else
7014 fputs ("@NTPOFF", file);
7015 break;
7016 case UNSPEC_DTPOFF:
7017 fputs ("@DTPOFF", file);
7018 break;
7019 case UNSPEC_GOTNTPOFF:
7020 if (TARGET_64BIT)
7021 fputs ("@GOTTPOFF(%rip)", file);
7022 else
7023 fputs ("@GOTNTPOFF", file);
7024 break;
7025 case UNSPEC_INDNTPOFF:
7026 fputs ("@INDNTPOFF", file);
7027 break;
7028 default:
7029 output_operand_lossage ("invalid UNSPEC as operand");
7030 break;
7032 break;
7034 default:
7035 output_operand_lossage ("invalid expression as operand");
7039 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7040 We need to emit DTP-relative relocations. */
7042 static void
7043 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7045 fputs (ASM_LONG, file);
7046 output_addr_const (file, x);
7047 fputs ("@DTPOFF", file);
7048 switch (size)
7050 case 4:
7051 break;
7052 case 8:
7053 fputs (", 0", file);
7054 break;
7055 default:
7056 gcc_unreachable ();
7060 /* In the name of slightly smaller debug output, and to cater to
7061 general assembler lossage, recognize PIC+GOTOFF and turn it back
7062 into a direct symbol reference. */
7064 static rtx
7065 ix86_delegitimize_address (rtx orig_x)
7067 rtx x = orig_x, y;
7069 if (GET_CODE (x) == MEM)
7070 x = XEXP (x, 0);
7072 if (TARGET_64BIT)
7074 if (GET_CODE (x) != CONST
7075 || GET_CODE (XEXP (x, 0)) != UNSPEC
7076 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7077 || GET_CODE (orig_x) != MEM)
7078 return orig_x;
7079 return XVECEXP (XEXP (x, 0), 0, 0);
7082 if (GET_CODE (x) != PLUS
7083 || GET_CODE (XEXP (x, 1)) != CONST)
7084 return orig_x;
7086 if (GET_CODE (XEXP (x, 0)) == REG
7087 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7088 /* %ebx + GOT/GOTOFF */
7089 y = NULL;
7090 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7092 /* %ebx + %reg * scale + GOT/GOTOFF */
7093 y = XEXP (x, 0);
7094 if (GET_CODE (XEXP (y, 0)) == REG
7095 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
7096 y = XEXP (y, 1);
7097 else if (GET_CODE (XEXP (y, 1)) == REG
7098 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7099 y = XEXP (y, 0);
7100 else
7101 return orig_x;
7102 if (GET_CODE (y) != REG
7103 && GET_CODE (y) != MULT
7104 && GET_CODE (y) != ASHIFT)
7105 return orig_x;
7107 else
7108 return orig_x;
7110 x = XEXP (XEXP (x, 1), 0);
7111 if (GET_CODE (x) == UNSPEC
7112 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7113 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7115 if (y)
7116 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7117 return XVECEXP (x, 0, 0);
7120 if (GET_CODE (x) == PLUS
7121 && GET_CODE (XEXP (x, 0)) == UNSPEC
7122 && GET_CODE (XEXP (x, 1)) == CONST_INT
7123 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7124 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7125 && GET_CODE (orig_x) != MEM)))
7127 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7128 if (y)
7129 return gen_rtx_PLUS (Pmode, y, x);
7130 return x;
7133 return orig_x;
7136 static void
7137 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7138 int fp, FILE *file)
7140 const char *suffix;
7142 if (mode == CCFPmode || mode == CCFPUmode)
7144 enum rtx_code second_code, bypass_code;
7145 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7146 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7147 code = ix86_fp_compare_code_to_integer (code);
7148 mode = CCmode;
7150 if (reverse)
7151 code = reverse_condition (code);
7153 switch (code)
7155 case EQ:
7156 suffix = "e";
7157 break;
7158 case NE:
7159 suffix = "ne";
7160 break;
7161 case GT:
7162 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7163 suffix = "g";
7164 break;
7165 case GTU:
7166 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7167 Those same assemblers have the same but opposite lossage on cmov. */
7168 gcc_assert (mode == CCmode);
7169 suffix = fp ? "nbe" : "a";
7170 break;
7171 case LT:
7172 switch (mode)
7174 case CCNOmode:
7175 case CCGOCmode:
7176 suffix = "s";
7177 break;
7179 case CCmode:
7180 case CCGCmode:
7181 suffix = "l";
7182 break;
7184 default:
7185 gcc_unreachable ();
7187 break;
7188 case LTU:
7189 gcc_assert (mode == CCmode);
7190 suffix = "b";
7191 break;
7192 case GE:
7193 switch (mode)
7195 case CCNOmode:
7196 case CCGOCmode:
7197 suffix = "ns";
7198 break;
7200 case CCmode:
7201 case CCGCmode:
7202 suffix = "ge";
7203 break;
7205 default:
7206 gcc_unreachable ();
7208 break;
7209 case GEU:
7210 /* ??? As above. */
7211 gcc_assert (mode == CCmode);
7212 suffix = fp ? "nb" : "ae";
7213 break;
7214 case LE:
7215 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7216 suffix = "le";
7217 break;
7218 case LEU:
7219 gcc_assert (mode == CCmode);
7220 suffix = "be";
7221 break;
7222 case UNORDERED:
7223 suffix = fp ? "u" : "p";
7224 break;
7225 case ORDERED:
7226 suffix = fp ? "nu" : "np";
7227 break;
7228 default:
7229 gcc_unreachable ();
7231 fputs (suffix, file);
7234 /* Print the name of register X to FILE based on its machine mode and number.
7235 If CODE is 'w', pretend the mode is HImode.
7236 If CODE is 'b', pretend the mode is QImode.
7237 If CODE is 'k', pretend the mode is SImode.
7238 If CODE is 'q', pretend the mode is DImode.
7239 If CODE is 'h', pretend the reg is the 'high' byte register.
7240 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7242 void
7243 print_reg (rtx x, int code, FILE *file)
7245 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7246 && REGNO (x) != FRAME_POINTER_REGNUM
7247 && REGNO (x) != FLAGS_REG
7248 && REGNO (x) != FPSR_REG);
7250 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7251 putc ('%', file);
7253 if (code == 'w' || MMX_REG_P (x))
7254 code = 2;
7255 else if (code == 'b')
7256 code = 1;
7257 else if (code == 'k')
7258 code = 4;
7259 else if (code == 'q')
7260 code = 8;
7261 else if (code == 'y')
7262 code = 3;
7263 else if (code == 'h')
7264 code = 0;
7265 else
7266 code = GET_MODE_SIZE (GET_MODE (x));
7268 /* Irritatingly, AMD extended registers use different naming convention
7269 from the normal registers. */
7270 if (REX_INT_REG_P (x))
7272 gcc_assert (TARGET_64BIT);
7273 switch (code)
7275 case 0:
7276 error ("extended registers have no high halves");
7277 break;
7278 case 1:
7279 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7280 break;
7281 case 2:
7282 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7283 break;
7284 case 4:
7285 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7286 break;
7287 case 8:
7288 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7289 break;
7290 default:
7291 error ("unsupported operand size for extended register");
7292 break;
7294 return;
7296 switch (code)
7298 case 3:
7299 if (STACK_TOP_P (x))
7301 fputs ("st(0)", file);
7302 break;
7304 /* FALLTHRU */
7305 case 8:
7306 case 4:
7307 case 12:
7308 if (! ANY_FP_REG_P (x))
7309 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7310 /* FALLTHRU */
7311 case 16:
7312 case 2:
7313 normal:
7314 fputs (hi_reg_name[REGNO (x)], file);
7315 break;
7316 case 1:
7317 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7318 goto normal;
7319 fputs (qi_reg_name[REGNO (x)], file);
7320 break;
7321 case 0:
7322 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7323 goto normal;
7324 fputs (qi_high_reg_name[REGNO (x)], file);
7325 break;
7326 default:
7327 gcc_unreachable ();
7331 /* Locate some local-dynamic symbol still in use by this function
7332 so that we can print its name in some tls_local_dynamic_base
7333 pattern. */
7335 static const char *
7336 get_some_local_dynamic_name (void)
7338 rtx insn;
7340 if (cfun->machine->some_ld_name)
7341 return cfun->machine->some_ld_name;
7343 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7344 if (INSN_P (insn)
7345 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7346 return cfun->machine->some_ld_name;
7348 gcc_unreachable ();
7351 static int
7352 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7354 rtx x = *px;
7356 if (GET_CODE (x) == SYMBOL_REF
7357 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7359 cfun->machine->some_ld_name = XSTR (x, 0);
7360 return 1;
7363 return 0;
7366 /* Meaning of CODE:
7367 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7368 C -- print opcode suffix for set/cmov insn.
7369 c -- like C, but print reversed condition
7370 F,f -- likewise, but for floating-point.
7371 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7372 otherwise nothing
7373 R -- print the prefix for register names.
7374 z -- print the opcode suffix for the size of the current operand.
7375 * -- print a star (in certain assembler syntax)
7376 A -- print an absolute memory reference.
7377 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7378 s -- print a shift double count, followed by the assemblers argument
7379 delimiter.
7380 b -- print the QImode name of the register for the indicated operand.
7381 %b0 would print %al if operands[0] is reg 0.
7382 w -- likewise, print the HImode name of the register.
7383 k -- likewise, print the SImode name of the register.
7384 q -- likewise, print the DImode name of the register.
7385 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7386 y -- print "st(0)" instead of "st" as a register.
7387 D -- print condition for SSE cmp instruction.
7388 P -- if PIC, print an @PLT suffix.
7389 X -- don't print any sort of PIC '@' suffix for a symbol.
7390 & -- print some in-use local-dynamic symbol name.
7391 H -- print a memory address offset by 8; used for sse high-parts
7394 void
7395 print_operand (FILE *file, rtx x, int code)
7397 if (code)
7399 switch (code)
7401 case '*':
7402 if (ASSEMBLER_DIALECT == ASM_ATT)
7403 putc ('*', file);
7404 return;
7406 case '&':
7407 assemble_name (file, get_some_local_dynamic_name ());
7408 return;
7410 case 'A':
7411 switch (ASSEMBLER_DIALECT)
7413 case ASM_ATT:
7414 putc ('*', file);
7415 break;
7417 case ASM_INTEL:
7418 /* Intel syntax. For absolute addresses, registers should not
7419 be surrounded by braces. */
7420 if (GET_CODE (x) != REG)
7422 putc ('[', file);
7423 PRINT_OPERAND (file, x, 0);
7424 putc (']', file);
7425 return;
7427 break;
7429 default:
7430 gcc_unreachable ();
7433 PRINT_OPERAND (file, x, 0);
7434 return;
7437 case 'L':
7438 if (ASSEMBLER_DIALECT == ASM_ATT)
7439 putc ('l', file);
7440 return;
7442 case 'W':
7443 if (ASSEMBLER_DIALECT == ASM_ATT)
7444 putc ('w', file);
7445 return;
7447 case 'B':
7448 if (ASSEMBLER_DIALECT == ASM_ATT)
7449 putc ('b', file);
7450 return;
7452 case 'Q':
7453 if (ASSEMBLER_DIALECT == ASM_ATT)
7454 putc ('l', file);
7455 return;
7457 case 'S':
7458 if (ASSEMBLER_DIALECT == ASM_ATT)
7459 putc ('s', file);
7460 return;
7462 case 'T':
7463 if (ASSEMBLER_DIALECT == ASM_ATT)
7464 putc ('t', file);
7465 return;
7467 case 'z':
7468 /* 387 opcodes don't get size suffixes if the operands are
7469 registers. */
7470 if (STACK_REG_P (x))
7471 return;
7473 /* Likewise if using Intel opcodes. */
7474 if (ASSEMBLER_DIALECT == ASM_INTEL)
7475 return;
7477 /* This is the size of op from size of operand. */
7478 switch (GET_MODE_SIZE (GET_MODE (x)))
7480 case 2:
7481 #ifdef HAVE_GAS_FILDS_FISTS
7482 putc ('s', file);
7483 #endif
7484 return;
7486 case 4:
7487 if (GET_MODE (x) == SFmode)
7489 putc ('s', file);
7490 return;
7492 else
7493 putc ('l', file);
7494 return;
7496 case 12:
7497 case 16:
7498 putc ('t', file);
7499 return;
7501 case 8:
7502 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7504 #ifdef GAS_MNEMONICS
7505 putc ('q', file);
7506 #else
7507 putc ('l', file);
7508 putc ('l', file);
7509 #endif
7511 else
7512 putc ('l', file);
7513 return;
7515 default:
7516 gcc_unreachable ();
7519 case 'b':
7520 case 'w':
7521 case 'k':
7522 case 'q':
7523 case 'h':
7524 case 'y':
7525 case 'X':
7526 case 'P':
7527 break;
7529 case 's':
7530 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7532 PRINT_OPERAND (file, x, 0);
7533 putc (',', file);
7535 return;
7537 case 'D':
7538 /* Little bit of braindamage here. The SSE compare instructions
7539 does use completely different names for the comparisons that the
7540 fp conditional moves. */
7541 switch (GET_CODE (x))
7543 case EQ:
7544 case UNEQ:
7545 fputs ("eq", file);
7546 break;
7547 case LT:
7548 case UNLT:
7549 fputs ("lt", file);
7550 break;
7551 case LE:
7552 case UNLE:
7553 fputs ("le", file);
7554 break;
7555 case UNORDERED:
7556 fputs ("unord", file);
7557 break;
7558 case NE:
7559 case LTGT:
7560 fputs ("neq", file);
7561 break;
7562 case UNGE:
7563 case GE:
7564 fputs ("nlt", file);
7565 break;
7566 case UNGT:
7567 case GT:
7568 fputs ("nle", file);
7569 break;
7570 case ORDERED:
7571 fputs ("ord", file);
7572 break;
7573 default:
7574 gcc_unreachable ();
7576 return;
7577 case 'O':
7578 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7579 if (ASSEMBLER_DIALECT == ASM_ATT)
7581 switch (GET_MODE (x))
7583 case HImode: putc ('w', file); break;
7584 case SImode:
7585 case SFmode: putc ('l', file); break;
7586 case DImode:
7587 case DFmode: putc ('q', file); break;
7588 default: gcc_unreachable ();
7590 putc ('.', file);
7592 #endif
7593 return;
7594 case 'C':
7595 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7596 return;
7597 case 'F':
7598 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7599 if (ASSEMBLER_DIALECT == ASM_ATT)
7600 putc ('.', file);
7601 #endif
7602 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7603 return;
7605 /* Like above, but reverse condition */
7606 case 'c':
7607 /* Check to see if argument to %c is really a constant
7608 and not a condition code which needs to be reversed. */
7609 if (!COMPARISON_P (x))
7611 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7612 return;
7614 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7615 return;
7616 case 'f':
7617 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7618 if (ASSEMBLER_DIALECT == ASM_ATT)
7619 putc ('.', file);
7620 #endif
7621 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7622 return;
7624 case 'H':
7625 /* It doesn't actually matter what mode we use here, as we're
7626 only going to use this for printing. */
7627 x = adjust_address_nv (x, DImode, 8);
7628 break;
7630 case '+':
7632 rtx x;
7634 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7635 return;
7637 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7638 if (x)
7640 int pred_val = INTVAL (XEXP (x, 0));
7642 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7643 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7645 int taken = pred_val > REG_BR_PROB_BASE / 2;
7646 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7648 /* Emit hints only in the case default branch prediction
7649 heuristics would fail. */
7650 if (taken != cputaken)
7652 /* We use 3e (DS) prefix for taken branches and
7653 2e (CS) prefix for not taken branches. */
7654 if (taken)
7655 fputs ("ds ; ", file);
7656 else
7657 fputs ("cs ; ", file);
7661 return;
7663 default:
7664 output_operand_lossage ("invalid operand code '%c'", code);
7668 if (GET_CODE (x) == REG)
7669 print_reg (x, code, file);
7671 else if (GET_CODE (x) == MEM)
7673 /* No `byte ptr' prefix for call instructions. */
7674 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7676 const char * size;
7677 switch (GET_MODE_SIZE (GET_MODE (x)))
7679 case 1: size = "BYTE"; break;
7680 case 2: size = "WORD"; break;
7681 case 4: size = "DWORD"; break;
7682 case 8: size = "QWORD"; break;
7683 case 12: size = "XWORD"; break;
7684 case 16: size = "XMMWORD"; break;
7685 default:
7686 gcc_unreachable ();
7689 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7690 if (code == 'b')
7691 size = "BYTE";
7692 else if (code == 'w')
7693 size = "WORD";
7694 else if (code == 'k')
7695 size = "DWORD";
7697 fputs (size, file);
7698 fputs (" PTR ", file);
7701 x = XEXP (x, 0);
7702 /* Avoid (%rip) for call operands. */
7703 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7704 && GET_CODE (x) != CONST_INT)
7705 output_addr_const (file, x);
7706 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7707 output_operand_lossage ("invalid constraints for operand");
7708 else
7709 output_address (x);
7712 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7714 REAL_VALUE_TYPE r;
7715 long l;
7717 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7718 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7720 if (ASSEMBLER_DIALECT == ASM_ATT)
7721 putc ('$', file);
7722 fprintf (file, "0x%08lx", l);
7725 /* These float cases don't actually occur as immediate operands. */
7726 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7728 char dstr[30];
7730 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7731 fprintf (file, "%s", dstr);
7734 else if (GET_CODE (x) == CONST_DOUBLE
7735 && GET_MODE (x) == XFmode)
7737 char dstr[30];
7739 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7740 fprintf (file, "%s", dstr);
7743 else
7745 /* We have patterns that allow zero sets of memory, for instance.
7746 In 64-bit mode, we should probably support all 8-byte vectors,
7747 since we can in fact encode that into an immediate. */
7748 if (GET_CODE (x) == CONST_VECTOR)
7750 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7751 x = const0_rtx;
7754 if (code != 'P')
7756 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7758 if (ASSEMBLER_DIALECT == ASM_ATT)
7759 putc ('$', file);
7761 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7762 || GET_CODE (x) == LABEL_REF)
7764 if (ASSEMBLER_DIALECT == ASM_ATT)
7765 putc ('$', file);
7766 else
7767 fputs ("OFFSET FLAT:", file);
7770 if (GET_CODE (x) == CONST_INT)
7771 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7772 else if (flag_pic)
7773 output_pic_addr_const (file, x, code);
7774 else
7775 output_addr_const (file, x);
7779 /* Print a memory operand whose address is ADDR. */
7781 void
7782 print_operand_address (FILE *file, rtx addr)
7784 struct ix86_address parts;
7785 rtx base, index, disp;
7786 int scale;
7787 int ok = ix86_decompose_address (addr, &parts);
7789 gcc_assert (ok);
7791 base = parts.base;
7792 index = parts.index;
7793 disp = parts.disp;
7794 scale = parts.scale;
7796 switch (parts.seg)
7798 case SEG_DEFAULT:
7799 break;
7800 case SEG_FS:
7801 case SEG_GS:
7802 if (USER_LABEL_PREFIX[0] == 0)
7803 putc ('%', file);
7804 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7805 break;
7806 default:
7807 gcc_unreachable ();
7810 if (!base && !index)
7812 /* Displacement only requires special attention. */
7814 if (GET_CODE (disp) == CONST_INT)
7816 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7818 if (USER_LABEL_PREFIX[0] == 0)
7819 putc ('%', file);
7820 fputs ("ds:", file);
7822 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7824 else if (flag_pic)
7825 output_pic_addr_const (file, disp, 0);
7826 else
7827 output_addr_const (file, disp);
7829 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7830 if (TARGET_64BIT)
7832 if (GET_CODE (disp) == CONST
7833 && GET_CODE (XEXP (disp, 0)) == PLUS
7834 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7835 disp = XEXP (XEXP (disp, 0), 0);
7836 if (GET_CODE (disp) == LABEL_REF
7837 || (GET_CODE (disp) == SYMBOL_REF
7838 && SYMBOL_REF_TLS_MODEL (disp) == 0))
7839 fputs ("(%rip)", file);
7842 else
7844 if (ASSEMBLER_DIALECT == ASM_ATT)
7846 if (disp)
7848 if (flag_pic)
7849 output_pic_addr_const (file, disp, 0);
7850 else if (GET_CODE (disp) == LABEL_REF)
7851 output_asm_label (disp);
7852 else
7853 output_addr_const (file, disp);
7856 putc ('(', file);
7857 if (base)
7858 print_reg (base, 0, file);
7859 if (index)
7861 putc (',', file);
7862 print_reg (index, 0, file);
7863 if (scale != 1)
7864 fprintf (file, ",%d", scale);
7866 putc (')', file);
7868 else
7870 rtx offset = NULL_RTX;
7872 if (disp)
7874 /* Pull out the offset of a symbol; print any symbol itself. */
7875 if (GET_CODE (disp) == CONST
7876 && GET_CODE (XEXP (disp, 0)) == PLUS
7877 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7879 offset = XEXP (XEXP (disp, 0), 1);
7880 disp = gen_rtx_CONST (VOIDmode,
7881 XEXP (XEXP (disp, 0), 0));
7884 if (flag_pic)
7885 output_pic_addr_const (file, disp, 0);
7886 else if (GET_CODE (disp) == LABEL_REF)
7887 output_asm_label (disp);
7888 else if (GET_CODE (disp) == CONST_INT)
7889 offset = disp;
7890 else
7891 output_addr_const (file, disp);
7894 putc ('[', file);
7895 if (base)
7897 print_reg (base, 0, file);
7898 if (offset)
7900 if (INTVAL (offset) >= 0)
7901 putc ('+', file);
7902 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7905 else if (offset)
7906 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7907 else
7908 putc ('0', file);
7910 if (index)
7912 putc ('+', file);
7913 print_reg (index, 0, file);
7914 if (scale != 1)
7915 fprintf (file, "*%d", scale);
7917 putc (']', file);
7922 bool
7923 output_addr_const_extra (FILE *file, rtx x)
7925 rtx op;
7927 if (GET_CODE (x) != UNSPEC)
7928 return false;
7930 op = XVECEXP (x, 0, 0);
7931 switch (XINT (x, 1))
7933 case UNSPEC_GOTTPOFF:
7934 output_addr_const (file, op);
7935 /* FIXME: This might be @TPOFF in Sun ld. */
7936 fputs ("@GOTTPOFF", file);
7937 break;
7938 case UNSPEC_TPOFF:
7939 output_addr_const (file, op);
7940 fputs ("@TPOFF", file);
7941 break;
7942 case UNSPEC_NTPOFF:
7943 output_addr_const (file, op);
7944 if (TARGET_64BIT)
7945 fputs ("@TPOFF", file);
7946 else
7947 fputs ("@NTPOFF", file);
7948 break;
7949 case UNSPEC_DTPOFF:
7950 output_addr_const (file, op);
7951 fputs ("@DTPOFF", file);
7952 break;
7953 case UNSPEC_GOTNTPOFF:
7954 output_addr_const (file, op);
7955 if (TARGET_64BIT)
7956 fputs ("@GOTTPOFF(%rip)", file);
7957 else
7958 fputs ("@GOTNTPOFF", file);
7959 break;
7960 case UNSPEC_INDNTPOFF:
7961 output_addr_const (file, op);
7962 fputs ("@INDNTPOFF", file);
7963 break;
7965 default:
7966 return false;
7969 return true;
7972 /* Split one or more DImode RTL references into pairs of SImode
7973 references. The RTL can be REG, offsettable MEM, integer constant, or
7974 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7975 split and "num" is its length. lo_half and hi_half are output arrays
7976 that parallel "operands". */
7978 void
7979 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7981 while (num--)
7983 rtx op = operands[num];
7985 /* simplify_subreg refuse to split volatile memory addresses,
7986 but we still have to handle it. */
7987 if (GET_CODE (op) == MEM)
7989 lo_half[num] = adjust_address (op, SImode, 0);
7990 hi_half[num] = adjust_address (op, SImode, 4);
7992 else
7994 lo_half[num] = simplify_gen_subreg (SImode, op,
7995 GET_MODE (op) == VOIDmode
7996 ? DImode : GET_MODE (op), 0);
7997 hi_half[num] = simplify_gen_subreg (SImode, op,
7998 GET_MODE (op) == VOIDmode
7999 ? DImode : GET_MODE (op), 4);
8003 /* Split one or more TImode RTL references into pairs of DImode
8004 references. The RTL can be REG, offsettable MEM, integer constant, or
8005 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8006 split and "num" is its length. lo_half and hi_half are output arrays
8007 that parallel "operands". */
8009 void
8010 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8012 while (num--)
8014 rtx op = operands[num];
8016 /* simplify_subreg refuse to split volatile memory addresses, but we
8017 still have to handle it. */
8018 if (GET_CODE (op) == MEM)
8020 lo_half[num] = adjust_address (op, DImode, 0);
8021 hi_half[num] = adjust_address (op, DImode, 8);
8023 else
8025 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8026 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8031 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8032 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8033 is the expression of the binary operation. The output may either be
8034 emitted here, or returned to the caller, like all output_* functions.
8036 There is no guarantee that the operands are the same mode, as they
8037 might be within FLOAT or FLOAT_EXTEND expressions. */
8039 #ifndef SYSV386_COMPAT
8040 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8041 wants to fix the assemblers because that causes incompatibility
8042 with gcc. No-one wants to fix gcc because that causes
8043 incompatibility with assemblers... You can use the option of
8044 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8045 #define SYSV386_COMPAT 1
8046 #endif
8048 const char *
8049 output_387_binary_op (rtx insn, rtx *operands)
8051 static char buf[30];
8052 const char *p;
8053 const char *ssep;
8054 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8056 #ifdef ENABLE_CHECKING
8057 /* Even if we do not want to check the inputs, this documents input
8058 constraints. Which helps in understanding the following code. */
8059 if (STACK_REG_P (operands[0])
8060 && ((REG_P (operands[1])
8061 && REGNO (operands[0]) == REGNO (operands[1])
8062 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8063 || (REG_P (operands[2])
8064 && REGNO (operands[0]) == REGNO (operands[2])
8065 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8066 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8067 ; /* ok */
8068 else
8069 gcc_assert (is_sse);
8070 #endif
8072 switch (GET_CODE (operands[3]))
8074 case PLUS:
8075 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8076 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8077 p = "fiadd";
8078 else
8079 p = "fadd";
8080 ssep = "add";
8081 break;
8083 case MINUS:
8084 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8085 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8086 p = "fisub";
8087 else
8088 p = "fsub";
8089 ssep = "sub";
8090 break;
8092 case MULT:
8093 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8094 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8095 p = "fimul";
8096 else
8097 p = "fmul";
8098 ssep = "mul";
8099 break;
8101 case DIV:
8102 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8103 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8104 p = "fidiv";
8105 else
8106 p = "fdiv";
8107 ssep = "div";
8108 break;
8110 default:
8111 gcc_unreachable ();
8114 if (is_sse)
8116 strcpy (buf, ssep);
8117 if (GET_MODE (operands[0]) == SFmode)
8118 strcat (buf, "ss\t{%2, %0|%0, %2}");
8119 else
8120 strcat (buf, "sd\t{%2, %0|%0, %2}");
8121 return buf;
8123 strcpy (buf, p);
8125 switch (GET_CODE (operands[3]))
8127 case MULT:
8128 case PLUS:
8129 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8131 rtx temp = operands[2];
8132 operands[2] = operands[1];
8133 operands[1] = temp;
8136 /* know operands[0] == operands[1]. */
8138 if (GET_CODE (operands[2]) == MEM)
8140 p = "%z2\t%2";
8141 break;
8144 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8146 if (STACK_TOP_P (operands[0]))
8147 /* How is it that we are storing to a dead operand[2]?
8148 Well, presumably operands[1] is dead too. We can't
8149 store the result to st(0) as st(0) gets popped on this
8150 instruction. Instead store to operands[2] (which I
8151 think has to be st(1)). st(1) will be popped later.
8152 gcc <= 2.8.1 didn't have this check and generated
8153 assembly code that the Unixware assembler rejected. */
8154 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8155 else
8156 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8157 break;
8160 if (STACK_TOP_P (operands[0]))
8161 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8162 else
8163 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8164 break;
8166 case MINUS:
8167 case DIV:
8168 if (GET_CODE (operands[1]) == MEM)
8170 p = "r%z1\t%1";
8171 break;
8174 if (GET_CODE (operands[2]) == MEM)
8176 p = "%z2\t%2";
8177 break;
8180 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8182 #if SYSV386_COMPAT
8183 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8184 derived assemblers, confusingly reverse the direction of
8185 the operation for fsub{r} and fdiv{r} when the
8186 destination register is not st(0). The Intel assembler
8187 doesn't have this brain damage. Read !SYSV386_COMPAT to
8188 figure out what the hardware really does. */
8189 if (STACK_TOP_P (operands[0]))
8190 p = "{p\t%0, %2|rp\t%2, %0}";
8191 else
8192 p = "{rp\t%2, %0|p\t%0, %2}";
8193 #else
8194 if (STACK_TOP_P (operands[0]))
8195 /* As above for fmul/fadd, we can't store to st(0). */
8196 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8197 else
8198 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8199 #endif
8200 break;
8203 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8205 #if SYSV386_COMPAT
8206 if (STACK_TOP_P (operands[0]))
8207 p = "{rp\t%0, %1|p\t%1, %0}";
8208 else
8209 p = "{p\t%1, %0|rp\t%0, %1}";
8210 #else
8211 if (STACK_TOP_P (operands[0]))
8212 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8213 else
8214 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8215 #endif
8216 break;
8219 if (STACK_TOP_P (operands[0]))
8221 if (STACK_TOP_P (operands[1]))
8222 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8223 else
8224 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8225 break;
8227 else if (STACK_TOP_P (operands[1]))
8229 #if SYSV386_COMPAT
8230 p = "{\t%1, %0|r\t%0, %1}";
8231 #else
8232 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8233 #endif
8235 else
8237 #if SYSV386_COMPAT
8238 p = "{r\t%2, %0|\t%0, %2}";
8239 #else
8240 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8241 #endif
8243 break;
8245 default:
8246 gcc_unreachable ();
8249 strcat (buf, p);
8250 return buf;
8253 /* Return needed mode for entity in optimize_mode_switching pass. */
8256 ix86_mode_needed (int entity, rtx insn)
8258 enum attr_i387_cw mode;
8260 /* The mode UNINITIALIZED is used to store control word after a
8261 function call or ASM pattern. The mode ANY specify that function
8262 has no requirements on the control word and make no changes in the
8263 bits we are interested in. */
8265 if (CALL_P (insn)
8266 || (NONJUMP_INSN_P (insn)
8267 && (asm_noperands (PATTERN (insn)) >= 0
8268 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8269 return I387_CW_UNINITIALIZED;
8271 if (recog_memoized (insn) < 0)
8272 return I387_CW_ANY;
8274 mode = get_attr_i387_cw (insn);
8276 switch (entity)
8278 case I387_TRUNC:
8279 if (mode == I387_CW_TRUNC)
8280 return mode;
8281 break;
8283 case I387_FLOOR:
8284 if (mode == I387_CW_FLOOR)
8285 return mode;
8286 break;
8288 case I387_CEIL:
8289 if (mode == I387_CW_CEIL)
8290 return mode;
8291 break;
8293 case I387_MASK_PM:
8294 if (mode == I387_CW_MASK_PM)
8295 return mode;
8296 break;
8298 default:
8299 gcc_unreachable ();
8302 return I387_CW_ANY;
8305 /* Output code to initialize control word copies used by trunc?f?i and
8306 rounding patterns. CURRENT_MODE is set to current control word,
8307 while NEW_MODE is set to new control word. */
8309 void
8310 emit_i387_cw_initialization (int mode)
8312 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8313 rtx new_mode;
8315 int slot;
8317 rtx reg = gen_reg_rtx (HImode);
8319 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8320 emit_move_insn (reg, stored_mode);
8322 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8324 switch (mode)
8326 case I387_CW_TRUNC:
8327 /* round toward zero (truncate) */
8328 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8329 slot = SLOT_CW_TRUNC;
8330 break;
8332 case I387_CW_FLOOR:
8333 /* round down toward -oo */
8334 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8335 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8336 slot = SLOT_CW_FLOOR;
8337 break;
8339 case I387_CW_CEIL:
8340 /* round up toward +oo */
8341 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8342 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8343 slot = SLOT_CW_CEIL;
8344 break;
8346 case I387_CW_MASK_PM:
8347 /* mask precision exception for nearbyint() */
8348 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8349 slot = SLOT_CW_MASK_PM;
8350 break;
8352 default:
8353 gcc_unreachable ();
8356 else
8358 switch (mode)
8360 case I387_CW_TRUNC:
8361 /* round toward zero (truncate) */
8362 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8363 slot = SLOT_CW_TRUNC;
8364 break;
8366 case I387_CW_FLOOR:
8367 /* round down toward -oo */
8368 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8369 slot = SLOT_CW_FLOOR;
8370 break;
8372 case I387_CW_CEIL:
8373 /* round up toward +oo */
8374 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8375 slot = SLOT_CW_CEIL;
8376 break;
8378 case I387_CW_MASK_PM:
8379 /* mask precision exception for nearbyint() */
8380 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8381 slot = SLOT_CW_MASK_PM;
8382 break;
8384 default:
8385 gcc_unreachable ();
8389 gcc_assert (slot < MAX_386_STACK_LOCALS);
8391 new_mode = assign_386_stack_local (HImode, slot);
8392 emit_move_insn (new_mode, reg);
8395 /* Output code for INSN to convert a float to a signed int. OPERANDS
8396 are the insn operands. The output may be [HSD]Imode and the input
8397 operand may be [SDX]Fmode. */
8399 const char *
8400 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8402 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8403 int dimode_p = GET_MODE (operands[0]) == DImode;
8404 int round_mode = get_attr_i387_cw (insn);
8406 /* Jump through a hoop or two for DImode, since the hardware has no
8407 non-popping instruction. We used to do this a different way, but
8408 that was somewhat fragile and broke with post-reload splitters. */
8409 if ((dimode_p || fisttp) && !stack_top_dies)
8410 output_asm_insn ("fld\t%y1", operands);
8412 gcc_assert (STACK_TOP_P (operands[1]));
8413 gcc_assert (GET_CODE (operands[0]) == MEM);
8415 if (fisttp)
8416 output_asm_insn ("fisttp%z0\t%0", operands);
8417 else
8419 if (round_mode != I387_CW_ANY)
8420 output_asm_insn ("fldcw\t%3", operands);
8421 if (stack_top_dies || dimode_p)
8422 output_asm_insn ("fistp%z0\t%0", operands);
8423 else
8424 output_asm_insn ("fist%z0\t%0", operands);
8425 if (round_mode != I387_CW_ANY)
8426 output_asm_insn ("fldcw\t%2", operands);
8429 return "";
8432 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8433 should be used. UNORDERED_P is true when fucom should be used. */
8435 const char *
8436 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8438 int stack_top_dies;
8439 rtx cmp_op0, cmp_op1;
8440 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8442 if (eflags_p)
8444 cmp_op0 = operands[0];
8445 cmp_op1 = operands[1];
8447 else
8449 cmp_op0 = operands[1];
8450 cmp_op1 = operands[2];
8453 if (is_sse)
8455 if (GET_MODE (operands[0]) == SFmode)
8456 if (unordered_p)
8457 return "ucomiss\t{%1, %0|%0, %1}";
8458 else
8459 return "comiss\t{%1, %0|%0, %1}";
8460 else
8461 if (unordered_p)
8462 return "ucomisd\t{%1, %0|%0, %1}";
8463 else
8464 return "comisd\t{%1, %0|%0, %1}";
8467 gcc_assert (STACK_TOP_P (cmp_op0));
8469 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8471 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8473 if (stack_top_dies)
8475 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8476 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
8478 else
8479 return "ftst\n\tfnstsw\t%0";
8482 if (STACK_REG_P (cmp_op1)
8483 && stack_top_dies
8484 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8485 && REGNO (cmp_op1) != FIRST_STACK_REG)
8487 /* If both the top of the 387 stack dies, and the other operand
8488 is also a stack register that dies, then this must be a
8489 `fcompp' float compare */
8491 if (eflags_p)
8493 /* There is no double popping fcomi variant. Fortunately,
8494 eflags is immune from the fstp's cc clobbering. */
8495 if (unordered_p)
8496 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8497 else
8498 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8499 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8501 else
8503 if (unordered_p)
8504 return "fucompp\n\tfnstsw\t%0";
8505 else
8506 return "fcompp\n\tfnstsw\t%0";
8509 else
8511 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8513 static const char * const alt[16] =
8515 "fcom%z2\t%y2\n\tfnstsw\t%0",
8516 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8517 "fucom%z2\t%y2\n\tfnstsw\t%0",
8518 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8520 "ficom%z2\t%y2\n\tfnstsw\t%0",
8521 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8522 NULL,
8523 NULL,
8525 "fcomi\t{%y1, %0|%0, %y1}",
8526 "fcomip\t{%y1, %0|%0, %y1}",
8527 "fucomi\t{%y1, %0|%0, %y1}",
8528 "fucomip\t{%y1, %0|%0, %y1}",
8530 NULL,
8531 NULL,
8532 NULL,
8533 NULL
8536 int mask;
8537 const char *ret;
8539 mask = eflags_p << 3;
8540 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8541 mask |= unordered_p << 1;
8542 mask |= stack_top_dies;
8544 gcc_assert (mask < 16);
8545 ret = alt[mask];
8546 gcc_assert (ret);
8548 return ret;
8552 void
8553 ix86_output_addr_vec_elt (FILE *file, int value)
8555 const char *directive = ASM_LONG;
8557 #ifdef ASM_QUAD
8558 if (TARGET_64BIT)
8559 directive = ASM_QUAD;
8560 #else
8561 gcc_assert (!TARGET_64BIT);
8562 #endif
8564 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8567 void
8568 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8570 if (TARGET_64BIT)
8571 fprintf (file, "%s%s%d-%s%d\n",
8572 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8573 else if (HAVE_AS_GOTOFF_IN_DATA)
8574 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8575 #if TARGET_MACHO
8576 else if (TARGET_MACHO)
8578 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8579 machopic_output_function_base_name (file);
8580 fprintf(file, "\n");
8582 #endif
8583 else
8584 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8585 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8588 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8589 for the target. */
8591 void
8592 ix86_expand_clear (rtx dest)
8594 rtx tmp;
8596 /* We play register width games, which are only valid after reload. */
8597 gcc_assert (reload_completed);
8599 /* Avoid HImode and its attendant prefix byte. */
8600 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8601 dest = gen_rtx_REG (SImode, REGNO (dest));
8603 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8605 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8606 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8608 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8609 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8612 emit_insn (tmp);
8615 /* X is an unchanging MEM. If it is a constant pool reference, return
8616 the constant pool rtx, else NULL. */
8619 maybe_get_pool_constant (rtx x)
8621 x = ix86_delegitimize_address (XEXP (x, 0));
8623 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8624 return get_pool_constant (x);
8626 return NULL_RTX;
8629 void
8630 ix86_expand_move (enum machine_mode mode, rtx operands[])
8632 int strict = (reload_in_progress || reload_completed);
8633 rtx op0, op1;
8634 enum tls_model model;
8636 op0 = operands[0];
8637 op1 = operands[1];
8639 if (GET_CODE (op1) == SYMBOL_REF)
8641 model = SYMBOL_REF_TLS_MODEL (op1);
8642 if (model)
8644 op1 = legitimize_tls_address (op1, model, true);
8645 op1 = force_operand (op1, op0);
8646 if (op1 == op0)
8647 return;
8650 else if (GET_CODE (op1) == CONST
8651 && GET_CODE (XEXP (op1, 0)) == PLUS
8652 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8654 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8655 if (model)
8657 rtx addend = XEXP (XEXP (op1, 0), 1);
8658 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8659 op1 = force_operand (op1, NULL);
8660 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8661 op0, 1, OPTAB_DIRECT);
8662 if (op1 == op0)
8663 return;
8667 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8669 #if TARGET_MACHO
8670 if (MACHOPIC_PURE)
8672 rtx temp = ((reload_in_progress
8673 || ((op0 && GET_CODE (op0) == REG)
8674 && mode == Pmode))
8675 ? op0 : gen_reg_rtx (Pmode));
8676 op1 = machopic_indirect_data_reference (op1, temp);
8677 op1 = machopic_legitimize_pic_address (op1, mode,
8678 temp == op1 ? 0 : temp);
8680 else if (MACHOPIC_INDIRECT)
8681 op1 = machopic_indirect_data_reference (op1, 0);
8682 if (op0 == op1)
8683 return;
8684 #else
8685 if (GET_CODE (op0) == MEM)
8686 op1 = force_reg (Pmode, op1);
8687 else
8688 op1 = legitimize_address (op1, op1, Pmode);
8689 #endif /* TARGET_MACHO */
8691 else
8693 if (GET_CODE (op0) == MEM
8694 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8695 || !push_operand (op0, mode))
8696 && GET_CODE (op1) == MEM)
8697 op1 = force_reg (mode, op1);
8699 if (push_operand (op0, mode)
8700 && ! general_no_elim_operand (op1, mode))
8701 op1 = copy_to_mode_reg (mode, op1);
8703 /* Force large constants in 64bit compilation into register
8704 to get them CSEed. */
8705 if (TARGET_64BIT && mode == DImode
8706 && immediate_operand (op1, mode)
8707 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8708 && !register_operand (op0, mode)
8709 && optimize && !reload_completed && !reload_in_progress)
8710 op1 = copy_to_mode_reg (mode, op1);
8712 if (FLOAT_MODE_P (mode))
8714 /* If we are loading a floating point constant to a register,
8715 force the value to memory now, since we'll get better code
8716 out the back end. */
8718 if (strict)
8720 else if (GET_CODE (op1) == CONST_DOUBLE)
8722 op1 = validize_mem (force_const_mem (mode, op1));
8723 if (!register_operand (op0, mode))
8725 rtx temp = gen_reg_rtx (mode);
8726 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8727 emit_move_insn (op0, temp);
8728 return;
8734 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8737 void
8738 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8740 rtx op0 = operands[0], op1 = operands[1];
8742 /* Force constants other than zero into memory. We do not know how
8743 the instructions used to build constants modify the upper 64 bits
8744 of the register, once we have that information we may be able
8745 to handle some of them more efficiently. */
8746 if ((reload_in_progress | reload_completed) == 0
8747 && register_operand (op0, mode)
8748 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
8749 op1 = validize_mem (force_const_mem (mode, op1));
8751 /* Make operand1 a register if it isn't already. */
8752 if (!no_new_pseudos
8753 && !register_operand (op0, mode)
8754 && !register_operand (op1, mode))
8756 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8757 return;
8760 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8763 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
8764 straight to ix86_expand_vector_move. */
8766 void
8767 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
8769 rtx op0, op1, m;
8771 op0 = operands[0];
8772 op1 = operands[1];
8774 if (MEM_P (op1))
8776 /* If we're optimizing for size, movups is the smallest. */
8777 if (optimize_size)
8779 op0 = gen_lowpart (V4SFmode, op0);
8780 op1 = gen_lowpart (V4SFmode, op1);
8781 emit_insn (gen_sse_movups (op0, op1));
8782 return;
8785 /* ??? If we have typed data, then it would appear that using
8786 movdqu is the only way to get unaligned data loaded with
8787 integer type. */
8788 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8790 op0 = gen_lowpart (V16QImode, op0);
8791 op1 = gen_lowpart (V16QImode, op1);
8792 emit_insn (gen_sse2_movdqu (op0, op1));
8793 return;
8796 if (TARGET_SSE2 && mode == V2DFmode)
8798 rtx zero;
8800 /* When SSE registers are split into halves, we can avoid
8801 writing to the top half twice. */
8802 if (TARGET_SSE_SPLIT_REGS)
8804 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8805 zero = op0;
8807 else
8809 /* ??? Not sure about the best option for the Intel chips.
8810 The following would seem to satisfy; the register is
8811 entirely cleared, breaking the dependency chain. We
8812 then store to the upper half, with a dependency depth
8813 of one. A rumor has it that Intel recommends two movsd
8814 followed by an unpacklpd, but this is unconfirmed. And
8815 given that the dependency depth of the unpacklpd would
8816 still be one, I'm not sure why this would be better. */
8817 zero = CONST0_RTX (V2DFmode);
8820 m = adjust_address (op1, DFmode, 0);
8821 emit_insn (gen_sse2_loadlpd (op0, zero, m));
8822 m = adjust_address (op1, DFmode, 8);
8823 emit_insn (gen_sse2_loadhpd (op0, op0, m));
8825 else
8827 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8828 emit_move_insn (op0, CONST0_RTX (mode));
8829 else
8830 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8832 if (mode != V4SFmode)
8833 op0 = gen_lowpart (V4SFmode, op0);
8834 m = adjust_address (op1, V2SFmode, 0);
8835 emit_insn (gen_sse_loadlps (op0, op0, m));
8836 m = adjust_address (op1, V2SFmode, 8);
8837 emit_insn (gen_sse_loadhps (op0, op0, m));
8840 else if (MEM_P (op0))
8842 /* If we're optimizing for size, movups is the smallest. */
8843 if (optimize_size)
8845 op0 = gen_lowpart (V4SFmode, op0);
8846 op1 = gen_lowpart (V4SFmode, op1);
8847 emit_insn (gen_sse_movups (op0, op1));
8848 return;
8851 /* ??? Similar to above, only less clear because of quote
8852 typeless stores unquote. */
8853 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8854 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8856 op0 = gen_lowpart (V16QImode, op0);
8857 op1 = gen_lowpart (V16QImode, op1);
8858 emit_insn (gen_sse2_movdqu (op0, op1));
8859 return;
8862 if (TARGET_SSE2 && mode == V2DFmode)
8864 m = adjust_address (op0, DFmode, 0);
8865 emit_insn (gen_sse2_storelpd (m, op1));
8866 m = adjust_address (op0, DFmode, 8);
8867 emit_insn (gen_sse2_storehpd (m, op1));
8869 else
8871 if (mode != V4SFmode)
8872 op1 = gen_lowpart (V4SFmode, op1);
8873 m = adjust_address (op0, V2SFmode, 0);
8874 emit_insn (gen_sse_storelps (m, op1));
8875 m = adjust_address (op0, V2SFmode, 8);
8876 emit_insn (gen_sse_storehps (m, op1));
8879 else
8880 gcc_unreachable ();
8883 /* Expand a push in MODE. This is some mode for which we do not support
8884 proper push instructions, at least from the registers that we expect
8885 the value to live in. */
8887 void
8888 ix86_expand_push (enum machine_mode mode, rtx x)
8890 rtx tmp;
8892 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8893 GEN_INT (-GET_MODE_SIZE (mode)),
8894 stack_pointer_rtx, 1, OPTAB_DIRECT);
8895 if (tmp != stack_pointer_rtx)
8896 emit_move_insn (stack_pointer_rtx, tmp);
8898 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8899 emit_move_insn (tmp, x);
8902 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8903 destination to use for the operation. If different from the true
8904 destination in operands[0], a copy operation will be required. */
8907 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8908 rtx operands[])
8910 int matching_memory;
8911 rtx src1, src2, dst;
8913 dst = operands[0];
8914 src1 = operands[1];
8915 src2 = operands[2];
8917 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8918 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8919 && (rtx_equal_p (dst, src2)
8920 || immediate_operand (src1, mode)))
8922 rtx temp = src1;
8923 src1 = src2;
8924 src2 = temp;
8927 /* If the destination is memory, and we do not have matching source
8928 operands, do things in registers. */
8929 matching_memory = 0;
8930 if (GET_CODE (dst) == MEM)
8932 if (rtx_equal_p (dst, src1))
8933 matching_memory = 1;
8934 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8935 && rtx_equal_p (dst, src2))
8936 matching_memory = 2;
8937 else
8938 dst = gen_reg_rtx (mode);
8941 /* Both source operands cannot be in memory. */
8942 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8944 if (matching_memory != 2)
8945 src2 = force_reg (mode, src2);
8946 else
8947 src1 = force_reg (mode, src1);
8950 /* If the operation is not commutable, source 1 cannot be a constant
8951 or non-matching memory. */
8952 if ((CONSTANT_P (src1)
8953 || (!matching_memory && GET_CODE (src1) == MEM))
8954 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8955 src1 = force_reg (mode, src1);
8957 src1 = operands[1] = src1;
8958 src2 = operands[2] = src2;
8959 return dst;
8962 /* Similarly, but assume that the destination has already been
8963 set up properly. */
8965 void
8966 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8967 enum machine_mode mode, rtx operands[])
8969 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
8970 gcc_assert (dst == operands[0]);
8973 /* Attempt to expand a binary operator. Make the expansion closer to the
8974 actual machine, then just general_operand, which will allow 3 separate
8975 memory references (one output, two input) in a single insn. */
8977 void
8978 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8979 rtx operands[])
8981 rtx src1, src2, dst, op, clob;
8983 dst = ix86_fixup_binary_operands (code, mode, operands);
8984 src1 = operands[1];
8985 src2 = operands[2];
8987 /* Emit the instruction. */
8989 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8990 if (reload_in_progress)
8992 /* Reload doesn't know about the flags register, and doesn't know that
8993 it doesn't want to clobber it. We can only do this with PLUS. */
8994 gcc_assert (code == PLUS);
8995 emit_insn (op);
8997 else
8999 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9000 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9003 /* Fix up the destination if needed. */
9004 if (dst != operands[0])
9005 emit_move_insn (operands[0], dst);
9008 /* Return TRUE or FALSE depending on whether the binary operator meets the
9009 appropriate constraints. */
9012 ix86_binary_operator_ok (enum rtx_code code,
9013 enum machine_mode mode ATTRIBUTE_UNUSED,
9014 rtx operands[3])
9016 /* Both source operands cannot be in memory. */
9017 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9018 return 0;
9019 /* If the operation is not commutable, source 1 cannot be a constant. */
9020 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9021 return 0;
9022 /* If the destination is memory, we must have a matching source operand. */
9023 if (GET_CODE (operands[0]) == MEM
9024 && ! (rtx_equal_p (operands[0], operands[1])
9025 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9026 && rtx_equal_p (operands[0], operands[2]))))
9027 return 0;
9028 /* If the operation is not commutable and the source 1 is memory, we must
9029 have a matching destination. */
9030 if (GET_CODE (operands[1]) == MEM
9031 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9032 && ! rtx_equal_p (operands[0], operands[1]))
9033 return 0;
9034 return 1;
9037 /* Attempt to expand a unary operator. Make the expansion closer to the
9038 actual machine, then just general_operand, which will allow 2 separate
9039 memory references (one output, one input) in a single insn. */
9041 void
9042 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9043 rtx operands[])
9045 int matching_memory;
9046 rtx src, dst, op, clob;
9048 dst = operands[0];
9049 src = operands[1];
9051 /* If the destination is memory, and we do not have matching source
9052 operands, do things in registers. */
9053 matching_memory = 0;
9054 if (MEM_P (dst))
9056 if (rtx_equal_p (dst, src))
9057 matching_memory = 1;
9058 else
9059 dst = gen_reg_rtx (mode);
9062 /* When source operand is memory, destination must match. */
9063 if (MEM_P (src) && !matching_memory)
9064 src = force_reg (mode, src);
9066 /* Emit the instruction. */
9068 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9069 if (reload_in_progress || code == NOT)
9071 /* Reload doesn't know about the flags register, and doesn't know that
9072 it doesn't want to clobber it. */
9073 gcc_assert (code == NOT);
9074 emit_insn (op);
9076 else
9078 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9079 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9082 /* Fix up the destination if needed. */
9083 if (dst != operands[0])
9084 emit_move_insn (operands[0], dst);
9087 /* Return TRUE or FALSE depending on whether the unary operator meets the
9088 appropriate constraints. */
9091 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9092 enum machine_mode mode ATTRIBUTE_UNUSED,
9093 rtx operands[2] ATTRIBUTE_UNUSED)
9095 /* If one of operands is memory, source and destination must match. */
9096 if ((GET_CODE (operands[0]) == MEM
9097 || GET_CODE (operands[1]) == MEM)
9098 && ! rtx_equal_p (operands[0], operands[1]))
9099 return FALSE;
9100 return TRUE;
9103 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9104 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9105 true, then replicate the mask for all elements of the vector register.
9106 If INVERT is true, then create a mask excluding the sign bit. */
9109 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9111 enum machine_mode vec_mode;
9112 HOST_WIDE_INT hi, lo;
9113 int shift = 63;
9114 rtvec v;
9115 rtx mask;
9117 /* Find the sign bit, sign extended to 2*HWI. */
9118 if (mode == SFmode)
9119 lo = 0x80000000, hi = lo < 0;
9120 else if (HOST_BITS_PER_WIDE_INT >= 64)
9121 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9122 else
9123 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9125 if (invert)
9126 lo = ~lo, hi = ~hi;
9128 /* Force this value into the low part of a fp vector constant. */
9129 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9130 mask = gen_lowpart (mode, mask);
9132 if (mode == SFmode)
9134 if (vect)
9135 v = gen_rtvec (4, mask, mask, mask, mask);
9136 else
9137 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9138 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9139 vec_mode = V4SFmode;
9141 else
9143 if (vect)
9144 v = gen_rtvec (2, mask, mask);
9145 else
9146 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9147 vec_mode = V2DFmode;
9150 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9153 /* Generate code for floating point ABS or NEG. */
9155 void
9156 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9157 rtx operands[])
9159 rtx mask, set, use, clob, dst, src;
9160 bool matching_memory;
9161 bool use_sse = false;
9162 bool vector_mode = VECTOR_MODE_P (mode);
9163 enum machine_mode elt_mode = mode;
9165 if (vector_mode)
9167 elt_mode = GET_MODE_INNER (mode);
9168 use_sse = true;
9170 else if (TARGET_SSE_MATH)
9171 use_sse = SSE_FLOAT_MODE_P (mode);
9173 /* NEG and ABS performed with SSE use bitwise mask operations.
9174 Create the appropriate mask now. */
9175 if (use_sse)
9176 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9177 else
9179 /* When not using SSE, we don't use the mask, but prefer to keep the
9180 same general form of the insn pattern to reduce duplication when
9181 it comes time to split. */
9182 mask = const0_rtx;
9185 dst = operands[0];
9186 src = operands[1];
9188 /* If the destination is memory, and we don't have matching source
9189 operands, do things in registers. */
9190 matching_memory = false;
9191 if (MEM_P (dst))
9193 if (rtx_equal_p (dst, src))
9194 matching_memory = true;
9195 else
9196 dst = gen_reg_rtx (mode);
9198 if (MEM_P (src) && !matching_memory)
9199 src = force_reg (mode, src);
9201 if (vector_mode)
9203 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9204 set = gen_rtx_SET (VOIDmode, dst, set);
9205 emit_insn (set);
9207 else
9209 set = gen_rtx_fmt_e (code, mode, src);
9210 set = gen_rtx_SET (VOIDmode, dst, set);
9211 use = gen_rtx_USE (VOIDmode, mask);
9212 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9213 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
9216 if (dst != operands[0])
9217 emit_move_insn (operands[0], dst);
9220 /* Expand a copysign operation. Special case operand 0 being a constant. */
9222 void
9223 ix86_expand_copysign (rtx operands[])
9225 enum machine_mode mode, vmode;
9226 rtx dest, op0, op1, mask, nmask;
9228 dest = operands[0];
9229 op0 = operands[1];
9230 op1 = operands[2];
9232 mode = GET_MODE (dest);
9233 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9235 if (GET_CODE (op0) == CONST_DOUBLE)
9237 rtvec v;
9239 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9240 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9242 if (op0 == CONST0_RTX (mode))
9243 op0 = CONST0_RTX (vmode);
9244 else
9246 if (mode == SFmode)
9247 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9248 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9249 else
9250 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9251 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9254 mask = ix86_build_signbit_mask (mode, 0, 0);
9256 if (mode == SFmode)
9257 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9258 else
9259 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9261 else
9263 nmask = ix86_build_signbit_mask (mode, 0, 1);
9264 mask = ix86_build_signbit_mask (mode, 0, 0);
9266 if (mode == SFmode)
9267 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9268 else
9269 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9273 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9274 be a constant, and so has already been expanded into a vector constant. */
9276 void
9277 ix86_split_copysign_const (rtx operands[])
9279 enum machine_mode mode, vmode;
9280 rtx dest, op0, op1, mask, x;
9282 dest = operands[0];
9283 op0 = operands[1];
9284 op1 = operands[2];
9285 mask = operands[3];
9287 mode = GET_MODE (dest);
9288 vmode = GET_MODE (mask);
9290 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9291 x = gen_rtx_AND (vmode, dest, mask);
9292 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9294 if (op0 != CONST0_RTX (vmode))
9296 x = gen_rtx_IOR (vmode, dest, op0);
9297 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9301 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9302 so we have to do two masks. */
9304 void
9305 ix86_split_copysign_var (rtx operands[])
9307 enum machine_mode mode, vmode;
9308 rtx dest, scratch, op0, op1, mask, nmask, x;
9310 dest = operands[0];
9311 scratch = operands[1];
9312 op0 = operands[2];
9313 op1 = operands[3];
9314 nmask = operands[4];
9315 mask = operands[5];
9317 mode = GET_MODE (dest);
9318 vmode = GET_MODE (mask);
9320 if (rtx_equal_p (op0, op1))
9322 /* Shouldn't happen often (it's useless, obviously), but when it does
9323 we'd generate incorrect code if we continue below. */
9324 emit_move_insn (dest, op0);
9325 return;
9328 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9330 gcc_assert (REGNO (op1) == REGNO (scratch));
9332 x = gen_rtx_AND (vmode, scratch, mask);
9333 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9335 dest = mask;
9336 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9337 x = gen_rtx_NOT (vmode, dest);
9338 x = gen_rtx_AND (vmode, x, op0);
9339 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9341 else
9343 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9345 x = gen_rtx_AND (vmode, scratch, mask);
9347 else /* alternative 2,4 */
9349 gcc_assert (REGNO (mask) == REGNO (scratch));
9350 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9351 x = gen_rtx_AND (vmode, scratch, op1);
9353 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9355 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9357 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9358 x = gen_rtx_AND (vmode, dest, nmask);
9360 else /* alternative 3,4 */
9362 gcc_assert (REGNO (nmask) == REGNO (dest));
9363 dest = nmask;
9364 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9365 x = gen_rtx_AND (vmode, dest, op0);
9367 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9370 x = gen_rtx_IOR (vmode, dest, scratch);
9371 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9374 /* Return TRUE or FALSE depending on whether the first SET in INSN
9375 has source and destination with matching CC modes, and that the
9376 CC mode is at least as constrained as REQ_MODE. */
9379 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9381 rtx set;
9382 enum machine_mode set_mode;
9384 set = PATTERN (insn);
9385 if (GET_CODE (set) == PARALLEL)
9386 set = XVECEXP (set, 0, 0);
9387 gcc_assert (GET_CODE (set) == SET);
9388 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9390 set_mode = GET_MODE (SET_DEST (set));
9391 switch (set_mode)
9393 case CCNOmode:
9394 if (req_mode != CCNOmode
9395 && (req_mode != CCmode
9396 || XEXP (SET_SRC (set), 1) != const0_rtx))
9397 return 0;
9398 break;
9399 case CCmode:
9400 if (req_mode == CCGCmode)
9401 return 0;
9402 /* FALLTHRU */
9403 case CCGCmode:
9404 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9405 return 0;
9406 /* FALLTHRU */
9407 case CCGOCmode:
9408 if (req_mode == CCZmode)
9409 return 0;
9410 /* FALLTHRU */
9411 case CCZmode:
9412 break;
9414 default:
9415 gcc_unreachable ();
9418 return (GET_MODE (SET_SRC (set)) == set_mode);
9421 /* Generate insn patterns to do an integer compare of OPERANDS. */
9423 static rtx
9424 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9426 enum machine_mode cmpmode;
9427 rtx tmp, flags;
9429 cmpmode = SELECT_CC_MODE (code, op0, op1);
9430 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9432 /* This is very simple, but making the interface the same as in the
9433 FP case makes the rest of the code easier. */
9434 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9435 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9437 /* Return the test that should be put into the flags user, i.e.
9438 the bcc, scc, or cmov instruction. */
9439 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9442 /* Figure out whether to use ordered or unordered fp comparisons.
9443 Return the appropriate mode to use. */
9445 enum machine_mode
9446 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9448 /* ??? In order to make all comparisons reversible, we do all comparisons
9449 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9450 all forms trapping and nontrapping comparisons, we can make inequality
9451 comparisons trapping again, since it results in better code when using
9452 FCOM based compares. */
9453 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9456 enum machine_mode
9457 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9459 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9460 return ix86_fp_compare_mode (code);
9461 switch (code)
9463 /* Only zero flag is needed. */
9464 case EQ: /* ZF=0 */
9465 case NE: /* ZF!=0 */
9466 return CCZmode;
9467 /* Codes needing carry flag. */
9468 case GEU: /* CF=0 */
9469 case GTU: /* CF=0 & ZF=0 */
9470 case LTU: /* CF=1 */
9471 case LEU: /* CF=1 | ZF=1 */
9472 return CCmode;
9473 /* Codes possibly doable only with sign flag when
9474 comparing against zero. */
9475 case GE: /* SF=OF or SF=0 */
9476 case LT: /* SF<>OF or SF=1 */
9477 if (op1 == const0_rtx)
9478 return CCGOCmode;
9479 else
9480 /* For other cases Carry flag is not required. */
9481 return CCGCmode;
9482 /* Codes doable only with sign flag when comparing
9483 against zero, but we miss jump instruction for it
9484 so we need to use relational tests against overflow
9485 that thus needs to be zero. */
9486 case GT: /* ZF=0 & SF=OF */
9487 case LE: /* ZF=1 | SF<>OF */
9488 if (op1 == const0_rtx)
9489 return CCNOmode;
9490 else
9491 return CCGCmode;
9492 /* strcmp pattern do (use flags) and combine may ask us for proper
9493 mode. */
9494 case USE:
9495 return CCmode;
9496 default:
9497 gcc_unreachable ();
9501 /* Return the fixed registers used for condition codes. */
9503 static bool
9504 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9506 *p1 = FLAGS_REG;
9507 *p2 = FPSR_REG;
9508 return true;
9511 /* If two condition code modes are compatible, return a condition code
9512 mode which is compatible with both. Otherwise, return
9513 VOIDmode. */
9515 static enum machine_mode
9516 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9518 if (m1 == m2)
9519 return m1;
9521 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9522 return VOIDmode;
9524 if ((m1 == CCGCmode && m2 == CCGOCmode)
9525 || (m1 == CCGOCmode && m2 == CCGCmode))
9526 return CCGCmode;
9528 switch (m1)
9530 default:
9531 gcc_unreachable ();
9533 case CCmode:
9534 case CCGCmode:
9535 case CCGOCmode:
9536 case CCNOmode:
9537 case CCZmode:
9538 switch (m2)
9540 default:
9541 return VOIDmode;
9543 case CCmode:
9544 case CCGCmode:
9545 case CCGOCmode:
9546 case CCNOmode:
9547 case CCZmode:
9548 return CCmode;
9551 case CCFPmode:
9552 case CCFPUmode:
9553 /* These are only compatible with themselves, which we already
9554 checked above. */
9555 return VOIDmode;
9559 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9562 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9564 enum rtx_code swapped_code = swap_condition (code);
9565 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9566 || (ix86_fp_comparison_cost (swapped_code)
9567 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9570 /* Swap, force into registers, or otherwise massage the two operands
9571 to a fp comparison. The operands are updated in place; the new
9572 comparison code is returned. */
9574 static enum rtx_code
9575 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9577 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9578 rtx op0 = *pop0, op1 = *pop1;
9579 enum machine_mode op_mode = GET_MODE (op0);
9580 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9582 /* All of the unordered compare instructions only work on registers.
9583 The same is true of the fcomi compare instructions. The XFmode
9584 compare instructions require registers except when comparing
9585 against zero or when converting operand 1 from fixed point to
9586 floating point. */
9588 if (!is_sse
9589 && (fpcmp_mode == CCFPUmode
9590 || (op_mode == XFmode
9591 && ! (standard_80387_constant_p (op0) == 1
9592 || standard_80387_constant_p (op1) == 1)
9593 && GET_CODE (op1) != FLOAT)
9594 || ix86_use_fcomi_compare (code)))
9596 op0 = force_reg (op_mode, op0);
9597 op1 = force_reg (op_mode, op1);
9599 else
9601 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9602 things around if they appear profitable, otherwise force op0
9603 into a register. */
9605 if (standard_80387_constant_p (op0) == 0
9606 || (GET_CODE (op0) == MEM
9607 && ! (standard_80387_constant_p (op1) == 0
9608 || GET_CODE (op1) == MEM)))
9610 rtx tmp;
9611 tmp = op0, op0 = op1, op1 = tmp;
9612 code = swap_condition (code);
9615 if (GET_CODE (op0) != REG)
9616 op0 = force_reg (op_mode, op0);
9618 if (CONSTANT_P (op1))
9620 int tmp = standard_80387_constant_p (op1);
9621 if (tmp == 0)
9622 op1 = validize_mem (force_const_mem (op_mode, op1));
9623 else if (tmp == 1)
9625 if (TARGET_CMOVE)
9626 op1 = force_reg (op_mode, op1);
9628 else
9629 op1 = force_reg (op_mode, op1);
9633 /* Try to rearrange the comparison to make it cheaper. */
9634 if (ix86_fp_comparison_cost (code)
9635 > ix86_fp_comparison_cost (swap_condition (code))
9636 && (GET_CODE (op1) == REG || !no_new_pseudos))
9638 rtx tmp;
9639 tmp = op0, op0 = op1, op1 = tmp;
9640 code = swap_condition (code);
9641 if (GET_CODE (op0) != REG)
9642 op0 = force_reg (op_mode, op0);
9645 *pop0 = op0;
9646 *pop1 = op1;
9647 return code;
9650 /* Convert comparison codes we use to represent FP comparison to integer
9651 code that will result in proper branch. Return UNKNOWN if no such code
9652 is available. */
9654 enum rtx_code
9655 ix86_fp_compare_code_to_integer (enum rtx_code code)
9657 switch (code)
9659 case GT:
9660 return GTU;
9661 case GE:
9662 return GEU;
9663 case ORDERED:
9664 case UNORDERED:
9665 return code;
9666 break;
9667 case UNEQ:
9668 return EQ;
9669 break;
9670 case UNLT:
9671 return LTU;
9672 break;
9673 case UNLE:
9674 return LEU;
9675 break;
9676 case LTGT:
9677 return NE;
9678 break;
9679 default:
9680 return UNKNOWN;
9684 /* Split comparison code CODE into comparisons we can do using branch
9685 instructions. BYPASS_CODE is comparison code for branch that will
9686 branch around FIRST_CODE and SECOND_CODE. If some of branches
9687 is not required, set value to UNKNOWN.
9688 We never require more than two branches. */
9690 void
9691 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9692 enum rtx_code *first_code,
9693 enum rtx_code *second_code)
9695 *first_code = code;
9696 *bypass_code = UNKNOWN;
9697 *second_code = UNKNOWN;
9699 /* The fcomi comparison sets flags as follows:
9701 cmp ZF PF CF
9702 > 0 0 0
9703 < 0 0 1
9704 = 1 0 0
9705 un 1 1 1 */
9707 switch (code)
9709 case GT: /* GTU - CF=0 & ZF=0 */
9710 case GE: /* GEU - CF=0 */
9711 case ORDERED: /* PF=0 */
9712 case UNORDERED: /* PF=1 */
9713 case UNEQ: /* EQ - ZF=1 */
9714 case UNLT: /* LTU - CF=1 */
9715 case UNLE: /* LEU - CF=1 | ZF=1 */
9716 case LTGT: /* EQ - ZF=0 */
9717 break;
9718 case LT: /* LTU - CF=1 - fails on unordered */
9719 *first_code = UNLT;
9720 *bypass_code = UNORDERED;
9721 break;
9722 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9723 *first_code = UNLE;
9724 *bypass_code = UNORDERED;
9725 break;
9726 case EQ: /* EQ - ZF=1 - fails on unordered */
9727 *first_code = UNEQ;
9728 *bypass_code = UNORDERED;
9729 break;
9730 case NE: /* NE - ZF=0 - fails on unordered */
9731 *first_code = LTGT;
9732 *second_code = UNORDERED;
9733 break;
9734 case UNGE: /* GEU - CF=0 - fails on unordered */
9735 *first_code = GE;
9736 *second_code = UNORDERED;
9737 break;
9738 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9739 *first_code = GT;
9740 *second_code = UNORDERED;
9741 break;
9742 default:
9743 gcc_unreachable ();
9745 if (!TARGET_IEEE_FP)
9747 *second_code = UNKNOWN;
9748 *bypass_code = UNKNOWN;
9752 /* Return cost of comparison done fcom + arithmetics operations on AX.
9753 All following functions do use number of instructions as a cost metrics.
9754 In future this should be tweaked to compute bytes for optimize_size and
9755 take into account performance of various instructions on various CPUs. */
9756 static int
9757 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9759 if (!TARGET_IEEE_FP)
9760 return 4;
9761 /* The cost of code output by ix86_expand_fp_compare. */
9762 switch (code)
9764 case UNLE:
9765 case UNLT:
9766 case LTGT:
9767 case GT:
9768 case GE:
9769 case UNORDERED:
9770 case ORDERED:
9771 case UNEQ:
9772 return 4;
9773 break;
9774 case LT:
9775 case NE:
9776 case EQ:
9777 case UNGE:
9778 return 5;
9779 break;
9780 case LE:
9781 case UNGT:
9782 return 6;
9783 break;
9784 default:
9785 gcc_unreachable ();
9789 /* Return cost of comparison done using fcomi operation.
9790 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9791 static int
9792 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9794 enum rtx_code bypass_code, first_code, second_code;
9795 /* Return arbitrarily high cost when instruction is not supported - this
9796 prevents gcc from using it. */
9797 if (!TARGET_CMOVE)
9798 return 1024;
9799 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9800 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9803 /* Return cost of comparison done using sahf operation.
9804 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9805 static int
9806 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9808 enum rtx_code bypass_code, first_code, second_code;
9809 /* Return arbitrarily high cost when instruction is not preferred - this
9810 avoids gcc from using it. */
9811 if (!TARGET_USE_SAHF && !optimize_size)
9812 return 1024;
9813 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9814 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9817 /* Compute cost of the comparison done using any method.
9818 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9819 static int
9820 ix86_fp_comparison_cost (enum rtx_code code)
9822 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9823 int min;
9825 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9826 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9828 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9829 if (min > sahf_cost)
9830 min = sahf_cost;
9831 if (min > fcomi_cost)
9832 min = fcomi_cost;
9833 return min;
9836 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9838 static rtx
9839 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9840 rtx *second_test, rtx *bypass_test)
9842 enum machine_mode fpcmp_mode, intcmp_mode;
9843 rtx tmp, tmp2;
9844 int cost = ix86_fp_comparison_cost (code);
9845 enum rtx_code bypass_code, first_code, second_code;
9847 fpcmp_mode = ix86_fp_compare_mode (code);
9848 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9850 if (second_test)
9851 *second_test = NULL_RTX;
9852 if (bypass_test)
9853 *bypass_test = NULL_RTX;
9855 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9857 /* Do fcomi/sahf based test when profitable. */
9858 if ((bypass_code == UNKNOWN || bypass_test)
9859 && (second_code == UNKNOWN || second_test)
9860 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9862 if (TARGET_CMOVE)
9864 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9865 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9866 tmp);
9867 emit_insn (tmp);
9869 else
9871 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9872 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9873 if (!scratch)
9874 scratch = gen_reg_rtx (HImode);
9875 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9876 emit_insn (gen_x86_sahf_1 (scratch));
9879 /* The FP codes work out to act like unsigned. */
9880 intcmp_mode = fpcmp_mode;
9881 code = first_code;
9882 if (bypass_code != UNKNOWN)
9883 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9884 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9885 const0_rtx);
9886 if (second_code != UNKNOWN)
9887 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9888 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9889 const0_rtx);
9891 else
9893 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9894 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9895 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9896 if (!scratch)
9897 scratch = gen_reg_rtx (HImode);
9898 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9900 /* In the unordered case, we have to check C2 for NaN's, which
9901 doesn't happen to work out to anything nice combination-wise.
9902 So do some bit twiddling on the value we've got in AH to come
9903 up with an appropriate set of condition codes. */
9905 intcmp_mode = CCNOmode;
9906 switch (code)
9908 case GT:
9909 case UNGT:
9910 if (code == GT || !TARGET_IEEE_FP)
9912 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9913 code = EQ;
9915 else
9917 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9918 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9919 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9920 intcmp_mode = CCmode;
9921 code = GEU;
9923 break;
9924 case LT:
9925 case UNLT:
9926 if (code == LT && TARGET_IEEE_FP)
9928 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9929 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9930 intcmp_mode = CCmode;
9931 code = EQ;
9933 else
9935 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9936 code = NE;
9938 break;
9939 case GE:
9940 case UNGE:
9941 if (code == GE || !TARGET_IEEE_FP)
9943 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9944 code = EQ;
9946 else
9948 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9949 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9950 GEN_INT (0x01)));
9951 code = NE;
9953 break;
9954 case LE:
9955 case UNLE:
9956 if (code == LE && TARGET_IEEE_FP)
9958 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9959 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9960 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9961 intcmp_mode = CCmode;
9962 code = LTU;
9964 else
9966 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9967 code = NE;
9969 break;
9970 case EQ:
9971 case UNEQ:
9972 if (code == EQ && TARGET_IEEE_FP)
9974 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9975 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9976 intcmp_mode = CCmode;
9977 code = EQ;
9979 else
9981 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9982 code = NE;
9983 break;
9985 break;
9986 case NE:
9987 case LTGT:
9988 if (code == NE && TARGET_IEEE_FP)
9990 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9991 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9992 GEN_INT (0x40)));
9993 code = NE;
9995 else
9997 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9998 code = EQ;
10000 break;
10002 case UNORDERED:
10003 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10004 code = NE;
10005 break;
10006 case ORDERED:
10007 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10008 code = EQ;
10009 break;
10011 default:
10012 gcc_unreachable ();
10016 /* Return the test that should be put into the flags user, i.e.
10017 the bcc, scc, or cmov instruction. */
10018 return gen_rtx_fmt_ee (code, VOIDmode,
10019 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10020 const0_rtx);
10024 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10026 rtx op0, op1, ret;
10027 op0 = ix86_compare_op0;
10028 op1 = ix86_compare_op1;
10030 if (second_test)
10031 *second_test = NULL_RTX;
10032 if (bypass_test)
10033 *bypass_test = NULL_RTX;
10035 if (ix86_compare_emitted)
10037 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10038 ix86_compare_emitted = NULL_RTX;
10040 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10041 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10042 second_test, bypass_test);
10043 else
10044 ret = ix86_expand_int_compare (code, op0, op1);
10046 return ret;
10049 /* Return true if the CODE will result in nontrivial jump sequence. */
10050 bool
10051 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10053 enum rtx_code bypass_code, first_code, second_code;
10054 if (!TARGET_CMOVE)
10055 return true;
10056 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10057 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10060 void
10061 ix86_expand_branch (enum rtx_code code, rtx label)
10063 rtx tmp;
10065 switch (GET_MODE (ix86_compare_op0))
10067 case QImode:
10068 case HImode:
10069 case SImode:
10070 simple:
10071 tmp = ix86_expand_compare (code, NULL, NULL);
10072 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10073 gen_rtx_LABEL_REF (VOIDmode, label),
10074 pc_rtx);
10075 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10076 return;
10078 case SFmode:
10079 case DFmode:
10080 case XFmode:
10082 rtvec vec;
10083 int use_fcomi;
10084 enum rtx_code bypass_code, first_code, second_code;
10086 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10087 &ix86_compare_op1);
10089 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10091 /* Check whether we will use the natural sequence with one jump. If
10092 so, we can expand jump early. Otherwise delay expansion by
10093 creating compound insn to not confuse optimizers. */
10094 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10095 && TARGET_CMOVE)
10097 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10098 gen_rtx_LABEL_REF (VOIDmode, label),
10099 pc_rtx, NULL_RTX, NULL_RTX);
10101 else
10103 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10104 ix86_compare_op0, ix86_compare_op1);
10105 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10106 gen_rtx_LABEL_REF (VOIDmode, label),
10107 pc_rtx);
10108 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10110 use_fcomi = ix86_use_fcomi_compare (code);
10111 vec = rtvec_alloc (3 + !use_fcomi);
10112 RTVEC_ELT (vec, 0) = tmp;
10113 RTVEC_ELT (vec, 1)
10114 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10115 RTVEC_ELT (vec, 2)
10116 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10117 if (! use_fcomi)
10118 RTVEC_ELT (vec, 3)
10119 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10121 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10123 return;
10126 case DImode:
10127 if (TARGET_64BIT)
10128 goto simple;
10129 case TImode:
10130 /* Expand DImode branch into multiple compare+branch. */
10132 rtx lo[2], hi[2], label2;
10133 enum rtx_code code1, code2, code3;
10134 enum machine_mode submode;
10136 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10138 tmp = ix86_compare_op0;
10139 ix86_compare_op0 = ix86_compare_op1;
10140 ix86_compare_op1 = tmp;
10141 code = swap_condition (code);
10143 if (GET_MODE (ix86_compare_op0) == DImode)
10145 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10146 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10147 submode = SImode;
10149 else
10151 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10152 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10153 submode = DImode;
10156 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10157 avoid two branches. This costs one extra insn, so disable when
10158 optimizing for size. */
10160 if ((code == EQ || code == NE)
10161 && (!optimize_size
10162 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10164 rtx xor0, xor1;
10166 xor1 = hi[0];
10167 if (hi[1] != const0_rtx)
10168 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10169 NULL_RTX, 0, OPTAB_WIDEN);
10171 xor0 = lo[0];
10172 if (lo[1] != const0_rtx)
10173 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10174 NULL_RTX, 0, OPTAB_WIDEN);
10176 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10177 NULL_RTX, 0, OPTAB_WIDEN);
10179 ix86_compare_op0 = tmp;
10180 ix86_compare_op1 = const0_rtx;
10181 ix86_expand_branch (code, label);
10182 return;
10185 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10186 op1 is a constant and the low word is zero, then we can just
10187 examine the high word. */
10189 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10190 switch (code)
10192 case LT: case LTU: case GE: case GEU:
10193 ix86_compare_op0 = hi[0];
10194 ix86_compare_op1 = hi[1];
10195 ix86_expand_branch (code, label);
10196 return;
10197 default:
10198 break;
10201 /* Otherwise, we need two or three jumps. */
10203 label2 = gen_label_rtx ();
10205 code1 = code;
10206 code2 = swap_condition (code);
10207 code3 = unsigned_condition (code);
10209 switch (code)
10211 case LT: case GT: case LTU: case GTU:
10212 break;
10214 case LE: code1 = LT; code2 = GT; break;
10215 case GE: code1 = GT; code2 = LT; break;
10216 case LEU: code1 = LTU; code2 = GTU; break;
10217 case GEU: code1 = GTU; code2 = LTU; break;
10219 case EQ: code1 = UNKNOWN; code2 = NE; break;
10220 case NE: code2 = UNKNOWN; break;
10222 default:
10223 gcc_unreachable ();
10227 * a < b =>
10228 * if (hi(a) < hi(b)) goto true;
10229 * if (hi(a) > hi(b)) goto false;
10230 * if (lo(a) < lo(b)) goto true;
10231 * false:
10234 ix86_compare_op0 = hi[0];
10235 ix86_compare_op1 = hi[1];
10237 if (code1 != UNKNOWN)
10238 ix86_expand_branch (code1, label);
10239 if (code2 != UNKNOWN)
10240 ix86_expand_branch (code2, label2);
10242 ix86_compare_op0 = lo[0];
10243 ix86_compare_op1 = lo[1];
10244 ix86_expand_branch (code3, label);
10246 if (code2 != UNKNOWN)
10247 emit_label (label2);
10248 return;
10251 default:
10252 gcc_unreachable ();
10256 /* Split branch based on floating point condition. */
10257 void
10258 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10259 rtx target1, rtx target2, rtx tmp, rtx pushed)
10261 rtx second, bypass;
10262 rtx label = NULL_RTX;
10263 rtx condition;
10264 int bypass_probability = -1, second_probability = -1, probability = -1;
10265 rtx i;
10267 if (target2 != pc_rtx)
10269 rtx tmp = target2;
10270 code = reverse_condition_maybe_unordered (code);
10271 target2 = target1;
10272 target1 = tmp;
10275 condition = ix86_expand_fp_compare (code, op1, op2,
10276 tmp, &second, &bypass);
10278 /* Remove pushed operand from stack. */
10279 if (pushed)
10280 ix86_free_from_memory (GET_MODE (pushed));
10282 if (split_branch_probability >= 0)
10284 /* Distribute the probabilities across the jumps.
10285 Assume the BYPASS and SECOND to be always test
10286 for UNORDERED. */
10287 probability = split_branch_probability;
10289 /* Value of 1 is low enough to make no need for probability
10290 to be updated. Later we may run some experiments and see
10291 if unordered values are more frequent in practice. */
10292 if (bypass)
10293 bypass_probability = 1;
10294 if (second)
10295 second_probability = 1;
10297 if (bypass != NULL_RTX)
10299 label = gen_label_rtx ();
10300 i = emit_jump_insn (gen_rtx_SET
10301 (VOIDmode, pc_rtx,
10302 gen_rtx_IF_THEN_ELSE (VOIDmode,
10303 bypass,
10304 gen_rtx_LABEL_REF (VOIDmode,
10305 label),
10306 pc_rtx)));
10307 if (bypass_probability >= 0)
10308 REG_NOTES (i)
10309 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10310 GEN_INT (bypass_probability),
10311 REG_NOTES (i));
10313 i = emit_jump_insn (gen_rtx_SET
10314 (VOIDmode, pc_rtx,
10315 gen_rtx_IF_THEN_ELSE (VOIDmode,
10316 condition, target1, target2)));
10317 if (probability >= 0)
10318 REG_NOTES (i)
10319 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10320 GEN_INT (probability),
10321 REG_NOTES (i));
10322 if (second != NULL_RTX)
10324 i = emit_jump_insn (gen_rtx_SET
10325 (VOIDmode, pc_rtx,
10326 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10327 target2)));
10328 if (second_probability >= 0)
10329 REG_NOTES (i)
10330 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10331 GEN_INT (second_probability),
10332 REG_NOTES (i));
10334 if (label != NULL_RTX)
10335 emit_label (label);
10339 ix86_expand_setcc (enum rtx_code code, rtx dest)
10341 rtx ret, tmp, tmpreg, equiv;
10342 rtx second_test, bypass_test;
10344 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10345 return 0; /* FAIL */
10347 gcc_assert (GET_MODE (dest) == QImode);
10349 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10350 PUT_MODE (ret, QImode);
10352 tmp = dest;
10353 tmpreg = dest;
10355 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10356 if (bypass_test || second_test)
10358 rtx test = second_test;
10359 int bypass = 0;
10360 rtx tmp2 = gen_reg_rtx (QImode);
10361 if (bypass_test)
10363 gcc_assert (!second_test);
10364 test = bypass_test;
10365 bypass = 1;
10366 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10368 PUT_MODE (test, QImode);
10369 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10371 if (bypass)
10372 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10373 else
10374 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10377 /* Attach a REG_EQUAL note describing the comparison result. */
10378 if (ix86_compare_op0 && ix86_compare_op1)
10380 equiv = simplify_gen_relational (code, QImode,
10381 GET_MODE (ix86_compare_op0),
10382 ix86_compare_op0, ix86_compare_op1);
10383 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10386 return 1; /* DONE */
10389 /* Expand comparison setting or clearing carry flag. Return true when
10390 successful and set pop for the operation. */
10391 static bool
10392 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10394 enum machine_mode mode =
10395 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10397 /* Do not handle DImode compares that go trought special path. Also we can't
10398 deal with FP compares yet. This is possible to add. */
10399 if (mode == (TARGET_64BIT ? TImode : DImode))
10400 return false;
10401 if (FLOAT_MODE_P (mode))
10403 rtx second_test = NULL, bypass_test = NULL;
10404 rtx compare_op, compare_seq;
10406 /* Shortcut: following common codes never translate into carry flag compares. */
10407 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10408 || code == ORDERED || code == UNORDERED)
10409 return false;
10411 /* These comparisons require zero flag; swap operands so they won't. */
10412 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10413 && !TARGET_IEEE_FP)
10415 rtx tmp = op0;
10416 op0 = op1;
10417 op1 = tmp;
10418 code = swap_condition (code);
10421 /* Try to expand the comparison and verify that we end up with carry flag
10422 based comparison. This is fails to be true only when we decide to expand
10423 comparison using arithmetic that is not too common scenario. */
10424 start_sequence ();
10425 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10426 &second_test, &bypass_test);
10427 compare_seq = get_insns ();
10428 end_sequence ();
10430 if (second_test || bypass_test)
10431 return false;
10432 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10433 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10434 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10435 else
10436 code = GET_CODE (compare_op);
10437 if (code != LTU && code != GEU)
10438 return false;
10439 emit_insn (compare_seq);
10440 *pop = compare_op;
10441 return true;
10443 if (!INTEGRAL_MODE_P (mode))
10444 return false;
10445 switch (code)
10447 case LTU:
10448 case GEU:
10449 break;
10451 /* Convert a==0 into (unsigned)a<1. */
10452 case EQ:
10453 case NE:
10454 if (op1 != const0_rtx)
10455 return false;
10456 op1 = const1_rtx;
10457 code = (code == EQ ? LTU : GEU);
10458 break;
10460 /* Convert a>b into b<a or a>=b-1. */
10461 case GTU:
10462 case LEU:
10463 if (GET_CODE (op1) == CONST_INT)
10465 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10466 /* Bail out on overflow. We still can swap operands but that
10467 would force loading of the constant into register. */
10468 if (op1 == const0_rtx
10469 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10470 return false;
10471 code = (code == GTU ? GEU : LTU);
10473 else
10475 rtx tmp = op1;
10476 op1 = op0;
10477 op0 = tmp;
10478 code = (code == GTU ? LTU : GEU);
10480 break;
10482 /* Convert a>=0 into (unsigned)a<0x80000000. */
10483 case LT:
10484 case GE:
10485 if (mode == DImode || op1 != const0_rtx)
10486 return false;
10487 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10488 code = (code == LT ? GEU : LTU);
10489 break;
10490 case LE:
10491 case GT:
10492 if (mode == DImode || op1 != constm1_rtx)
10493 return false;
10494 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10495 code = (code == LE ? GEU : LTU);
10496 break;
10498 default:
10499 return false;
10501 /* Swapping operands may cause constant to appear as first operand. */
10502 if (!nonimmediate_operand (op0, VOIDmode))
10504 if (no_new_pseudos)
10505 return false;
10506 op0 = force_reg (mode, op0);
10508 ix86_compare_op0 = op0;
10509 ix86_compare_op1 = op1;
10510 *pop = ix86_expand_compare (code, NULL, NULL);
10511 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10512 return true;
10516 ix86_expand_int_movcc (rtx operands[])
10518 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10519 rtx compare_seq, compare_op;
10520 rtx second_test, bypass_test;
10521 enum machine_mode mode = GET_MODE (operands[0]);
10522 bool sign_bit_compare_p = false;;
10524 start_sequence ();
10525 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10526 compare_seq = get_insns ();
10527 end_sequence ();
10529 compare_code = GET_CODE (compare_op);
10531 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10532 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10533 sign_bit_compare_p = true;
10535 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10536 HImode insns, we'd be swallowed in word prefix ops. */
10538 if ((mode != HImode || TARGET_FAST_PREFIX)
10539 && (mode != (TARGET_64BIT ? TImode : DImode))
10540 && GET_CODE (operands[2]) == CONST_INT
10541 && GET_CODE (operands[3]) == CONST_INT)
10543 rtx out = operands[0];
10544 HOST_WIDE_INT ct = INTVAL (operands[2]);
10545 HOST_WIDE_INT cf = INTVAL (operands[3]);
10546 HOST_WIDE_INT diff;
10548 diff = ct - cf;
10549 /* Sign bit compares are better done using shifts than we do by using
10550 sbb. */
10551 if (sign_bit_compare_p
10552 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10553 ix86_compare_op1, &compare_op))
10555 /* Detect overlap between destination and compare sources. */
10556 rtx tmp = out;
10558 if (!sign_bit_compare_p)
10560 bool fpcmp = false;
10562 compare_code = GET_CODE (compare_op);
10564 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10565 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10567 fpcmp = true;
10568 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10571 /* To simplify rest of code, restrict to the GEU case. */
10572 if (compare_code == LTU)
10574 HOST_WIDE_INT tmp = ct;
10575 ct = cf;
10576 cf = tmp;
10577 compare_code = reverse_condition (compare_code);
10578 code = reverse_condition (code);
10580 else
10582 if (fpcmp)
10583 PUT_CODE (compare_op,
10584 reverse_condition_maybe_unordered
10585 (GET_CODE (compare_op)));
10586 else
10587 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10589 diff = ct - cf;
10591 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10592 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10593 tmp = gen_reg_rtx (mode);
10595 if (mode == DImode)
10596 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10597 else
10598 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10600 else
10602 if (code == GT || code == GE)
10603 code = reverse_condition (code);
10604 else
10606 HOST_WIDE_INT tmp = ct;
10607 ct = cf;
10608 cf = tmp;
10609 diff = ct - cf;
10611 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10612 ix86_compare_op1, VOIDmode, 0, -1);
10615 if (diff == 1)
10618 * cmpl op0,op1
10619 * sbbl dest,dest
10620 * [addl dest, ct]
10622 * Size 5 - 8.
10624 if (ct)
10625 tmp = expand_simple_binop (mode, PLUS,
10626 tmp, GEN_INT (ct),
10627 copy_rtx (tmp), 1, OPTAB_DIRECT);
10629 else if (cf == -1)
10632 * cmpl op0,op1
10633 * sbbl dest,dest
10634 * orl $ct, dest
10636 * Size 8.
10638 tmp = expand_simple_binop (mode, IOR,
10639 tmp, GEN_INT (ct),
10640 copy_rtx (tmp), 1, OPTAB_DIRECT);
10642 else if (diff == -1 && ct)
10645 * cmpl op0,op1
10646 * sbbl dest,dest
10647 * notl dest
10648 * [addl dest, cf]
10650 * Size 8 - 11.
10652 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10653 if (cf)
10654 tmp = expand_simple_binop (mode, PLUS,
10655 copy_rtx (tmp), GEN_INT (cf),
10656 copy_rtx (tmp), 1, OPTAB_DIRECT);
10658 else
10661 * cmpl op0,op1
10662 * sbbl dest,dest
10663 * [notl dest]
10664 * andl cf - ct, dest
10665 * [addl dest, ct]
10667 * Size 8 - 11.
10670 if (cf == 0)
10672 cf = ct;
10673 ct = 0;
10674 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10677 tmp = expand_simple_binop (mode, AND,
10678 copy_rtx (tmp),
10679 gen_int_mode (cf - ct, mode),
10680 copy_rtx (tmp), 1, OPTAB_DIRECT);
10681 if (ct)
10682 tmp = expand_simple_binop (mode, PLUS,
10683 copy_rtx (tmp), GEN_INT (ct),
10684 copy_rtx (tmp), 1, OPTAB_DIRECT);
10687 if (!rtx_equal_p (tmp, out))
10688 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10690 return 1; /* DONE */
10693 if (diff < 0)
10695 HOST_WIDE_INT tmp;
10696 tmp = ct, ct = cf, cf = tmp;
10697 diff = -diff;
10698 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10700 /* We may be reversing unordered compare to normal compare, that
10701 is not valid in general (we may convert non-trapping condition
10702 to trapping one), however on i386 we currently emit all
10703 comparisons unordered. */
10704 compare_code = reverse_condition_maybe_unordered (compare_code);
10705 code = reverse_condition_maybe_unordered (code);
10707 else
10709 compare_code = reverse_condition (compare_code);
10710 code = reverse_condition (code);
10714 compare_code = UNKNOWN;
10715 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10716 && GET_CODE (ix86_compare_op1) == CONST_INT)
10718 if (ix86_compare_op1 == const0_rtx
10719 && (code == LT || code == GE))
10720 compare_code = code;
10721 else if (ix86_compare_op1 == constm1_rtx)
10723 if (code == LE)
10724 compare_code = LT;
10725 else if (code == GT)
10726 compare_code = GE;
10730 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10731 if (compare_code != UNKNOWN
10732 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10733 && (cf == -1 || ct == -1))
10735 /* If lea code below could be used, only optimize
10736 if it results in a 2 insn sequence. */
10738 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10739 || diff == 3 || diff == 5 || diff == 9)
10740 || (compare_code == LT && ct == -1)
10741 || (compare_code == GE && cf == -1))
10744 * notl op1 (if necessary)
10745 * sarl $31, op1
10746 * orl cf, op1
10748 if (ct != -1)
10750 cf = ct;
10751 ct = -1;
10752 code = reverse_condition (code);
10755 out = emit_store_flag (out, code, ix86_compare_op0,
10756 ix86_compare_op1, VOIDmode, 0, -1);
10758 out = expand_simple_binop (mode, IOR,
10759 out, GEN_INT (cf),
10760 out, 1, OPTAB_DIRECT);
10761 if (out != operands[0])
10762 emit_move_insn (operands[0], out);
10764 return 1; /* DONE */
10769 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10770 || diff == 3 || diff == 5 || diff == 9)
10771 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10772 && (mode != DImode
10773 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
10776 * xorl dest,dest
10777 * cmpl op1,op2
10778 * setcc dest
10779 * lea cf(dest*(ct-cf)),dest
10781 * Size 14.
10783 * This also catches the degenerate setcc-only case.
10786 rtx tmp;
10787 int nops;
10789 out = emit_store_flag (out, code, ix86_compare_op0,
10790 ix86_compare_op1, VOIDmode, 0, 1);
10792 nops = 0;
10793 /* On x86_64 the lea instruction operates on Pmode, so we need
10794 to get arithmetics done in proper mode to match. */
10795 if (diff == 1)
10796 tmp = copy_rtx (out);
10797 else
10799 rtx out1;
10800 out1 = copy_rtx (out);
10801 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10802 nops++;
10803 if (diff & 1)
10805 tmp = gen_rtx_PLUS (mode, tmp, out1);
10806 nops++;
10809 if (cf != 0)
10811 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10812 nops++;
10814 if (!rtx_equal_p (tmp, out))
10816 if (nops == 1)
10817 out = force_operand (tmp, copy_rtx (out));
10818 else
10819 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10821 if (!rtx_equal_p (out, operands[0]))
10822 emit_move_insn (operands[0], copy_rtx (out));
10824 return 1; /* DONE */
10828 * General case: Jumpful:
10829 * xorl dest,dest cmpl op1, op2
10830 * cmpl op1, op2 movl ct, dest
10831 * setcc dest jcc 1f
10832 * decl dest movl cf, dest
10833 * andl (cf-ct),dest 1:
10834 * addl ct,dest
10836 * Size 20. Size 14.
10838 * This is reasonably steep, but branch mispredict costs are
10839 * high on modern cpus, so consider failing only if optimizing
10840 * for space.
10843 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10844 && BRANCH_COST >= 2)
10846 if (cf == 0)
10848 cf = ct;
10849 ct = 0;
10850 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10851 /* We may be reversing unordered compare to normal compare,
10852 that is not valid in general (we may convert non-trapping
10853 condition to trapping one), however on i386 we currently
10854 emit all comparisons unordered. */
10855 code = reverse_condition_maybe_unordered (code);
10856 else
10858 code = reverse_condition (code);
10859 if (compare_code != UNKNOWN)
10860 compare_code = reverse_condition (compare_code);
10864 if (compare_code != UNKNOWN)
10866 /* notl op1 (if needed)
10867 sarl $31, op1
10868 andl (cf-ct), op1
10869 addl ct, op1
10871 For x < 0 (resp. x <= -1) there will be no notl,
10872 so if possible swap the constants to get rid of the
10873 complement.
10874 True/false will be -1/0 while code below (store flag
10875 followed by decrement) is 0/-1, so the constants need
10876 to be exchanged once more. */
10878 if (compare_code == GE || !cf)
10880 code = reverse_condition (code);
10881 compare_code = LT;
10883 else
10885 HOST_WIDE_INT tmp = cf;
10886 cf = ct;
10887 ct = tmp;
10890 out = emit_store_flag (out, code, ix86_compare_op0,
10891 ix86_compare_op1, VOIDmode, 0, -1);
10893 else
10895 out = emit_store_flag (out, code, ix86_compare_op0,
10896 ix86_compare_op1, VOIDmode, 0, 1);
10898 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10899 copy_rtx (out), 1, OPTAB_DIRECT);
10902 out = expand_simple_binop (mode, AND, copy_rtx (out),
10903 gen_int_mode (cf - ct, mode),
10904 copy_rtx (out), 1, OPTAB_DIRECT);
10905 if (ct)
10906 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10907 copy_rtx (out), 1, OPTAB_DIRECT);
10908 if (!rtx_equal_p (out, operands[0]))
10909 emit_move_insn (operands[0], copy_rtx (out));
10911 return 1; /* DONE */
10915 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10917 /* Try a few things more with specific constants and a variable. */
10919 optab op;
10920 rtx var, orig_out, out, tmp;
10922 if (BRANCH_COST <= 2)
10923 return 0; /* FAIL */
10925 /* If one of the two operands is an interesting constant, load a
10926 constant with the above and mask it in with a logical operation. */
10928 if (GET_CODE (operands[2]) == CONST_INT)
10930 var = operands[3];
10931 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10932 operands[3] = constm1_rtx, op = and_optab;
10933 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10934 operands[3] = const0_rtx, op = ior_optab;
10935 else
10936 return 0; /* FAIL */
10938 else if (GET_CODE (operands[3]) == CONST_INT)
10940 var = operands[2];
10941 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10942 operands[2] = constm1_rtx, op = and_optab;
10943 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10944 operands[2] = const0_rtx, op = ior_optab;
10945 else
10946 return 0; /* FAIL */
10948 else
10949 return 0; /* FAIL */
10951 orig_out = operands[0];
10952 tmp = gen_reg_rtx (mode);
10953 operands[0] = tmp;
10955 /* Recurse to get the constant loaded. */
10956 if (ix86_expand_int_movcc (operands) == 0)
10957 return 0; /* FAIL */
10959 /* Mask in the interesting variable. */
10960 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10961 OPTAB_WIDEN);
10962 if (!rtx_equal_p (out, orig_out))
10963 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10965 return 1; /* DONE */
10969 * For comparison with above,
10971 * movl cf,dest
10972 * movl ct,tmp
10973 * cmpl op1,op2
10974 * cmovcc tmp,dest
10976 * Size 15.
10979 if (! nonimmediate_operand (operands[2], mode))
10980 operands[2] = force_reg (mode, operands[2]);
10981 if (! nonimmediate_operand (operands[3], mode))
10982 operands[3] = force_reg (mode, operands[3]);
10984 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10986 rtx tmp = gen_reg_rtx (mode);
10987 emit_move_insn (tmp, operands[3]);
10988 operands[3] = tmp;
10990 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10992 rtx tmp = gen_reg_rtx (mode);
10993 emit_move_insn (tmp, operands[2]);
10994 operands[2] = tmp;
10997 if (! register_operand (operands[2], VOIDmode)
10998 && (mode == QImode
10999 || ! register_operand (operands[3], VOIDmode)))
11000 operands[2] = force_reg (mode, operands[2]);
11002 if (mode == QImode
11003 && ! register_operand (operands[3], VOIDmode))
11004 operands[3] = force_reg (mode, operands[3]);
11006 emit_insn (compare_seq);
11007 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11008 gen_rtx_IF_THEN_ELSE (mode,
11009 compare_op, operands[2],
11010 operands[3])));
11011 if (bypass_test)
11012 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11013 gen_rtx_IF_THEN_ELSE (mode,
11014 bypass_test,
11015 copy_rtx (operands[3]),
11016 copy_rtx (operands[0]))));
11017 if (second_test)
11018 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11019 gen_rtx_IF_THEN_ELSE (mode,
11020 second_test,
11021 copy_rtx (operands[2]),
11022 copy_rtx (operands[0]))));
11024 return 1; /* DONE */
11027 /* Swap, force into registers, or otherwise massage the two operands
11028 to an sse comparison with a mask result. Thus we differ a bit from
11029 ix86_prepare_fp_compare_args which expects to produce a flags result.
11031 The DEST operand exists to help determine whether to commute commutative
11032 operators. The POP0/POP1 operands are updated in place. The new
11033 comparison code is returned, or UNKNOWN if not implementable. */
11035 static enum rtx_code
11036 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11037 rtx *pop0, rtx *pop1)
11039 rtx tmp;
11041 switch (code)
11043 case LTGT:
11044 case UNEQ:
11045 /* We have no LTGT as an operator. We could implement it with
11046 NE & ORDERED, but this requires an extra temporary. It's
11047 not clear that it's worth it. */
11048 return UNKNOWN;
11050 case LT:
11051 case LE:
11052 case UNGT:
11053 case UNGE:
11054 /* These are supported directly. */
11055 break;
11057 case EQ:
11058 case NE:
11059 case UNORDERED:
11060 case ORDERED:
11061 /* For commutative operators, try to canonicalize the destination
11062 operand to be first in the comparison - this helps reload to
11063 avoid extra moves. */
11064 if (!dest || !rtx_equal_p (dest, *pop1))
11065 break;
11066 /* FALLTHRU */
11068 case GE:
11069 case GT:
11070 case UNLE:
11071 case UNLT:
11072 /* These are not supported directly. Swap the comparison operands
11073 to transform into something that is supported. */
11074 tmp = *pop0;
11075 *pop0 = *pop1;
11076 *pop1 = tmp;
11077 code = swap_condition (code);
11078 break;
11080 default:
11081 gcc_unreachable ();
11084 return code;
11087 /* Detect conditional moves that exactly match min/max operational
11088 semantics. Note that this is IEEE safe, as long as we don't
11089 interchange the operands.
11091 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11092 and TRUE if the operation is successful and instructions are emitted. */
11094 static bool
11095 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11096 rtx cmp_op1, rtx if_true, rtx if_false)
11098 enum machine_mode mode;
11099 bool is_min;
11100 rtx tmp;
11102 if (code == LT)
11104 else if (code == UNGE)
11106 tmp = if_true;
11107 if_true = if_false;
11108 if_false = tmp;
11110 else
11111 return false;
11113 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11114 is_min = true;
11115 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11116 is_min = false;
11117 else
11118 return false;
11120 mode = GET_MODE (dest);
11122 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11123 but MODE may be a vector mode and thus not appropriate. */
11124 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11126 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11127 rtvec v;
11129 if_true = force_reg (mode, if_true);
11130 v = gen_rtvec (2, if_true, if_false);
11131 tmp = gen_rtx_UNSPEC (mode, v, u);
11133 else
11135 code = is_min ? SMIN : SMAX;
11136 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11139 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11140 return true;
11143 /* Expand an sse vector comparison. Return the register with the result. */
11145 static rtx
11146 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11147 rtx op_true, rtx op_false)
11149 enum machine_mode mode = GET_MODE (dest);
11150 rtx x;
11152 cmp_op0 = force_reg (mode, cmp_op0);
11153 if (!nonimmediate_operand (cmp_op1, mode))
11154 cmp_op1 = force_reg (mode, cmp_op1);
11156 if (optimize
11157 || reg_overlap_mentioned_p (dest, op_true)
11158 || reg_overlap_mentioned_p (dest, op_false))
11159 dest = gen_reg_rtx (mode);
11161 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11162 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11164 return dest;
11167 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11168 operations. This is used for both scalar and vector conditional moves. */
11170 static void
11171 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11173 enum machine_mode mode = GET_MODE (dest);
11174 rtx t2, t3, x;
11176 if (op_false == CONST0_RTX (mode))
11178 op_true = force_reg (mode, op_true);
11179 x = gen_rtx_AND (mode, cmp, op_true);
11180 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11182 else if (op_true == CONST0_RTX (mode))
11184 op_false = force_reg (mode, op_false);
11185 x = gen_rtx_NOT (mode, cmp);
11186 x = gen_rtx_AND (mode, x, op_false);
11187 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11189 else
11191 op_true = force_reg (mode, op_true);
11192 op_false = force_reg (mode, op_false);
11194 t2 = gen_reg_rtx (mode);
11195 if (optimize)
11196 t3 = gen_reg_rtx (mode);
11197 else
11198 t3 = dest;
11200 x = gen_rtx_AND (mode, op_true, cmp);
11201 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11203 x = gen_rtx_NOT (mode, cmp);
11204 x = gen_rtx_AND (mode, x, op_false);
11205 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11207 x = gen_rtx_IOR (mode, t3, t2);
11208 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11212 /* Expand a floating-point conditional move. Return true if successful. */
11215 ix86_expand_fp_movcc (rtx operands[])
11217 enum machine_mode mode = GET_MODE (operands[0]);
11218 enum rtx_code code = GET_CODE (operands[1]);
11219 rtx tmp, compare_op, second_test, bypass_test;
11221 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11223 enum machine_mode cmode;
11225 /* Since we've no cmove for sse registers, don't force bad register
11226 allocation just to gain access to it. Deny movcc when the
11227 comparison mode doesn't match the move mode. */
11228 cmode = GET_MODE (ix86_compare_op0);
11229 if (cmode == VOIDmode)
11230 cmode = GET_MODE (ix86_compare_op1);
11231 if (cmode != mode)
11232 return 0;
11234 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11235 &ix86_compare_op0,
11236 &ix86_compare_op1);
11237 if (code == UNKNOWN)
11238 return 0;
11240 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11241 ix86_compare_op1, operands[2],
11242 operands[3]))
11243 return 1;
11245 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11246 ix86_compare_op1, operands[2], operands[3]);
11247 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11248 return 1;
11251 /* The floating point conditional move instructions don't directly
11252 support conditions resulting from a signed integer comparison. */
11254 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11256 /* The floating point conditional move instructions don't directly
11257 support signed integer comparisons. */
11259 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11261 gcc_assert (!second_test && !bypass_test);
11262 tmp = gen_reg_rtx (QImode);
11263 ix86_expand_setcc (code, tmp);
11264 code = NE;
11265 ix86_compare_op0 = tmp;
11266 ix86_compare_op1 = const0_rtx;
11267 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11269 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11271 tmp = gen_reg_rtx (mode);
11272 emit_move_insn (tmp, operands[3]);
11273 operands[3] = tmp;
11275 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11277 tmp = gen_reg_rtx (mode);
11278 emit_move_insn (tmp, operands[2]);
11279 operands[2] = tmp;
11282 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11283 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11284 operands[2], operands[3])));
11285 if (bypass_test)
11286 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11287 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11288 operands[3], operands[0])));
11289 if (second_test)
11290 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11291 gen_rtx_IF_THEN_ELSE (mode, second_test,
11292 operands[2], operands[0])));
11294 return 1;
11297 /* Expand a floating-point vector conditional move; a vcond operation
11298 rather than a movcc operation. */
11300 bool
11301 ix86_expand_fp_vcond (rtx operands[])
11303 enum rtx_code code = GET_CODE (operands[3]);
11304 rtx cmp;
11306 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11307 &operands[4], &operands[5]);
11308 if (code == UNKNOWN)
11309 return false;
11311 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11312 operands[5], operands[1], operands[2]))
11313 return true;
11315 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11316 operands[1], operands[2]);
11317 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11318 return true;
11321 /* Expand a signed integral vector conditional move. */
11323 bool
11324 ix86_expand_int_vcond (rtx operands[])
11326 enum machine_mode mode = GET_MODE (operands[0]);
11327 enum rtx_code code = GET_CODE (operands[3]);
11328 bool negate = false;
11329 rtx x, cop0, cop1;
11331 cop0 = operands[4];
11332 cop1 = operands[5];
11334 /* Canonicalize the comparison to EQ, GT, GTU. */
11335 switch (code)
11337 case EQ:
11338 case GT:
11339 case GTU:
11340 break;
11342 case NE:
11343 case LE:
11344 case LEU:
11345 code = reverse_condition (code);
11346 negate = true;
11347 break;
11349 case GE:
11350 case GEU:
11351 code = reverse_condition (code);
11352 negate = true;
11353 /* FALLTHRU */
11355 case LT:
11356 case LTU:
11357 code = swap_condition (code);
11358 x = cop0, cop0 = cop1, cop1 = x;
11359 break;
11361 default:
11362 gcc_unreachable ();
11365 /* Unsigned parallel compare is not supported by the hardware. Play some
11366 tricks to turn this into a signed comparison against 0. */
11367 if (code == GTU)
11369 switch (mode)
11371 case V4SImode:
11373 rtx t1, t2, mask;
11375 /* Perform a parallel modulo subtraction. */
11376 t1 = gen_reg_rtx (mode);
11377 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11379 /* Extract the original sign bit of op0. */
11380 mask = GEN_INT (-0x80000000);
11381 mask = gen_rtx_CONST_VECTOR (mode,
11382 gen_rtvec (4, mask, mask, mask, mask));
11383 mask = force_reg (mode, mask);
11384 t2 = gen_reg_rtx (mode);
11385 emit_insn (gen_andv4si3 (t2, cop0, mask));
11387 /* XOR it back into the result of the subtraction. This results
11388 in the sign bit set iff we saw unsigned underflow. */
11389 x = gen_reg_rtx (mode);
11390 emit_insn (gen_xorv4si3 (x, t1, t2));
11392 code = GT;
11394 break;
11396 case V16QImode:
11397 case V8HImode:
11398 /* Perform a parallel unsigned saturating subtraction. */
11399 x = gen_reg_rtx (mode);
11400 emit_insn (gen_rtx_SET (VOIDmode, x,
11401 gen_rtx_US_MINUS (mode, cop0, cop1)));
11403 code = EQ;
11404 negate = !negate;
11405 break;
11407 default:
11408 gcc_unreachable ();
11411 cop0 = x;
11412 cop1 = CONST0_RTX (mode);
11415 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11416 operands[1+negate], operands[2-negate]);
11418 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11419 operands[2-negate]);
11420 return true;
11423 /* Expand conditional increment or decrement using adb/sbb instructions.
11424 The default case using setcc followed by the conditional move can be
11425 done by generic code. */
11427 ix86_expand_int_addcc (rtx operands[])
11429 enum rtx_code code = GET_CODE (operands[1]);
11430 rtx compare_op;
11431 rtx val = const0_rtx;
11432 bool fpcmp = false;
11433 enum machine_mode mode = GET_MODE (operands[0]);
11435 if (operands[3] != const1_rtx
11436 && operands[3] != constm1_rtx)
11437 return 0;
11438 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11439 ix86_compare_op1, &compare_op))
11440 return 0;
11441 code = GET_CODE (compare_op);
11443 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11444 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11446 fpcmp = true;
11447 code = ix86_fp_compare_code_to_integer (code);
11450 if (code != LTU)
11452 val = constm1_rtx;
11453 if (fpcmp)
11454 PUT_CODE (compare_op,
11455 reverse_condition_maybe_unordered
11456 (GET_CODE (compare_op)));
11457 else
11458 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11460 PUT_MODE (compare_op, mode);
11462 /* Construct either adc or sbb insn. */
11463 if ((code == LTU) == (operands[3] == constm1_rtx))
11465 switch (GET_MODE (operands[0]))
11467 case QImode:
11468 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11469 break;
11470 case HImode:
11471 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11472 break;
11473 case SImode:
11474 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11475 break;
11476 case DImode:
11477 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11478 break;
11479 default:
11480 gcc_unreachable ();
11483 else
11485 switch (GET_MODE (operands[0]))
11487 case QImode:
11488 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11489 break;
11490 case HImode:
11491 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11492 break;
11493 case SImode:
11494 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11495 break;
11496 case DImode:
11497 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11498 break;
11499 default:
11500 gcc_unreachable ();
11503 return 1; /* DONE */
11507 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11508 works for floating pointer parameters and nonoffsetable memories.
11509 For pushes, it returns just stack offsets; the values will be saved
11510 in the right order. Maximally three parts are generated. */
11512 static int
11513 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11515 int size;
11517 if (!TARGET_64BIT)
11518 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11519 else
11520 size = (GET_MODE_SIZE (mode) + 4) / 8;
11522 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11523 gcc_assert (size >= 2 && size <= 3);
11525 /* Optimize constant pool reference to immediates. This is used by fp
11526 moves, that force all constants to memory to allow combining. */
11527 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11529 rtx tmp = maybe_get_pool_constant (operand);
11530 if (tmp)
11531 operand = tmp;
11534 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11536 /* The only non-offsetable memories we handle are pushes. */
11537 int ok = push_operand (operand, VOIDmode);
11539 gcc_assert (ok);
11541 operand = copy_rtx (operand);
11542 PUT_MODE (operand, Pmode);
11543 parts[0] = parts[1] = parts[2] = operand;
11544 return size;
11547 if (GET_CODE (operand) == CONST_VECTOR)
11549 enum machine_mode imode = int_mode_for_mode (mode);
11550 /* Caution: if we looked through a constant pool memory above,
11551 the operand may actually have a different mode now. That's
11552 ok, since we want to pun this all the way back to an integer. */
11553 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11554 gcc_assert (operand != NULL);
11555 mode = imode;
11558 if (!TARGET_64BIT)
11560 if (mode == DImode)
11561 split_di (&operand, 1, &parts[0], &parts[1]);
11562 else
11564 if (REG_P (operand))
11566 gcc_assert (reload_completed);
11567 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11568 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11569 if (size == 3)
11570 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11572 else if (offsettable_memref_p (operand))
11574 operand = adjust_address (operand, SImode, 0);
11575 parts[0] = operand;
11576 parts[1] = adjust_address (operand, SImode, 4);
11577 if (size == 3)
11578 parts[2] = adjust_address (operand, SImode, 8);
11580 else if (GET_CODE (operand) == CONST_DOUBLE)
11582 REAL_VALUE_TYPE r;
11583 long l[4];
11585 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11586 switch (mode)
11588 case XFmode:
11589 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11590 parts[2] = gen_int_mode (l[2], SImode);
11591 break;
11592 case DFmode:
11593 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11594 break;
11595 default:
11596 gcc_unreachable ();
11598 parts[1] = gen_int_mode (l[1], SImode);
11599 parts[0] = gen_int_mode (l[0], SImode);
11601 else
11602 gcc_unreachable ();
11605 else
11607 if (mode == TImode)
11608 split_ti (&operand, 1, &parts[0], &parts[1]);
11609 if (mode == XFmode || mode == TFmode)
11611 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11612 if (REG_P (operand))
11614 gcc_assert (reload_completed);
11615 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11616 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11618 else if (offsettable_memref_p (operand))
11620 operand = adjust_address (operand, DImode, 0);
11621 parts[0] = operand;
11622 parts[1] = adjust_address (operand, upper_mode, 8);
11624 else if (GET_CODE (operand) == CONST_DOUBLE)
11626 REAL_VALUE_TYPE r;
11627 long l[4];
11629 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11630 real_to_target (l, &r, mode);
11632 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11633 if (HOST_BITS_PER_WIDE_INT >= 64)
11634 parts[0]
11635 = gen_int_mode
11636 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11637 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11638 DImode);
11639 else
11640 parts[0] = immed_double_const (l[0], l[1], DImode);
11642 if (upper_mode == SImode)
11643 parts[1] = gen_int_mode (l[2], SImode);
11644 else if (HOST_BITS_PER_WIDE_INT >= 64)
11645 parts[1]
11646 = gen_int_mode
11647 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11648 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11649 DImode);
11650 else
11651 parts[1] = immed_double_const (l[2], l[3], DImode);
11653 else
11654 gcc_unreachable ();
11658 return size;
11661 /* Emit insns to perform a move or push of DI, DF, and XF values.
11662 Return false when normal moves are needed; true when all required
11663 insns have been emitted. Operands 2-4 contain the input values
11664 int the correct order; operands 5-7 contain the output values. */
11666 void
11667 ix86_split_long_move (rtx operands[])
11669 rtx part[2][3];
11670 int nparts;
11671 int push = 0;
11672 int collisions = 0;
11673 enum machine_mode mode = GET_MODE (operands[0]);
11675 /* The DFmode expanders may ask us to move double.
11676 For 64bit target this is single move. By hiding the fact
11677 here we simplify i386.md splitters. */
11678 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11680 /* Optimize constant pool reference to immediates. This is used by
11681 fp moves, that force all constants to memory to allow combining. */
11683 if (GET_CODE (operands[1]) == MEM
11684 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11685 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11686 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11687 if (push_operand (operands[0], VOIDmode))
11689 operands[0] = copy_rtx (operands[0]);
11690 PUT_MODE (operands[0], Pmode);
11692 else
11693 operands[0] = gen_lowpart (DImode, operands[0]);
11694 operands[1] = gen_lowpart (DImode, operands[1]);
11695 emit_move_insn (operands[0], operands[1]);
11696 return;
11699 /* The only non-offsettable memory we handle is push. */
11700 if (push_operand (operands[0], VOIDmode))
11701 push = 1;
11702 else
11703 gcc_assert (GET_CODE (operands[0]) != MEM
11704 || offsettable_memref_p (operands[0]));
11706 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11707 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11709 /* When emitting push, take care for source operands on the stack. */
11710 if (push && GET_CODE (operands[1]) == MEM
11711 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11713 if (nparts == 3)
11714 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11715 XEXP (part[1][2], 0));
11716 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11717 XEXP (part[1][1], 0));
11720 /* We need to do copy in the right order in case an address register
11721 of the source overlaps the destination. */
11722 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11724 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11725 collisions++;
11726 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11727 collisions++;
11728 if (nparts == 3
11729 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11730 collisions++;
11732 /* Collision in the middle part can be handled by reordering. */
11733 if (collisions == 1 && nparts == 3
11734 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11736 rtx tmp;
11737 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11738 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11741 /* If there are more collisions, we can't handle it by reordering.
11742 Do an lea to the last part and use only one colliding move. */
11743 else if (collisions > 1)
11745 rtx base;
11747 collisions = 1;
11749 base = part[0][nparts - 1];
11751 /* Handle the case when the last part isn't valid for lea.
11752 Happens in 64-bit mode storing the 12-byte XFmode. */
11753 if (GET_MODE (base) != Pmode)
11754 base = gen_rtx_REG (Pmode, REGNO (base));
11756 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11757 part[1][0] = replace_equiv_address (part[1][0], base);
11758 part[1][1] = replace_equiv_address (part[1][1],
11759 plus_constant (base, UNITS_PER_WORD));
11760 if (nparts == 3)
11761 part[1][2] = replace_equiv_address (part[1][2],
11762 plus_constant (base, 8));
11766 if (push)
11768 if (!TARGET_64BIT)
11770 if (nparts == 3)
11772 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
11773 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
11774 emit_move_insn (part[0][2], part[1][2]);
11777 else
11779 /* In 64bit mode we don't have 32bit push available. In case this is
11780 register, it is OK - we will just use larger counterpart. We also
11781 retype memory - these comes from attempt to avoid REX prefix on
11782 moving of second half of TFmode value. */
11783 if (GET_MODE (part[1][1]) == SImode)
11785 switch (GET_CODE (part[1][1]))
11787 case MEM:
11788 part[1][1] = adjust_address (part[1][1], DImode, 0);
11789 break;
11791 case REG:
11792 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11793 break;
11795 default:
11796 gcc_unreachable ();
11799 if (GET_MODE (part[1][0]) == SImode)
11800 part[1][0] = part[1][1];
11803 emit_move_insn (part[0][1], part[1][1]);
11804 emit_move_insn (part[0][0], part[1][0]);
11805 return;
11808 /* Choose correct order to not overwrite the source before it is copied. */
11809 if ((REG_P (part[0][0])
11810 && REG_P (part[1][1])
11811 && (REGNO (part[0][0]) == REGNO (part[1][1])
11812 || (nparts == 3
11813 && REGNO (part[0][0]) == REGNO (part[1][2]))))
11814 || (collisions > 0
11815 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11817 if (nparts == 3)
11819 operands[2] = part[0][2];
11820 operands[3] = part[0][1];
11821 operands[4] = part[0][0];
11822 operands[5] = part[1][2];
11823 operands[6] = part[1][1];
11824 operands[7] = part[1][0];
11826 else
11828 operands[2] = part[0][1];
11829 operands[3] = part[0][0];
11830 operands[5] = part[1][1];
11831 operands[6] = part[1][0];
11834 else
11836 if (nparts == 3)
11838 operands[2] = part[0][0];
11839 operands[3] = part[0][1];
11840 operands[4] = part[0][2];
11841 operands[5] = part[1][0];
11842 operands[6] = part[1][1];
11843 operands[7] = part[1][2];
11845 else
11847 operands[2] = part[0][0];
11848 operands[3] = part[0][1];
11849 operands[5] = part[1][0];
11850 operands[6] = part[1][1];
11854 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
11855 if (optimize_size)
11857 if (GET_CODE (operands[5]) == CONST_INT
11858 && operands[5] != const0_rtx
11859 && REG_P (operands[2]))
11861 if (GET_CODE (operands[6]) == CONST_INT
11862 && INTVAL (operands[6]) == INTVAL (operands[5]))
11863 operands[6] = operands[2];
11865 if (nparts == 3
11866 && GET_CODE (operands[7]) == CONST_INT
11867 && INTVAL (operands[7]) == INTVAL (operands[5]))
11868 operands[7] = operands[2];
11871 if (nparts == 3
11872 && GET_CODE (operands[6]) == CONST_INT
11873 && operands[6] != const0_rtx
11874 && REG_P (operands[3])
11875 && GET_CODE (operands[7]) == CONST_INT
11876 && INTVAL (operands[7]) == INTVAL (operands[6]))
11877 operands[7] = operands[3];
11880 emit_move_insn (operands[2], operands[5]);
11881 emit_move_insn (operands[3], operands[6]);
11882 if (nparts == 3)
11883 emit_move_insn (operands[4], operands[7]);
11885 return;
11888 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
11889 left shift by a constant, either using a single shift or
11890 a sequence of add instructions. */
11892 static void
11893 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
11895 if (count == 1)
11897 emit_insn ((mode == DImode
11898 ? gen_addsi3
11899 : gen_adddi3) (operand, operand, operand));
11901 else if (!optimize_size
11902 && count * ix86_cost->add <= ix86_cost->shift_const)
11904 int i;
11905 for (i=0; i<count; i++)
11907 emit_insn ((mode == DImode
11908 ? gen_addsi3
11909 : gen_adddi3) (operand, operand, operand));
11912 else
11913 emit_insn ((mode == DImode
11914 ? gen_ashlsi3
11915 : gen_ashldi3) (operand, operand, GEN_INT (count)));
11918 void
11919 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
11921 rtx low[2], high[2];
11922 int count;
11923 const int single_width = mode == DImode ? 32 : 64;
11925 if (GET_CODE (operands[2]) == CONST_INT)
11927 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11928 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11930 if (count >= single_width)
11932 emit_move_insn (high[0], low[1]);
11933 emit_move_insn (low[0], const0_rtx);
11935 if (count > single_width)
11936 ix86_expand_ashl_const (high[0], count - single_width, mode);
11938 else
11940 if (!rtx_equal_p (operands[0], operands[1]))
11941 emit_move_insn (operands[0], operands[1]);
11942 emit_insn ((mode == DImode
11943 ? gen_x86_shld_1
11944 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
11945 ix86_expand_ashl_const (low[0], count, mode);
11947 return;
11950 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11952 if (operands[1] == const1_rtx)
11954 /* Assuming we've chosen a QImode capable registers, then 1 << N
11955 can be done with two 32/64-bit shifts, no branches, no cmoves. */
11956 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11958 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11960 ix86_expand_clear (low[0]);
11961 ix86_expand_clear (high[0]);
11962 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
11964 d = gen_lowpart (QImode, low[0]);
11965 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11966 s = gen_rtx_EQ (QImode, flags, const0_rtx);
11967 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11969 d = gen_lowpart (QImode, high[0]);
11970 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11971 s = gen_rtx_NE (QImode, flags, const0_rtx);
11972 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11975 /* Otherwise, we can get the same results by manually performing
11976 a bit extract operation on bit 5/6, and then performing the two
11977 shifts. The two methods of getting 0/1 into low/high are exactly
11978 the same size. Avoiding the shift in the bit extract case helps
11979 pentium4 a bit; no one else seems to care much either way. */
11980 else
11982 rtx x;
11984 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
11985 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
11986 else
11987 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
11988 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
11990 emit_insn ((mode == DImode
11991 ? gen_lshrsi3
11992 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
11993 emit_insn ((mode == DImode
11994 ? gen_andsi3
11995 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
11996 emit_move_insn (low[0], high[0]);
11997 emit_insn ((mode == DImode
11998 ? gen_xorsi3
11999 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12002 emit_insn ((mode == DImode
12003 ? gen_ashlsi3
12004 : gen_ashldi3) (low[0], low[0], operands[2]));
12005 emit_insn ((mode == DImode
12006 ? gen_ashlsi3
12007 : gen_ashldi3) (high[0], high[0], operands[2]));
12008 return;
12011 if (operands[1] == constm1_rtx)
12013 /* For -1 << N, we can avoid the shld instruction, because we
12014 know that we're shifting 0...31/63 ones into a -1. */
12015 emit_move_insn (low[0], constm1_rtx);
12016 if (optimize_size)
12017 emit_move_insn (high[0], low[0]);
12018 else
12019 emit_move_insn (high[0], constm1_rtx);
12021 else
12023 if (!rtx_equal_p (operands[0], operands[1]))
12024 emit_move_insn (operands[0], operands[1]);
12026 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12027 emit_insn ((mode == DImode
12028 ? gen_x86_shld_1
12029 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12032 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12034 if (TARGET_CMOVE && scratch)
12036 ix86_expand_clear (scratch);
12037 emit_insn ((mode == DImode
12038 ? gen_x86_shift_adj_1
12039 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12041 else
12042 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12045 void
12046 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12048 rtx low[2], high[2];
12049 int count;
12050 const int single_width = mode == DImode ? 32 : 64;
12052 if (GET_CODE (operands[2]) == CONST_INT)
12054 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12055 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12057 if (count == single_width * 2 - 1)
12059 emit_move_insn (high[0], high[1]);
12060 emit_insn ((mode == DImode
12061 ? gen_ashrsi3
12062 : gen_ashrdi3) (high[0], high[0],
12063 GEN_INT (single_width - 1)));
12064 emit_move_insn (low[0], high[0]);
12067 else if (count >= single_width)
12069 emit_move_insn (low[0], high[1]);
12070 emit_move_insn (high[0], low[0]);
12071 emit_insn ((mode == DImode
12072 ? gen_ashrsi3
12073 : gen_ashrdi3) (high[0], high[0],
12074 GEN_INT (single_width - 1)));
12075 if (count > single_width)
12076 emit_insn ((mode == DImode
12077 ? gen_ashrsi3
12078 : gen_ashrdi3) (low[0], low[0],
12079 GEN_INT (count - single_width)));
12081 else
12083 if (!rtx_equal_p (operands[0], operands[1]))
12084 emit_move_insn (operands[0], operands[1]);
12085 emit_insn ((mode == DImode
12086 ? gen_x86_shrd_1
12087 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12088 emit_insn ((mode == DImode
12089 ? gen_ashrsi3
12090 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12093 else
12095 if (!rtx_equal_p (operands[0], operands[1]))
12096 emit_move_insn (operands[0], operands[1]);
12098 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12100 emit_insn ((mode == DImode
12101 ? gen_x86_shrd_1
12102 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12103 emit_insn ((mode == DImode
12104 ? gen_ashrsi3
12105 : gen_ashrdi3) (high[0], high[0], operands[2]));
12107 if (TARGET_CMOVE && scratch)
12109 emit_move_insn (scratch, high[0]);
12110 emit_insn ((mode == DImode
12111 ? gen_ashrsi3
12112 : gen_ashrdi3) (scratch, scratch,
12113 GEN_INT (single_width - 1)));
12114 emit_insn ((mode == DImode
12115 ? gen_x86_shift_adj_1
12116 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12117 scratch));
12119 else
12120 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12124 void
12125 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12127 rtx low[2], high[2];
12128 int count;
12129 const int single_width = mode == DImode ? 32 : 64;
12131 if (GET_CODE (operands[2]) == CONST_INT)
12133 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12134 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12136 if (count >= single_width)
12138 emit_move_insn (low[0], high[1]);
12139 ix86_expand_clear (high[0]);
12141 if (count > single_width)
12142 emit_insn ((mode == DImode
12143 ? gen_lshrsi3
12144 : gen_lshrdi3) (low[0], low[0],
12145 GEN_INT (count - single_width)));
12147 else
12149 if (!rtx_equal_p (operands[0], operands[1]))
12150 emit_move_insn (operands[0], operands[1]);
12151 emit_insn ((mode == DImode
12152 ? gen_x86_shrd_1
12153 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12154 emit_insn ((mode == DImode
12155 ? gen_lshrsi3
12156 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12159 else
12161 if (!rtx_equal_p (operands[0], operands[1]))
12162 emit_move_insn (operands[0], operands[1]);
12164 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12166 emit_insn ((mode == DImode
12167 ? gen_x86_shrd_1
12168 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12169 emit_insn ((mode == DImode
12170 ? gen_lshrsi3
12171 : gen_lshrdi3) (high[0], high[0], operands[2]));
12173 /* Heh. By reversing the arguments, we can reuse this pattern. */
12174 if (TARGET_CMOVE && scratch)
12176 ix86_expand_clear (scratch);
12177 emit_insn ((mode == DImode
12178 ? gen_x86_shift_adj_1
12179 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12180 scratch));
12182 else
12183 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12187 /* Helper function for the string operations below. Dest VARIABLE whether
12188 it is aligned to VALUE bytes. If true, jump to the label. */
12189 static rtx
12190 ix86_expand_aligntest (rtx variable, int value)
12192 rtx label = gen_label_rtx ();
12193 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12194 if (GET_MODE (variable) == DImode)
12195 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12196 else
12197 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12198 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12199 1, label);
12200 return label;
12203 /* Adjust COUNTER by the VALUE. */
12204 static void
12205 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12207 if (GET_MODE (countreg) == DImode)
12208 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12209 else
12210 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12213 /* Zero extend possibly SImode EXP to Pmode register. */
12215 ix86_zero_extend_to_Pmode (rtx exp)
12217 rtx r;
12218 if (GET_MODE (exp) == VOIDmode)
12219 return force_reg (Pmode, exp);
12220 if (GET_MODE (exp) == Pmode)
12221 return copy_to_mode_reg (Pmode, exp);
12222 r = gen_reg_rtx (Pmode);
12223 emit_insn (gen_zero_extendsidi2 (r, exp));
12224 return r;
12227 /* Expand string move (memcpy) operation. Use i386 string operations when
12228 profitable. expand_clrmem contains similar code. */
12230 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12232 rtx srcreg, destreg, countreg, srcexp, destexp;
12233 enum machine_mode counter_mode;
12234 HOST_WIDE_INT align = 0;
12235 unsigned HOST_WIDE_INT count = 0;
12237 if (GET_CODE (align_exp) == CONST_INT)
12238 align = INTVAL (align_exp);
12240 /* Can't use any of this if the user has appropriated esi or edi. */
12241 if (global_regs[4] || global_regs[5])
12242 return 0;
12244 /* This simple hack avoids all inlining code and simplifies code below. */
12245 if (!TARGET_ALIGN_STRINGOPS)
12246 align = 64;
12248 if (GET_CODE (count_exp) == CONST_INT)
12250 count = INTVAL (count_exp);
12251 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12252 return 0;
12255 /* Figure out proper mode for counter. For 32bits it is always SImode,
12256 for 64bits use SImode when possible, otherwise DImode.
12257 Set count to number of bytes copied when known at compile time. */
12258 if (!TARGET_64BIT
12259 || GET_MODE (count_exp) == SImode
12260 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12261 counter_mode = SImode;
12262 else
12263 counter_mode = DImode;
12265 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12267 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12268 if (destreg != XEXP (dst, 0))
12269 dst = replace_equiv_address_nv (dst, destreg);
12270 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12271 if (srcreg != XEXP (src, 0))
12272 src = replace_equiv_address_nv (src, srcreg);
12274 /* When optimizing for size emit simple rep ; movsb instruction for
12275 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12276 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12277 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12278 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12279 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12280 known to be zero or not. The rep; movsb sequence causes higher
12281 register pressure though, so take that into account. */
12283 if ((!optimize || optimize_size)
12284 && (count == 0
12285 || ((count & 0x03)
12286 && (!optimize_size
12287 || count > 5 * 4
12288 || (count & 3) + count / 4 > 6))))
12290 emit_insn (gen_cld ());
12291 countreg = ix86_zero_extend_to_Pmode (count_exp);
12292 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12293 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12294 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12295 destexp, srcexp));
12298 /* For constant aligned (or small unaligned) copies use rep movsl
12299 followed by code copying the rest. For PentiumPro ensure 8 byte
12300 alignment to allow rep movsl acceleration. */
12302 else if (count != 0
12303 && (align >= 8
12304 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12305 || optimize_size || count < (unsigned int) 64))
12307 unsigned HOST_WIDE_INT offset = 0;
12308 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12309 rtx srcmem, dstmem;
12311 emit_insn (gen_cld ());
12312 if (count & ~(size - 1))
12314 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12316 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12318 while (offset < (count & ~(size - 1)))
12320 srcmem = adjust_automodify_address_nv (src, movs_mode,
12321 srcreg, offset);
12322 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12323 destreg, offset);
12324 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12325 offset += size;
12328 else
12330 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12331 & (TARGET_64BIT ? -1 : 0x3fffffff));
12332 countreg = copy_to_mode_reg (counter_mode, countreg);
12333 countreg = ix86_zero_extend_to_Pmode (countreg);
12335 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12336 GEN_INT (size == 4 ? 2 : 3));
12337 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12338 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12340 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12341 countreg, destexp, srcexp));
12342 offset = count & ~(size - 1);
12345 if (size == 8 && (count & 0x04))
12347 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12348 offset);
12349 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12350 offset);
12351 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12352 offset += 4;
12354 if (count & 0x02)
12356 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12357 offset);
12358 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12359 offset);
12360 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12361 offset += 2;
12363 if (count & 0x01)
12365 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12366 offset);
12367 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12368 offset);
12369 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12372 /* The generic code based on the glibc implementation:
12373 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12374 allowing accelerated copying there)
12375 - copy the data using rep movsl
12376 - copy the rest. */
12377 else
12379 rtx countreg2;
12380 rtx label = NULL;
12381 rtx srcmem, dstmem;
12382 int desired_alignment = (TARGET_PENTIUMPRO
12383 && (count == 0 || count >= (unsigned int) 260)
12384 ? 8 : UNITS_PER_WORD);
12385 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12386 dst = change_address (dst, BLKmode, destreg);
12387 src = change_address (src, BLKmode, srcreg);
12389 /* In case we don't know anything about the alignment, default to
12390 library version, since it is usually equally fast and result in
12391 shorter code.
12393 Also emit call when we know that the count is large and call overhead
12394 will not be important. */
12395 if (!TARGET_INLINE_ALL_STRINGOPS
12396 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12397 return 0;
12399 if (TARGET_SINGLE_STRINGOP)
12400 emit_insn (gen_cld ());
12402 countreg2 = gen_reg_rtx (Pmode);
12403 countreg = copy_to_mode_reg (counter_mode, count_exp);
12405 /* We don't use loops to align destination and to copy parts smaller
12406 than 4 bytes, because gcc is able to optimize such code better (in
12407 the case the destination or the count really is aligned, gcc is often
12408 able to predict the branches) and also it is friendlier to the
12409 hardware branch prediction.
12411 Using loops is beneficial for generic case, because we can
12412 handle small counts using the loops. Many CPUs (such as Athlon)
12413 have large REP prefix setup costs.
12415 This is quite costly. Maybe we can revisit this decision later or
12416 add some customizability to this code. */
12418 if (count == 0 && align < desired_alignment)
12420 label = gen_label_rtx ();
12421 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12422 LEU, 0, counter_mode, 1, label);
12424 if (align <= 1)
12426 rtx label = ix86_expand_aligntest (destreg, 1);
12427 srcmem = change_address (src, QImode, srcreg);
12428 dstmem = change_address (dst, QImode, destreg);
12429 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12430 ix86_adjust_counter (countreg, 1);
12431 emit_label (label);
12432 LABEL_NUSES (label) = 1;
12434 if (align <= 2)
12436 rtx label = ix86_expand_aligntest (destreg, 2);
12437 srcmem = change_address (src, HImode, srcreg);
12438 dstmem = change_address (dst, HImode, destreg);
12439 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12440 ix86_adjust_counter (countreg, 2);
12441 emit_label (label);
12442 LABEL_NUSES (label) = 1;
12444 if (align <= 4 && desired_alignment > 4)
12446 rtx label = ix86_expand_aligntest (destreg, 4);
12447 srcmem = change_address (src, SImode, srcreg);
12448 dstmem = change_address (dst, SImode, destreg);
12449 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12450 ix86_adjust_counter (countreg, 4);
12451 emit_label (label);
12452 LABEL_NUSES (label) = 1;
12455 if (label && desired_alignment > 4 && !TARGET_64BIT)
12457 emit_label (label);
12458 LABEL_NUSES (label) = 1;
12459 label = NULL_RTX;
12461 if (!TARGET_SINGLE_STRINGOP)
12462 emit_insn (gen_cld ());
12463 if (TARGET_64BIT)
12465 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12466 GEN_INT (3)));
12467 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12469 else
12471 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12472 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12474 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12475 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12476 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12477 countreg2, destexp, srcexp));
12479 if (label)
12481 emit_label (label);
12482 LABEL_NUSES (label) = 1;
12484 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12486 srcmem = change_address (src, SImode, srcreg);
12487 dstmem = change_address (dst, SImode, destreg);
12488 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12490 if ((align <= 4 || count == 0) && TARGET_64BIT)
12492 rtx label = ix86_expand_aligntest (countreg, 4);
12493 srcmem = change_address (src, SImode, srcreg);
12494 dstmem = change_address (dst, SImode, destreg);
12495 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12496 emit_label (label);
12497 LABEL_NUSES (label) = 1;
12499 if (align > 2 && count != 0 && (count & 2))
12501 srcmem = change_address (src, HImode, srcreg);
12502 dstmem = change_address (dst, HImode, destreg);
12503 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12505 if (align <= 2 || count == 0)
12507 rtx label = ix86_expand_aligntest (countreg, 2);
12508 srcmem = change_address (src, HImode, srcreg);
12509 dstmem = change_address (dst, HImode, destreg);
12510 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12511 emit_label (label);
12512 LABEL_NUSES (label) = 1;
12514 if (align > 1 && count != 0 && (count & 1))
12516 srcmem = change_address (src, QImode, srcreg);
12517 dstmem = change_address (dst, QImode, destreg);
12518 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12520 if (align <= 1 || count == 0)
12522 rtx label = ix86_expand_aligntest (countreg, 1);
12523 srcmem = change_address (src, QImode, srcreg);
12524 dstmem = change_address (dst, QImode, destreg);
12525 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12526 emit_label (label);
12527 LABEL_NUSES (label) = 1;
12531 return 1;
12534 /* Expand string clear operation (bzero). Use i386 string operations when
12535 profitable. expand_movmem contains similar code. */
12537 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12539 rtx destreg, zeroreg, countreg, destexp;
12540 enum machine_mode counter_mode;
12541 HOST_WIDE_INT align = 0;
12542 unsigned HOST_WIDE_INT count = 0;
12544 if (GET_CODE (align_exp) == CONST_INT)
12545 align = INTVAL (align_exp);
12547 /* Can't use any of this if the user has appropriated esi. */
12548 if (global_regs[4])
12549 return 0;
12551 /* This simple hack avoids all inlining code and simplifies code below. */
12552 if (!TARGET_ALIGN_STRINGOPS)
12553 align = 32;
12555 if (GET_CODE (count_exp) == CONST_INT)
12557 count = INTVAL (count_exp);
12558 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12559 return 0;
12561 /* Figure out proper mode for counter. For 32bits it is always SImode,
12562 for 64bits use SImode when possible, otherwise DImode.
12563 Set count to number of bytes copied when known at compile time. */
12564 if (!TARGET_64BIT
12565 || GET_MODE (count_exp) == SImode
12566 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12567 counter_mode = SImode;
12568 else
12569 counter_mode = DImode;
12571 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12572 if (destreg != XEXP (dst, 0))
12573 dst = replace_equiv_address_nv (dst, destreg);
12576 /* When optimizing for size emit simple rep ; movsb instruction for
12577 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12578 sequence is 7 bytes long, so if optimizing for size and count is
12579 small enough that some stosl, stosw and stosb instructions without
12580 rep are shorter, fall back into the next if. */
12582 if ((!optimize || optimize_size)
12583 && (count == 0
12584 || ((count & 0x03)
12585 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12587 emit_insn (gen_cld ());
12589 countreg = ix86_zero_extend_to_Pmode (count_exp);
12590 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12591 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12592 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12594 else if (count != 0
12595 && (align >= 8
12596 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12597 || optimize_size || count < (unsigned int) 64))
12599 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12600 unsigned HOST_WIDE_INT offset = 0;
12602 emit_insn (gen_cld ());
12604 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12605 if (count & ~(size - 1))
12607 unsigned HOST_WIDE_INT repcount;
12608 unsigned int max_nonrep;
12610 repcount = count >> (size == 4 ? 2 : 3);
12611 if (!TARGET_64BIT)
12612 repcount &= 0x3fffffff;
12614 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12615 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12616 bytes. In both cases the latter seems to be faster for small
12617 values of N. */
12618 max_nonrep = size == 4 ? 7 : 4;
12619 if (!optimize_size)
12620 switch (ix86_tune)
12622 case PROCESSOR_PENTIUM4:
12623 case PROCESSOR_NOCONA:
12624 max_nonrep = 3;
12625 break;
12626 default:
12627 break;
12630 if (repcount <= max_nonrep)
12631 while (repcount-- > 0)
12633 rtx mem = adjust_automodify_address_nv (dst,
12634 GET_MODE (zeroreg),
12635 destreg, offset);
12636 emit_insn (gen_strset (destreg, mem, zeroreg));
12637 offset += size;
12639 else
12641 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12642 countreg = ix86_zero_extend_to_Pmode (countreg);
12643 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12644 GEN_INT (size == 4 ? 2 : 3));
12645 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12646 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12647 destexp));
12648 offset = count & ~(size - 1);
12651 if (size == 8 && (count & 0x04))
12653 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12654 offset);
12655 emit_insn (gen_strset (destreg, mem,
12656 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12657 offset += 4;
12659 if (count & 0x02)
12661 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12662 offset);
12663 emit_insn (gen_strset (destreg, mem,
12664 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12665 offset += 2;
12667 if (count & 0x01)
12669 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12670 offset);
12671 emit_insn (gen_strset (destreg, mem,
12672 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12675 else
12677 rtx countreg2;
12678 rtx label = NULL;
12679 /* Compute desired alignment of the string operation. */
12680 int desired_alignment = (TARGET_PENTIUMPRO
12681 && (count == 0 || count >= (unsigned int) 260)
12682 ? 8 : UNITS_PER_WORD);
12684 /* In case we don't know anything about the alignment, default to
12685 library version, since it is usually equally fast and result in
12686 shorter code.
12688 Also emit call when we know that the count is large and call overhead
12689 will not be important. */
12690 if (!TARGET_INLINE_ALL_STRINGOPS
12691 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12692 return 0;
12694 if (TARGET_SINGLE_STRINGOP)
12695 emit_insn (gen_cld ());
12697 countreg2 = gen_reg_rtx (Pmode);
12698 countreg = copy_to_mode_reg (counter_mode, count_exp);
12699 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12700 /* Get rid of MEM_OFFSET, it won't be accurate. */
12701 dst = change_address (dst, BLKmode, destreg);
12703 if (count == 0 && align < desired_alignment)
12705 label = gen_label_rtx ();
12706 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12707 LEU, 0, counter_mode, 1, label);
12709 if (align <= 1)
12711 rtx label = ix86_expand_aligntest (destreg, 1);
12712 emit_insn (gen_strset (destreg, dst,
12713 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12714 ix86_adjust_counter (countreg, 1);
12715 emit_label (label);
12716 LABEL_NUSES (label) = 1;
12718 if (align <= 2)
12720 rtx label = ix86_expand_aligntest (destreg, 2);
12721 emit_insn (gen_strset (destreg, dst,
12722 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12723 ix86_adjust_counter (countreg, 2);
12724 emit_label (label);
12725 LABEL_NUSES (label) = 1;
12727 if (align <= 4 && desired_alignment > 4)
12729 rtx label = ix86_expand_aligntest (destreg, 4);
12730 emit_insn (gen_strset (destreg, dst,
12731 (TARGET_64BIT
12732 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12733 : zeroreg)));
12734 ix86_adjust_counter (countreg, 4);
12735 emit_label (label);
12736 LABEL_NUSES (label) = 1;
12739 if (label && desired_alignment > 4 && !TARGET_64BIT)
12741 emit_label (label);
12742 LABEL_NUSES (label) = 1;
12743 label = NULL_RTX;
12746 if (!TARGET_SINGLE_STRINGOP)
12747 emit_insn (gen_cld ());
12748 if (TARGET_64BIT)
12750 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12751 GEN_INT (3)));
12752 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12754 else
12756 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12757 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12759 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12760 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
12762 if (label)
12764 emit_label (label);
12765 LABEL_NUSES (label) = 1;
12768 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12769 emit_insn (gen_strset (destreg, dst,
12770 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12771 if (TARGET_64BIT && (align <= 4 || count == 0))
12773 rtx label = ix86_expand_aligntest (countreg, 4);
12774 emit_insn (gen_strset (destreg, dst,
12775 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12776 emit_label (label);
12777 LABEL_NUSES (label) = 1;
12779 if (align > 2 && count != 0 && (count & 2))
12780 emit_insn (gen_strset (destreg, dst,
12781 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12782 if (align <= 2 || count == 0)
12784 rtx label = ix86_expand_aligntest (countreg, 2);
12785 emit_insn (gen_strset (destreg, dst,
12786 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12787 emit_label (label);
12788 LABEL_NUSES (label) = 1;
12790 if (align > 1 && count != 0 && (count & 1))
12791 emit_insn (gen_strset (destreg, dst,
12792 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12793 if (align <= 1 || count == 0)
12795 rtx label = ix86_expand_aligntest (countreg, 1);
12796 emit_insn (gen_strset (destreg, dst,
12797 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12798 emit_label (label);
12799 LABEL_NUSES (label) = 1;
12802 return 1;
12805 /* Expand strlen. */
12807 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
12809 rtx addr, scratch1, scratch2, scratch3, scratch4;
12811 /* The generic case of strlen expander is long. Avoid it's
12812 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
12814 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12815 && !TARGET_INLINE_ALL_STRINGOPS
12816 && !optimize_size
12817 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12818 return 0;
12820 addr = force_reg (Pmode, XEXP (src, 0));
12821 scratch1 = gen_reg_rtx (Pmode);
12823 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12824 && !optimize_size)
12826 /* Well it seems that some optimizer does not combine a call like
12827 foo(strlen(bar), strlen(bar));
12828 when the move and the subtraction is done here. It does calculate
12829 the length just once when these instructions are done inside of
12830 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
12831 often used and I use one fewer register for the lifetime of
12832 output_strlen_unroll() this is better. */
12834 emit_move_insn (out, addr);
12836 ix86_expand_strlensi_unroll_1 (out, src, align);
12838 /* strlensi_unroll_1 returns the address of the zero at the end of
12839 the string, like memchr(), so compute the length by subtracting
12840 the start address. */
12841 if (TARGET_64BIT)
12842 emit_insn (gen_subdi3 (out, out, addr));
12843 else
12844 emit_insn (gen_subsi3 (out, out, addr));
12846 else
12848 rtx unspec;
12849 scratch2 = gen_reg_rtx (Pmode);
12850 scratch3 = gen_reg_rtx (Pmode);
12851 scratch4 = force_reg (Pmode, constm1_rtx);
12853 emit_move_insn (scratch3, addr);
12854 eoschar = force_reg (QImode, eoschar);
12856 emit_insn (gen_cld ());
12857 src = replace_equiv_address_nv (src, scratch3);
12859 /* If .md starts supporting :P, this can be done in .md. */
12860 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12861 scratch4), UNSPEC_SCAS);
12862 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
12863 if (TARGET_64BIT)
12865 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12866 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12868 else
12870 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12871 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12874 return 1;
12877 /* Expand the appropriate insns for doing strlen if not just doing
12878 repnz; scasb
12880 out = result, initialized with the start address
12881 align_rtx = alignment of the address.
12882 scratch = scratch register, initialized with the startaddress when
12883 not aligned, otherwise undefined
12885 This is just the body. It needs the initializations mentioned above and
12886 some address computing at the end. These things are done in i386.md. */
12888 static void
12889 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12891 int align;
12892 rtx tmp;
12893 rtx align_2_label = NULL_RTX;
12894 rtx align_3_label = NULL_RTX;
12895 rtx align_4_label = gen_label_rtx ();
12896 rtx end_0_label = gen_label_rtx ();
12897 rtx mem;
12898 rtx tmpreg = gen_reg_rtx (SImode);
12899 rtx scratch = gen_reg_rtx (SImode);
12900 rtx cmp;
12902 align = 0;
12903 if (GET_CODE (align_rtx) == CONST_INT)
12904 align = INTVAL (align_rtx);
12906 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
12908 /* Is there a known alignment and is it less than 4? */
12909 if (align < 4)
12911 rtx scratch1 = gen_reg_rtx (Pmode);
12912 emit_move_insn (scratch1, out);
12913 /* Is there a known alignment and is it not 2? */
12914 if (align != 2)
12916 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12917 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12919 /* Leave just the 3 lower bits. */
12920 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12921 NULL_RTX, 0, OPTAB_WIDEN);
12923 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12924 Pmode, 1, align_4_label);
12925 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12926 Pmode, 1, align_2_label);
12927 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12928 Pmode, 1, align_3_label);
12930 else
12932 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12933 check if is aligned to 4 - byte. */
12935 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12936 NULL_RTX, 0, OPTAB_WIDEN);
12938 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12939 Pmode, 1, align_4_label);
12942 mem = change_address (src, QImode, out);
12944 /* Now compare the bytes. */
12946 /* Compare the first n unaligned byte on a byte per byte basis. */
12947 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
12948 QImode, 1, end_0_label);
12950 /* Increment the address. */
12951 if (TARGET_64BIT)
12952 emit_insn (gen_adddi3 (out, out, const1_rtx));
12953 else
12954 emit_insn (gen_addsi3 (out, out, const1_rtx));
12956 /* Not needed with an alignment of 2 */
12957 if (align != 2)
12959 emit_label (align_2_label);
12961 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12962 end_0_label);
12964 if (TARGET_64BIT)
12965 emit_insn (gen_adddi3 (out, out, const1_rtx));
12966 else
12967 emit_insn (gen_addsi3 (out, out, const1_rtx));
12969 emit_label (align_3_label);
12972 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12973 end_0_label);
12975 if (TARGET_64BIT)
12976 emit_insn (gen_adddi3 (out, out, const1_rtx));
12977 else
12978 emit_insn (gen_addsi3 (out, out, const1_rtx));
12981 /* Generate loop to check 4 bytes at a time. It is not a good idea to
12982 align this loop. It gives only huge programs, but does not help to
12983 speed up. */
12984 emit_label (align_4_label);
12986 mem = change_address (src, SImode, out);
12987 emit_move_insn (scratch, mem);
12988 if (TARGET_64BIT)
12989 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
12990 else
12991 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
12993 /* This formula yields a nonzero result iff one of the bytes is zero.
12994 This saves three branches inside loop and many cycles. */
12996 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
12997 emit_insn (gen_one_cmplsi2 (scratch, scratch));
12998 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
12999 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13000 gen_int_mode (0x80808080, SImode)));
13001 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13002 align_4_label);
13004 if (TARGET_CMOVE)
13006 rtx reg = gen_reg_rtx (SImode);
13007 rtx reg2 = gen_reg_rtx (Pmode);
13008 emit_move_insn (reg, tmpreg);
13009 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13011 /* If zero is not in the first two bytes, move two bytes forward. */
13012 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13013 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13014 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13015 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13016 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13017 reg,
13018 tmpreg)));
13019 /* Emit lea manually to avoid clobbering of flags. */
13020 emit_insn (gen_rtx_SET (SImode, reg2,
13021 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13023 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13024 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13025 emit_insn (gen_rtx_SET (VOIDmode, out,
13026 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13027 reg2,
13028 out)));
13031 else
13033 rtx end_2_label = gen_label_rtx ();
13034 /* Is zero in the first two bytes? */
13036 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13037 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13038 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13039 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13040 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13041 pc_rtx);
13042 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13043 JUMP_LABEL (tmp) = end_2_label;
13045 /* Not in the first two. Move two bytes forward. */
13046 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13047 if (TARGET_64BIT)
13048 emit_insn (gen_adddi3 (out, out, const2_rtx));
13049 else
13050 emit_insn (gen_addsi3 (out, out, const2_rtx));
13052 emit_label (end_2_label);
13056 /* Avoid branch in fixing the byte. */
13057 tmpreg = gen_lowpart (QImode, tmpreg);
13058 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13059 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13060 if (TARGET_64BIT)
13061 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13062 else
13063 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13065 emit_label (end_0_label);
13068 void
13069 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13070 rtx callarg2 ATTRIBUTE_UNUSED,
13071 rtx pop, int sibcall)
13073 rtx use = NULL, call;
13075 if (pop == const0_rtx)
13076 pop = NULL;
13077 gcc_assert (!TARGET_64BIT || !pop);
13079 #if TARGET_MACHO
13080 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13081 fnaddr = machopic_indirect_call_target (fnaddr);
13082 #else
13083 /* Static functions and indirect calls don't need the pic register. */
13084 if (! TARGET_64BIT && flag_pic
13085 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13086 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13087 use_reg (&use, pic_offset_table_rtx);
13089 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13091 rtx al = gen_rtx_REG (QImode, 0);
13092 emit_move_insn (al, callarg2);
13093 use_reg (&use, al);
13095 #endif /* TARGET_MACHO */
13097 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13099 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13100 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13102 if (sibcall && TARGET_64BIT
13103 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13105 rtx addr;
13106 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13107 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13108 emit_move_insn (fnaddr, addr);
13109 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13112 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13113 if (retval)
13114 call = gen_rtx_SET (VOIDmode, retval, call);
13115 if (pop)
13117 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13118 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13119 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13122 call = emit_call_insn (call);
13123 if (use)
13124 CALL_INSN_FUNCTION_USAGE (call) = use;
13128 /* Clear stack slot assignments remembered from previous functions.
13129 This is called from INIT_EXPANDERS once before RTL is emitted for each
13130 function. */
13132 static struct machine_function *
13133 ix86_init_machine_status (void)
13135 struct machine_function *f;
13137 f = ggc_alloc_cleared (sizeof (struct machine_function));
13138 f->use_fast_prologue_epilogue_nregs = -1;
13139 f->tls_descriptor_call_expanded_p = 0;
13141 return f;
13144 /* Return a MEM corresponding to a stack slot with mode MODE.
13145 Allocate a new slot if necessary.
13147 The RTL for a function can have several slots available: N is
13148 which slot to use. */
13151 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13153 struct stack_local_entry *s;
13155 gcc_assert (n < MAX_386_STACK_LOCALS);
13157 for (s = ix86_stack_locals; s; s = s->next)
13158 if (s->mode == mode && s->n == n)
13159 return s->rtl;
13161 s = (struct stack_local_entry *)
13162 ggc_alloc (sizeof (struct stack_local_entry));
13163 s->n = n;
13164 s->mode = mode;
13165 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13167 s->next = ix86_stack_locals;
13168 ix86_stack_locals = s;
13169 return s->rtl;
13172 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13174 static GTY(()) rtx ix86_tls_symbol;
13176 ix86_tls_get_addr (void)
13179 if (!ix86_tls_symbol)
13181 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13182 (TARGET_ANY_GNU_TLS
13183 && !TARGET_64BIT)
13184 ? "___tls_get_addr"
13185 : "__tls_get_addr");
13188 return ix86_tls_symbol;
13191 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13193 static GTY(()) rtx ix86_tls_module_base_symbol;
13195 ix86_tls_module_base (void)
13198 if (!ix86_tls_module_base_symbol)
13200 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13201 "_TLS_MODULE_BASE_");
13202 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13203 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13206 return ix86_tls_module_base_symbol;
13209 /* Calculate the length of the memory address in the instruction
13210 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13213 memory_address_length (rtx addr)
13215 struct ix86_address parts;
13216 rtx base, index, disp;
13217 int len;
13218 int ok;
13220 if (GET_CODE (addr) == PRE_DEC
13221 || GET_CODE (addr) == POST_INC
13222 || GET_CODE (addr) == PRE_MODIFY
13223 || GET_CODE (addr) == POST_MODIFY)
13224 return 0;
13226 ok = ix86_decompose_address (addr, &parts);
13227 gcc_assert (ok);
13229 if (parts.base && GET_CODE (parts.base) == SUBREG)
13230 parts.base = SUBREG_REG (parts.base);
13231 if (parts.index && GET_CODE (parts.index) == SUBREG)
13232 parts.index = SUBREG_REG (parts.index);
13234 base = parts.base;
13235 index = parts.index;
13236 disp = parts.disp;
13237 len = 0;
13239 /* Rule of thumb:
13240 - esp as the base always wants an index,
13241 - ebp as the base always wants a displacement. */
13243 /* Register Indirect. */
13244 if (base && !index && !disp)
13246 /* esp (for its index) and ebp (for its displacement) need
13247 the two-byte modrm form. */
13248 if (addr == stack_pointer_rtx
13249 || addr == arg_pointer_rtx
13250 || addr == frame_pointer_rtx
13251 || addr == hard_frame_pointer_rtx)
13252 len = 1;
13255 /* Direct Addressing. */
13256 else if (disp && !base && !index)
13257 len = 4;
13259 else
13261 /* Find the length of the displacement constant. */
13262 if (disp)
13264 if (GET_CODE (disp) == CONST_INT
13265 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
13266 && base)
13267 len = 1;
13268 else
13269 len = 4;
13271 /* ebp always wants a displacement. */
13272 else if (base == hard_frame_pointer_rtx)
13273 len = 1;
13275 /* An index requires the two-byte modrm form.... */
13276 if (index
13277 /* ...like esp, which always wants an index. */
13278 || base == stack_pointer_rtx
13279 || base == arg_pointer_rtx
13280 || base == frame_pointer_rtx)
13281 len += 1;
13284 return len;
13287 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13288 is set, expect that insn have 8bit immediate alternative. */
13290 ix86_attr_length_immediate_default (rtx insn, int shortform)
13292 int len = 0;
13293 int i;
13294 extract_insn_cached (insn);
13295 for (i = recog_data.n_operands - 1; i >= 0; --i)
13296 if (CONSTANT_P (recog_data.operand[i]))
13298 gcc_assert (!len);
13299 if (shortform
13300 && GET_CODE (recog_data.operand[i]) == CONST_INT
13301 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
13302 len = 1;
13303 else
13305 switch (get_attr_mode (insn))
13307 case MODE_QI:
13308 len+=1;
13309 break;
13310 case MODE_HI:
13311 len+=2;
13312 break;
13313 case MODE_SI:
13314 len+=4;
13315 break;
13316 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13317 case MODE_DI:
13318 len+=4;
13319 break;
13320 default:
13321 fatal_insn ("unknown insn mode", insn);
13325 return len;
13327 /* Compute default value for "length_address" attribute. */
13329 ix86_attr_length_address_default (rtx insn)
13331 int i;
13333 if (get_attr_type (insn) == TYPE_LEA)
13335 rtx set = PATTERN (insn);
13337 if (GET_CODE (set) == PARALLEL)
13338 set = XVECEXP (set, 0, 0);
13340 gcc_assert (GET_CODE (set) == SET);
13342 return memory_address_length (SET_SRC (set));
13345 extract_insn_cached (insn);
13346 for (i = recog_data.n_operands - 1; i >= 0; --i)
13347 if (GET_CODE (recog_data.operand[i]) == MEM)
13349 return memory_address_length (XEXP (recog_data.operand[i], 0));
13350 break;
13352 return 0;
13355 /* Return the maximum number of instructions a cpu can issue. */
13357 static int
13358 ix86_issue_rate (void)
13360 switch (ix86_tune)
13362 case PROCESSOR_PENTIUM:
13363 case PROCESSOR_K6:
13364 return 2;
13366 case PROCESSOR_PENTIUMPRO:
13367 case PROCESSOR_PENTIUM4:
13368 case PROCESSOR_ATHLON:
13369 case PROCESSOR_K8:
13370 case PROCESSOR_NOCONA:
13371 case PROCESSOR_GENERIC32:
13372 case PROCESSOR_GENERIC64:
13373 return 3;
13375 default:
13376 return 1;
13380 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13381 by DEP_INSN and nothing set by DEP_INSN. */
13383 static int
13384 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13386 rtx set, set2;
13388 /* Simplify the test for uninteresting insns. */
13389 if (insn_type != TYPE_SETCC
13390 && insn_type != TYPE_ICMOV
13391 && insn_type != TYPE_FCMOV
13392 && insn_type != TYPE_IBR)
13393 return 0;
13395 if ((set = single_set (dep_insn)) != 0)
13397 set = SET_DEST (set);
13398 set2 = NULL_RTX;
13400 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13401 && XVECLEN (PATTERN (dep_insn), 0) == 2
13402 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13403 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13405 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13406 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13408 else
13409 return 0;
13411 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13412 return 0;
13414 /* This test is true if the dependent insn reads the flags but
13415 not any other potentially set register. */
13416 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13417 return 0;
13419 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13420 return 0;
13422 return 1;
13425 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13426 address with operands set by DEP_INSN. */
13428 static int
13429 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13431 rtx addr;
13433 if (insn_type == TYPE_LEA
13434 && TARGET_PENTIUM)
13436 addr = PATTERN (insn);
13438 if (GET_CODE (addr) == PARALLEL)
13439 addr = XVECEXP (addr, 0, 0);
13441 gcc_assert (GET_CODE (addr) == SET);
13443 addr = SET_SRC (addr);
13445 else
13447 int i;
13448 extract_insn_cached (insn);
13449 for (i = recog_data.n_operands - 1; i >= 0; --i)
13450 if (GET_CODE (recog_data.operand[i]) == MEM)
13452 addr = XEXP (recog_data.operand[i], 0);
13453 goto found;
13455 return 0;
13456 found:;
13459 return modified_in_p (addr, dep_insn);
13462 static int
13463 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13465 enum attr_type insn_type, dep_insn_type;
13466 enum attr_memory memory;
13467 rtx set, set2;
13468 int dep_insn_code_number;
13470 /* Anti and output dependencies have zero cost on all CPUs. */
13471 if (REG_NOTE_KIND (link) != 0)
13472 return 0;
13474 dep_insn_code_number = recog_memoized (dep_insn);
13476 /* If we can't recognize the insns, we can't really do anything. */
13477 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13478 return cost;
13480 insn_type = get_attr_type (insn);
13481 dep_insn_type = get_attr_type (dep_insn);
13483 switch (ix86_tune)
13485 case PROCESSOR_PENTIUM:
13486 /* Address Generation Interlock adds a cycle of latency. */
13487 if (ix86_agi_dependant (insn, dep_insn, insn_type))
13488 cost += 1;
13490 /* ??? Compares pair with jump/setcc. */
13491 if (ix86_flags_dependant (insn, dep_insn, insn_type))
13492 cost = 0;
13494 /* Floating point stores require value to be ready one cycle earlier. */
13495 if (insn_type == TYPE_FMOV
13496 && get_attr_memory (insn) == MEMORY_STORE
13497 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13498 cost += 1;
13499 break;
13501 case PROCESSOR_PENTIUMPRO:
13502 memory = get_attr_memory (insn);
13504 /* INT->FP conversion is expensive. */
13505 if (get_attr_fp_int_src (dep_insn))
13506 cost += 5;
13508 /* There is one cycle extra latency between an FP op and a store. */
13509 if (insn_type == TYPE_FMOV
13510 && (set = single_set (dep_insn)) != NULL_RTX
13511 && (set2 = single_set (insn)) != NULL_RTX
13512 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13513 && GET_CODE (SET_DEST (set2)) == MEM)
13514 cost += 1;
13516 /* Show ability of reorder buffer to hide latency of load by executing
13517 in parallel with previous instruction in case
13518 previous instruction is not needed to compute the address. */
13519 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13520 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13522 /* Claim moves to take one cycle, as core can issue one load
13523 at time and the next load can start cycle later. */
13524 if (dep_insn_type == TYPE_IMOV
13525 || dep_insn_type == TYPE_FMOV)
13526 cost = 1;
13527 else if (cost > 1)
13528 cost--;
13530 break;
13532 case PROCESSOR_K6:
13533 memory = get_attr_memory (insn);
13535 /* The esp dependency is resolved before the instruction is really
13536 finished. */
13537 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13538 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13539 return 1;
13541 /* INT->FP conversion is expensive. */
13542 if (get_attr_fp_int_src (dep_insn))
13543 cost += 5;
13545 /* Show ability of reorder buffer to hide latency of load by executing
13546 in parallel with previous instruction in case
13547 previous instruction is not needed to compute the address. */
13548 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13549 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13551 /* Claim moves to take one cycle, as core can issue one load
13552 at time and the next load can start cycle later. */
13553 if (dep_insn_type == TYPE_IMOV
13554 || dep_insn_type == TYPE_FMOV)
13555 cost = 1;
13556 else if (cost > 2)
13557 cost -= 2;
13558 else
13559 cost = 1;
13561 break;
13563 case PROCESSOR_ATHLON:
13564 case PROCESSOR_K8:
13565 case PROCESSOR_GENERIC32:
13566 case PROCESSOR_GENERIC64:
13567 memory = get_attr_memory (insn);
13569 /* Show ability of reorder buffer to hide latency of load by executing
13570 in parallel with previous instruction in case
13571 previous instruction is not needed to compute the address. */
13572 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13573 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13575 enum attr_unit unit = get_attr_unit (insn);
13576 int loadcost = 3;
13578 /* Because of the difference between the length of integer and
13579 floating unit pipeline preparation stages, the memory operands
13580 for floating point are cheaper.
13582 ??? For Athlon it the difference is most probably 2. */
13583 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13584 loadcost = 3;
13585 else
13586 loadcost = TARGET_ATHLON ? 2 : 0;
13588 if (cost >= loadcost)
13589 cost -= loadcost;
13590 else
13591 cost = 0;
13594 default:
13595 break;
13598 return cost;
13601 /* How many alternative schedules to try. This should be as wide as the
13602 scheduling freedom in the DFA, but no wider. Making this value too
13603 large results extra work for the scheduler. */
13605 static int
13606 ia32_multipass_dfa_lookahead (void)
13608 if (ix86_tune == PROCESSOR_PENTIUM)
13609 return 2;
13611 if (ix86_tune == PROCESSOR_PENTIUMPRO
13612 || ix86_tune == PROCESSOR_K6)
13613 return 1;
13615 else
13616 return 0;
13620 /* Compute the alignment given to a constant that is being placed in memory.
13621 EXP is the constant and ALIGN is the alignment that the object would
13622 ordinarily have.
13623 The value of this function is used instead of that alignment to align
13624 the object. */
13627 ix86_constant_alignment (tree exp, int align)
13629 if (TREE_CODE (exp) == REAL_CST)
13631 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13632 return 64;
13633 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13634 return 128;
13636 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13637 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13638 return BITS_PER_WORD;
13640 return align;
13643 /* Compute the alignment for a static variable.
13644 TYPE is the data type, and ALIGN is the alignment that
13645 the object would ordinarily have. The value of this function is used
13646 instead of that alignment to align the object. */
13649 ix86_data_alignment (tree type, int align)
13651 int max_align = optimize_size ? BITS_PER_WORD : 256;
13653 if (AGGREGATE_TYPE_P (type)
13654 && TYPE_SIZE (type)
13655 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13656 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13657 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13658 && align < max_align)
13659 align = max_align;
13661 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13662 to 16byte boundary. */
13663 if (TARGET_64BIT)
13665 if (AGGREGATE_TYPE_P (type)
13666 && TYPE_SIZE (type)
13667 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13668 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13669 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13670 return 128;
13673 if (TREE_CODE (type) == ARRAY_TYPE)
13675 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13676 return 64;
13677 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13678 return 128;
13680 else if (TREE_CODE (type) == COMPLEX_TYPE)
13683 if (TYPE_MODE (type) == DCmode && align < 64)
13684 return 64;
13685 if (TYPE_MODE (type) == XCmode && align < 128)
13686 return 128;
13688 else if ((TREE_CODE (type) == RECORD_TYPE
13689 || TREE_CODE (type) == UNION_TYPE
13690 || TREE_CODE (type) == QUAL_UNION_TYPE)
13691 && TYPE_FIELDS (type))
13693 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13694 return 64;
13695 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13696 return 128;
13698 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13699 || TREE_CODE (type) == INTEGER_TYPE)
13701 if (TYPE_MODE (type) == DFmode && align < 64)
13702 return 64;
13703 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13704 return 128;
13707 return align;
13710 /* Compute the alignment for a local variable.
13711 TYPE is the data type, and ALIGN is the alignment that
13712 the object would ordinarily have. The value of this macro is used
13713 instead of that alignment to align the object. */
13716 ix86_local_alignment (tree type, int align)
13718 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13719 to 16byte boundary. */
13720 if (TARGET_64BIT)
13722 if (AGGREGATE_TYPE_P (type)
13723 && TYPE_SIZE (type)
13724 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13725 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13726 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13727 return 128;
13729 if (TREE_CODE (type) == ARRAY_TYPE)
13731 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13732 return 64;
13733 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13734 return 128;
13736 else if (TREE_CODE (type) == COMPLEX_TYPE)
13738 if (TYPE_MODE (type) == DCmode && align < 64)
13739 return 64;
13740 if (TYPE_MODE (type) == XCmode && align < 128)
13741 return 128;
13743 else if ((TREE_CODE (type) == RECORD_TYPE
13744 || TREE_CODE (type) == UNION_TYPE
13745 || TREE_CODE (type) == QUAL_UNION_TYPE)
13746 && TYPE_FIELDS (type))
13748 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13749 return 64;
13750 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13751 return 128;
13753 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13754 || TREE_CODE (type) == INTEGER_TYPE)
13757 if (TYPE_MODE (type) == DFmode && align < 64)
13758 return 64;
13759 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13760 return 128;
13762 return align;
13765 /* Emit RTL insns to initialize the variable parts of a trampoline.
13766 FNADDR is an RTX for the address of the function's pure code.
13767 CXT is an RTX for the static chain value for the function. */
13768 void
13769 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
13771 if (!TARGET_64BIT)
13773 /* Compute offset from the end of the jmp to the target function. */
13774 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
13775 plus_constant (tramp, 10),
13776 NULL_RTX, 1, OPTAB_DIRECT);
13777 emit_move_insn (gen_rtx_MEM (QImode, tramp),
13778 gen_int_mode (0xb9, QImode));
13779 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
13780 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
13781 gen_int_mode (0xe9, QImode));
13782 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
13784 else
13786 int offset = 0;
13787 /* Try to load address using shorter movl instead of movabs.
13788 We may want to support movq for kernel mode, but kernel does not use
13789 trampolines at the moment. */
13790 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
13792 fnaddr = copy_to_mode_reg (DImode, fnaddr);
13793 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13794 gen_int_mode (0xbb41, HImode));
13795 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
13796 gen_lowpart (SImode, fnaddr));
13797 offset += 6;
13799 else
13801 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13802 gen_int_mode (0xbb49, HImode));
13803 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13804 fnaddr);
13805 offset += 10;
13807 /* Load static chain using movabs to r10. */
13808 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13809 gen_int_mode (0xba49, HImode));
13810 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13811 cxt);
13812 offset += 10;
13813 /* Jump to the r11 */
13814 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13815 gen_int_mode (0xff49, HImode));
13816 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
13817 gen_int_mode (0xe3, QImode));
13818 offset += 3;
13819 gcc_assert (offset <= TRAMPOLINE_SIZE);
13822 #ifdef ENABLE_EXECUTE_STACK
13823 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
13824 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13825 #endif
13828 /* Codes for all the SSE/MMX builtins. */
13829 enum ix86_builtins
13831 IX86_BUILTIN_ADDPS,
13832 IX86_BUILTIN_ADDSS,
13833 IX86_BUILTIN_DIVPS,
13834 IX86_BUILTIN_DIVSS,
13835 IX86_BUILTIN_MULPS,
13836 IX86_BUILTIN_MULSS,
13837 IX86_BUILTIN_SUBPS,
13838 IX86_BUILTIN_SUBSS,
13840 IX86_BUILTIN_CMPEQPS,
13841 IX86_BUILTIN_CMPLTPS,
13842 IX86_BUILTIN_CMPLEPS,
13843 IX86_BUILTIN_CMPGTPS,
13844 IX86_BUILTIN_CMPGEPS,
13845 IX86_BUILTIN_CMPNEQPS,
13846 IX86_BUILTIN_CMPNLTPS,
13847 IX86_BUILTIN_CMPNLEPS,
13848 IX86_BUILTIN_CMPNGTPS,
13849 IX86_BUILTIN_CMPNGEPS,
13850 IX86_BUILTIN_CMPORDPS,
13851 IX86_BUILTIN_CMPUNORDPS,
13852 IX86_BUILTIN_CMPEQSS,
13853 IX86_BUILTIN_CMPLTSS,
13854 IX86_BUILTIN_CMPLESS,
13855 IX86_BUILTIN_CMPNEQSS,
13856 IX86_BUILTIN_CMPNLTSS,
13857 IX86_BUILTIN_CMPNLESS,
13858 IX86_BUILTIN_CMPNGTSS,
13859 IX86_BUILTIN_CMPNGESS,
13860 IX86_BUILTIN_CMPORDSS,
13861 IX86_BUILTIN_CMPUNORDSS,
13863 IX86_BUILTIN_COMIEQSS,
13864 IX86_BUILTIN_COMILTSS,
13865 IX86_BUILTIN_COMILESS,
13866 IX86_BUILTIN_COMIGTSS,
13867 IX86_BUILTIN_COMIGESS,
13868 IX86_BUILTIN_COMINEQSS,
13869 IX86_BUILTIN_UCOMIEQSS,
13870 IX86_BUILTIN_UCOMILTSS,
13871 IX86_BUILTIN_UCOMILESS,
13872 IX86_BUILTIN_UCOMIGTSS,
13873 IX86_BUILTIN_UCOMIGESS,
13874 IX86_BUILTIN_UCOMINEQSS,
13876 IX86_BUILTIN_CVTPI2PS,
13877 IX86_BUILTIN_CVTPS2PI,
13878 IX86_BUILTIN_CVTSI2SS,
13879 IX86_BUILTIN_CVTSI642SS,
13880 IX86_BUILTIN_CVTSS2SI,
13881 IX86_BUILTIN_CVTSS2SI64,
13882 IX86_BUILTIN_CVTTPS2PI,
13883 IX86_BUILTIN_CVTTSS2SI,
13884 IX86_BUILTIN_CVTTSS2SI64,
13886 IX86_BUILTIN_MAXPS,
13887 IX86_BUILTIN_MAXSS,
13888 IX86_BUILTIN_MINPS,
13889 IX86_BUILTIN_MINSS,
13891 IX86_BUILTIN_LOADUPS,
13892 IX86_BUILTIN_STOREUPS,
13893 IX86_BUILTIN_MOVSS,
13895 IX86_BUILTIN_MOVHLPS,
13896 IX86_BUILTIN_MOVLHPS,
13897 IX86_BUILTIN_LOADHPS,
13898 IX86_BUILTIN_LOADLPS,
13899 IX86_BUILTIN_STOREHPS,
13900 IX86_BUILTIN_STORELPS,
13902 IX86_BUILTIN_MASKMOVQ,
13903 IX86_BUILTIN_MOVMSKPS,
13904 IX86_BUILTIN_PMOVMSKB,
13906 IX86_BUILTIN_MOVNTPS,
13907 IX86_BUILTIN_MOVNTQ,
13909 IX86_BUILTIN_LOADDQU,
13910 IX86_BUILTIN_STOREDQU,
13912 IX86_BUILTIN_PACKSSWB,
13913 IX86_BUILTIN_PACKSSDW,
13914 IX86_BUILTIN_PACKUSWB,
13916 IX86_BUILTIN_PADDB,
13917 IX86_BUILTIN_PADDW,
13918 IX86_BUILTIN_PADDD,
13919 IX86_BUILTIN_PADDQ,
13920 IX86_BUILTIN_PADDSB,
13921 IX86_BUILTIN_PADDSW,
13922 IX86_BUILTIN_PADDUSB,
13923 IX86_BUILTIN_PADDUSW,
13924 IX86_BUILTIN_PSUBB,
13925 IX86_BUILTIN_PSUBW,
13926 IX86_BUILTIN_PSUBD,
13927 IX86_BUILTIN_PSUBQ,
13928 IX86_BUILTIN_PSUBSB,
13929 IX86_BUILTIN_PSUBSW,
13930 IX86_BUILTIN_PSUBUSB,
13931 IX86_BUILTIN_PSUBUSW,
13933 IX86_BUILTIN_PAND,
13934 IX86_BUILTIN_PANDN,
13935 IX86_BUILTIN_POR,
13936 IX86_BUILTIN_PXOR,
13938 IX86_BUILTIN_PAVGB,
13939 IX86_BUILTIN_PAVGW,
13941 IX86_BUILTIN_PCMPEQB,
13942 IX86_BUILTIN_PCMPEQW,
13943 IX86_BUILTIN_PCMPEQD,
13944 IX86_BUILTIN_PCMPGTB,
13945 IX86_BUILTIN_PCMPGTW,
13946 IX86_BUILTIN_PCMPGTD,
13948 IX86_BUILTIN_PMADDWD,
13950 IX86_BUILTIN_PMAXSW,
13951 IX86_BUILTIN_PMAXUB,
13952 IX86_BUILTIN_PMINSW,
13953 IX86_BUILTIN_PMINUB,
13955 IX86_BUILTIN_PMULHUW,
13956 IX86_BUILTIN_PMULHW,
13957 IX86_BUILTIN_PMULLW,
13959 IX86_BUILTIN_PSADBW,
13960 IX86_BUILTIN_PSHUFW,
13962 IX86_BUILTIN_PSLLW,
13963 IX86_BUILTIN_PSLLD,
13964 IX86_BUILTIN_PSLLQ,
13965 IX86_BUILTIN_PSRAW,
13966 IX86_BUILTIN_PSRAD,
13967 IX86_BUILTIN_PSRLW,
13968 IX86_BUILTIN_PSRLD,
13969 IX86_BUILTIN_PSRLQ,
13970 IX86_BUILTIN_PSLLWI,
13971 IX86_BUILTIN_PSLLDI,
13972 IX86_BUILTIN_PSLLQI,
13973 IX86_BUILTIN_PSRAWI,
13974 IX86_BUILTIN_PSRADI,
13975 IX86_BUILTIN_PSRLWI,
13976 IX86_BUILTIN_PSRLDI,
13977 IX86_BUILTIN_PSRLQI,
13979 IX86_BUILTIN_PUNPCKHBW,
13980 IX86_BUILTIN_PUNPCKHWD,
13981 IX86_BUILTIN_PUNPCKHDQ,
13982 IX86_BUILTIN_PUNPCKLBW,
13983 IX86_BUILTIN_PUNPCKLWD,
13984 IX86_BUILTIN_PUNPCKLDQ,
13986 IX86_BUILTIN_SHUFPS,
13988 IX86_BUILTIN_RCPPS,
13989 IX86_BUILTIN_RCPSS,
13990 IX86_BUILTIN_RSQRTPS,
13991 IX86_BUILTIN_RSQRTSS,
13992 IX86_BUILTIN_SQRTPS,
13993 IX86_BUILTIN_SQRTSS,
13995 IX86_BUILTIN_UNPCKHPS,
13996 IX86_BUILTIN_UNPCKLPS,
13998 IX86_BUILTIN_ANDPS,
13999 IX86_BUILTIN_ANDNPS,
14000 IX86_BUILTIN_ORPS,
14001 IX86_BUILTIN_XORPS,
14003 IX86_BUILTIN_EMMS,
14004 IX86_BUILTIN_LDMXCSR,
14005 IX86_BUILTIN_STMXCSR,
14006 IX86_BUILTIN_SFENCE,
14008 /* 3DNow! Original */
14009 IX86_BUILTIN_FEMMS,
14010 IX86_BUILTIN_PAVGUSB,
14011 IX86_BUILTIN_PF2ID,
14012 IX86_BUILTIN_PFACC,
14013 IX86_BUILTIN_PFADD,
14014 IX86_BUILTIN_PFCMPEQ,
14015 IX86_BUILTIN_PFCMPGE,
14016 IX86_BUILTIN_PFCMPGT,
14017 IX86_BUILTIN_PFMAX,
14018 IX86_BUILTIN_PFMIN,
14019 IX86_BUILTIN_PFMUL,
14020 IX86_BUILTIN_PFRCP,
14021 IX86_BUILTIN_PFRCPIT1,
14022 IX86_BUILTIN_PFRCPIT2,
14023 IX86_BUILTIN_PFRSQIT1,
14024 IX86_BUILTIN_PFRSQRT,
14025 IX86_BUILTIN_PFSUB,
14026 IX86_BUILTIN_PFSUBR,
14027 IX86_BUILTIN_PI2FD,
14028 IX86_BUILTIN_PMULHRW,
14030 /* 3DNow! Athlon Extensions */
14031 IX86_BUILTIN_PF2IW,
14032 IX86_BUILTIN_PFNACC,
14033 IX86_BUILTIN_PFPNACC,
14034 IX86_BUILTIN_PI2FW,
14035 IX86_BUILTIN_PSWAPDSI,
14036 IX86_BUILTIN_PSWAPDSF,
14038 /* SSE2 */
14039 IX86_BUILTIN_ADDPD,
14040 IX86_BUILTIN_ADDSD,
14041 IX86_BUILTIN_DIVPD,
14042 IX86_BUILTIN_DIVSD,
14043 IX86_BUILTIN_MULPD,
14044 IX86_BUILTIN_MULSD,
14045 IX86_BUILTIN_SUBPD,
14046 IX86_BUILTIN_SUBSD,
14048 IX86_BUILTIN_CMPEQPD,
14049 IX86_BUILTIN_CMPLTPD,
14050 IX86_BUILTIN_CMPLEPD,
14051 IX86_BUILTIN_CMPGTPD,
14052 IX86_BUILTIN_CMPGEPD,
14053 IX86_BUILTIN_CMPNEQPD,
14054 IX86_BUILTIN_CMPNLTPD,
14055 IX86_BUILTIN_CMPNLEPD,
14056 IX86_BUILTIN_CMPNGTPD,
14057 IX86_BUILTIN_CMPNGEPD,
14058 IX86_BUILTIN_CMPORDPD,
14059 IX86_BUILTIN_CMPUNORDPD,
14060 IX86_BUILTIN_CMPNEPD,
14061 IX86_BUILTIN_CMPEQSD,
14062 IX86_BUILTIN_CMPLTSD,
14063 IX86_BUILTIN_CMPLESD,
14064 IX86_BUILTIN_CMPNEQSD,
14065 IX86_BUILTIN_CMPNLTSD,
14066 IX86_BUILTIN_CMPNLESD,
14067 IX86_BUILTIN_CMPORDSD,
14068 IX86_BUILTIN_CMPUNORDSD,
14069 IX86_BUILTIN_CMPNESD,
14071 IX86_BUILTIN_COMIEQSD,
14072 IX86_BUILTIN_COMILTSD,
14073 IX86_BUILTIN_COMILESD,
14074 IX86_BUILTIN_COMIGTSD,
14075 IX86_BUILTIN_COMIGESD,
14076 IX86_BUILTIN_COMINEQSD,
14077 IX86_BUILTIN_UCOMIEQSD,
14078 IX86_BUILTIN_UCOMILTSD,
14079 IX86_BUILTIN_UCOMILESD,
14080 IX86_BUILTIN_UCOMIGTSD,
14081 IX86_BUILTIN_UCOMIGESD,
14082 IX86_BUILTIN_UCOMINEQSD,
14084 IX86_BUILTIN_MAXPD,
14085 IX86_BUILTIN_MAXSD,
14086 IX86_BUILTIN_MINPD,
14087 IX86_BUILTIN_MINSD,
14089 IX86_BUILTIN_ANDPD,
14090 IX86_BUILTIN_ANDNPD,
14091 IX86_BUILTIN_ORPD,
14092 IX86_BUILTIN_XORPD,
14094 IX86_BUILTIN_SQRTPD,
14095 IX86_BUILTIN_SQRTSD,
14097 IX86_BUILTIN_UNPCKHPD,
14098 IX86_BUILTIN_UNPCKLPD,
14100 IX86_BUILTIN_SHUFPD,
14102 IX86_BUILTIN_LOADUPD,
14103 IX86_BUILTIN_STOREUPD,
14104 IX86_BUILTIN_MOVSD,
14106 IX86_BUILTIN_LOADHPD,
14107 IX86_BUILTIN_LOADLPD,
14109 IX86_BUILTIN_CVTDQ2PD,
14110 IX86_BUILTIN_CVTDQ2PS,
14112 IX86_BUILTIN_CVTPD2DQ,
14113 IX86_BUILTIN_CVTPD2PI,
14114 IX86_BUILTIN_CVTPD2PS,
14115 IX86_BUILTIN_CVTTPD2DQ,
14116 IX86_BUILTIN_CVTTPD2PI,
14118 IX86_BUILTIN_CVTPI2PD,
14119 IX86_BUILTIN_CVTSI2SD,
14120 IX86_BUILTIN_CVTSI642SD,
14122 IX86_BUILTIN_CVTSD2SI,
14123 IX86_BUILTIN_CVTSD2SI64,
14124 IX86_BUILTIN_CVTSD2SS,
14125 IX86_BUILTIN_CVTSS2SD,
14126 IX86_BUILTIN_CVTTSD2SI,
14127 IX86_BUILTIN_CVTTSD2SI64,
14129 IX86_BUILTIN_CVTPS2DQ,
14130 IX86_BUILTIN_CVTPS2PD,
14131 IX86_BUILTIN_CVTTPS2DQ,
14133 IX86_BUILTIN_MOVNTI,
14134 IX86_BUILTIN_MOVNTPD,
14135 IX86_BUILTIN_MOVNTDQ,
14137 /* SSE2 MMX */
14138 IX86_BUILTIN_MASKMOVDQU,
14139 IX86_BUILTIN_MOVMSKPD,
14140 IX86_BUILTIN_PMOVMSKB128,
14142 IX86_BUILTIN_PACKSSWB128,
14143 IX86_BUILTIN_PACKSSDW128,
14144 IX86_BUILTIN_PACKUSWB128,
14146 IX86_BUILTIN_PADDB128,
14147 IX86_BUILTIN_PADDW128,
14148 IX86_BUILTIN_PADDD128,
14149 IX86_BUILTIN_PADDQ128,
14150 IX86_BUILTIN_PADDSB128,
14151 IX86_BUILTIN_PADDSW128,
14152 IX86_BUILTIN_PADDUSB128,
14153 IX86_BUILTIN_PADDUSW128,
14154 IX86_BUILTIN_PSUBB128,
14155 IX86_BUILTIN_PSUBW128,
14156 IX86_BUILTIN_PSUBD128,
14157 IX86_BUILTIN_PSUBQ128,
14158 IX86_BUILTIN_PSUBSB128,
14159 IX86_BUILTIN_PSUBSW128,
14160 IX86_BUILTIN_PSUBUSB128,
14161 IX86_BUILTIN_PSUBUSW128,
14163 IX86_BUILTIN_PAND128,
14164 IX86_BUILTIN_PANDN128,
14165 IX86_BUILTIN_POR128,
14166 IX86_BUILTIN_PXOR128,
14168 IX86_BUILTIN_PAVGB128,
14169 IX86_BUILTIN_PAVGW128,
14171 IX86_BUILTIN_PCMPEQB128,
14172 IX86_BUILTIN_PCMPEQW128,
14173 IX86_BUILTIN_PCMPEQD128,
14174 IX86_BUILTIN_PCMPGTB128,
14175 IX86_BUILTIN_PCMPGTW128,
14176 IX86_BUILTIN_PCMPGTD128,
14178 IX86_BUILTIN_PMADDWD128,
14180 IX86_BUILTIN_PMAXSW128,
14181 IX86_BUILTIN_PMAXUB128,
14182 IX86_BUILTIN_PMINSW128,
14183 IX86_BUILTIN_PMINUB128,
14185 IX86_BUILTIN_PMULUDQ,
14186 IX86_BUILTIN_PMULUDQ128,
14187 IX86_BUILTIN_PMULHUW128,
14188 IX86_BUILTIN_PMULHW128,
14189 IX86_BUILTIN_PMULLW128,
14191 IX86_BUILTIN_PSADBW128,
14192 IX86_BUILTIN_PSHUFHW,
14193 IX86_BUILTIN_PSHUFLW,
14194 IX86_BUILTIN_PSHUFD,
14196 IX86_BUILTIN_PSLLW128,
14197 IX86_BUILTIN_PSLLD128,
14198 IX86_BUILTIN_PSLLQ128,
14199 IX86_BUILTIN_PSRAW128,
14200 IX86_BUILTIN_PSRAD128,
14201 IX86_BUILTIN_PSRLW128,
14202 IX86_BUILTIN_PSRLD128,
14203 IX86_BUILTIN_PSRLQ128,
14204 IX86_BUILTIN_PSLLDQI128,
14205 IX86_BUILTIN_PSLLWI128,
14206 IX86_BUILTIN_PSLLDI128,
14207 IX86_BUILTIN_PSLLQI128,
14208 IX86_BUILTIN_PSRAWI128,
14209 IX86_BUILTIN_PSRADI128,
14210 IX86_BUILTIN_PSRLDQI128,
14211 IX86_BUILTIN_PSRLWI128,
14212 IX86_BUILTIN_PSRLDI128,
14213 IX86_BUILTIN_PSRLQI128,
14215 IX86_BUILTIN_PUNPCKHBW128,
14216 IX86_BUILTIN_PUNPCKHWD128,
14217 IX86_BUILTIN_PUNPCKHDQ128,
14218 IX86_BUILTIN_PUNPCKHQDQ128,
14219 IX86_BUILTIN_PUNPCKLBW128,
14220 IX86_BUILTIN_PUNPCKLWD128,
14221 IX86_BUILTIN_PUNPCKLDQ128,
14222 IX86_BUILTIN_PUNPCKLQDQ128,
14224 IX86_BUILTIN_CLFLUSH,
14225 IX86_BUILTIN_MFENCE,
14226 IX86_BUILTIN_LFENCE,
14228 /* Prescott New Instructions. */
14229 IX86_BUILTIN_ADDSUBPS,
14230 IX86_BUILTIN_HADDPS,
14231 IX86_BUILTIN_HSUBPS,
14232 IX86_BUILTIN_MOVSHDUP,
14233 IX86_BUILTIN_MOVSLDUP,
14234 IX86_BUILTIN_ADDSUBPD,
14235 IX86_BUILTIN_HADDPD,
14236 IX86_BUILTIN_HSUBPD,
14237 IX86_BUILTIN_LDDQU,
14239 IX86_BUILTIN_MONITOR,
14240 IX86_BUILTIN_MWAIT,
14242 IX86_BUILTIN_VEC_INIT_V2SI,
14243 IX86_BUILTIN_VEC_INIT_V4HI,
14244 IX86_BUILTIN_VEC_INIT_V8QI,
14245 IX86_BUILTIN_VEC_EXT_V2DF,
14246 IX86_BUILTIN_VEC_EXT_V2DI,
14247 IX86_BUILTIN_VEC_EXT_V4SF,
14248 IX86_BUILTIN_VEC_EXT_V4SI,
14249 IX86_BUILTIN_VEC_EXT_V8HI,
14250 IX86_BUILTIN_VEC_EXT_V2SI,
14251 IX86_BUILTIN_VEC_EXT_V4HI,
14252 IX86_BUILTIN_VEC_SET_V8HI,
14253 IX86_BUILTIN_VEC_SET_V4HI,
14255 /* SSE2 ABI functions. */
14256 IX86_BUILTIN_SSE2_ACOS,
14257 IX86_BUILTIN_SSE2_ACOSF,
14258 IX86_BUILTIN_SSE2_ASIN,
14259 IX86_BUILTIN_SSE2_ASINF,
14260 IX86_BUILTIN_SSE2_ATAN,
14261 IX86_BUILTIN_SSE2_ATANF,
14262 IX86_BUILTIN_SSE2_ATAN2,
14263 IX86_BUILTIN_SSE2_ATAN2F,
14264 IX86_BUILTIN_SSE2_COS,
14265 IX86_BUILTIN_SSE2_COSF,
14266 IX86_BUILTIN_SSE2_EXP,
14267 IX86_BUILTIN_SSE2_EXPF,
14268 IX86_BUILTIN_SSE2_LOG10,
14269 IX86_BUILTIN_SSE2_LOG10F,
14270 IX86_BUILTIN_SSE2_LOG,
14271 IX86_BUILTIN_SSE2_LOGF,
14272 IX86_BUILTIN_SSE2_SIN,
14273 IX86_BUILTIN_SSE2_SINF,
14274 IX86_BUILTIN_SSE2_TAN,
14275 IX86_BUILTIN_SSE2_TANF,
14277 IX86_BUILTIN_MAX
14280 #define def_builtin(MASK, NAME, TYPE, CODE) \
14281 do { \
14282 if ((MASK) & target_flags \
14283 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14284 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14285 NULL, NULL_TREE); \
14286 } while (0)
14288 /* Bits for builtin_description.flag. */
14290 /* Set when we don't support the comparison natively, and should
14291 swap_comparison in order to support it. */
14292 #define BUILTIN_DESC_SWAP_OPERANDS 1
14294 struct builtin_description
14296 const unsigned int mask;
14297 const enum insn_code icode;
14298 const char *const name;
14299 const enum ix86_builtins code;
14300 const enum rtx_code comparison;
14301 const unsigned int flag;
14304 static const struct builtin_description bdesc_comi[] =
14306 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14307 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14308 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14309 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14310 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14311 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14312 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14313 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14314 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14315 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14316 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14317 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14318 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14319 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14320 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14321 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14322 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14323 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14324 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14325 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14326 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14327 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14328 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14329 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14332 static const struct builtin_description bdesc_2arg[] =
14334 /* SSE */
14335 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14336 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14337 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14338 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14339 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14340 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14341 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14342 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14344 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14345 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14346 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14347 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14348 BUILTIN_DESC_SWAP_OPERANDS },
14349 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14350 BUILTIN_DESC_SWAP_OPERANDS },
14351 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14352 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14353 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14354 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14355 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14356 BUILTIN_DESC_SWAP_OPERANDS },
14357 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14358 BUILTIN_DESC_SWAP_OPERANDS },
14359 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14360 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14361 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14362 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14363 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14364 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14365 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14366 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14367 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14368 BUILTIN_DESC_SWAP_OPERANDS },
14369 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14370 BUILTIN_DESC_SWAP_OPERANDS },
14371 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14373 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14374 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14375 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14376 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14378 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14379 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14380 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14381 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14383 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14384 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14385 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14386 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14387 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14389 /* MMX */
14390 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14391 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14392 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14393 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14394 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14395 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14396 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14397 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14399 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14400 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14401 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14402 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14403 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14404 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14405 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14406 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14408 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14409 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14410 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14412 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14413 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14414 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14415 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14417 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14418 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14420 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14421 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14422 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14423 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14424 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14425 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14427 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14428 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14429 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14430 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14432 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14433 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14434 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14435 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14436 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14437 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14439 /* Special. */
14440 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14441 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14442 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14444 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14445 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14446 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14448 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14449 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14450 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14451 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14452 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14453 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14455 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14456 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14457 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14458 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14459 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14460 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14462 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14463 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14464 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14465 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14467 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14468 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14470 /* SSE2 */
14471 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14472 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14473 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14474 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14475 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14476 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14477 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14478 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14480 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14481 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14482 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14483 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14484 BUILTIN_DESC_SWAP_OPERANDS },
14485 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14486 BUILTIN_DESC_SWAP_OPERANDS },
14487 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14488 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14489 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14490 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14491 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14492 BUILTIN_DESC_SWAP_OPERANDS },
14493 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14494 BUILTIN_DESC_SWAP_OPERANDS },
14495 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14496 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14497 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14498 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14499 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14500 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14501 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14502 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14503 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14505 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14506 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14507 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14508 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14510 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14511 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14512 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14513 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14515 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14516 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14517 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14519 /* SSE2 MMX */
14520 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14521 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14522 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14523 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14524 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14525 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14526 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14527 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14529 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14530 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14531 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14532 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14533 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14534 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14535 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14536 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14538 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14539 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14541 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14542 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14543 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14544 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14546 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14547 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14549 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14550 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14551 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14552 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14553 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14554 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14556 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14557 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14558 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14559 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14561 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14562 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14563 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14564 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14565 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14566 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14567 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14568 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14570 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14571 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14572 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14574 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14575 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14577 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14578 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14580 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14581 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14582 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14584 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14585 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14586 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14588 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14589 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14591 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14593 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14594 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14595 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14596 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14598 /* SSE3 MMX */
14599 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14600 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14601 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14602 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14603 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14604 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14607 static const struct builtin_description bdesc_1arg[] =
14609 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14610 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14612 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14613 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14614 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14616 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14617 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14618 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14619 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14620 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14621 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14623 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14624 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14626 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14628 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14629 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14631 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14632 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14633 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14634 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14635 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14637 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14639 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14640 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14641 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14642 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14644 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14645 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14646 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14648 /* SSE3 */
14649 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14650 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14653 static void
14654 ix86_init_builtins (void)
14656 if (TARGET_MMX)
14657 ix86_init_mmx_sse_builtins ();
14658 if (TARGET_SSE2)
14659 ix86_init_sse_abi_builtins ();
14662 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14663 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14664 builtins. */
14665 static void
14666 ix86_init_mmx_sse_builtins (void)
14668 const struct builtin_description * d;
14669 size_t i;
14671 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14672 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14673 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14674 tree V2DI_type_node
14675 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14676 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14677 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14678 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14679 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14680 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14681 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14683 tree pchar_type_node = build_pointer_type (char_type_node);
14684 tree pcchar_type_node = build_pointer_type (
14685 build_type_variant (char_type_node, 1, 0));
14686 tree pfloat_type_node = build_pointer_type (float_type_node);
14687 tree pcfloat_type_node = build_pointer_type (
14688 build_type_variant (float_type_node, 1, 0));
14689 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14690 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14691 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14693 /* Comparisons. */
14694 tree int_ftype_v4sf_v4sf
14695 = build_function_type_list (integer_type_node,
14696 V4SF_type_node, V4SF_type_node, NULL_TREE);
14697 tree v4si_ftype_v4sf_v4sf
14698 = build_function_type_list (V4SI_type_node,
14699 V4SF_type_node, V4SF_type_node, NULL_TREE);
14700 /* MMX/SSE/integer conversions. */
14701 tree int_ftype_v4sf
14702 = build_function_type_list (integer_type_node,
14703 V4SF_type_node, NULL_TREE);
14704 tree int64_ftype_v4sf
14705 = build_function_type_list (long_long_integer_type_node,
14706 V4SF_type_node, NULL_TREE);
14707 tree int_ftype_v8qi
14708 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14709 tree v4sf_ftype_v4sf_int
14710 = build_function_type_list (V4SF_type_node,
14711 V4SF_type_node, integer_type_node, NULL_TREE);
14712 tree v4sf_ftype_v4sf_int64
14713 = build_function_type_list (V4SF_type_node,
14714 V4SF_type_node, long_long_integer_type_node,
14715 NULL_TREE);
14716 tree v4sf_ftype_v4sf_v2si
14717 = build_function_type_list (V4SF_type_node,
14718 V4SF_type_node, V2SI_type_node, NULL_TREE);
14720 /* Miscellaneous. */
14721 tree v8qi_ftype_v4hi_v4hi
14722 = build_function_type_list (V8QI_type_node,
14723 V4HI_type_node, V4HI_type_node, NULL_TREE);
14724 tree v4hi_ftype_v2si_v2si
14725 = build_function_type_list (V4HI_type_node,
14726 V2SI_type_node, V2SI_type_node, NULL_TREE);
14727 tree v4sf_ftype_v4sf_v4sf_int
14728 = build_function_type_list (V4SF_type_node,
14729 V4SF_type_node, V4SF_type_node,
14730 integer_type_node, NULL_TREE);
14731 tree v2si_ftype_v4hi_v4hi
14732 = build_function_type_list (V2SI_type_node,
14733 V4HI_type_node, V4HI_type_node, NULL_TREE);
14734 tree v4hi_ftype_v4hi_int
14735 = build_function_type_list (V4HI_type_node,
14736 V4HI_type_node, integer_type_node, NULL_TREE);
14737 tree v4hi_ftype_v4hi_di
14738 = build_function_type_list (V4HI_type_node,
14739 V4HI_type_node, long_long_unsigned_type_node,
14740 NULL_TREE);
14741 tree v2si_ftype_v2si_di
14742 = build_function_type_list (V2SI_type_node,
14743 V2SI_type_node, long_long_unsigned_type_node,
14744 NULL_TREE);
14745 tree void_ftype_void
14746 = build_function_type (void_type_node, void_list_node);
14747 tree void_ftype_unsigned
14748 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14749 tree void_ftype_unsigned_unsigned
14750 = build_function_type_list (void_type_node, unsigned_type_node,
14751 unsigned_type_node, NULL_TREE);
14752 tree void_ftype_pcvoid_unsigned_unsigned
14753 = build_function_type_list (void_type_node, const_ptr_type_node,
14754 unsigned_type_node, unsigned_type_node,
14755 NULL_TREE);
14756 tree unsigned_ftype_void
14757 = build_function_type (unsigned_type_node, void_list_node);
14758 tree v2si_ftype_v4sf
14759 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14760 /* Loads/stores. */
14761 tree void_ftype_v8qi_v8qi_pchar
14762 = build_function_type_list (void_type_node,
14763 V8QI_type_node, V8QI_type_node,
14764 pchar_type_node, NULL_TREE);
14765 tree v4sf_ftype_pcfloat
14766 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14767 /* @@@ the type is bogus */
14768 tree v4sf_ftype_v4sf_pv2si
14769 = build_function_type_list (V4SF_type_node,
14770 V4SF_type_node, pv2si_type_node, NULL_TREE);
14771 tree void_ftype_pv2si_v4sf
14772 = build_function_type_list (void_type_node,
14773 pv2si_type_node, V4SF_type_node, NULL_TREE);
14774 tree void_ftype_pfloat_v4sf
14775 = build_function_type_list (void_type_node,
14776 pfloat_type_node, V4SF_type_node, NULL_TREE);
14777 tree void_ftype_pdi_di
14778 = build_function_type_list (void_type_node,
14779 pdi_type_node, long_long_unsigned_type_node,
14780 NULL_TREE);
14781 tree void_ftype_pv2di_v2di
14782 = build_function_type_list (void_type_node,
14783 pv2di_type_node, V2DI_type_node, NULL_TREE);
14784 /* Normal vector unops. */
14785 tree v4sf_ftype_v4sf
14786 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14788 /* Normal vector binops. */
14789 tree v4sf_ftype_v4sf_v4sf
14790 = build_function_type_list (V4SF_type_node,
14791 V4SF_type_node, V4SF_type_node, NULL_TREE);
14792 tree v8qi_ftype_v8qi_v8qi
14793 = build_function_type_list (V8QI_type_node,
14794 V8QI_type_node, V8QI_type_node, NULL_TREE);
14795 tree v4hi_ftype_v4hi_v4hi
14796 = build_function_type_list (V4HI_type_node,
14797 V4HI_type_node, V4HI_type_node, NULL_TREE);
14798 tree v2si_ftype_v2si_v2si
14799 = build_function_type_list (V2SI_type_node,
14800 V2SI_type_node, V2SI_type_node, NULL_TREE);
14801 tree di_ftype_di_di
14802 = build_function_type_list (long_long_unsigned_type_node,
14803 long_long_unsigned_type_node,
14804 long_long_unsigned_type_node, NULL_TREE);
14806 tree v2si_ftype_v2sf
14807 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
14808 tree v2sf_ftype_v2si
14809 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
14810 tree v2si_ftype_v2si
14811 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
14812 tree v2sf_ftype_v2sf
14813 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
14814 tree v2sf_ftype_v2sf_v2sf
14815 = build_function_type_list (V2SF_type_node,
14816 V2SF_type_node, V2SF_type_node, NULL_TREE);
14817 tree v2si_ftype_v2sf_v2sf
14818 = build_function_type_list (V2SI_type_node,
14819 V2SF_type_node, V2SF_type_node, NULL_TREE);
14820 tree pint_type_node = build_pointer_type (integer_type_node);
14821 tree pdouble_type_node = build_pointer_type (double_type_node);
14822 tree pcdouble_type_node = build_pointer_type (
14823 build_type_variant (double_type_node, 1, 0));
14824 tree int_ftype_v2df_v2df
14825 = build_function_type_list (integer_type_node,
14826 V2DF_type_node, V2DF_type_node, NULL_TREE);
14828 tree void_ftype_pcvoid
14829 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
14830 tree v4sf_ftype_v4si
14831 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
14832 tree v4si_ftype_v4sf
14833 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
14834 tree v2df_ftype_v4si
14835 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
14836 tree v4si_ftype_v2df
14837 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
14838 tree v2si_ftype_v2df
14839 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
14840 tree v4sf_ftype_v2df
14841 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
14842 tree v2df_ftype_v2si
14843 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
14844 tree v2df_ftype_v4sf
14845 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
14846 tree int_ftype_v2df
14847 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
14848 tree int64_ftype_v2df
14849 = build_function_type_list (long_long_integer_type_node,
14850 V2DF_type_node, NULL_TREE);
14851 tree v2df_ftype_v2df_int
14852 = build_function_type_list (V2DF_type_node,
14853 V2DF_type_node, integer_type_node, NULL_TREE);
14854 tree v2df_ftype_v2df_int64
14855 = build_function_type_list (V2DF_type_node,
14856 V2DF_type_node, long_long_integer_type_node,
14857 NULL_TREE);
14858 tree v4sf_ftype_v4sf_v2df
14859 = build_function_type_list (V4SF_type_node,
14860 V4SF_type_node, V2DF_type_node, NULL_TREE);
14861 tree v2df_ftype_v2df_v4sf
14862 = build_function_type_list (V2DF_type_node,
14863 V2DF_type_node, V4SF_type_node, NULL_TREE);
14864 tree v2df_ftype_v2df_v2df_int
14865 = build_function_type_list (V2DF_type_node,
14866 V2DF_type_node, V2DF_type_node,
14867 integer_type_node,
14868 NULL_TREE);
14869 tree v2df_ftype_v2df_pcdouble
14870 = build_function_type_list (V2DF_type_node,
14871 V2DF_type_node, pcdouble_type_node, NULL_TREE);
14872 tree void_ftype_pdouble_v2df
14873 = build_function_type_list (void_type_node,
14874 pdouble_type_node, V2DF_type_node, NULL_TREE);
14875 tree void_ftype_pint_int
14876 = build_function_type_list (void_type_node,
14877 pint_type_node, integer_type_node, NULL_TREE);
14878 tree void_ftype_v16qi_v16qi_pchar
14879 = build_function_type_list (void_type_node,
14880 V16QI_type_node, V16QI_type_node,
14881 pchar_type_node, NULL_TREE);
14882 tree v2df_ftype_pcdouble
14883 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
14884 tree v2df_ftype_v2df_v2df
14885 = build_function_type_list (V2DF_type_node,
14886 V2DF_type_node, V2DF_type_node, NULL_TREE);
14887 tree v16qi_ftype_v16qi_v16qi
14888 = build_function_type_list (V16QI_type_node,
14889 V16QI_type_node, V16QI_type_node, NULL_TREE);
14890 tree v8hi_ftype_v8hi_v8hi
14891 = build_function_type_list (V8HI_type_node,
14892 V8HI_type_node, V8HI_type_node, NULL_TREE);
14893 tree v4si_ftype_v4si_v4si
14894 = build_function_type_list (V4SI_type_node,
14895 V4SI_type_node, V4SI_type_node, NULL_TREE);
14896 tree v2di_ftype_v2di_v2di
14897 = build_function_type_list (V2DI_type_node,
14898 V2DI_type_node, V2DI_type_node, NULL_TREE);
14899 tree v2di_ftype_v2df_v2df
14900 = build_function_type_list (V2DI_type_node,
14901 V2DF_type_node, V2DF_type_node, NULL_TREE);
14902 tree v2df_ftype_v2df
14903 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14904 tree v2di_ftype_v2di_int
14905 = build_function_type_list (V2DI_type_node,
14906 V2DI_type_node, integer_type_node, NULL_TREE);
14907 tree v4si_ftype_v4si_int
14908 = build_function_type_list (V4SI_type_node,
14909 V4SI_type_node, integer_type_node, NULL_TREE);
14910 tree v8hi_ftype_v8hi_int
14911 = build_function_type_list (V8HI_type_node,
14912 V8HI_type_node, integer_type_node, NULL_TREE);
14913 tree v8hi_ftype_v8hi_v2di
14914 = build_function_type_list (V8HI_type_node,
14915 V8HI_type_node, V2DI_type_node, NULL_TREE);
14916 tree v4si_ftype_v4si_v2di
14917 = build_function_type_list (V4SI_type_node,
14918 V4SI_type_node, V2DI_type_node, NULL_TREE);
14919 tree v4si_ftype_v8hi_v8hi
14920 = build_function_type_list (V4SI_type_node,
14921 V8HI_type_node, V8HI_type_node, NULL_TREE);
14922 tree di_ftype_v8qi_v8qi
14923 = build_function_type_list (long_long_unsigned_type_node,
14924 V8QI_type_node, V8QI_type_node, NULL_TREE);
14925 tree di_ftype_v2si_v2si
14926 = build_function_type_list (long_long_unsigned_type_node,
14927 V2SI_type_node, V2SI_type_node, NULL_TREE);
14928 tree v2di_ftype_v16qi_v16qi
14929 = build_function_type_list (V2DI_type_node,
14930 V16QI_type_node, V16QI_type_node, NULL_TREE);
14931 tree v2di_ftype_v4si_v4si
14932 = build_function_type_list (V2DI_type_node,
14933 V4SI_type_node, V4SI_type_node, NULL_TREE);
14934 tree int_ftype_v16qi
14935 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14936 tree v16qi_ftype_pcchar
14937 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14938 tree void_ftype_pchar_v16qi
14939 = build_function_type_list (void_type_node,
14940 pchar_type_node, V16QI_type_node, NULL_TREE);
14942 tree float80_type;
14943 tree float128_type;
14944 tree ftype;
14946 /* The __float80 type. */
14947 if (TYPE_MODE (long_double_type_node) == XFmode)
14948 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14949 "__float80");
14950 else
14952 /* The __float80 type. */
14953 float80_type = make_node (REAL_TYPE);
14954 TYPE_PRECISION (float80_type) = 80;
14955 layout_type (float80_type);
14956 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
14959 if (TARGET_64BIT)
14961 float128_type = make_node (REAL_TYPE);
14962 TYPE_PRECISION (float128_type) = 128;
14963 layout_type (float128_type);
14964 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
14967 /* Add all builtins that are more or less simple operations on two
14968 operands. */
14969 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14971 /* Use one of the operands; the target can have a different mode for
14972 mask-generating compares. */
14973 enum machine_mode mode;
14974 tree type;
14976 if (d->name == 0)
14977 continue;
14978 mode = insn_data[d->icode].operand[1].mode;
14980 switch (mode)
14982 case V16QImode:
14983 type = v16qi_ftype_v16qi_v16qi;
14984 break;
14985 case V8HImode:
14986 type = v8hi_ftype_v8hi_v8hi;
14987 break;
14988 case V4SImode:
14989 type = v4si_ftype_v4si_v4si;
14990 break;
14991 case V2DImode:
14992 type = v2di_ftype_v2di_v2di;
14993 break;
14994 case V2DFmode:
14995 type = v2df_ftype_v2df_v2df;
14996 break;
14997 case V4SFmode:
14998 type = v4sf_ftype_v4sf_v4sf;
14999 break;
15000 case V8QImode:
15001 type = v8qi_ftype_v8qi_v8qi;
15002 break;
15003 case V4HImode:
15004 type = v4hi_ftype_v4hi_v4hi;
15005 break;
15006 case V2SImode:
15007 type = v2si_ftype_v2si_v2si;
15008 break;
15009 case DImode:
15010 type = di_ftype_di_di;
15011 break;
15013 default:
15014 gcc_unreachable ();
15017 /* Override for comparisons. */
15018 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15019 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15020 type = v4si_ftype_v4sf_v4sf;
15022 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15023 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15024 type = v2di_ftype_v2df_v2df;
15026 def_builtin (d->mask, d->name, type, d->code);
15029 /* Add the remaining MMX insns with somewhat more complicated types. */
15030 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15031 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15032 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15033 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15035 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15036 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15037 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15039 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15040 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15042 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15043 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15045 /* comi/ucomi insns. */
15046 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15047 if (d->mask == MASK_SSE2)
15048 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15049 else
15050 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15052 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15053 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15054 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15056 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15057 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15058 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15059 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15060 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15061 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15062 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15063 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15064 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15065 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15066 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15068 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15070 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15071 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15073 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15074 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15075 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15076 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15078 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15079 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15080 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15081 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15083 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15085 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15087 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15088 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15089 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15090 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15091 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15092 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15094 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15096 /* Original 3DNow! */
15097 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15098 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15099 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15100 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15101 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15102 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15103 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15104 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15105 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15106 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15107 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15108 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15109 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15110 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15111 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15112 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15113 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15114 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15115 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15116 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15118 /* 3DNow! extension as used in the Athlon CPU. */
15119 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15120 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15121 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15122 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15123 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15124 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15126 /* SSE2 */
15127 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15129 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15130 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15132 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15133 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15135 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15136 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15137 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15138 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15139 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15141 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15142 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15143 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15144 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15146 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15147 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15149 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15151 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15152 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15154 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15155 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15156 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15157 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15158 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15160 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15162 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15163 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15164 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15165 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15167 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15168 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15169 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15171 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15172 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15173 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15174 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15176 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15177 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15178 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15180 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15181 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15183 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15184 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15186 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15187 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15188 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15190 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15191 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15192 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15194 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15195 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15197 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15198 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15199 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15200 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15202 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15203 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15204 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15205 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15207 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15208 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15210 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15212 /* Prescott New Instructions. */
15213 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15214 void_ftype_pcvoid_unsigned_unsigned,
15215 IX86_BUILTIN_MONITOR);
15216 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15217 void_ftype_unsigned_unsigned,
15218 IX86_BUILTIN_MWAIT);
15219 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15220 v4sf_ftype_v4sf,
15221 IX86_BUILTIN_MOVSHDUP);
15222 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15223 v4sf_ftype_v4sf,
15224 IX86_BUILTIN_MOVSLDUP);
15225 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15226 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15228 /* Access to the vec_init patterns. */
15229 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15230 integer_type_node, NULL_TREE);
15231 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15232 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15234 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15235 short_integer_type_node,
15236 short_integer_type_node,
15237 short_integer_type_node, NULL_TREE);
15238 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15239 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15241 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15242 char_type_node, char_type_node,
15243 char_type_node, char_type_node,
15244 char_type_node, char_type_node,
15245 char_type_node, NULL_TREE);
15246 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15247 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15249 /* Access to the vec_extract patterns. */
15250 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15251 integer_type_node, NULL_TREE);
15252 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15253 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15255 ftype = build_function_type_list (long_long_integer_type_node,
15256 V2DI_type_node, integer_type_node,
15257 NULL_TREE);
15258 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15259 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15261 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15262 integer_type_node, NULL_TREE);
15263 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15264 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15266 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15267 integer_type_node, NULL_TREE);
15268 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15269 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15271 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15272 integer_type_node, NULL_TREE);
15273 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15274 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15276 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15277 integer_type_node, NULL_TREE);
15278 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15279 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15281 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15282 integer_type_node, NULL_TREE);
15283 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15284 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15286 /* Access to the vec_set patterns. */
15287 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15288 intHI_type_node,
15289 integer_type_node, NULL_TREE);
15290 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15291 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15293 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15294 intHI_type_node,
15295 integer_type_node, NULL_TREE);
15296 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15297 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15299 #undef def_builtin
15301 /* Set up all the SSE ABI builtins that we may use to override
15302 the normal builtins. */
15303 static void
15304 ix86_init_sse_abi_builtins (void)
15306 tree dbl, flt, dbl2, flt2;
15308 /* Bail out in case the template definitions are not available. */
15309 if (! built_in_decls [BUILT_IN_SIN]
15310 || ! built_in_decls [BUILT_IN_SINF]
15311 || ! built_in_decls [BUILT_IN_ATAN2]
15312 || ! built_in_decls [BUILT_IN_ATAN2F])
15313 return;
15315 /* Build the function types as variants of the existing ones. */
15316 dbl = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_SIN]));
15317 TYPE_ATTRIBUTES (dbl)
15318 = tree_cons (get_identifier ("sseregparm"),
15319 NULL_TREE, TYPE_ATTRIBUTES (dbl));
15320 flt = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_SINF]));
15321 TYPE_ATTRIBUTES (flt)
15322 = tree_cons (get_identifier ("sseregparm"),
15323 NULL_TREE, TYPE_ATTRIBUTES (flt));
15324 dbl2 = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_ATAN2]));
15325 TYPE_ATTRIBUTES (dbl2)
15326 = tree_cons (get_identifier ("sseregparm"),
15327 NULL_TREE, TYPE_ATTRIBUTES (dbl2));
15328 flt2 = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_ATAN2F]));
15329 TYPE_ATTRIBUTES (flt2)
15330 = tree_cons (get_identifier ("sseregparm"),
15331 NULL_TREE, TYPE_ATTRIBUTES (flt2));
15333 #define def_builtin(capname, name, type) \
15334 ix86_builtin_function_variants [BUILT_IN_ ## capname] \
15335 = lang_hooks.builtin_function ("__builtin_sse2_" # name, type, \
15336 IX86_BUILTIN_SSE2_ ## capname, \
15337 BUILT_IN_NORMAL, \
15338 "__libm_sse2_" # name, NULL_TREE)
15340 def_builtin (ACOS, acos, dbl);
15341 def_builtin (ACOSF, acosf, flt);
15342 def_builtin (ASIN, asin, dbl);
15343 def_builtin (ASINF, asinf, flt);
15344 def_builtin (ATAN, atan, dbl);
15345 def_builtin (ATANF, atanf, flt);
15346 def_builtin (ATAN2, atan2, dbl2);
15347 def_builtin (ATAN2F, atan2f, flt2);
15348 def_builtin (COS, cos, dbl);
15349 def_builtin (COSF, cosf, flt);
15350 def_builtin (EXP, exp, dbl);
15351 def_builtin (EXPF, expf, flt);
15352 def_builtin (LOG10, log10, dbl);
15353 def_builtin (LOG10F, log10f, flt);
15354 def_builtin (LOG, log, dbl);
15355 def_builtin (LOGF, logf, flt);
15356 def_builtin (SIN, sin, dbl);
15357 def_builtin (SINF, sinf, flt);
15358 def_builtin (TAN, tan, dbl);
15359 def_builtin (TANF, tanf, flt);
15361 #undef def_builtin
15364 /* Errors in the source file can cause expand_expr to return const0_rtx
15365 where we expect a vector. To avoid crashing, use one of the vector
15366 clear instructions. */
15367 static rtx
15368 safe_vector_operand (rtx x, enum machine_mode mode)
15370 if (x == const0_rtx)
15371 x = CONST0_RTX (mode);
15372 return x;
15375 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15377 static rtx
15378 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15380 rtx pat, xops[3];
15381 tree arg0 = TREE_VALUE (arglist);
15382 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15383 rtx op0 = expand_normal (arg0);
15384 rtx op1 = expand_normal (arg1);
15385 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15386 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15387 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15389 if (VECTOR_MODE_P (mode0))
15390 op0 = safe_vector_operand (op0, mode0);
15391 if (VECTOR_MODE_P (mode1))
15392 op1 = safe_vector_operand (op1, mode1);
15394 if (optimize || !target
15395 || GET_MODE (target) != tmode
15396 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15397 target = gen_reg_rtx (tmode);
15399 if (GET_MODE (op1) == SImode && mode1 == TImode)
15401 rtx x = gen_reg_rtx (V4SImode);
15402 emit_insn (gen_sse2_loadd (x, op1));
15403 op1 = gen_lowpart (TImode, x);
15406 /* The insn must want input operands in the same modes as the
15407 result. */
15408 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15409 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15411 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15412 op0 = copy_to_mode_reg (mode0, op0);
15413 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15414 op1 = copy_to_mode_reg (mode1, op1);
15416 /* ??? Using ix86_fixup_binary_operands is problematic when
15417 we've got mismatched modes. Fake it. */
15419 xops[0] = target;
15420 xops[1] = op0;
15421 xops[2] = op1;
15423 if (tmode == mode0 && tmode == mode1)
15425 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15426 op0 = xops[1];
15427 op1 = xops[2];
15429 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15431 op0 = force_reg (mode0, op0);
15432 op1 = force_reg (mode1, op1);
15433 target = gen_reg_rtx (tmode);
15436 pat = GEN_FCN (icode) (target, op0, op1);
15437 if (! pat)
15438 return 0;
15439 emit_insn (pat);
15440 return target;
15443 /* Subroutine of ix86_expand_builtin to take care of stores. */
15445 static rtx
15446 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15448 rtx pat;
15449 tree arg0 = TREE_VALUE (arglist);
15450 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15451 rtx op0 = expand_normal (arg0);
15452 rtx op1 = expand_normal (arg1);
15453 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15454 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15456 if (VECTOR_MODE_P (mode1))
15457 op1 = safe_vector_operand (op1, mode1);
15459 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15460 op1 = copy_to_mode_reg (mode1, op1);
15462 pat = GEN_FCN (icode) (op0, op1);
15463 if (pat)
15464 emit_insn (pat);
15465 return 0;
15468 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15470 static rtx
15471 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15472 rtx target, int do_load)
15474 rtx pat;
15475 tree arg0 = TREE_VALUE (arglist);
15476 rtx op0 = expand_normal (arg0);
15477 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15478 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15480 if (optimize || !target
15481 || GET_MODE (target) != tmode
15482 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15483 target = gen_reg_rtx (tmode);
15484 if (do_load)
15485 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15486 else
15488 if (VECTOR_MODE_P (mode0))
15489 op0 = safe_vector_operand (op0, mode0);
15491 if ((optimize && !register_operand (op0, mode0))
15492 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15493 op0 = copy_to_mode_reg (mode0, op0);
15496 pat = GEN_FCN (icode) (target, op0);
15497 if (! pat)
15498 return 0;
15499 emit_insn (pat);
15500 return target;
15503 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15504 sqrtss, rsqrtss, rcpss. */
15506 static rtx
15507 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15509 rtx pat;
15510 tree arg0 = TREE_VALUE (arglist);
15511 rtx op1, op0 = expand_normal (arg0);
15512 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15513 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15515 if (optimize || !target
15516 || GET_MODE (target) != tmode
15517 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15518 target = gen_reg_rtx (tmode);
15520 if (VECTOR_MODE_P (mode0))
15521 op0 = safe_vector_operand (op0, mode0);
15523 if ((optimize && !register_operand (op0, mode0))
15524 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15525 op0 = copy_to_mode_reg (mode0, op0);
15527 op1 = op0;
15528 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15529 op1 = copy_to_mode_reg (mode0, op1);
15531 pat = GEN_FCN (icode) (target, op0, op1);
15532 if (! pat)
15533 return 0;
15534 emit_insn (pat);
15535 return target;
15538 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15540 static rtx
15541 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15542 rtx target)
15544 rtx pat;
15545 tree arg0 = TREE_VALUE (arglist);
15546 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15547 rtx op0 = expand_normal (arg0);
15548 rtx op1 = expand_normal (arg1);
15549 rtx op2;
15550 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15551 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15552 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15553 enum rtx_code comparison = d->comparison;
15555 if (VECTOR_MODE_P (mode0))
15556 op0 = safe_vector_operand (op0, mode0);
15557 if (VECTOR_MODE_P (mode1))
15558 op1 = safe_vector_operand (op1, mode1);
15560 /* Swap operands if we have a comparison that isn't available in
15561 hardware. */
15562 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15564 rtx tmp = gen_reg_rtx (mode1);
15565 emit_move_insn (tmp, op1);
15566 op1 = op0;
15567 op0 = tmp;
15570 if (optimize || !target
15571 || GET_MODE (target) != tmode
15572 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15573 target = gen_reg_rtx (tmode);
15575 if ((optimize && !register_operand (op0, mode0))
15576 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15577 op0 = copy_to_mode_reg (mode0, op0);
15578 if ((optimize && !register_operand (op1, mode1))
15579 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15580 op1 = copy_to_mode_reg (mode1, op1);
15582 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15583 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15584 if (! pat)
15585 return 0;
15586 emit_insn (pat);
15587 return target;
15590 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15592 static rtx
15593 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15594 rtx target)
15596 rtx pat;
15597 tree arg0 = TREE_VALUE (arglist);
15598 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15599 rtx op0 = expand_normal (arg0);
15600 rtx op1 = expand_normal (arg1);
15601 rtx op2;
15602 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15603 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15604 enum rtx_code comparison = d->comparison;
15606 if (VECTOR_MODE_P (mode0))
15607 op0 = safe_vector_operand (op0, mode0);
15608 if (VECTOR_MODE_P (mode1))
15609 op1 = safe_vector_operand (op1, mode1);
15611 /* Swap operands if we have a comparison that isn't available in
15612 hardware. */
15613 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15615 rtx tmp = op1;
15616 op1 = op0;
15617 op0 = tmp;
15620 target = gen_reg_rtx (SImode);
15621 emit_move_insn (target, const0_rtx);
15622 target = gen_rtx_SUBREG (QImode, target, 0);
15624 if ((optimize && !register_operand (op0, mode0))
15625 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15626 op0 = copy_to_mode_reg (mode0, op0);
15627 if ((optimize && !register_operand (op1, mode1))
15628 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15629 op1 = copy_to_mode_reg (mode1, op1);
15631 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15632 pat = GEN_FCN (d->icode) (op0, op1);
15633 if (! pat)
15634 return 0;
15635 emit_insn (pat);
15636 emit_insn (gen_rtx_SET (VOIDmode,
15637 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15638 gen_rtx_fmt_ee (comparison, QImode,
15639 SET_DEST (pat),
15640 const0_rtx)));
15642 return SUBREG_REG (target);
15645 /* Return the integer constant in ARG. Constrain it to be in the range
15646 of the subparts of VEC_TYPE; issue an error if not. */
15648 static int
15649 get_element_number (tree vec_type, tree arg)
15651 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15653 if (!host_integerp (arg, 1)
15654 || (elt = tree_low_cst (arg, 1), elt > max))
15656 error ("selector must be an integer constant in the range 0..%wi", max);
15657 return 0;
15660 return elt;
15663 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15664 ix86_expand_vector_init. We DO have language-level syntax for this, in
15665 the form of (type){ init-list }. Except that since we can't place emms
15666 instructions from inside the compiler, we can't allow the use of MMX
15667 registers unless the user explicitly asks for it. So we do *not* define
15668 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15669 we have builtins invoked by mmintrin.h that gives us license to emit
15670 these sorts of instructions. */
15672 static rtx
15673 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15675 enum machine_mode tmode = TYPE_MODE (type);
15676 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15677 int i, n_elt = GET_MODE_NUNITS (tmode);
15678 rtvec v = rtvec_alloc (n_elt);
15680 gcc_assert (VECTOR_MODE_P (tmode));
15682 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15684 rtx x = expand_normal (TREE_VALUE (arglist));
15685 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15688 gcc_assert (arglist == NULL);
15690 if (!target || !register_operand (target, tmode))
15691 target = gen_reg_rtx (tmode);
15693 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15694 return target;
15697 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15698 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15699 had a language-level syntax for referencing vector elements. */
15701 static rtx
15702 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15704 enum machine_mode tmode, mode0;
15705 tree arg0, arg1;
15706 int elt;
15707 rtx op0;
15709 arg0 = TREE_VALUE (arglist);
15710 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15712 op0 = expand_normal (arg0);
15713 elt = get_element_number (TREE_TYPE (arg0), arg1);
15715 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15716 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15717 gcc_assert (VECTOR_MODE_P (mode0));
15719 op0 = force_reg (mode0, op0);
15721 if (optimize || !target || !register_operand (target, tmode))
15722 target = gen_reg_rtx (tmode);
15724 ix86_expand_vector_extract (true, target, op0, elt);
15726 return target;
15729 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15730 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15731 a language-level syntax for referencing vector elements. */
15733 static rtx
15734 ix86_expand_vec_set_builtin (tree arglist)
15736 enum machine_mode tmode, mode1;
15737 tree arg0, arg1, arg2;
15738 int elt;
15739 rtx op0, op1;
15741 arg0 = TREE_VALUE (arglist);
15742 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15743 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15745 tmode = TYPE_MODE (TREE_TYPE (arg0));
15746 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15747 gcc_assert (VECTOR_MODE_P (tmode));
15749 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15750 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15751 elt = get_element_number (TREE_TYPE (arg0), arg2);
15753 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15754 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15756 op0 = force_reg (tmode, op0);
15757 op1 = force_reg (mode1, op1);
15759 ix86_expand_vector_set (true, op0, op1, elt);
15761 return op0;
15764 /* Expand an expression EXP that calls a built-in function,
15765 with result going to TARGET if that's convenient
15766 (and in mode MODE if that's convenient).
15767 SUBTARGET may be used as the target for computing one of EXP's operands.
15768 IGNORE is nonzero if the value is to be ignored. */
15770 static rtx
15771 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15772 enum machine_mode mode ATTRIBUTE_UNUSED,
15773 int ignore ATTRIBUTE_UNUSED)
15775 const struct builtin_description *d;
15776 size_t i;
15777 enum insn_code icode;
15778 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15779 tree arglist = TREE_OPERAND (exp, 1);
15780 tree arg0, arg1, arg2;
15781 rtx op0, op1, op2, pat;
15782 enum machine_mode tmode, mode0, mode1, mode2;
15783 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15785 switch (fcode)
15787 case IX86_BUILTIN_EMMS:
15788 emit_insn (gen_mmx_emms ());
15789 return 0;
15791 case IX86_BUILTIN_SFENCE:
15792 emit_insn (gen_sse_sfence ());
15793 return 0;
15795 case IX86_BUILTIN_MASKMOVQ:
15796 case IX86_BUILTIN_MASKMOVDQU:
15797 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15798 ? CODE_FOR_mmx_maskmovq
15799 : CODE_FOR_sse2_maskmovdqu);
15800 /* Note the arg order is different from the operand order. */
15801 arg1 = TREE_VALUE (arglist);
15802 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15803 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15804 op0 = expand_normal (arg0);
15805 op1 = expand_normal (arg1);
15806 op2 = expand_normal (arg2);
15807 mode0 = insn_data[icode].operand[0].mode;
15808 mode1 = insn_data[icode].operand[1].mode;
15809 mode2 = insn_data[icode].operand[2].mode;
15811 op0 = force_reg (Pmode, op0);
15812 op0 = gen_rtx_MEM (mode1, op0);
15814 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15815 op0 = copy_to_mode_reg (mode0, op0);
15816 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15817 op1 = copy_to_mode_reg (mode1, op1);
15818 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15819 op2 = copy_to_mode_reg (mode2, op2);
15820 pat = GEN_FCN (icode) (op0, op1, op2);
15821 if (! pat)
15822 return 0;
15823 emit_insn (pat);
15824 return 0;
15826 case IX86_BUILTIN_SQRTSS:
15827 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15828 case IX86_BUILTIN_RSQRTSS:
15829 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15830 case IX86_BUILTIN_RCPSS:
15831 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15833 case IX86_BUILTIN_LOADUPS:
15834 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15836 case IX86_BUILTIN_STOREUPS:
15837 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15839 case IX86_BUILTIN_LOADHPS:
15840 case IX86_BUILTIN_LOADLPS:
15841 case IX86_BUILTIN_LOADHPD:
15842 case IX86_BUILTIN_LOADLPD:
15843 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
15844 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
15845 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
15846 : CODE_FOR_sse2_loadlpd);
15847 arg0 = TREE_VALUE (arglist);
15848 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15849 op0 = expand_normal (arg0);
15850 op1 = expand_normal (arg1);
15851 tmode = insn_data[icode].operand[0].mode;
15852 mode0 = insn_data[icode].operand[1].mode;
15853 mode1 = insn_data[icode].operand[2].mode;
15855 op0 = force_reg (mode0, op0);
15856 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
15857 if (optimize || target == 0
15858 || GET_MODE (target) != tmode
15859 || !register_operand (target, tmode))
15860 target = gen_reg_rtx (tmode);
15861 pat = GEN_FCN (icode) (target, op0, op1);
15862 if (! pat)
15863 return 0;
15864 emit_insn (pat);
15865 return target;
15867 case IX86_BUILTIN_STOREHPS:
15868 case IX86_BUILTIN_STORELPS:
15869 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
15870 : CODE_FOR_sse_storelps);
15871 arg0 = TREE_VALUE (arglist);
15872 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15873 op0 = expand_normal (arg0);
15874 op1 = expand_normal (arg1);
15875 mode0 = insn_data[icode].operand[0].mode;
15876 mode1 = insn_data[icode].operand[1].mode;
15878 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15879 op1 = force_reg (mode1, op1);
15881 pat = GEN_FCN (icode) (op0, op1);
15882 if (! pat)
15883 return 0;
15884 emit_insn (pat);
15885 return const0_rtx;
15887 case IX86_BUILTIN_MOVNTPS:
15888 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
15889 case IX86_BUILTIN_MOVNTQ:
15890 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
15892 case IX86_BUILTIN_LDMXCSR:
15893 op0 = expand_normal (TREE_VALUE (arglist));
15894 target = assign_386_stack_local (SImode, SLOT_TEMP);
15895 emit_move_insn (target, op0);
15896 emit_insn (gen_sse_ldmxcsr (target));
15897 return 0;
15899 case IX86_BUILTIN_STMXCSR:
15900 target = assign_386_stack_local (SImode, SLOT_TEMP);
15901 emit_insn (gen_sse_stmxcsr (target));
15902 return copy_to_mode_reg (SImode, target);
15904 case IX86_BUILTIN_SHUFPS:
15905 case IX86_BUILTIN_SHUFPD:
15906 icode = (fcode == IX86_BUILTIN_SHUFPS
15907 ? CODE_FOR_sse_shufps
15908 : CODE_FOR_sse2_shufpd);
15909 arg0 = TREE_VALUE (arglist);
15910 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15911 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15912 op0 = expand_normal (arg0);
15913 op1 = expand_normal (arg1);
15914 op2 = expand_normal (arg2);
15915 tmode = insn_data[icode].operand[0].mode;
15916 mode0 = insn_data[icode].operand[1].mode;
15917 mode1 = insn_data[icode].operand[2].mode;
15918 mode2 = insn_data[icode].operand[3].mode;
15920 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15921 op0 = copy_to_mode_reg (mode0, op0);
15922 if ((optimize && !register_operand (op1, mode1))
15923 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
15924 op1 = copy_to_mode_reg (mode1, op1);
15925 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15927 /* @@@ better error message */
15928 error ("mask must be an immediate");
15929 return gen_reg_rtx (tmode);
15931 if (optimize || target == 0
15932 || GET_MODE (target) != tmode
15933 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15934 target = gen_reg_rtx (tmode);
15935 pat = GEN_FCN (icode) (target, op0, op1, op2);
15936 if (! pat)
15937 return 0;
15938 emit_insn (pat);
15939 return target;
15941 case IX86_BUILTIN_PSHUFW:
15942 case IX86_BUILTIN_PSHUFD:
15943 case IX86_BUILTIN_PSHUFHW:
15944 case IX86_BUILTIN_PSHUFLW:
15945 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15946 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15947 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15948 : CODE_FOR_mmx_pshufw);
15949 arg0 = TREE_VALUE (arglist);
15950 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15951 op0 = expand_normal (arg0);
15952 op1 = expand_normal (arg1);
15953 tmode = insn_data[icode].operand[0].mode;
15954 mode1 = insn_data[icode].operand[1].mode;
15955 mode2 = insn_data[icode].operand[2].mode;
15957 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15958 op0 = copy_to_mode_reg (mode1, op0);
15959 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15961 /* @@@ better error message */
15962 error ("mask must be an immediate");
15963 return const0_rtx;
15965 if (target == 0
15966 || GET_MODE (target) != tmode
15967 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15968 target = gen_reg_rtx (tmode);
15969 pat = GEN_FCN (icode) (target, op0, op1);
15970 if (! pat)
15971 return 0;
15972 emit_insn (pat);
15973 return target;
15975 case IX86_BUILTIN_PSLLDQI128:
15976 case IX86_BUILTIN_PSRLDQI128:
15977 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
15978 : CODE_FOR_sse2_lshrti3);
15979 arg0 = TREE_VALUE (arglist);
15980 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15981 op0 = expand_normal (arg0);
15982 op1 = expand_normal (arg1);
15983 tmode = insn_data[icode].operand[0].mode;
15984 mode1 = insn_data[icode].operand[1].mode;
15985 mode2 = insn_data[icode].operand[2].mode;
15987 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15989 op0 = copy_to_reg (op0);
15990 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
15992 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15994 error ("shift must be an immediate");
15995 return const0_rtx;
15997 target = gen_reg_rtx (V2DImode);
15998 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
15999 if (! pat)
16000 return 0;
16001 emit_insn (pat);
16002 return target;
16004 case IX86_BUILTIN_FEMMS:
16005 emit_insn (gen_mmx_femms ());
16006 return NULL_RTX;
16008 case IX86_BUILTIN_PAVGUSB:
16009 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16011 case IX86_BUILTIN_PF2ID:
16012 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16014 case IX86_BUILTIN_PFACC:
16015 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16017 case IX86_BUILTIN_PFADD:
16018 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16020 case IX86_BUILTIN_PFCMPEQ:
16021 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16023 case IX86_BUILTIN_PFCMPGE:
16024 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16026 case IX86_BUILTIN_PFCMPGT:
16027 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16029 case IX86_BUILTIN_PFMAX:
16030 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16032 case IX86_BUILTIN_PFMIN:
16033 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16035 case IX86_BUILTIN_PFMUL:
16036 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16038 case IX86_BUILTIN_PFRCP:
16039 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16041 case IX86_BUILTIN_PFRCPIT1:
16042 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16044 case IX86_BUILTIN_PFRCPIT2:
16045 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16047 case IX86_BUILTIN_PFRSQIT1:
16048 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16050 case IX86_BUILTIN_PFRSQRT:
16051 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16053 case IX86_BUILTIN_PFSUB:
16054 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16056 case IX86_BUILTIN_PFSUBR:
16057 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16059 case IX86_BUILTIN_PI2FD:
16060 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16062 case IX86_BUILTIN_PMULHRW:
16063 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16065 case IX86_BUILTIN_PF2IW:
16066 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16068 case IX86_BUILTIN_PFNACC:
16069 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16071 case IX86_BUILTIN_PFPNACC:
16072 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16074 case IX86_BUILTIN_PI2FW:
16075 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16077 case IX86_BUILTIN_PSWAPDSI:
16078 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16080 case IX86_BUILTIN_PSWAPDSF:
16081 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16083 case IX86_BUILTIN_SQRTSD:
16084 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16085 case IX86_BUILTIN_LOADUPD:
16086 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16087 case IX86_BUILTIN_STOREUPD:
16088 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16090 case IX86_BUILTIN_MFENCE:
16091 emit_insn (gen_sse2_mfence ());
16092 return 0;
16093 case IX86_BUILTIN_LFENCE:
16094 emit_insn (gen_sse2_lfence ());
16095 return 0;
16097 case IX86_BUILTIN_CLFLUSH:
16098 arg0 = TREE_VALUE (arglist);
16099 op0 = expand_normal (arg0);
16100 icode = CODE_FOR_sse2_clflush;
16101 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16102 op0 = copy_to_mode_reg (Pmode, op0);
16104 emit_insn (gen_sse2_clflush (op0));
16105 return 0;
16107 case IX86_BUILTIN_MOVNTPD:
16108 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16109 case IX86_BUILTIN_MOVNTDQ:
16110 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16111 case IX86_BUILTIN_MOVNTI:
16112 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16114 case IX86_BUILTIN_LOADDQU:
16115 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16116 case IX86_BUILTIN_STOREDQU:
16117 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16119 case IX86_BUILTIN_MONITOR:
16120 arg0 = TREE_VALUE (arglist);
16121 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16122 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16123 op0 = expand_normal (arg0);
16124 op1 = expand_normal (arg1);
16125 op2 = expand_normal (arg2);
16126 if (!REG_P (op0))
16127 op0 = copy_to_mode_reg (SImode, op0);
16128 if (!REG_P (op1))
16129 op1 = copy_to_mode_reg (SImode, op1);
16130 if (!REG_P (op2))
16131 op2 = copy_to_mode_reg (SImode, op2);
16132 emit_insn (gen_sse3_monitor (op0, op1, op2));
16133 return 0;
16135 case IX86_BUILTIN_MWAIT:
16136 arg0 = TREE_VALUE (arglist);
16137 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16138 op0 = expand_normal (arg0);
16139 op1 = expand_normal (arg1);
16140 if (!REG_P (op0))
16141 op0 = copy_to_mode_reg (SImode, op0);
16142 if (!REG_P (op1))
16143 op1 = copy_to_mode_reg (SImode, op1);
16144 emit_insn (gen_sse3_mwait (op0, op1));
16145 return 0;
16147 case IX86_BUILTIN_LDDQU:
16148 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16149 target, 1);
16151 case IX86_BUILTIN_VEC_INIT_V2SI:
16152 case IX86_BUILTIN_VEC_INIT_V4HI:
16153 case IX86_BUILTIN_VEC_INIT_V8QI:
16154 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16156 case IX86_BUILTIN_VEC_EXT_V2DF:
16157 case IX86_BUILTIN_VEC_EXT_V2DI:
16158 case IX86_BUILTIN_VEC_EXT_V4SF:
16159 case IX86_BUILTIN_VEC_EXT_V4SI:
16160 case IX86_BUILTIN_VEC_EXT_V8HI:
16161 case IX86_BUILTIN_VEC_EXT_V2SI:
16162 case IX86_BUILTIN_VEC_EXT_V4HI:
16163 return ix86_expand_vec_ext_builtin (arglist, target);
16165 case IX86_BUILTIN_VEC_SET_V8HI:
16166 case IX86_BUILTIN_VEC_SET_V4HI:
16167 return ix86_expand_vec_set_builtin (arglist);
16169 default:
16170 break;
16173 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16174 if (d->code == fcode)
16176 /* Compares are treated specially. */
16177 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16178 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16179 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16180 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16181 return ix86_expand_sse_compare (d, arglist, target);
16183 return ix86_expand_binop_builtin (d->icode, arglist, target);
16186 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16187 if (d->code == fcode)
16188 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16190 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16191 if (d->code == fcode)
16192 return ix86_expand_sse_comi (d, arglist, target);
16194 gcc_unreachable ();
16197 /* Expand an expression EXP that calls a built-in library function,
16198 with result going to TARGET if that's convenient
16199 (and in mode MODE if that's convenient).
16200 SUBTARGET may be used as the target for computing one of EXP's operands.
16201 IGNORE is nonzero if the value is to be ignored. */
16203 static rtx
16204 ix86_expand_library_builtin (tree exp, rtx target,
16205 rtx subtarget ATTRIBUTE_UNUSED,
16206 enum machine_mode mode ATTRIBUTE_UNUSED,
16207 int ignore)
16209 enum built_in_function fncode;
16210 tree fndecl, newfn, call;
16212 /* Try expanding builtin math functions to the SSE2 ABI variants. */
16213 if (!TARGET_SSELIBM)
16214 return NULL_RTX;
16216 fncode = builtin_mathfn_code (exp);
16217 if (!ix86_builtin_function_variants [(int)fncode])
16218 return NULL_RTX;
16220 fndecl = get_callee_fndecl (exp);
16221 if (DECL_RTL_SET_P (fndecl))
16222 return NULL_RTX;
16224 /* Build the redirected call and expand it. */
16225 newfn = ix86_builtin_function_variants [(int)fncode];
16226 call = build_function_call_expr (newfn, TREE_OPERAND (exp, 1));
16227 return expand_call (call, target, ignore);
16230 /* Store OPERAND to the memory after reload is completed. This means
16231 that we can't easily use assign_stack_local. */
16233 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16235 rtx result;
16237 gcc_assert (reload_completed);
16238 if (TARGET_RED_ZONE)
16240 result = gen_rtx_MEM (mode,
16241 gen_rtx_PLUS (Pmode,
16242 stack_pointer_rtx,
16243 GEN_INT (-RED_ZONE_SIZE)));
16244 emit_move_insn (result, operand);
16246 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16248 switch (mode)
16250 case HImode:
16251 case SImode:
16252 operand = gen_lowpart (DImode, operand);
16253 /* FALLTHRU */
16254 case DImode:
16255 emit_insn (
16256 gen_rtx_SET (VOIDmode,
16257 gen_rtx_MEM (DImode,
16258 gen_rtx_PRE_DEC (DImode,
16259 stack_pointer_rtx)),
16260 operand));
16261 break;
16262 default:
16263 gcc_unreachable ();
16265 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16267 else
16269 switch (mode)
16271 case DImode:
16273 rtx operands[2];
16274 split_di (&operand, 1, operands, operands + 1);
16275 emit_insn (
16276 gen_rtx_SET (VOIDmode,
16277 gen_rtx_MEM (SImode,
16278 gen_rtx_PRE_DEC (Pmode,
16279 stack_pointer_rtx)),
16280 operands[1]));
16281 emit_insn (
16282 gen_rtx_SET (VOIDmode,
16283 gen_rtx_MEM (SImode,
16284 gen_rtx_PRE_DEC (Pmode,
16285 stack_pointer_rtx)),
16286 operands[0]));
16288 break;
16289 case HImode:
16290 /* Store HImodes as SImodes. */
16291 operand = gen_lowpart (SImode, operand);
16292 /* FALLTHRU */
16293 case SImode:
16294 emit_insn (
16295 gen_rtx_SET (VOIDmode,
16296 gen_rtx_MEM (GET_MODE (operand),
16297 gen_rtx_PRE_DEC (SImode,
16298 stack_pointer_rtx)),
16299 operand));
16300 break;
16301 default:
16302 gcc_unreachable ();
16304 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16306 return result;
16309 /* Free operand from the memory. */
16310 void
16311 ix86_free_from_memory (enum machine_mode mode)
16313 if (!TARGET_RED_ZONE)
16315 int size;
16317 if (mode == DImode || TARGET_64BIT)
16318 size = 8;
16319 else
16320 size = 4;
16321 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16322 to pop or add instruction if registers are available. */
16323 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16324 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16325 GEN_INT (size))));
16329 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16330 QImode must go into class Q_REGS.
16331 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16332 movdf to do mem-to-mem moves through integer regs. */
16333 enum reg_class
16334 ix86_preferred_reload_class (rtx x, enum reg_class class)
16336 /* We're only allowed to return a subclass of CLASS. Many of the
16337 following checks fail for NO_REGS, so eliminate that early. */
16338 if (class == NO_REGS)
16339 return NO_REGS;
16341 /* All classes can load zeros. */
16342 if (x == CONST0_RTX (GET_MODE (x)))
16343 return class;
16345 /* Floating-point constants need more complex checks. */
16346 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16348 /* General regs can load everything. */
16349 if (reg_class_subset_p (class, GENERAL_REGS))
16350 return class;
16352 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16353 zero above. We only want to wind up preferring 80387 registers if
16354 we plan on doing computation with them. */
16355 if (TARGET_80387
16356 && (TARGET_MIX_SSE_I387
16357 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
16358 && standard_80387_constant_p (x))
16360 /* Limit class to non-sse. */
16361 if (class == FLOAT_SSE_REGS)
16362 return FLOAT_REGS;
16363 if (class == FP_TOP_SSE_REGS)
16364 return FP_TOP_REG;
16365 if (class == FP_SECOND_SSE_REGS)
16366 return FP_SECOND_REG;
16367 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16368 return class;
16371 return NO_REGS;
16373 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
16374 return NO_REGS;
16375 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
16376 return NO_REGS;
16378 /* Generally when we see PLUS here, it's the function invariant
16379 (plus soft-fp const_int). Which can only be computed into general
16380 regs. */
16381 if (GET_CODE (x) == PLUS)
16382 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16384 /* QImode constants are easy to load, but non-constant QImode data
16385 must go into Q_REGS. */
16386 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16388 if (reg_class_subset_p (class, Q_REGS))
16389 return class;
16390 if (reg_class_subset_p (Q_REGS, class))
16391 return Q_REGS;
16392 return NO_REGS;
16395 return class;
16398 /* If we are copying between general and FP registers, we need a memory
16399 location. The same is true for SSE and MMX registers.
16401 The macro can't work reliably when one of the CLASSES is class containing
16402 registers from multiple units (SSE, MMX, integer). We avoid this by never
16403 combining those units in single alternative in the machine description.
16404 Ensure that this constraint holds to avoid unexpected surprises.
16406 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16407 enforce these sanity checks. */
16410 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16411 enum machine_mode mode, int strict)
16413 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16414 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16415 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16416 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16417 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16418 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16420 gcc_assert (!strict);
16421 return true;
16424 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16425 return true;
16427 /* ??? This is a lie. We do have moves between mmx/general, and for
16428 mmx/sse2. But by saying we need secondary memory we discourage the
16429 register allocator from using the mmx registers unless needed. */
16430 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16431 return true;
16433 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16435 /* SSE1 doesn't have any direct moves from other classes. */
16436 if (!TARGET_SSE2)
16437 return true;
16439 /* If the target says that inter-unit moves are more expensive
16440 than moving through memory, then don't generate them. */
16441 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16442 return true;
16444 /* Between SSE and general, we have moves no larger than word size. */
16445 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16446 return true;
16448 /* ??? For the cost of one register reformat penalty, we could use
16449 the same instructions to move SFmode and DFmode data, but the
16450 relevant move patterns don't support those alternatives. */
16451 if (mode == SFmode || mode == DFmode)
16452 return true;
16455 return false;
16458 /* Return true if the registers in CLASS cannot represent the change from
16459 modes FROM to TO. */
16461 bool
16462 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16463 enum reg_class class)
16465 if (from == to)
16466 return false;
16468 /* x87 registers can't do subreg at all, as all values are reformatted
16469 to extended precision. */
16470 if (MAYBE_FLOAT_CLASS_P (class))
16471 return true;
16473 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16475 /* Vector registers do not support QI or HImode loads. If we don't
16476 disallow a change to these modes, reload will assume it's ok to
16477 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16478 the vec_dupv4hi pattern. */
16479 if (GET_MODE_SIZE (from) < 4)
16480 return true;
16482 /* Vector registers do not support subreg with nonzero offsets, which
16483 are otherwise valid for integer registers. Since we can't see
16484 whether we have a nonzero offset from here, prohibit all
16485 nonparadoxical subregs changing size. */
16486 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16487 return true;
16490 return false;
16493 /* Return the cost of moving data from a register in class CLASS1 to
16494 one in class CLASS2.
16496 It is not required that the cost always equal 2 when FROM is the same as TO;
16497 on some machines it is expensive to move between registers if they are not
16498 general registers. */
16501 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16502 enum reg_class class2)
16504 /* In case we require secondary memory, compute cost of the store followed
16505 by load. In order to avoid bad register allocation choices, we need
16506 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16508 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16510 int cost = 1;
16512 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16513 MEMORY_MOVE_COST (mode, class1, 1));
16514 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16515 MEMORY_MOVE_COST (mode, class2, 1));
16517 /* In case of copying from general_purpose_register we may emit multiple
16518 stores followed by single load causing memory size mismatch stall.
16519 Count this as arbitrarily high cost of 20. */
16520 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16521 cost += 20;
16523 /* In the case of FP/MMX moves, the registers actually overlap, and we
16524 have to switch modes in order to treat them differently. */
16525 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16526 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16527 cost += 20;
16529 return cost;
16532 /* Moves between SSE/MMX and integer unit are expensive. */
16533 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16534 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16535 return ix86_cost->mmxsse_to_integer;
16536 if (MAYBE_FLOAT_CLASS_P (class1))
16537 return ix86_cost->fp_move;
16538 if (MAYBE_SSE_CLASS_P (class1))
16539 return ix86_cost->sse_move;
16540 if (MAYBE_MMX_CLASS_P (class1))
16541 return ix86_cost->mmx_move;
16542 return 2;
16545 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16547 bool
16548 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16550 /* Flags and only flags can only hold CCmode values. */
16551 if (CC_REGNO_P (regno))
16552 return GET_MODE_CLASS (mode) == MODE_CC;
16553 if (GET_MODE_CLASS (mode) == MODE_CC
16554 || GET_MODE_CLASS (mode) == MODE_RANDOM
16555 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16556 return 0;
16557 if (FP_REGNO_P (regno))
16558 return VALID_FP_MODE_P (mode);
16559 if (SSE_REGNO_P (regno))
16561 /* We implement the move patterns for all vector modes into and
16562 out of SSE registers, even when no operation instructions
16563 are available. */
16564 return (VALID_SSE_REG_MODE (mode)
16565 || VALID_SSE2_REG_MODE (mode)
16566 || VALID_MMX_REG_MODE (mode)
16567 || VALID_MMX_REG_MODE_3DNOW (mode));
16569 if (MMX_REGNO_P (regno))
16571 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16572 so if the register is available at all, then we can move data of
16573 the given mode into or out of it. */
16574 return (VALID_MMX_REG_MODE (mode)
16575 || VALID_MMX_REG_MODE_3DNOW (mode));
16578 if (mode == QImode)
16580 /* Take care for QImode values - they can be in non-QI regs,
16581 but then they do cause partial register stalls. */
16582 if (regno < 4 || TARGET_64BIT)
16583 return 1;
16584 if (!TARGET_PARTIAL_REG_STALL)
16585 return 1;
16586 return reload_in_progress || reload_completed;
16588 /* We handle both integer and floats in the general purpose registers. */
16589 else if (VALID_INT_MODE_P (mode))
16590 return 1;
16591 else if (VALID_FP_MODE_P (mode))
16592 return 1;
16593 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16594 on to use that value in smaller contexts, this can easily force a
16595 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16596 supporting DImode, allow it. */
16597 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16598 return 1;
16600 return 0;
16603 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16604 tieable integer mode. */
16606 static bool
16607 ix86_tieable_integer_mode_p (enum machine_mode mode)
16609 switch (mode)
16611 case HImode:
16612 case SImode:
16613 return true;
16615 case QImode:
16616 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16618 case DImode:
16619 return TARGET_64BIT;
16621 default:
16622 return false;
16626 /* Return true if MODE1 is accessible in a register that can hold MODE2
16627 without copying. That is, all register classes that can hold MODE2
16628 can also hold MODE1. */
16630 bool
16631 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16633 if (mode1 == mode2)
16634 return true;
16636 if (ix86_tieable_integer_mode_p (mode1)
16637 && ix86_tieable_integer_mode_p (mode2))
16638 return true;
16640 /* MODE2 being XFmode implies fp stack or general regs, which means we
16641 can tie any smaller floating point modes to it. Note that we do not
16642 tie this with TFmode. */
16643 if (mode2 == XFmode)
16644 return mode1 == SFmode || mode1 == DFmode;
16646 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16647 that we can tie it with SFmode. */
16648 if (mode2 == DFmode)
16649 return mode1 == SFmode;
16651 /* If MODE2 is only appropriate for an SSE register, then tie with
16652 any other mode acceptable to SSE registers. */
16653 if (GET_MODE_SIZE (mode2) >= 8
16654 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16655 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16657 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16658 with any other mode acceptable to MMX registers. */
16659 if (GET_MODE_SIZE (mode2) == 8
16660 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16661 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16663 return false;
16666 /* Return the cost of moving data of mode M between a
16667 register and memory. A value of 2 is the default; this cost is
16668 relative to those in `REGISTER_MOVE_COST'.
16670 If moving between registers and memory is more expensive than
16671 between two registers, you should define this macro to express the
16672 relative cost.
16674 Model also increased moving costs of QImode registers in non
16675 Q_REGS classes.
16678 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16680 if (FLOAT_CLASS_P (class))
16682 int index;
16683 switch (mode)
16685 case SFmode:
16686 index = 0;
16687 break;
16688 case DFmode:
16689 index = 1;
16690 break;
16691 case XFmode:
16692 index = 2;
16693 break;
16694 default:
16695 return 100;
16697 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16699 if (SSE_CLASS_P (class))
16701 int index;
16702 switch (GET_MODE_SIZE (mode))
16704 case 4:
16705 index = 0;
16706 break;
16707 case 8:
16708 index = 1;
16709 break;
16710 case 16:
16711 index = 2;
16712 break;
16713 default:
16714 return 100;
16716 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16718 if (MMX_CLASS_P (class))
16720 int index;
16721 switch (GET_MODE_SIZE (mode))
16723 case 4:
16724 index = 0;
16725 break;
16726 case 8:
16727 index = 1;
16728 break;
16729 default:
16730 return 100;
16732 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16734 switch (GET_MODE_SIZE (mode))
16736 case 1:
16737 if (in)
16738 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16739 : ix86_cost->movzbl_load);
16740 else
16741 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16742 : ix86_cost->int_store[0] + 4);
16743 break;
16744 case 2:
16745 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16746 default:
16747 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16748 if (mode == TFmode)
16749 mode = XFmode;
16750 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16751 * (((int) GET_MODE_SIZE (mode)
16752 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16756 /* Compute a (partial) cost for rtx X. Return true if the complete
16757 cost has been computed, and false if subexpressions should be
16758 scanned. In either case, *TOTAL contains the cost result. */
16760 static bool
16761 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16763 enum machine_mode mode = GET_MODE (x);
16765 switch (code)
16767 case CONST_INT:
16768 case CONST:
16769 case LABEL_REF:
16770 case SYMBOL_REF:
16771 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16772 *total = 3;
16773 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16774 *total = 2;
16775 else if (flag_pic && SYMBOLIC_CONST (x)
16776 && (!TARGET_64BIT
16777 || (!GET_CODE (x) != LABEL_REF
16778 && (GET_CODE (x) != SYMBOL_REF
16779 || !SYMBOL_REF_LOCAL_P (x)))))
16780 *total = 1;
16781 else
16782 *total = 0;
16783 return true;
16785 case CONST_DOUBLE:
16786 if (mode == VOIDmode)
16787 *total = 0;
16788 else
16789 switch (standard_80387_constant_p (x))
16791 case 1: /* 0.0 */
16792 *total = 1;
16793 break;
16794 default: /* Other constants */
16795 *total = 2;
16796 break;
16797 case 0:
16798 case -1:
16799 /* Start with (MEM (SYMBOL_REF)), since that's where
16800 it'll probably end up. Add a penalty for size. */
16801 *total = (COSTS_N_INSNS (1)
16802 + (flag_pic != 0 && !TARGET_64BIT)
16803 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16804 break;
16806 return true;
16808 case ZERO_EXTEND:
16809 /* The zero extensions is often completely free on x86_64, so make
16810 it as cheap as possible. */
16811 if (TARGET_64BIT && mode == DImode
16812 && GET_MODE (XEXP (x, 0)) == SImode)
16813 *total = 1;
16814 else if (TARGET_ZERO_EXTEND_WITH_AND)
16815 *total = ix86_cost->add;
16816 else
16817 *total = ix86_cost->movzx;
16818 return false;
16820 case SIGN_EXTEND:
16821 *total = ix86_cost->movsx;
16822 return false;
16824 case ASHIFT:
16825 if (GET_CODE (XEXP (x, 1)) == CONST_INT
16826 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16828 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16829 if (value == 1)
16831 *total = ix86_cost->add;
16832 return false;
16834 if ((value == 2 || value == 3)
16835 && ix86_cost->lea <= ix86_cost->shift_const)
16837 *total = ix86_cost->lea;
16838 return false;
16841 /* FALLTHRU */
16843 case ROTATE:
16844 case ASHIFTRT:
16845 case LSHIFTRT:
16846 case ROTATERT:
16847 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
16849 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16851 if (INTVAL (XEXP (x, 1)) > 32)
16852 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
16853 else
16854 *total = ix86_cost->shift_const * 2;
16856 else
16858 if (GET_CODE (XEXP (x, 1)) == AND)
16859 *total = ix86_cost->shift_var * 2;
16860 else
16861 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
16864 else
16866 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16867 *total = ix86_cost->shift_const;
16868 else
16869 *total = ix86_cost->shift_var;
16871 return false;
16873 case MULT:
16874 if (FLOAT_MODE_P (mode))
16876 *total = ix86_cost->fmul;
16877 return false;
16879 else
16881 rtx op0 = XEXP (x, 0);
16882 rtx op1 = XEXP (x, 1);
16883 int nbits;
16884 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16886 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16887 for (nbits = 0; value != 0; value &= value - 1)
16888 nbits++;
16890 else
16891 /* This is arbitrary. */
16892 nbits = 7;
16894 /* Compute costs correctly for widening multiplication. */
16895 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
16896 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
16897 == GET_MODE_SIZE (mode))
16899 int is_mulwiden = 0;
16900 enum machine_mode inner_mode = GET_MODE (op0);
16902 if (GET_CODE (op0) == GET_CODE (op1))
16903 is_mulwiden = 1, op1 = XEXP (op1, 0);
16904 else if (GET_CODE (op1) == CONST_INT)
16906 if (GET_CODE (op0) == SIGN_EXTEND)
16907 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
16908 == INTVAL (op1);
16909 else
16910 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
16913 if (is_mulwiden)
16914 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
16917 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
16918 + nbits * ix86_cost->mult_bit
16919 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
16921 return true;
16924 case DIV:
16925 case UDIV:
16926 case MOD:
16927 case UMOD:
16928 if (FLOAT_MODE_P (mode))
16929 *total = ix86_cost->fdiv;
16930 else
16931 *total = ix86_cost->divide[MODE_INDEX (mode)];
16932 return false;
16934 case PLUS:
16935 if (FLOAT_MODE_P (mode))
16936 *total = ix86_cost->fadd;
16937 else if (GET_MODE_CLASS (mode) == MODE_INT
16938 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
16940 if (GET_CODE (XEXP (x, 0)) == PLUS
16941 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16942 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
16943 && CONSTANT_P (XEXP (x, 1)))
16945 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
16946 if (val == 2 || val == 4 || val == 8)
16948 *total = ix86_cost->lea;
16949 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16950 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
16951 outer_code);
16952 *total += rtx_cost (XEXP (x, 1), outer_code);
16953 return true;
16956 else if (GET_CODE (XEXP (x, 0)) == MULT
16957 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
16959 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
16960 if (val == 2 || val == 4 || val == 8)
16962 *total = ix86_cost->lea;
16963 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16964 *total += rtx_cost (XEXP (x, 1), outer_code);
16965 return true;
16968 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16970 *total = ix86_cost->lea;
16971 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16972 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16973 *total += rtx_cost (XEXP (x, 1), outer_code);
16974 return true;
16977 /* FALLTHRU */
16979 case MINUS:
16980 if (FLOAT_MODE_P (mode))
16982 *total = ix86_cost->fadd;
16983 return false;
16985 /* FALLTHRU */
16987 case AND:
16988 case IOR:
16989 case XOR:
16990 if (!TARGET_64BIT && mode == DImode)
16992 *total = (ix86_cost->add * 2
16993 + (rtx_cost (XEXP (x, 0), outer_code)
16994 << (GET_MODE (XEXP (x, 0)) != DImode))
16995 + (rtx_cost (XEXP (x, 1), outer_code)
16996 << (GET_MODE (XEXP (x, 1)) != DImode)));
16997 return true;
16999 /* FALLTHRU */
17001 case NEG:
17002 if (FLOAT_MODE_P (mode))
17004 *total = ix86_cost->fchs;
17005 return false;
17007 /* FALLTHRU */
17009 case NOT:
17010 if (!TARGET_64BIT && mode == DImode)
17011 *total = ix86_cost->add * 2;
17012 else
17013 *total = ix86_cost->add;
17014 return false;
17016 case COMPARE:
17017 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17018 && XEXP (XEXP (x, 0), 1) == const1_rtx
17019 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17020 && XEXP (x, 1) == const0_rtx)
17022 /* This kind of construct is implemented using test[bwl].
17023 Treat it as if we had an AND. */
17024 *total = (ix86_cost->add
17025 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17026 + rtx_cost (const1_rtx, outer_code));
17027 return true;
17029 return false;
17031 case FLOAT_EXTEND:
17032 if (!TARGET_SSE_MATH
17033 || mode == XFmode
17034 || (mode == DFmode && !TARGET_SSE2))
17035 *total = 0;
17036 return false;
17038 case ABS:
17039 if (FLOAT_MODE_P (mode))
17040 *total = ix86_cost->fabs;
17041 return false;
17043 case SQRT:
17044 if (FLOAT_MODE_P (mode))
17045 *total = ix86_cost->fsqrt;
17046 return false;
17048 case UNSPEC:
17049 if (XINT (x, 1) == UNSPEC_TP)
17050 *total = 0;
17051 return false;
17053 default:
17054 return false;
17058 #if TARGET_MACHO
17060 static int current_machopic_label_num;
17062 /* Given a symbol name and its associated stub, write out the
17063 definition of the stub. */
17065 void
17066 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17068 unsigned int length;
17069 char *binder_name, *symbol_name, lazy_ptr_name[32];
17070 int label = ++current_machopic_label_num;
17072 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17073 symb = (*targetm.strip_name_encoding) (symb);
17075 length = strlen (stub);
17076 binder_name = alloca (length + 32);
17077 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17079 length = strlen (symb);
17080 symbol_name = alloca (length + 32);
17081 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17083 sprintf (lazy_ptr_name, "L%d$lz", label);
17085 if (MACHOPIC_PURE)
17086 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17087 else
17088 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17090 fprintf (file, "%s:\n", stub);
17091 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17093 if (MACHOPIC_PURE)
17095 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
17096 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17097 fprintf (file, "\tjmp %%edx\n");
17099 else
17100 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
17102 fprintf (file, "%s:\n", binder_name);
17104 if (MACHOPIC_PURE)
17106 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17107 fprintf (file, "\tpushl %%eax\n");
17109 else
17110 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
17112 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
17114 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17115 fprintf (file, "%s:\n", lazy_ptr_name);
17116 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17117 fprintf (file, "\t.long %s\n", binder_name);
17120 void
17121 darwin_x86_file_end (void)
17123 darwin_file_end ();
17124 ix86_file_end ();
17126 #endif /* TARGET_MACHO */
17128 /* Order the registers for register allocator. */
17130 void
17131 x86_order_regs_for_local_alloc (void)
17133 int pos = 0;
17134 int i;
17136 /* First allocate the local general purpose registers. */
17137 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17138 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17139 reg_alloc_order [pos++] = i;
17141 /* Global general purpose registers. */
17142 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17143 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17144 reg_alloc_order [pos++] = i;
17146 /* x87 registers come first in case we are doing FP math
17147 using them. */
17148 if (!TARGET_SSE_MATH)
17149 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17150 reg_alloc_order [pos++] = i;
17152 /* SSE registers. */
17153 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17154 reg_alloc_order [pos++] = i;
17155 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17156 reg_alloc_order [pos++] = i;
17158 /* x87 registers. */
17159 if (TARGET_SSE_MATH)
17160 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17161 reg_alloc_order [pos++] = i;
17163 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17164 reg_alloc_order [pos++] = i;
17166 /* Initialize the rest of array as we do not allocate some registers
17167 at all. */
17168 while (pos < FIRST_PSEUDO_REGISTER)
17169 reg_alloc_order [pos++] = 0;
17172 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17173 struct attribute_spec.handler. */
17174 static tree
17175 ix86_handle_struct_attribute (tree *node, tree name,
17176 tree args ATTRIBUTE_UNUSED,
17177 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17179 tree *type = NULL;
17180 if (DECL_P (*node))
17182 if (TREE_CODE (*node) == TYPE_DECL)
17183 type = &TREE_TYPE (*node);
17185 else
17186 type = node;
17188 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17189 || TREE_CODE (*type) == UNION_TYPE)))
17191 warning (OPT_Wattributes, "%qs attribute ignored",
17192 IDENTIFIER_POINTER (name));
17193 *no_add_attrs = true;
17196 else if ((is_attribute_p ("ms_struct", name)
17197 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17198 || ((is_attribute_p ("gcc_struct", name)
17199 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17201 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17202 IDENTIFIER_POINTER (name));
17203 *no_add_attrs = true;
17206 return NULL_TREE;
17209 static bool
17210 ix86_ms_bitfield_layout_p (tree record_type)
17212 return (TARGET_MS_BITFIELD_LAYOUT &&
17213 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17214 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17217 /* Returns an expression indicating where the this parameter is
17218 located on entry to the FUNCTION. */
17220 static rtx
17221 x86_this_parameter (tree function)
17223 tree type = TREE_TYPE (function);
17225 if (TARGET_64BIT)
17227 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17228 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17231 if (ix86_function_regparm (type, function) > 0)
17233 tree parm;
17235 parm = TYPE_ARG_TYPES (type);
17236 /* Figure out whether or not the function has a variable number of
17237 arguments. */
17238 for (; parm; parm = TREE_CHAIN (parm))
17239 if (TREE_VALUE (parm) == void_type_node)
17240 break;
17241 /* If not, the this parameter is in the first argument. */
17242 if (parm)
17244 int regno = 0;
17245 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17246 regno = 2;
17247 return gen_rtx_REG (SImode, regno);
17251 if (aggregate_value_p (TREE_TYPE (type), type))
17252 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17253 else
17254 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17257 /* Determine whether x86_output_mi_thunk can succeed. */
17259 static bool
17260 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17261 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17262 HOST_WIDE_INT vcall_offset, tree function)
17264 /* 64-bit can handle anything. */
17265 if (TARGET_64BIT)
17266 return true;
17268 /* For 32-bit, everything's fine if we have one free register. */
17269 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17270 return true;
17272 /* Need a free register for vcall_offset. */
17273 if (vcall_offset)
17274 return false;
17276 /* Need a free register for GOT references. */
17277 if (flag_pic && !(*targetm.binds_local_p) (function))
17278 return false;
17280 /* Otherwise ok. */
17281 return true;
17284 /* Output the assembler code for a thunk function. THUNK_DECL is the
17285 declaration for the thunk function itself, FUNCTION is the decl for
17286 the target function. DELTA is an immediate constant offset to be
17287 added to THIS. If VCALL_OFFSET is nonzero, the word at
17288 *(*this + vcall_offset) should be added to THIS. */
17290 static void
17291 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17292 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17293 HOST_WIDE_INT vcall_offset, tree function)
17295 rtx xops[3];
17296 rtx this = x86_this_parameter (function);
17297 rtx this_reg, tmp;
17299 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17300 pull it in now and let DELTA benefit. */
17301 if (REG_P (this))
17302 this_reg = this;
17303 else if (vcall_offset)
17305 /* Put the this parameter into %eax. */
17306 xops[0] = this;
17307 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17308 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17310 else
17311 this_reg = NULL_RTX;
17313 /* Adjust the this parameter by a fixed constant. */
17314 if (delta)
17316 xops[0] = GEN_INT (delta);
17317 xops[1] = this_reg ? this_reg : this;
17318 if (TARGET_64BIT)
17320 if (!x86_64_general_operand (xops[0], DImode))
17322 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17323 xops[1] = tmp;
17324 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17325 xops[0] = tmp;
17326 xops[1] = this;
17328 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17330 else
17331 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17334 /* Adjust the this parameter by a value stored in the vtable. */
17335 if (vcall_offset)
17337 if (TARGET_64BIT)
17338 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17339 else
17341 int tmp_regno = 2 /* ECX */;
17342 if (lookup_attribute ("fastcall",
17343 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17344 tmp_regno = 0 /* EAX */;
17345 tmp = gen_rtx_REG (SImode, tmp_regno);
17348 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17349 xops[1] = tmp;
17350 if (TARGET_64BIT)
17351 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17352 else
17353 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17355 /* Adjust the this parameter. */
17356 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17357 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17359 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17360 xops[0] = GEN_INT (vcall_offset);
17361 xops[1] = tmp2;
17362 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17363 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17365 xops[1] = this_reg;
17366 if (TARGET_64BIT)
17367 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17368 else
17369 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17372 /* If necessary, drop THIS back to its stack slot. */
17373 if (this_reg && this_reg != this)
17375 xops[0] = this_reg;
17376 xops[1] = this;
17377 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17380 xops[0] = XEXP (DECL_RTL (function), 0);
17381 if (TARGET_64BIT)
17383 if (!flag_pic || (*targetm.binds_local_p) (function))
17384 output_asm_insn ("jmp\t%P0", xops);
17385 else
17387 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17388 tmp = gen_rtx_CONST (Pmode, tmp);
17389 tmp = gen_rtx_MEM (QImode, tmp);
17390 xops[0] = tmp;
17391 output_asm_insn ("jmp\t%A0", xops);
17394 else
17396 if (!flag_pic || (*targetm.binds_local_p) (function))
17397 output_asm_insn ("jmp\t%P0", xops);
17398 else
17399 #if TARGET_MACHO
17400 if (TARGET_MACHO)
17402 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17403 tmp = (gen_rtx_SYMBOL_REF
17404 (Pmode,
17405 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17406 tmp = gen_rtx_MEM (QImode, tmp);
17407 xops[0] = tmp;
17408 output_asm_insn ("jmp\t%0", xops);
17410 else
17411 #endif /* TARGET_MACHO */
17413 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17414 output_set_got (tmp, NULL_RTX);
17416 xops[1] = tmp;
17417 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17418 output_asm_insn ("jmp\t{*}%1", xops);
17423 static void
17424 x86_file_start (void)
17426 default_file_start ();
17427 if (X86_FILE_START_VERSION_DIRECTIVE)
17428 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17429 if (X86_FILE_START_FLTUSED)
17430 fputs ("\t.global\t__fltused\n", asm_out_file);
17431 if (ix86_asm_dialect == ASM_INTEL)
17432 fputs ("\t.intel_syntax\n", asm_out_file);
17436 x86_field_alignment (tree field, int computed)
17438 enum machine_mode mode;
17439 tree type = TREE_TYPE (field);
17441 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17442 return computed;
17443 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17444 ? get_inner_array_type (type) : type);
17445 if (mode == DFmode || mode == DCmode
17446 || GET_MODE_CLASS (mode) == MODE_INT
17447 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17448 return MIN (32, computed);
17449 return computed;
17452 /* Output assembler code to FILE to increment profiler label # LABELNO
17453 for profiling a function entry. */
17454 void
17455 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17457 if (TARGET_64BIT)
17458 if (flag_pic)
17460 #ifndef NO_PROFILE_COUNTERS
17461 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17462 #endif
17463 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17465 else
17467 #ifndef NO_PROFILE_COUNTERS
17468 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17469 #endif
17470 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17472 else if (flag_pic)
17474 #ifndef NO_PROFILE_COUNTERS
17475 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17476 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17477 #endif
17478 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17480 else
17482 #ifndef NO_PROFILE_COUNTERS
17483 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17484 PROFILE_COUNT_REGISTER);
17485 #endif
17486 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17490 /* We don't have exact information about the insn sizes, but we may assume
17491 quite safely that we are informed about all 1 byte insns and memory
17492 address sizes. This is enough to eliminate unnecessary padding in
17493 99% of cases. */
17495 static int
17496 min_insn_size (rtx insn)
17498 int l = 0;
17500 if (!INSN_P (insn) || !active_insn_p (insn))
17501 return 0;
17503 /* Discard alignments we've emit and jump instructions. */
17504 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17505 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17506 return 0;
17507 if (GET_CODE (insn) == JUMP_INSN
17508 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17509 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17510 return 0;
17512 /* Important case - calls are always 5 bytes.
17513 It is common to have many calls in the row. */
17514 if (GET_CODE (insn) == CALL_INSN
17515 && symbolic_reference_mentioned_p (PATTERN (insn))
17516 && !SIBLING_CALL_P (insn))
17517 return 5;
17518 if (get_attr_length (insn) <= 1)
17519 return 1;
17521 /* For normal instructions we may rely on the sizes of addresses
17522 and the presence of symbol to require 4 bytes of encoding.
17523 This is not the case for jumps where references are PC relative. */
17524 if (GET_CODE (insn) != JUMP_INSN)
17526 l = get_attr_length_address (insn);
17527 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17528 l = 4;
17530 if (l)
17531 return 1+l;
17532 else
17533 return 2;
17536 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17537 window. */
17539 static void
17540 ix86_avoid_jump_misspredicts (void)
17542 rtx insn, start = get_insns ();
17543 int nbytes = 0, njumps = 0;
17544 int isjump = 0;
17546 /* Look for all minimal intervals of instructions containing 4 jumps.
17547 The intervals are bounded by START and INSN. NBYTES is the total
17548 size of instructions in the interval including INSN and not including
17549 START. When the NBYTES is smaller than 16 bytes, it is possible
17550 that the end of START and INSN ends up in the same 16byte page.
17552 The smallest offset in the page INSN can start is the case where START
17553 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17554 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17556 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17559 nbytes += min_insn_size (insn);
17560 if (dump_file)
17561 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17562 INSN_UID (insn), min_insn_size (insn));
17563 if ((GET_CODE (insn) == JUMP_INSN
17564 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17565 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17566 || GET_CODE (insn) == CALL_INSN)
17567 njumps++;
17568 else
17569 continue;
17571 while (njumps > 3)
17573 start = NEXT_INSN (start);
17574 if ((GET_CODE (start) == JUMP_INSN
17575 && GET_CODE (PATTERN (start)) != ADDR_VEC
17576 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17577 || GET_CODE (start) == CALL_INSN)
17578 njumps--, isjump = 1;
17579 else
17580 isjump = 0;
17581 nbytes -= min_insn_size (start);
17583 gcc_assert (njumps >= 0);
17584 if (dump_file)
17585 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17586 INSN_UID (start), INSN_UID (insn), nbytes);
17588 if (njumps == 3 && isjump && nbytes < 16)
17590 int padsize = 15 - nbytes + min_insn_size (insn);
17592 if (dump_file)
17593 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17594 INSN_UID (insn), padsize);
17595 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17600 /* AMD Athlon works faster
17601 when RET is not destination of conditional jump or directly preceded
17602 by other jump instruction. We avoid the penalty by inserting NOP just
17603 before the RET instructions in such cases. */
17604 static void
17605 ix86_pad_returns (void)
17607 edge e;
17608 edge_iterator ei;
17610 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17612 basic_block bb = e->src;
17613 rtx ret = BB_END (bb);
17614 rtx prev;
17615 bool replace = false;
17617 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17618 || !maybe_hot_bb_p (bb))
17619 continue;
17620 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17621 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17622 break;
17623 if (prev && GET_CODE (prev) == CODE_LABEL)
17625 edge e;
17626 edge_iterator ei;
17628 FOR_EACH_EDGE (e, ei, bb->preds)
17629 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17630 && !(e->flags & EDGE_FALLTHRU))
17631 replace = true;
17633 if (!replace)
17635 prev = prev_active_insn (ret);
17636 if (prev
17637 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17638 || GET_CODE (prev) == CALL_INSN))
17639 replace = true;
17640 /* Empty functions get branch mispredict even when the jump destination
17641 is not visible to us. */
17642 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17643 replace = true;
17645 if (replace)
17647 emit_insn_before (gen_return_internal_long (), ret);
17648 delete_insn (ret);
17653 /* Implement machine specific optimizations. We implement padding of returns
17654 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17655 static void
17656 ix86_reorg (void)
17658 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
17659 ix86_pad_returns ();
17660 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17661 ix86_avoid_jump_misspredicts ();
17664 /* Return nonzero when QImode register that must be represented via REX prefix
17665 is used. */
17666 bool
17667 x86_extended_QIreg_mentioned_p (rtx insn)
17669 int i;
17670 extract_insn_cached (insn);
17671 for (i = 0; i < recog_data.n_operands; i++)
17672 if (REG_P (recog_data.operand[i])
17673 && REGNO (recog_data.operand[i]) >= 4)
17674 return true;
17675 return false;
17678 /* Return nonzero when P points to register encoded via REX prefix.
17679 Called via for_each_rtx. */
17680 static int
17681 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17683 unsigned int regno;
17684 if (!REG_P (*p))
17685 return 0;
17686 regno = REGNO (*p);
17687 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17690 /* Return true when INSN mentions register that must be encoded using REX
17691 prefix. */
17692 bool
17693 x86_extended_reg_mentioned_p (rtx insn)
17695 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17698 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17699 optabs would emit if we didn't have TFmode patterns. */
17701 void
17702 x86_emit_floatuns (rtx operands[2])
17704 rtx neglab, donelab, i0, i1, f0, in, out;
17705 enum machine_mode mode, inmode;
17707 inmode = GET_MODE (operands[1]);
17708 gcc_assert (inmode == SImode || inmode == DImode);
17710 out = operands[0];
17711 in = force_reg (inmode, operands[1]);
17712 mode = GET_MODE (out);
17713 neglab = gen_label_rtx ();
17714 donelab = gen_label_rtx ();
17715 i1 = gen_reg_rtx (Pmode);
17716 f0 = gen_reg_rtx (mode);
17718 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17720 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17721 emit_jump_insn (gen_jump (donelab));
17722 emit_barrier ();
17724 emit_label (neglab);
17726 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17727 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17728 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17729 expand_float (f0, i0, 0);
17730 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17732 emit_label (donelab);
17735 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17736 with all elements equal to VAR. Return true if successful. */
17738 static bool
17739 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17740 rtx target, rtx val)
17742 enum machine_mode smode, wsmode, wvmode;
17743 rtx x;
17745 switch (mode)
17747 case V2SImode:
17748 case V2SFmode:
17749 if (!mmx_ok && !TARGET_SSE)
17750 return false;
17751 /* FALLTHRU */
17753 case V2DFmode:
17754 case V2DImode:
17755 case V4SFmode:
17756 case V4SImode:
17757 val = force_reg (GET_MODE_INNER (mode), val);
17758 x = gen_rtx_VEC_DUPLICATE (mode, val);
17759 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17760 return true;
17762 case V4HImode:
17763 if (!mmx_ok)
17764 return false;
17765 if (TARGET_SSE || TARGET_3DNOW_A)
17767 val = gen_lowpart (SImode, val);
17768 x = gen_rtx_TRUNCATE (HImode, val);
17769 x = gen_rtx_VEC_DUPLICATE (mode, x);
17770 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17771 return true;
17773 else
17775 smode = HImode;
17776 wsmode = SImode;
17777 wvmode = V2SImode;
17778 goto widen;
17781 case V8QImode:
17782 if (!mmx_ok)
17783 return false;
17784 smode = QImode;
17785 wsmode = HImode;
17786 wvmode = V4HImode;
17787 goto widen;
17788 case V8HImode:
17789 smode = HImode;
17790 wsmode = SImode;
17791 wvmode = V4SImode;
17792 goto widen;
17793 case V16QImode:
17794 smode = QImode;
17795 wsmode = HImode;
17796 wvmode = V8HImode;
17797 goto widen;
17798 widen:
17799 /* Replicate the value once into the next wider mode and recurse. */
17800 val = convert_modes (wsmode, smode, val, true);
17801 x = expand_simple_binop (wsmode, ASHIFT, val,
17802 GEN_INT (GET_MODE_BITSIZE (smode)),
17803 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17804 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
17806 x = gen_reg_rtx (wvmode);
17807 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
17808 gcc_unreachable ();
17809 emit_move_insn (target, gen_lowpart (mode, x));
17810 return true;
17812 default:
17813 return false;
17817 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17818 whose low element is VAR, and other elements are zero. Return true
17819 if successful. */
17821 static bool
17822 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
17823 rtx target, rtx var)
17825 enum machine_mode vsimode;
17826 rtx x;
17828 switch (mode)
17830 case V2SFmode:
17831 case V2SImode:
17832 if (!mmx_ok && !TARGET_SSE)
17833 return false;
17834 /* FALLTHRU */
17836 case V2DFmode:
17837 case V2DImode:
17838 var = force_reg (GET_MODE_INNER (mode), var);
17839 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
17840 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17841 return true;
17843 case V4SFmode:
17844 case V4SImode:
17845 var = force_reg (GET_MODE_INNER (mode), var);
17846 x = gen_rtx_VEC_DUPLICATE (mode, var);
17847 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
17848 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17849 return true;
17851 case V8HImode:
17852 case V16QImode:
17853 vsimode = V4SImode;
17854 goto widen;
17855 case V4HImode:
17856 case V8QImode:
17857 if (!mmx_ok)
17858 return false;
17859 vsimode = V2SImode;
17860 goto widen;
17861 widen:
17862 /* Zero extend the variable element to SImode and recurse. */
17863 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
17865 x = gen_reg_rtx (vsimode);
17866 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
17867 gcc_unreachable ();
17869 emit_move_insn (target, gen_lowpart (mode, x));
17870 return true;
17872 default:
17873 return false;
17877 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17878 consisting of the values in VALS. It is known that all elements
17879 except ONE_VAR are constants. Return true if successful. */
17881 static bool
17882 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
17883 rtx target, rtx vals, int one_var)
17885 rtx var = XVECEXP (vals, 0, one_var);
17886 enum machine_mode wmode;
17887 rtx const_vec, x;
17889 const_vec = copy_rtx (vals);
17890 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
17891 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
17893 switch (mode)
17895 case V2DFmode:
17896 case V2DImode:
17897 case V2SFmode:
17898 case V2SImode:
17899 /* For the two element vectors, it's just as easy to use
17900 the general case. */
17901 return false;
17903 case V4SFmode:
17904 case V4SImode:
17905 case V8HImode:
17906 case V4HImode:
17907 break;
17909 case V16QImode:
17910 wmode = V8HImode;
17911 goto widen;
17912 case V8QImode:
17913 wmode = V4HImode;
17914 goto widen;
17915 widen:
17916 /* There's no way to set one QImode entry easily. Combine
17917 the variable value with its adjacent constant value, and
17918 promote to an HImode set. */
17919 x = XVECEXP (vals, 0, one_var ^ 1);
17920 if (one_var & 1)
17922 var = convert_modes (HImode, QImode, var, true);
17923 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
17924 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17925 x = GEN_INT (INTVAL (x) & 0xff);
17927 else
17929 var = convert_modes (HImode, QImode, var, true);
17930 x = gen_int_mode (INTVAL (x) << 8, HImode);
17932 if (x != const0_rtx)
17933 var = expand_simple_binop (HImode, IOR, var, x, var,
17934 1, OPTAB_LIB_WIDEN);
17936 x = gen_reg_rtx (wmode);
17937 emit_move_insn (x, gen_lowpart (wmode, const_vec));
17938 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
17940 emit_move_insn (target, gen_lowpart (mode, x));
17941 return true;
17943 default:
17944 return false;
17947 emit_move_insn (target, const_vec);
17948 ix86_expand_vector_set (mmx_ok, target, var, one_var);
17949 return true;
17952 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
17953 all values variable, and none identical. */
17955 static void
17956 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
17957 rtx target, rtx vals)
17959 enum machine_mode half_mode = GET_MODE_INNER (mode);
17960 rtx op0 = NULL, op1 = NULL;
17961 bool use_vec_concat = false;
17963 switch (mode)
17965 case V2SFmode:
17966 case V2SImode:
17967 if (!mmx_ok && !TARGET_SSE)
17968 break;
17969 /* FALLTHRU */
17971 case V2DFmode:
17972 case V2DImode:
17973 /* For the two element vectors, we always implement VEC_CONCAT. */
17974 op0 = XVECEXP (vals, 0, 0);
17975 op1 = XVECEXP (vals, 0, 1);
17976 use_vec_concat = true;
17977 break;
17979 case V4SFmode:
17980 half_mode = V2SFmode;
17981 goto half;
17982 case V4SImode:
17983 half_mode = V2SImode;
17984 goto half;
17985 half:
17987 rtvec v;
17989 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
17990 Recurse to load the two halves. */
17992 op0 = gen_reg_rtx (half_mode);
17993 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
17994 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
17996 op1 = gen_reg_rtx (half_mode);
17997 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
17998 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18000 use_vec_concat = true;
18002 break;
18004 case V8HImode:
18005 case V16QImode:
18006 case V4HImode:
18007 case V8QImode:
18008 break;
18010 default:
18011 gcc_unreachable ();
18014 if (use_vec_concat)
18016 if (!register_operand (op0, half_mode))
18017 op0 = force_reg (half_mode, op0);
18018 if (!register_operand (op1, half_mode))
18019 op1 = force_reg (half_mode, op1);
18021 emit_insn (gen_rtx_SET (VOIDmode, target,
18022 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18024 else
18026 int i, j, n_elts, n_words, n_elt_per_word;
18027 enum machine_mode inner_mode;
18028 rtx words[4], shift;
18030 inner_mode = GET_MODE_INNER (mode);
18031 n_elts = GET_MODE_NUNITS (mode);
18032 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18033 n_elt_per_word = n_elts / n_words;
18034 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18036 for (i = 0; i < n_words; ++i)
18038 rtx word = NULL_RTX;
18040 for (j = 0; j < n_elt_per_word; ++j)
18042 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18043 elt = convert_modes (word_mode, inner_mode, elt, true);
18045 if (j == 0)
18046 word = elt;
18047 else
18049 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18050 word, 1, OPTAB_LIB_WIDEN);
18051 word = expand_simple_binop (word_mode, IOR, word, elt,
18052 word, 1, OPTAB_LIB_WIDEN);
18056 words[i] = word;
18059 if (n_words == 1)
18060 emit_move_insn (target, gen_lowpart (mode, words[0]));
18061 else if (n_words == 2)
18063 rtx tmp = gen_reg_rtx (mode);
18064 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18065 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18066 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18067 emit_move_insn (target, tmp);
18069 else if (n_words == 4)
18071 rtx tmp = gen_reg_rtx (V4SImode);
18072 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18073 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18074 emit_move_insn (target, gen_lowpart (mode, tmp));
18076 else
18077 gcc_unreachable ();
18081 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18082 instructions unless MMX_OK is true. */
18084 void
18085 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18087 enum machine_mode mode = GET_MODE (target);
18088 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18089 int n_elts = GET_MODE_NUNITS (mode);
18090 int n_var = 0, one_var = -1;
18091 bool all_same = true, all_const_zero = true;
18092 int i;
18093 rtx x;
18095 for (i = 0; i < n_elts; ++i)
18097 x = XVECEXP (vals, 0, i);
18098 if (!CONSTANT_P (x))
18099 n_var++, one_var = i;
18100 else if (x != CONST0_RTX (inner_mode))
18101 all_const_zero = false;
18102 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18103 all_same = false;
18106 /* Constants are best loaded from the constant pool. */
18107 if (n_var == 0)
18109 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18110 return;
18113 /* If all values are identical, broadcast the value. */
18114 if (all_same
18115 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18116 XVECEXP (vals, 0, 0)))
18117 return;
18119 /* Values where only one field is non-constant are best loaded from
18120 the pool and overwritten via move later. */
18121 if (n_var == 1)
18123 if (all_const_zero && one_var == 0
18124 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
18125 XVECEXP (vals, 0, 0)))
18126 return;
18128 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18129 return;
18132 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18135 void
18136 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18138 enum machine_mode mode = GET_MODE (target);
18139 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18140 bool use_vec_merge = false;
18141 rtx tmp;
18143 switch (mode)
18145 case V2SFmode:
18146 case V2SImode:
18147 if (mmx_ok)
18149 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18150 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18151 if (elt == 0)
18152 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18153 else
18154 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18155 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18156 return;
18158 break;
18160 case V2DFmode:
18161 case V2DImode:
18163 rtx op0, op1;
18165 /* For the two element vectors, we implement a VEC_CONCAT with
18166 the extraction of the other element. */
18168 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18169 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18171 if (elt == 0)
18172 op0 = val, op1 = tmp;
18173 else
18174 op0 = tmp, op1 = val;
18176 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18177 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18179 return;
18181 case V4SFmode:
18182 switch (elt)
18184 case 0:
18185 use_vec_merge = true;
18186 break;
18188 case 1:
18189 /* tmp = target = A B C D */
18190 tmp = copy_to_reg (target);
18191 /* target = A A B B */
18192 emit_insn (gen_sse_unpcklps (target, target, target));
18193 /* target = X A B B */
18194 ix86_expand_vector_set (false, target, val, 0);
18195 /* target = A X C D */
18196 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18197 GEN_INT (1), GEN_INT (0),
18198 GEN_INT (2+4), GEN_INT (3+4)));
18199 return;
18201 case 2:
18202 /* tmp = target = A B C D */
18203 tmp = copy_to_reg (target);
18204 /* tmp = X B C D */
18205 ix86_expand_vector_set (false, tmp, val, 0);
18206 /* target = A B X D */
18207 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18208 GEN_INT (0), GEN_INT (1),
18209 GEN_INT (0+4), GEN_INT (3+4)));
18210 return;
18212 case 3:
18213 /* tmp = target = A B C D */
18214 tmp = copy_to_reg (target);
18215 /* tmp = X B C D */
18216 ix86_expand_vector_set (false, tmp, val, 0);
18217 /* target = A B X D */
18218 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18219 GEN_INT (0), GEN_INT (1),
18220 GEN_INT (2+4), GEN_INT (0+4)));
18221 return;
18223 default:
18224 gcc_unreachable ();
18226 break;
18228 case V4SImode:
18229 /* Element 0 handled by vec_merge below. */
18230 if (elt == 0)
18232 use_vec_merge = true;
18233 break;
18236 if (TARGET_SSE2)
18238 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18239 store into element 0, then shuffle them back. */
18241 rtx order[4];
18243 order[0] = GEN_INT (elt);
18244 order[1] = const1_rtx;
18245 order[2] = const2_rtx;
18246 order[3] = GEN_INT (3);
18247 order[elt] = const0_rtx;
18249 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18250 order[1], order[2], order[3]));
18252 ix86_expand_vector_set (false, target, val, 0);
18254 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18255 order[1], order[2], order[3]));
18257 else
18259 /* For SSE1, we have to reuse the V4SF code. */
18260 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18261 gen_lowpart (SFmode, val), elt);
18263 return;
18265 case V8HImode:
18266 use_vec_merge = TARGET_SSE2;
18267 break;
18268 case V4HImode:
18269 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18270 break;
18272 case V16QImode:
18273 case V8QImode:
18274 default:
18275 break;
18278 if (use_vec_merge)
18280 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18281 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18282 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18284 else
18286 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18288 emit_move_insn (mem, target);
18290 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18291 emit_move_insn (tmp, val);
18293 emit_move_insn (target, mem);
18297 void
18298 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18300 enum machine_mode mode = GET_MODE (vec);
18301 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18302 bool use_vec_extr = false;
18303 rtx tmp;
18305 switch (mode)
18307 case V2SImode:
18308 case V2SFmode:
18309 if (!mmx_ok)
18310 break;
18311 /* FALLTHRU */
18313 case V2DFmode:
18314 case V2DImode:
18315 use_vec_extr = true;
18316 break;
18318 case V4SFmode:
18319 switch (elt)
18321 case 0:
18322 tmp = vec;
18323 break;
18325 case 1:
18326 case 3:
18327 tmp = gen_reg_rtx (mode);
18328 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18329 GEN_INT (elt), GEN_INT (elt),
18330 GEN_INT (elt+4), GEN_INT (elt+4)));
18331 break;
18333 case 2:
18334 tmp = gen_reg_rtx (mode);
18335 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18336 break;
18338 default:
18339 gcc_unreachable ();
18341 vec = tmp;
18342 use_vec_extr = true;
18343 elt = 0;
18344 break;
18346 case V4SImode:
18347 if (TARGET_SSE2)
18349 switch (elt)
18351 case 0:
18352 tmp = vec;
18353 break;
18355 case 1:
18356 case 3:
18357 tmp = gen_reg_rtx (mode);
18358 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18359 GEN_INT (elt), GEN_INT (elt),
18360 GEN_INT (elt), GEN_INT (elt)));
18361 break;
18363 case 2:
18364 tmp = gen_reg_rtx (mode);
18365 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18366 break;
18368 default:
18369 gcc_unreachable ();
18371 vec = tmp;
18372 use_vec_extr = true;
18373 elt = 0;
18375 else
18377 /* For SSE1, we have to reuse the V4SF code. */
18378 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18379 gen_lowpart (V4SFmode, vec), elt);
18380 return;
18382 break;
18384 case V8HImode:
18385 use_vec_extr = TARGET_SSE2;
18386 break;
18387 case V4HImode:
18388 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18389 break;
18391 case V16QImode:
18392 case V8QImode:
18393 /* ??? Could extract the appropriate HImode element and shift. */
18394 default:
18395 break;
18398 if (use_vec_extr)
18400 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18401 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18403 /* Let the rtl optimizers know about the zero extension performed. */
18404 if (inner_mode == HImode)
18406 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18407 target = gen_lowpart (SImode, target);
18410 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18412 else
18414 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18416 emit_move_insn (mem, vec);
18418 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18419 emit_move_insn (target, tmp);
18423 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18424 pattern to reduce; DEST is the destination; IN is the input vector. */
18426 void
18427 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18429 rtx tmp1, tmp2, tmp3;
18431 tmp1 = gen_reg_rtx (V4SFmode);
18432 tmp2 = gen_reg_rtx (V4SFmode);
18433 tmp3 = gen_reg_rtx (V4SFmode);
18435 emit_insn (gen_sse_movhlps (tmp1, in, in));
18436 emit_insn (fn (tmp2, tmp1, in));
18438 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18439 GEN_INT (1), GEN_INT (1),
18440 GEN_INT (1+4), GEN_INT (1+4)));
18441 emit_insn (fn (dest, tmp2, tmp3));
18444 /* Implements target hook vector_mode_supported_p. */
18445 static bool
18446 ix86_vector_mode_supported_p (enum machine_mode mode)
18448 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18449 return true;
18450 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18451 return true;
18452 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18453 return true;
18454 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18455 return true;
18456 return false;
18459 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18461 We do this in the new i386 backend to maintain source compatibility
18462 with the old cc0-based compiler. */
18464 static tree
18465 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18466 tree inputs ATTRIBUTE_UNUSED,
18467 tree clobbers)
18469 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18470 clobbers);
18471 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18472 clobbers);
18473 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18474 clobbers);
18475 return clobbers;
18478 /* Return true if this goes in small data/bss. */
18480 static bool
18481 ix86_in_large_data_p (tree exp)
18483 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18484 return false;
18486 /* Functions are never large data. */
18487 if (TREE_CODE (exp) == FUNCTION_DECL)
18488 return false;
18490 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18492 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18493 if (strcmp (section, ".ldata") == 0
18494 || strcmp (section, ".lbss") == 0)
18495 return true;
18496 return false;
18498 else
18500 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18502 /* If this is an incomplete type with size 0, then we can't put it
18503 in data because it might be too big when completed. */
18504 if (!size || size > ix86_section_threshold)
18505 return true;
18508 return false;
18510 static void
18511 ix86_encode_section_info (tree decl, rtx rtl, int first)
18513 default_encode_section_info (decl, rtl, first);
18515 if (TREE_CODE (decl) == VAR_DECL
18516 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18517 && ix86_in_large_data_p (decl))
18518 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18521 /* Worker function for REVERSE_CONDITION. */
18523 enum rtx_code
18524 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18526 return (mode != CCFPmode && mode != CCFPUmode
18527 ? reverse_condition (code)
18528 : reverse_condition_maybe_unordered (code));
18531 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18532 to OPERANDS[0]. */
18534 const char *
18535 output_387_reg_move (rtx insn, rtx *operands)
18537 if (REG_P (operands[1])
18538 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18540 if (REGNO (operands[0]) == FIRST_STACK_REG
18541 && TARGET_USE_FFREEP)
18542 return "ffreep\t%y0";
18543 return "fstp\t%y0";
18545 if (STACK_TOP_P (operands[0]))
18546 return "fld%z1\t%y1";
18547 return "fst\t%y0";
18550 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18551 FP status register is set. */
18553 void
18554 ix86_emit_fp_unordered_jump (rtx label)
18556 rtx reg = gen_reg_rtx (HImode);
18557 rtx temp;
18559 emit_insn (gen_x86_fnstsw_1 (reg));
18561 if (TARGET_USE_SAHF)
18563 emit_insn (gen_x86_sahf_1 (reg));
18565 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18566 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18568 else
18570 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18572 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18573 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18576 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18577 gen_rtx_LABEL_REF (VOIDmode, label),
18578 pc_rtx);
18579 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18580 emit_jump_insn (temp);
18583 /* Output code to perform a log1p XFmode calculation. */
18585 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18587 rtx label1 = gen_label_rtx ();
18588 rtx label2 = gen_label_rtx ();
18590 rtx tmp = gen_reg_rtx (XFmode);
18591 rtx tmp2 = gen_reg_rtx (XFmode);
18593 emit_insn (gen_absxf2 (tmp, op1));
18594 emit_insn (gen_cmpxf (tmp,
18595 CONST_DOUBLE_FROM_REAL_VALUE (
18596 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18597 XFmode)));
18598 emit_jump_insn (gen_bge (label1));
18600 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18601 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18602 emit_jump (label2);
18604 emit_label (label1);
18605 emit_move_insn (tmp, CONST1_RTX (XFmode));
18606 emit_insn (gen_addxf3 (tmp, op1, tmp));
18607 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18608 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18610 emit_label (label2);
18613 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18615 static void
18616 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18617 tree decl)
18619 /* With Binutils 2.15, the "@unwind" marker must be specified on
18620 every occurrence of the ".eh_frame" section, not just the first
18621 one. */
18622 if (TARGET_64BIT
18623 && strcmp (name, ".eh_frame") == 0)
18625 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18626 flags & SECTION_WRITE ? "aw" : "a");
18627 return;
18629 default_elf_asm_named_section (name, flags, decl);
18632 /* Return the mangling of TYPE if it is an extended fundamental type. */
18634 static const char *
18635 ix86_mangle_fundamental_type (tree type)
18637 switch (TYPE_MODE (type))
18639 case TFmode:
18640 /* __float128 is "g". */
18641 return "g";
18642 case XFmode:
18643 /* "long double" or __float80 is "e". */
18644 return "e";
18645 default:
18646 return NULL;
18650 /* For 32-bit code we can save PIC register setup by using
18651 __stack_chk_fail_local hidden function instead of calling
18652 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18653 register, so it is better to call __stack_chk_fail directly. */
18655 static tree
18656 ix86_stack_protect_fail (void)
18658 return TARGET_64BIT
18659 ? default_external_stack_protect_fail ()
18660 : default_hidden_stack_protect_fail ();
18663 /* Select a format to encode pointers in exception handling data. CODE
18664 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18665 true if the symbol may be affected by dynamic relocations.
18667 ??? All x86 object file formats are capable of representing this.
18668 After all, the relocation needed is the same as for the call insn.
18669 Whether or not a particular assembler allows us to enter such, I
18670 guess we'll have to see. */
18672 asm_preferred_eh_data_format (int code, int global)
18674 if (flag_pic)
18676 int type = DW_EH_PE_sdata8;
18677 if (!TARGET_64BIT
18678 || ix86_cmodel == CM_SMALL_PIC
18679 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18680 type = DW_EH_PE_sdata4;
18681 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18683 if (ix86_cmodel == CM_SMALL
18684 || (ix86_cmodel == CM_MEDIUM && code))
18685 return DW_EH_PE_udata4;
18686 return DW_EH_PE_absptr;
18689 #include "gt-i386.h"