Merge from mainline
[official-gcc.git] / gcc / config / i386 / i386.c
blobedfe04f57377a9776450d5365101fc36fc88ca46
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
53 #ifndef CHECK_STACK_LIMIT
54 #define CHECK_STACK_LIMIT (-1)
55 #endif
57 /* Return index of given mode in mult and division cost tables. */
58 #define MODE_INDEX(mode) \
59 ((mode) == QImode ? 0 \
60 : (mode) == HImode ? 1 \
61 : (mode) == SImode ? 2 \
62 : (mode) == DImode ? 3 \
63 : 4)
65 /* Processor costs (relative to an add) */
66 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
67 #define COSTS_N_BYTES(N) ((N) * 2)
69 static const
70 struct processor_costs size_cost = { /* costs for tunning for size */
71 COSTS_N_BYTES (2), /* cost of an add instruction */
72 COSTS_N_BYTES (3), /* cost of a lea instruction */
73 COSTS_N_BYTES (2), /* variable shift costs */
74 COSTS_N_BYTES (3), /* constant shift costs */
75 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
76 COSTS_N_BYTES (3), /* HI */
77 COSTS_N_BYTES (3), /* SI */
78 COSTS_N_BYTES (3), /* DI */
79 COSTS_N_BYTES (5)}, /* other */
80 0, /* cost of multiply per each bit set */
81 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 COSTS_N_BYTES (3), /* cost of movsx */
87 COSTS_N_BYTES (3), /* cost of movzx */
88 0, /* "large" insn */
89 2, /* MOVE_RATIO */
90 2, /* cost for loading QImode using movzbl */
91 {2, 2, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 2, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {2, 2, 2}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {2, 2, 2}, /* cost of loading integer registers */
99 3, /* cost of moving MMX register */
100 {3, 3}, /* cost of loading MMX registers
101 in SImode and DImode */
102 {3, 3}, /* cost of storing MMX registers
103 in SImode and DImode */
104 3, /* cost of moving SSE register */
105 {3, 3, 3}, /* cost of loading SSE registers
106 in SImode, DImode and TImode */
107 {3, 3, 3}, /* cost of storing SSE registers
108 in SImode, DImode and TImode */
109 3, /* MMX or SSE register to integer */
110 0, /* size of prefetch block */
111 0, /* number of parallel prefetches */
112 2, /* Branch cost */
113 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
114 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
115 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
116 COSTS_N_BYTES (2), /* cost of FABS instruction. */
117 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
118 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
121 /* Processor costs (relative to an add) */
122 static const
123 struct processor_costs i386_cost = { /* 386 specific costs */
124 COSTS_N_INSNS (1), /* cost of an add instruction */
125 COSTS_N_INSNS (1), /* cost of a lea instruction */
126 COSTS_N_INSNS (3), /* variable shift costs */
127 COSTS_N_INSNS (2), /* constant shift costs */
128 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
129 COSTS_N_INSNS (6), /* HI */
130 COSTS_N_INSNS (6), /* SI */
131 COSTS_N_INSNS (6), /* DI */
132 COSTS_N_INSNS (6)}, /* other */
133 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
134 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
135 COSTS_N_INSNS (23), /* HI */
136 COSTS_N_INSNS (23), /* SI */
137 COSTS_N_INSNS (23), /* DI */
138 COSTS_N_INSNS (23)}, /* other */
139 COSTS_N_INSNS (3), /* cost of movsx */
140 COSTS_N_INSNS (2), /* cost of movzx */
141 15, /* "large" insn */
142 3, /* MOVE_RATIO */
143 4, /* cost for loading QImode using movzbl */
144 {2, 4, 2}, /* cost of loading integer registers
145 in QImode, HImode and SImode.
146 Relative to reg-reg move (2). */
147 {2, 4, 2}, /* cost of storing integer registers */
148 2, /* cost of reg,reg fld/fst */
149 {8, 8, 8}, /* cost of loading fp registers
150 in SFmode, DFmode and XFmode */
151 {8, 8, 8}, /* cost of loading integer registers */
152 2, /* cost of moving MMX register */
153 {4, 8}, /* cost of loading MMX registers
154 in SImode and DImode */
155 {4, 8}, /* cost of storing MMX registers
156 in SImode and DImode */
157 2, /* cost of moving SSE register */
158 {4, 8, 16}, /* cost of loading SSE registers
159 in SImode, DImode and TImode */
160 {4, 8, 16}, /* cost of storing SSE registers
161 in SImode, DImode and TImode */
162 3, /* MMX or SSE register to integer */
163 0, /* size of prefetch block */
164 0, /* number of parallel prefetches */
165 1, /* Branch cost */
166 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
167 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
168 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
169 COSTS_N_INSNS (22), /* cost of FABS instruction. */
170 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
171 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
174 static const
175 struct processor_costs i486_cost = { /* 486 specific costs */
176 COSTS_N_INSNS (1), /* cost of an add instruction */
177 COSTS_N_INSNS (1), /* cost of a lea instruction */
178 COSTS_N_INSNS (3), /* variable shift costs */
179 COSTS_N_INSNS (2), /* constant shift costs */
180 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
181 COSTS_N_INSNS (12), /* HI */
182 COSTS_N_INSNS (12), /* SI */
183 COSTS_N_INSNS (12), /* DI */
184 COSTS_N_INSNS (12)}, /* other */
185 1, /* cost of multiply per each bit set */
186 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
187 COSTS_N_INSNS (40), /* HI */
188 COSTS_N_INSNS (40), /* SI */
189 COSTS_N_INSNS (40), /* DI */
190 COSTS_N_INSNS (40)}, /* other */
191 COSTS_N_INSNS (3), /* cost of movsx */
192 COSTS_N_INSNS (2), /* cost of movzx */
193 15, /* "large" insn */
194 3, /* MOVE_RATIO */
195 4, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {8, 8, 8}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {8, 8, 8}, /* cost of loading integer registers */
204 2, /* cost of moving MMX register */
205 {4, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {4, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 1, /* Branch cost */
218 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
219 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
220 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
221 COSTS_N_INSNS (3), /* cost of FABS instruction. */
222 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
223 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
226 static const
227 struct processor_costs pentium_cost = {
228 COSTS_N_INSNS (1), /* cost of an add instruction */
229 COSTS_N_INSNS (1), /* cost of a lea instruction */
230 COSTS_N_INSNS (4), /* variable shift costs */
231 COSTS_N_INSNS (1), /* constant shift costs */
232 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
233 COSTS_N_INSNS (11), /* HI */
234 COSTS_N_INSNS (11), /* SI */
235 COSTS_N_INSNS (11), /* DI */
236 COSTS_N_INSNS (11)}, /* other */
237 0, /* cost of multiply per each bit set */
238 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
239 COSTS_N_INSNS (25), /* HI */
240 COSTS_N_INSNS (25), /* SI */
241 COSTS_N_INSNS (25), /* DI */
242 COSTS_N_INSNS (25)}, /* other */
243 COSTS_N_INSNS (3), /* cost of movsx */
244 COSTS_N_INSNS (2), /* cost of movzx */
245 8, /* "large" insn */
246 6, /* MOVE_RATIO */
247 6, /* cost for loading QImode using movzbl */
248 {2, 4, 2}, /* cost of loading integer registers
249 in QImode, HImode and SImode.
250 Relative to reg-reg move (2). */
251 {2, 4, 2}, /* cost of storing integer registers */
252 2, /* cost of reg,reg fld/fst */
253 {2, 2, 6}, /* cost of loading fp registers
254 in SFmode, DFmode and XFmode */
255 {4, 4, 6}, /* cost of loading integer registers */
256 8, /* cost of moving MMX register */
257 {8, 8}, /* cost of loading MMX registers
258 in SImode and DImode */
259 {8, 8}, /* cost of storing MMX registers
260 in SImode and DImode */
261 2, /* cost of moving SSE register */
262 {4, 8, 16}, /* cost of loading SSE registers
263 in SImode, DImode and TImode */
264 {4, 8, 16}, /* cost of storing SSE registers
265 in SImode, DImode and TImode */
266 3, /* MMX or SSE register to integer */
267 0, /* size of prefetch block */
268 0, /* number of parallel prefetches */
269 2, /* Branch cost */
270 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
271 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
272 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
273 COSTS_N_INSNS (1), /* cost of FABS instruction. */
274 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
275 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
278 static const
279 struct processor_costs pentiumpro_cost = {
280 COSTS_N_INSNS (1), /* cost of an add instruction */
281 COSTS_N_INSNS (1), /* cost of a lea instruction */
282 COSTS_N_INSNS (1), /* variable shift costs */
283 COSTS_N_INSNS (1), /* constant shift costs */
284 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
285 COSTS_N_INSNS (4), /* HI */
286 COSTS_N_INSNS (4), /* SI */
287 COSTS_N_INSNS (4), /* DI */
288 COSTS_N_INSNS (4)}, /* other */
289 0, /* cost of multiply per each bit set */
290 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
291 COSTS_N_INSNS (17), /* HI */
292 COSTS_N_INSNS (17), /* SI */
293 COSTS_N_INSNS (17), /* DI */
294 COSTS_N_INSNS (17)}, /* other */
295 COSTS_N_INSNS (1), /* cost of movsx */
296 COSTS_N_INSNS (1), /* cost of movzx */
297 8, /* "large" insn */
298 6, /* MOVE_RATIO */
299 2, /* cost for loading QImode using movzbl */
300 {4, 4, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 2, 2}, /* cost of storing integer registers */
304 2, /* cost of reg,reg fld/fst */
305 {2, 2, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 6}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 3, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 6, /* number of parallel prefetches */
321 2, /* Branch cost */
322 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
323 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
324 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
325 COSTS_N_INSNS (2), /* cost of FABS instruction. */
326 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
327 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
330 static const
331 struct processor_costs k6_cost = {
332 COSTS_N_INSNS (1), /* cost of an add instruction */
333 COSTS_N_INSNS (2), /* cost of a lea instruction */
334 COSTS_N_INSNS (1), /* variable shift costs */
335 COSTS_N_INSNS (1), /* constant shift costs */
336 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
337 COSTS_N_INSNS (3), /* HI */
338 COSTS_N_INSNS (3), /* SI */
339 COSTS_N_INSNS (3), /* DI */
340 COSTS_N_INSNS (3)}, /* other */
341 0, /* cost of multiply per each bit set */
342 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
343 COSTS_N_INSNS (18), /* HI */
344 COSTS_N_INSNS (18), /* SI */
345 COSTS_N_INSNS (18), /* DI */
346 COSTS_N_INSNS (18)}, /* other */
347 COSTS_N_INSNS (2), /* cost of movsx */
348 COSTS_N_INSNS (2), /* cost of movzx */
349 8, /* "large" insn */
350 4, /* MOVE_RATIO */
351 3, /* cost for loading QImode using movzbl */
352 {4, 5, 4}, /* cost of loading integer registers
353 in QImode, HImode and SImode.
354 Relative to reg-reg move (2). */
355 {2, 3, 2}, /* cost of storing integer registers */
356 4, /* cost of reg,reg fld/fst */
357 {6, 6, 6}, /* cost of loading fp registers
358 in SFmode, DFmode and XFmode */
359 {4, 4, 4}, /* cost of loading integer registers */
360 2, /* cost of moving MMX register */
361 {2, 2}, /* cost of loading MMX registers
362 in SImode and DImode */
363 {2, 2}, /* cost of storing MMX registers
364 in SImode and DImode */
365 2, /* cost of moving SSE register */
366 {2, 2, 8}, /* cost of loading SSE registers
367 in SImode, DImode and TImode */
368 {2, 2, 8}, /* cost of storing SSE registers
369 in SImode, DImode and TImode */
370 6, /* MMX or SSE register to integer */
371 32, /* size of prefetch block */
372 1, /* number of parallel prefetches */
373 1, /* Branch cost */
374 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
375 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
376 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
377 COSTS_N_INSNS (2), /* cost of FABS instruction. */
378 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
379 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
382 static const
383 struct processor_costs athlon_cost = {
384 COSTS_N_INSNS (1), /* cost of an add instruction */
385 COSTS_N_INSNS (2), /* cost of a lea instruction */
386 COSTS_N_INSNS (1), /* variable shift costs */
387 COSTS_N_INSNS (1), /* constant shift costs */
388 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
389 COSTS_N_INSNS (5), /* HI */
390 COSTS_N_INSNS (5), /* SI */
391 COSTS_N_INSNS (5), /* DI */
392 COSTS_N_INSNS (5)}, /* other */
393 0, /* cost of multiply per each bit set */
394 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
395 COSTS_N_INSNS (26), /* HI */
396 COSTS_N_INSNS (42), /* SI */
397 COSTS_N_INSNS (74), /* DI */
398 COSTS_N_INSNS (74)}, /* other */
399 COSTS_N_INSNS (1), /* cost of movsx */
400 COSTS_N_INSNS (1), /* cost of movzx */
401 8, /* "large" insn */
402 9, /* MOVE_RATIO */
403 4, /* cost for loading QImode using movzbl */
404 {3, 4, 3}, /* cost of loading integer registers
405 in QImode, HImode and SImode.
406 Relative to reg-reg move (2). */
407 {3, 4, 3}, /* cost of storing integer registers */
408 4, /* cost of reg,reg fld/fst */
409 {4, 4, 12}, /* cost of loading fp registers
410 in SFmode, DFmode and XFmode */
411 {6, 6, 8}, /* cost of loading integer registers */
412 2, /* cost of moving MMX register */
413 {4, 4}, /* cost of loading MMX registers
414 in SImode and DImode */
415 {4, 4}, /* cost of storing MMX registers
416 in SImode and DImode */
417 2, /* cost of moving SSE register */
418 {4, 4, 6}, /* cost of loading SSE registers
419 in SImode, DImode and TImode */
420 {4, 4, 5}, /* cost of storing SSE registers
421 in SImode, DImode and TImode */
422 5, /* MMX or SSE register to integer */
423 64, /* size of prefetch block */
424 6, /* number of parallel prefetches */
425 5, /* Branch cost */
426 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (2), /* cost of FABS instruction. */
430 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
434 static const
435 struct processor_costs k8_cost = {
436 COSTS_N_INSNS (1), /* cost of an add instruction */
437 COSTS_N_INSNS (2), /* cost of a lea instruction */
438 COSTS_N_INSNS (1), /* variable shift costs */
439 COSTS_N_INSNS (1), /* constant shift costs */
440 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
441 COSTS_N_INSNS (4), /* HI */
442 COSTS_N_INSNS (3), /* SI */
443 COSTS_N_INSNS (4), /* DI */
444 COSTS_N_INSNS (5)}, /* other */
445 0, /* cost of multiply per each bit set */
446 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
447 COSTS_N_INSNS (26), /* HI */
448 COSTS_N_INSNS (42), /* SI */
449 COSTS_N_INSNS (74), /* DI */
450 COSTS_N_INSNS (74)}, /* other */
451 COSTS_N_INSNS (1), /* cost of movsx */
452 COSTS_N_INSNS (1), /* cost of movzx */
453 8, /* "large" insn */
454 9, /* MOVE_RATIO */
455 4, /* cost for loading QImode using movzbl */
456 {3, 4, 3}, /* cost of loading integer registers
457 in QImode, HImode and SImode.
458 Relative to reg-reg move (2). */
459 {3, 4, 3}, /* cost of storing integer registers */
460 4, /* cost of reg,reg fld/fst */
461 {4, 4, 12}, /* cost of loading fp registers
462 in SFmode, DFmode and XFmode */
463 {6, 6, 8}, /* cost of loading integer registers */
464 2, /* cost of moving MMX register */
465 {3, 3}, /* cost of loading MMX registers
466 in SImode and DImode */
467 {4, 4}, /* cost of storing MMX registers
468 in SImode and DImode */
469 2, /* cost of moving SSE register */
470 {4, 3, 6}, /* cost of loading SSE registers
471 in SImode, DImode and TImode */
472 {4, 4, 5}, /* cost of storing SSE registers
473 in SImode, DImode and TImode */
474 5, /* MMX or SSE register to integer */
475 64, /* size of prefetch block */
476 6, /* number of parallel prefetches */
477 5, /* Branch cost */
478 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
479 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
480 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
481 COSTS_N_INSNS (2), /* cost of FABS instruction. */
482 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
483 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
486 static const
487 struct processor_costs pentium4_cost = {
488 COSTS_N_INSNS (1), /* cost of an add instruction */
489 COSTS_N_INSNS (3), /* cost of a lea instruction */
490 COSTS_N_INSNS (4), /* variable shift costs */
491 COSTS_N_INSNS (4), /* constant shift costs */
492 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
493 COSTS_N_INSNS (15), /* HI */
494 COSTS_N_INSNS (15), /* SI */
495 COSTS_N_INSNS (15), /* DI */
496 COSTS_N_INSNS (15)}, /* other */
497 0, /* cost of multiply per each bit set */
498 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
499 COSTS_N_INSNS (56), /* HI */
500 COSTS_N_INSNS (56), /* SI */
501 COSTS_N_INSNS (56), /* DI */
502 COSTS_N_INSNS (56)}, /* other */
503 COSTS_N_INSNS (1), /* cost of movsx */
504 COSTS_N_INSNS (1), /* cost of movzx */
505 16, /* "large" insn */
506 6, /* MOVE_RATIO */
507 2, /* cost for loading QImode using movzbl */
508 {4, 5, 4}, /* cost of loading integer registers
509 in QImode, HImode and SImode.
510 Relative to reg-reg move (2). */
511 {2, 3, 2}, /* cost of storing integer registers */
512 2, /* cost of reg,reg fld/fst */
513 {2, 2, 6}, /* cost of loading fp registers
514 in SFmode, DFmode and XFmode */
515 {4, 4, 6}, /* cost of loading integer registers */
516 2, /* cost of moving MMX register */
517 {2, 2}, /* cost of loading MMX registers
518 in SImode and DImode */
519 {2, 2}, /* cost of storing MMX registers
520 in SImode and DImode */
521 12, /* cost of moving SSE register */
522 {12, 12, 12}, /* cost of loading SSE registers
523 in SImode, DImode and TImode */
524 {2, 2, 8}, /* cost of storing SSE registers
525 in SImode, DImode and TImode */
526 10, /* MMX or SSE register to integer */
527 64, /* size of prefetch block */
528 6, /* number of parallel prefetches */
529 2, /* Branch cost */
530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
538 static const
539 struct processor_costs nocona_cost = {
540 COSTS_N_INSNS (1), /* cost of an add instruction */
541 COSTS_N_INSNS (1), /* cost of a lea instruction */
542 COSTS_N_INSNS (1), /* variable shift costs */
543 COSTS_N_INSNS (1), /* constant shift costs */
544 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
545 COSTS_N_INSNS (10), /* HI */
546 COSTS_N_INSNS (10), /* SI */
547 COSTS_N_INSNS (10), /* DI */
548 COSTS_N_INSNS (10)}, /* other */
549 0, /* cost of multiply per each bit set */
550 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
551 COSTS_N_INSNS (66), /* HI */
552 COSTS_N_INSNS (66), /* SI */
553 COSTS_N_INSNS (66), /* DI */
554 COSTS_N_INSNS (66)}, /* other */
555 COSTS_N_INSNS (1), /* cost of movsx */
556 COSTS_N_INSNS (1), /* cost of movzx */
557 16, /* "large" insn */
558 17, /* MOVE_RATIO */
559 4, /* cost for loading QImode using movzbl */
560 {4, 4, 4}, /* cost of loading integer registers
561 in QImode, HImode and SImode.
562 Relative to reg-reg move (2). */
563 {4, 4, 4}, /* cost of storing integer registers */
564 3, /* cost of reg,reg fld/fst */
565 {12, 12, 12}, /* cost of loading fp registers
566 in SFmode, DFmode and XFmode */
567 {4, 4, 4}, /* cost of loading integer registers */
568 6, /* cost of moving MMX register */
569 {12, 12}, /* cost of loading MMX registers
570 in SImode and DImode */
571 {12, 12}, /* cost of storing MMX registers
572 in SImode and DImode */
573 6, /* cost of moving SSE register */
574 {12, 12, 12}, /* cost of loading SSE registers
575 in SImode, DImode and TImode */
576 {12, 12, 12}, /* cost of storing SSE registers
577 in SImode, DImode and TImode */
578 8, /* MMX or SSE register to integer */
579 128, /* size of prefetch block */
580 8, /* number of parallel prefetches */
581 1, /* Branch cost */
582 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
583 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
584 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
585 COSTS_N_INSNS (3), /* cost of FABS instruction. */
586 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
587 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
590 /* Generic64 should produce code tuned for Nocona and K8. */
591 static const
592 struct processor_costs generic64_cost = {
593 COSTS_N_INSNS (1), /* cost of an add instruction */
594 /* On all chips taken into consideration lea is 2 cycles and more. With
595 this cost however our current implementation of synth_mult results in
596 use of unnecesary temporary registers causing regression on several
597 SPECfp benchmarks. */
598 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (4), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (4), /* DI */
605 COSTS_N_INSNS (2)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (26), /* HI */
609 COSTS_N_INSNS (42), /* SI */
610 COSTS_N_INSNS (74), /* DI */
611 COSTS_N_INSNS (74)}, /* other */
612 COSTS_N_INSNS (1), /* cost of movsx */
613 COSTS_N_INSNS (1), /* cost of movzx */
614 8, /* "large" insn */
615 17, /* MOVE_RATIO */
616 4, /* cost for loading QImode using movzbl */
617 {4, 4, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {4, 4, 4}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {12, 12, 12}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {6, 6, 8}, /* cost of loading integer registers */
625 2, /* cost of moving MMX register */
626 {8, 8}, /* cost of loading MMX registers
627 in SImode and DImode */
628 {8, 8}, /* cost of storing MMX registers
629 in SImode and DImode */
630 2, /* cost of moving SSE register */
631 {8, 8, 8}, /* cost of loading SSE registers
632 in SImode, DImode and TImode */
633 {8, 8, 8}, /* cost of storing SSE registers
634 in SImode, DImode and TImode */
635 5, /* MMX or SSE register to integer */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
638 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
639 is increased to perhaps more appropriate value of 5. */
640 3, /* Branch cost */
641 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
642 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
643 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
644 COSTS_N_INSNS (8), /* cost of FABS instruction. */
645 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
646 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
649 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
650 static const
651 struct processor_costs generic32_cost = {
652 COSTS_N_INSNS (1), /* cost of an add instruction */
653 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
654 COSTS_N_INSNS (1), /* variable shift costs */
655 COSTS_N_INSNS (1), /* constant shift costs */
656 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
657 COSTS_N_INSNS (4), /* HI */
658 COSTS_N_INSNS (3), /* SI */
659 COSTS_N_INSNS (4), /* DI */
660 COSTS_N_INSNS (2)}, /* other */
661 0, /* cost of multiply per each bit set */
662 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
663 COSTS_N_INSNS (26), /* HI */
664 COSTS_N_INSNS (42), /* SI */
665 COSTS_N_INSNS (74), /* DI */
666 COSTS_N_INSNS (74)}, /* other */
667 COSTS_N_INSNS (1), /* cost of movsx */
668 COSTS_N_INSNS (1), /* cost of movzx */
669 8, /* "large" insn */
670 17, /* MOVE_RATIO */
671 4, /* cost for loading QImode using movzbl */
672 {4, 4, 4}, /* cost of loading integer registers
673 in QImode, HImode and SImode.
674 Relative to reg-reg move (2). */
675 {4, 4, 4}, /* cost of storing integer registers */
676 4, /* cost of reg,reg fld/fst */
677 {12, 12, 12}, /* cost of loading fp registers
678 in SFmode, DFmode and XFmode */
679 {6, 6, 8}, /* cost of loading integer registers */
680 2, /* cost of moving MMX register */
681 {8, 8}, /* cost of loading MMX registers
682 in SImode and DImode */
683 {8, 8}, /* cost of storing MMX registers
684 in SImode and DImode */
685 2, /* cost of moving SSE register */
686 {8, 8, 8}, /* cost of loading SSE registers
687 in SImode, DImode and TImode */
688 {8, 8, 8}, /* cost of storing SSE registers
689 in SImode, DImode and TImode */
690 5, /* MMX or SSE register to integer */
691 64, /* size of prefetch block */
692 6, /* number of parallel prefetches */
693 3, /* Branch cost */
694 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
695 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
696 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
697 COSTS_N_INSNS (8), /* cost of FABS instruction. */
698 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
699 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
702 const struct processor_costs *ix86_cost = &pentium_cost;
704 /* Processor feature/optimization bitmasks. */
705 #define m_386 (1<<PROCESSOR_I386)
706 #define m_486 (1<<PROCESSOR_I486)
707 #define m_PENT (1<<PROCESSOR_PENTIUM)
708 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
709 #define m_K6 (1<<PROCESSOR_K6)
710 #define m_ATHLON (1<<PROCESSOR_ATHLON)
711 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
712 #define m_K8 (1<<PROCESSOR_K8)
713 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
714 #define m_NOCONA (1<<PROCESSOR_NOCONA)
715 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
716 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
717 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
719 /* Generic instruction choice should be common subset of supported CPUs
720 (PPro/PENT4/NOCONA/Athlon/K8). */
722 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
723 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
724 generic because it is not working well with PPro base chips. */
725 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
726 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
727 const int x86_zero_extend_with_and = m_486 | m_PENT;
728 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
729 const int x86_double_with_add = ~m_386;
730 const int x86_use_bit_test = m_386;
731 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
732 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
733 const int x86_fisttp = m_NOCONA;
734 const int x86_3dnow_a = m_ATHLON_K8;
735 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
736 /* Branch hints were put in P4 based on simulation result. But
737 after P4 was made, no performance benefit was observed with
738 branch hints. It also increases the code size. As the result,
739 icc never generates branch hints. */
740 const int x86_branch_hints = 0;
741 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
742 /* We probably ought to watch for partial register stalls on Generic32
743 compilation setting as well. However in current implementation the
744 partial register stalls are not eliminated very well - they can
745 be introduced via subregs synthetized by combine and can happen
746 in caller/callee saving sequences.
747 Because this option pays back little on PPro based chips and is in conflict
748 with partial reg. dependencies used by Athlon/P4 based chips, it is better
749 to leave it off for generic32 for now. */
750 const int x86_partial_reg_stall = m_PPRO;
751 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
752 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
753 const int x86_use_mov0 = m_K6;
754 const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
755 const int x86_read_modify_write = ~m_PENT;
756 const int x86_read_modify = ~(m_PENT | m_PPRO);
757 const int x86_split_long_moves = m_PPRO;
758 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_GENERIC; /* m_PENT4 ? */
759 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
760 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
761 const int x86_qimode_math = ~(0);
762 const int x86_promote_qi_regs = 0;
763 /* On PPro this flag is meant to avoid partial register stalls. Just like
764 the x86_partial_reg_stall this option might be considered for Generic32
765 if our scheme for avoiding partial stalls was more effective. */
766 const int x86_himode_math = ~(m_PPRO);
767 const int x86_promote_hi_regs = m_PPRO;
768 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
769 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
770 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
771 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
772 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
773 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
774 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
775 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
776 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
777 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
778 const int x86_shift1 = ~m_486;
779 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
780 /* In Generic model we have an confict here in between PPro/Pentium4 based chips
781 that thread 128bit SSE registers as single units versus K8 based chips that
782 divide SSE registers to two 64bit halves.
783 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
784 to allow register renaming on 128bit SSE units, but usually results in one
785 extra microop on 64bit SSE units. Experimental results shows that disabling
786 this option on P4 brings over 20% SPECfp regression, while enabling it on
787 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
788 of moves. */
789 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
790 /* Set for machines where the type and dependencies are resolved on SSE
791 register parts instead of whole registers, so we may maintain just
792 lower part of scalar values in proper format leaving the upper part
793 undefined. */
794 const int x86_sse_split_regs = m_ATHLON_K8;
795 const int x86_sse_typeless_stores = m_ATHLON_K8;
796 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
797 const int x86_use_ffreep = m_ATHLON_K8;
798 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
799 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
801 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
802 integer data in xmm registers. Which results in pretty abysmal code. */
803 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
805 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
806 /* Some CPU cores are not able to predict more than 4 branch instructions in
807 the 16 byte window. */
808 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
809 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
810 const int x86_use_bt = m_ATHLON_K8;
811 /* Compare and exchange was added for 80486. */
812 const int x86_cmpxchg = ~m_386;
813 /* Compare and exchange 8 bytes was added for pentium. */
814 const int x86_cmpxchg8b = ~(m_386 | m_486);
815 /* Compare and exchange 16 bytes was added for nocona. */
816 const int x86_cmpxchg16b = m_NOCONA;
817 /* Exchange and add was added for 80486. */
818 const int x86_xadd = ~m_386;
819 const int x86_pad_returns = m_ATHLON_K8 | m_GENERIC;
821 /* In case the average insn count for single function invocation is
822 lower than this constant, emit fast (but longer) prologue and
823 epilogue code. */
824 #define FAST_PROLOGUE_INSN_COUNT 20
826 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
827 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
828 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
829 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
831 /* Array of the smallest class containing reg number REGNO, indexed by
832 REGNO. Used by REGNO_REG_CLASS in i386.h. */
834 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
836 /* ax, dx, cx, bx */
837 AREG, DREG, CREG, BREG,
838 /* si, di, bp, sp */
839 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
840 /* FP registers */
841 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
842 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
843 /* arg pointer */
844 NON_Q_REGS,
845 /* flags, fpsr, dirflag, frame */
846 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
847 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
848 SSE_REGS, SSE_REGS,
849 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
850 MMX_REGS, MMX_REGS,
851 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
852 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
853 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
854 SSE_REGS, SSE_REGS,
857 /* The "default" register map used in 32bit mode. */
859 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
861 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
862 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
863 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
864 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
865 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
866 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
867 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
870 static int const x86_64_int_parameter_registers[6] =
872 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
873 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
876 static int const x86_64_int_return_registers[4] =
878 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
881 /* The "default" register map used in 64bit mode. */
882 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
884 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
885 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
886 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
887 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
888 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
889 8,9,10,11,12,13,14,15, /* extended integer registers */
890 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
893 /* Define the register numbers to be used in Dwarf debugging information.
894 The SVR4 reference port C compiler uses the following register numbers
895 in its Dwarf output code:
896 0 for %eax (gcc regno = 0)
897 1 for %ecx (gcc regno = 2)
898 2 for %edx (gcc regno = 1)
899 3 for %ebx (gcc regno = 3)
900 4 for %esp (gcc regno = 7)
901 5 for %ebp (gcc regno = 6)
902 6 for %esi (gcc regno = 4)
903 7 for %edi (gcc regno = 5)
904 The following three DWARF register numbers are never generated by
905 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
906 believes these numbers have these meanings.
907 8 for %eip (no gcc equivalent)
908 9 for %eflags (gcc regno = 17)
909 10 for %trapno (no gcc equivalent)
910 It is not at all clear how we should number the FP stack registers
911 for the x86 architecture. If the version of SDB on x86/svr4 were
912 a bit less brain dead with respect to floating-point then we would
913 have a precedent to follow with respect to DWARF register numbers
914 for x86 FP registers, but the SDB on x86/svr4 is so completely
915 broken with respect to FP registers that it is hardly worth thinking
916 of it as something to strive for compatibility with.
917 The version of x86/svr4 SDB I have at the moment does (partially)
918 seem to believe that DWARF register number 11 is associated with
919 the x86 register %st(0), but that's about all. Higher DWARF
920 register numbers don't seem to be associated with anything in
921 particular, and even for DWARF regno 11, SDB only seems to under-
922 stand that it should say that a variable lives in %st(0) (when
923 asked via an `=' command) if we said it was in DWARF regno 11,
924 but SDB still prints garbage when asked for the value of the
925 variable in question (via a `/' command).
926 (Also note that the labels SDB prints for various FP stack regs
927 when doing an `x' command are all wrong.)
928 Note that these problems generally don't affect the native SVR4
929 C compiler because it doesn't allow the use of -O with -g and
930 because when it is *not* optimizing, it allocates a memory
931 location for each floating-point variable, and the memory
932 location is what gets described in the DWARF AT_location
933 attribute for the variable in question.
934 Regardless of the severe mental illness of the x86/svr4 SDB, we
935 do something sensible here and we use the following DWARF
936 register numbers. Note that these are all stack-top-relative
937 numbers.
938 11 for %st(0) (gcc regno = 8)
939 12 for %st(1) (gcc regno = 9)
940 13 for %st(2) (gcc regno = 10)
941 14 for %st(3) (gcc regno = 11)
942 15 for %st(4) (gcc regno = 12)
943 16 for %st(5) (gcc regno = 13)
944 17 for %st(6) (gcc regno = 14)
945 18 for %st(7) (gcc regno = 15)
947 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
949 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
950 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
951 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
952 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
953 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
954 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
955 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
958 /* Test and compare insns in i386.md store the information needed to
959 generate branch and scc insns here. */
961 rtx ix86_compare_op0 = NULL_RTX;
962 rtx ix86_compare_op1 = NULL_RTX;
963 rtx ix86_compare_emitted = NULL_RTX;
965 /* Size of the register save area. */
966 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
968 /* Define the structure for the machine field in struct function. */
970 struct stack_local_entry GTY(())
972 unsigned short mode;
973 unsigned short n;
974 rtx rtl;
975 struct stack_local_entry *next;
978 /* Structure describing stack frame layout.
979 Stack grows downward:
981 [arguments]
982 <- ARG_POINTER
983 saved pc
985 saved frame pointer if frame_pointer_needed
986 <- HARD_FRAME_POINTER
987 [saved regs]
989 [padding1] \
991 [va_arg registers] (
992 > to_allocate <- FRAME_POINTER
993 [frame] (
995 [padding2] /
997 struct ix86_frame
999 int nregs;
1000 int padding1;
1001 int va_arg_size;
1002 HOST_WIDE_INT frame;
1003 int padding2;
1004 int outgoing_arguments_size;
1005 int red_zone_size;
1007 HOST_WIDE_INT to_allocate;
1008 /* The offsets relative to ARG_POINTER. */
1009 HOST_WIDE_INT frame_pointer_offset;
1010 HOST_WIDE_INT hard_frame_pointer_offset;
1011 HOST_WIDE_INT stack_pointer_offset;
1013 /* When save_regs_using_mov is set, emit prologue using
1014 move instead of push instructions. */
1015 bool save_regs_using_mov;
1018 /* Code model option. */
1019 enum cmodel ix86_cmodel;
1020 /* Asm dialect. */
1021 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1022 /* TLS dialects. */
1023 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1025 /* Which unit we are generating floating point math for. */
1026 enum fpmath_unit ix86_fpmath;
1028 /* Which cpu are we scheduling for. */
1029 enum processor_type ix86_tune;
1030 /* Which instruction set architecture to use. */
1031 enum processor_type ix86_arch;
1033 /* true if sse prefetch instruction is not NOOP. */
1034 int x86_prefetch_sse;
1036 /* ix86_regparm_string as a number */
1037 static int ix86_regparm;
1039 /* Preferred alignment for stack boundary in bits. */
1040 unsigned int ix86_preferred_stack_boundary;
1042 /* Values 1-5: see jump.c */
1043 int ix86_branch_cost;
1045 /* Variables which are this size or smaller are put in the data/bss
1046 or ldata/lbss sections. */
1048 int ix86_section_threshold = 65536;
1050 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1051 char internal_label_prefix[16];
1052 int internal_label_prefix_len;
1054 /* Table for BUILT_IN_NORMAL to BUILT_IN_MD mapping. */
1055 static GTY(()) tree ix86_builtin_function_variants[(int) END_BUILTINS];
1057 static bool ix86_handle_option (size_t, const char *, int);
1058 static void output_pic_addr_const (FILE *, rtx, int);
1059 static void put_condition_code (enum rtx_code, enum machine_mode,
1060 int, int, FILE *);
1061 static const char *get_some_local_dynamic_name (void);
1062 static int get_some_local_dynamic_name_1 (rtx *, void *);
1063 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1064 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1065 rtx *);
1066 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1067 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1068 enum machine_mode);
1069 static rtx get_thread_pointer (int);
1070 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1071 static void get_pc_thunk_name (char [32], unsigned int);
1072 static rtx gen_push (rtx);
1073 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
1074 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
1075 static struct machine_function * ix86_init_machine_status (void);
1076 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1077 static int ix86_nsaved_regs (void);
1078 static void ix86_emit_save_regs (void);
1079 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1080 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1081 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1082 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1083 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1084 static rtx ix86_expand_aligntest (rtx, int);
1085 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1086 static int ix86_issue_rate (void);
1087 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1088 static int ia32_multipass_dfa_lookahead (void);
1089 static void ix86_init_mmx_sse_builtins (void);
1090 static void ix86_init_sse_abi_builtins (void);
1091 static rtx x86_this_parameter (tree);
1092 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1093 HOST_WIDE_INT, tree);
1094 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1095 static void x86_file_start (void);
1096 static void ix86_reorg (void);
1097 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1098 static tree ix86_build_builtin_va_list (void);
1099 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1100 tree, int *, int);
1101 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1102 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1103 static bool ix86_vector_mode_supported_p (enum machine_mode);
1105 static int ix86_address_cost (rtx);
1106 static bool ix86_cannot_force_const_mem (rtx);
1107 static rtx ix86_delegitimize_address (rtx);
1109 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1111 struct builtin_description;
1112 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1113 tree, rtx);
1114 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1115 tree, rtx);
1116 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1117 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1118 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1119 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1120 static rtx safe_vector_operand (rtx, enum machine_mode);
1121 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1122 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1123 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1124 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1125 static int ix86_fp_comparison_cost (enum rtx_code code);
1126 static unsigned int ix86_select_alt_pic_regnum (void);
1127 static int ix86_save_reg (unsigned int, int);
1128 static void ix86_compute_frame_layout (struct ix86_frame *);
1129 static int ix86_comp_type_attributes (tree, tree);
1130 static int ix86_function_regparm (tree, tree);
1131 const struct attribute_spec ix86_attribute_table[];
1132 static bool ix86_function_ok_for_sibcall (tree, tree);
1133 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1134 static int ix86_value_regno (enum machine_mode, tree, tree);
1135 static bool contains_128bit_aligned_vector_p (tree);
1136 static rtx ix86_struct_value_rtx (tree, int);
1137 static bool ix86_ms_bitfield_layout_p (tree);
1138 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1139 static int extended_reg_mentioned_1 (rtx *, void *);
1140 static bool ix86_rtx_costs (rtx, int, int, int *);
1141 static int min_insn_size (rtx);
1142 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1143 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1144 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1145 tree, bool);
1146 static void ix86_init_builtins (void);
1147 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1148 static rtx ix86_expand_library_builtin (tree, rtx, rtx, enum machine_mode, int);
1149 static const char *ix86_mangle_fundamental_type (tree);
1150 static tree ix86_stack_protect_fail (void);
1151 static rtx ix86_internal_arg_pointer (void);
1152 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1154 /* This function is only used on Solaris. */
1155 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1156 ATTRIBUTE_UNUSED;
1158 /* Register class used for passing given 64bit part of the argument.
1159 These represent classes as documented by the PS ABI, with the exception
1160 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1161 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1163 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1164 whenever possible (upper half does contain padding).
1166 enum x86_64_reg_class
1168 X86_64_NO_CLASS,
1169 X86_64_INTEGER_CLASS,
1170 X86_64_INTEGERSI_CLASS,
1171 X86_64_SSE_CLASS,
1172 X86_64_SSESF_CLASS,
1173 X86_64_SSEDF_CLASS,
1174 X86_64_SSEUP_CLASS,
1175 X86_64_X87_CLASS,
1176 X86_64_X87UP_CLASS,
1177 X86_64_COMPLEX_X87_CLASS,
1178 X86_64_MEMORY_CLASS
1180 static const char * const x86_64_reg_class_name[] = {
1181 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1182 "sseup", "x87", "x87up", "cplx87", "no"
1185 #define MAX_CLASSES 4
1187 /* Table of constants used by fldpi, fldln2, etc.... */
1188 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1189 static bool ext_80387_constants_init = 0;
1190 static void init_ext_80387_constants (void);
1191 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1192 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1193 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1194 static section *x86_64_elf_select_section (tree decl, int reloc,
1195 unsigned HOST_WIDE_INT align)
1196 ATTRIBUTE_UNUSED;
1198 /* Initialize the GCC target structure. */
1199 #undef TARGET_ATTRIBUTE_TABLE
1200 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1201 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1202 # undef TARGET_MERGE_DECL_ATTRIBUTES
1203 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1204 #endif
1206 #undef TARGET_COMP_TYPE_ATTRIBUTES
1207 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1209 #undef TARGET_INIT_BUILTINS
1210 #define TARGET_INIT_BUILTINS ix86_init_builtins
1211 #undef TARGET_EXPAND_BUILTIN
1212 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1213 #undef TARGET_EXPAND_LIBRARY_BUILTIN
1214 #define TARGET_EXPAND_LIBRARY_BUILTIN ix86_expand_library_builtin
1216 #undef TARGET_ASM_FUNCTION_EPILOGUE
1217 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1219 #undef TARGET_ENCODE_SECTION_INFO
1220 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1221 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1222 #else
1223 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1224 #endif
1226 #undef TARGET_ASM_OPEN_PAREN
1227 #define TARGET_ASM_OPEN_PAREN ""
1228 #undef TARGET_ASM_CLOSE_PAREN
1229 #define TARGET_ASM_CLOSE_PAREN ""
1231 #undef TARGET_ASM_ALIGNED_HI_OP
1232 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1233 #undef TARGET_ASM_ALIGNED_SI_OP
1234 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1235 #ifdef ASM_QUAD
1236 #undef TARGET_ASM_ALIGNED_DI_OP
1237 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1238 #endif
1240 #undef TARGET_ASM_UNALIGNED_HI_OP
1241 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1242 #undef TARGET_ASM_UNALIGNED_SI_OP
1243 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1244 #undef TARGET_ASM_UNALIGNED_DI_OP
1245 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1247 #undef TARGET_SCHED_ADJUST_COST
1248 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1249 #undef TARGET_SCHED_ISSUE_RATE
1250 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1251 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1252 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1253 ia32_multipass_dfa_lookahead
1255 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1256 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1258 #ifdef HAVE_AS_TLS
1259 #undef TARGET_HAVE_TLS
1260 #define TARGET_HAVE_TLS true
1261 #endif
1262 #undef TARGET_CANNOT_FORCE_CONST_MEM
1263 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1265 #undef TARGET_DELEGITIMIZE_ADDRESS
1266 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1268 #undef TARGET_MS_BITFIELD_LAYOUT_P
1269 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1271 #if TARGET_MACHO
1272 #undef TARGET_BINDS_LOCAL_P
1273 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1274 #endif
1276 #undef TARGET_ASM_OUTPUT_MI_THUNK
1277 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1278 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1279 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1281 #undef TARGET_ASM_FILE_START
1282 #define TARGET_ASM_FILE_START x86_file_start
1284 #undef TARGET_DEFAULT_TARGET_FLAGS
1285 #define TARGET_DEFAULT_TARGET_FLAGS \
1286 (TARGET_DEFAULT \
1287 | TARGET_64BIT_DEFAULT \
1288 | TARGET_SUBTARGET_DEFAULT \
1289 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1291 #undef TARGET_HANDLE_OPTION
1292 #define TARGET_HANDLE_OPTION ix86_handle_option
1294 #undef TARGET_RTX_COSTS
1295 #define TARGET_RTX_COSTS ix86_rtx_costs
1296 #undef TARGET_ADDRESS_COST
1297 #define TARGET_ADDRESS_COST ix86_address_cost
1299 #undef TARGET_FIXED_CONDITION_CODE_REGS
1300 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1301 #undef TARGET_CC_MODES_COMPATIBLE
1302 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1304 #undef TARGET_MACHINE_DEPENDENT_REORG
1305 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1307 #undef TARGET_BUILD_BUILTIN_VA_LIST
1308 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1310 #undef TARGET_MD_ASM_CLOBBERS
1311 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1313 #undef TARGET_PROMOTE_PROTOTYPES
1314 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1315 #undef TARGET_STRUCT_VALUE_RTX
1316 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1317 #undef TARGET_SETUP_INCOMING_VARARGS
1318 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1319 #undef TARGET_MUST_PASS_IN_STACK
1320 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1321 #undef TARGET_PASS_BY_REFERENCE
1322 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1323 #undef TARGET_INTERNAL_ARG_POINTER
1324 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1325 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1326 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1328 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1329 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1331 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1332 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1334 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1335 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1337 #ifdef HAVE_AS_TLS
1338 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1339 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1340 #endif
1342 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1343 #undef TARGET_INSERT_ATTRIBUTES
1344 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1345 #endif
1347 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1348 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1350 #undef TARGET_STACK_PROTECT_FAIL
1351 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1353 #undef TARGET_FUNCTION_VALUE
1354 #define TARGET_FUNCTION_VALUE ix86_function_value
1356 struct gcc_target targetm = TARGET_INITIALIZER;
1359 /* The svr4 ABI for the i386 says that records and unions are returned
1360 in memory. */
1361 #ifndef DEFAULT_PCC_STRUCT_RETURN
1362 #define DEFAULT_PCC_STRUCT_RETURN 1
1363 #endif
1365 /* Implement TARGET_HANDLE_OPTION. */
1367 static bool
1368 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1370 switch (code)
1372 case OPT_m3dnow:
1373 if (!value)
1375 target_flags &= ~MASK_3DNOW_A;
1376 target_flags_explicit |= MASK_3DNOW_A;
1378 return true;
1380 case OPT_mmmx:
1381 if (!value)
1383 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1384 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1386 return true;
1388 case OPT_msse:
1389 if (!value)
1391 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1392 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1394 return true;
1396 case OPT_msse2:
1397 if (!value)
1399 target_flags &= ~MASK_SSE3;
1400 target_flags_explicit |= MASK_SSE3;
1402 return true;
1404 default:
1405 return true;
1409 /* Sometimes certain combinations of command options do not make
1410 sense on a particular target machine. You can define a macro
1411 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1412 defined, is executed once just after all the command options have
1413 been parsed.
1415 Don't use this macro to turn on various extra optimizations for
1416 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1418 void
1419 override_options (void)
1421 int i;
1422 int ix86_tune_defaulted = 0;
1424 /* Comes from final.c -- no real reason to change it. */
1425 #define MAX_CODE_ALIGN 16
1427 static struct ptt
1429 const struct processor_costs *cost; /* Processor costs */
1430 const int target_enable; /* Target flags to enable. */
1431 const int target_disable; /* Target flags to disable. */
1432 const int align_loop; /* Default alignments. */
1433 const int align_loop_max_skip;
1434 const int align_jump;
1435 const int align_jump_max_skip;
1436 const int align_func;
1438 const processor_target_table[PROCESSOR_max] =
1440 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1441 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1442 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1443 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1444 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1445 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1446 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1447 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1448 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1449 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1450 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1453 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1454 static struct pta
1456 const char *const name; /* processor name or nickname. */
1457 const enum processor_type processor;
1458 const enum pta_flags
1460 PTA_SSE = 1,
1461 PTA_SSE2 = 2,
1462 PTA_SSE3 = 4,
1463 PTA_MMX = 8,
1464 PTA_PREFETCH_SSE = 16,
1465 PTA_3DNOW = 32,
1466 PTA_3DNOW_A = 64,
1467 PTA_64BIT = 128
1468 } flags;
1470 const processor_alias_table[] =
1472 {"i386", PROCESSOR_I386, 0},
1473 {"i486", PROCESSOR_I486, 0},
1474 {"i586", PROCESSOR_PENTIUM, 0},
1475 {"pentium", PROCESSOR_PENTIUM, 0},
1476 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1477 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1478 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1479 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1480 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1481 {"i686", PROCESSOR_PENTIUMPRO, 0},
1482 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1483 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1484 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1485 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1486 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1487 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1488 | PTA_MMX | PTA_PREFETCH_SSE},
1489 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1490 | PTA_MMX | PTA_PREFETCH_SSE},
1491 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1492 | PTA_MMX | PTA_PREFETCH_SSE},
1493 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1494 | PTA_MMX | PTA_PREFETCH_SSE},
1495 {"k6", PROCESSOR_K6, PTA_MMX},
1496 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1497 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1498 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1499 | PTA_3DNOW_A},
1500 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1501 | PTA_3DNOW | PTA_3DNOW_A},
1502 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1503 | PTA_3DNOW_A | PTA_SSE},
1504 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1505 | PTA_3DNOW_A | PTA_SSE},
1506 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1507 | PTA_3DNOW_A | PTA_SSE},
1508 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1509 | PTA_SSE | PTA_SSE2 },
1510 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1511 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1512 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1513 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1514 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1515 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1516 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1517 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1518 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1519 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1522 int const pta_size = ARRAY_SIZE (processor_alias_table);
1524 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1525 SUBTARGET_OVERRIDE_OPTIONS;
1526 #endif
1528 /* Set the default values for switches whose default depends on TARGET_64BIT
1529 in case they weren't overwritten by command line options. */
1530 if (TARGET_64BIT)
1532 if (flag_omit_frame_pointer == 2)
1533 flag_omit_frame_pointer = 1;
1534 if (flag_asynchronous_unwind_tables == 2)
1535 flag_asynchronous_unwind_tables = 1;
1536 if (flag_pcc_struct_return == 2)
1537 flag_pcc_struct_return = 0;
1539 else
1541 if (flag_omit_frame_pointer == 2)
1542 flag_omit_frame_pointer = 0;
1543 if (flag_asynchronous_unwind_tables == 2)
1544 flag_asynchronous_unwind_tables = 0;
1545 if (flag_pcc_struct_return == 2)
1546 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1549 /* Need to check -mtune=generic first. */
1550 if (ix86_tune_string)
1552 if (!strcmp (ix86_tune_string, "generic")
1553 || !strcmp (ix86_tune_string, "i686"))
1555 if (TARGET_64BIT)
1556 ix86_tune_string = "generic64";
1557 else
1558 ix86_tune_string = "generic32";
1560 else if (!strncmp (ix86_tune_string, "generic", 7))
1561 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1563 else
1565 if (ix86_arch_string)
1566 ix86_tune_string = ix86_arch_string;
1567 if (!ix86_tune_string)
1569 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1570 ix86_tune_defaulted = 1;
1573 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1574 need to use a sensible tune option. */
1575 if (!strcmp (ix86_tune_string, "generic")
1576 || !strcmp (ix86_tune_string, "x86-64")
1577 || !strcmp (ix86_tune_string, "i686"))
1579 if (TARGET_64BIT)
1580 ix86_tune_string = "generic64";
1581 else
1582 ix86_tune_string = "generic32";
1585 if (!strcmp (ix86_tune_string, "x86-64"))
1586 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1587 "-mtune=generic instead as appropriate.");
1589 if (!ix86_arch_string)
1590 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1591 if (!strcmp (ix86_arch_string, "generic"))
1592 error ("generic CPU can be used only for -mtune= switch");
1593 if (!strncmp (ix86_arch_string, "generic", 7))
1594 error ("bad value (%s) for -march= switch", ix86_arch_string);
1596 if (ix86_cmodel_string != 0)
1598 if (!strcmp (ix86_cmodel_string, "small"))
1599 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1600 else if (!strcmp (ix86_cmodel_string, "medium"))
1601 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1602 else if (flag_pic)
1603 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1604 else if (!strcmp (ix86_cmodel_string, "32"))
1605 ix86_cmodel = CM_32;
1606 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1607 ix86_cmodel = CM_KERNEL;
1608 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1609 ix86_cmodel = CM_LARGE;
1610 else
1611 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1613 else
1615 ix86_cmodel = CM_32;
1616 if (TARGET_64BIT)
1617 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1619 if (ix86_asm_string != 0)
1621 if (! TARGET_MACHO
1622 && !strcmp (ix86_asm_string, "intel"))
1623 ix86_asm_dialect = ASM_INTEL;
1624 else if (!strcmp (ix86_asm_string, "att"))
1625 ix86_asm_dialect = ASM_ATT;
1626 else
1627 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1629 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1630 error ("code model %qs not supported in the %s bit mode",
1631 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1632 if (ix86_cmodel == CM_LARGE)
1633 sorry ("code model %<large%> not supported yet");
1634 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1635 sorry ("%i-bit mode not compiled in",
1636 (target_flags & MASK_64BIT) ? 64 : 32);
1638 for (i = 0; i < pta_size; i++)
1639 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1641 ix86_arch = processor_alias_table[i].processor;
1642 /* Default cpu tuning to the architecture. */
1643 ix86_tune = ix86_arch;
1644 if (processor_alias_table[i].flags & PTA_MMX
1645 && !(target_flags_explicit & MASK_MMX))
1646 target_flags |= MASK_MMX;
1647 if (processor_alias_table[i].flags & PTA_3DNOW
1648 && !(target_flags_explicit & MASK_3DNOW))
1649 target_flags |= MASK_3DNOW;
1650 if (processor_alias_table[i].flags & PTA_3DNOW_A
1651 && !(target_flags_explicit & MASK_3DNOW_A))
1652 target_flags |= MASK_3DNOW_A;
1653 if (processor_alias_table[i].flags & PTA_SSE
1654 && !(target_flags_explicit & MASK_SSE))
1655 target_flags |= MASK_SSE;
1656 if (processor_alias_table[i].flags & PTA_SSE2
1657 && !(target_flags_explicit & MASK_SSE2))
1658 target_flags |= MASK_SSE2;
1659 if (processor_alias_table[i].flags & PTA_SSE3
1660 && !(target_flags_explicit & MASK_SSE3))
1661 target_flags |= MASK_SSE3;
1662 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1663 x86_prefetch_sse = true;
1664 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1665 error ("CPU you selected does not support x86-64 "
1666 "instruction set");
1667 break;
1670 if (i == pta_size)
1671 error ("bad value (%s) for -march= switch", ix86_arch_string);
1673 for (i = 0; i < pta_size; i++)
1674 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1676 ix86_tune = processor_alias_table[i].processor;
1677 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1679 if (ix86_tune_defaulted)
1681 ix86_tune_string = "x86-64";
1682 for (i = 0; i < pta_size; i++)
1683 if (! strcmp (ix86_tune_string,
1684 processor_alias_table[i].name))
1685 break;
1686 ix86_tune = processor_alias_table[i].processor;
1688 else
1689 error ("CPU you selected does not support x86-64 "
1690 "instruction set");
1692 /* Intel CPUs have always interpreted SSE prefetch instructions as
1693 NOPs; so, we can enable SSE prefetch instructions even when
1694 -mtune (rather than -march) points us to a processor that has them.
1695 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1696 higher processors. */
1697 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1698 x86_prefetch_sse = true;
1699 break;
1701 if (i == pta_size)
1702 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1704 if (optimize_size)
1705 ix86_cost = &size_cost;
1706 else
1707 ix86_cost = processor_target_table[ix86_tune].cost;
1708 target_flags |= processor_target_table[ix86_tune].target_enable;
1709 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1711 /* Arrange to set up i386_stack_locals for all functions. */
1712 init_machine_status = ix86_init_machine_status;
1714 /* Validate -mregparm= value. */
1715 if (ix86_regparm_string)
1717 i = atoi (ix86_regparm_string);
1718 if (i < 0 || i > REGPARM_MAX)
1719 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1720 else
1721 ix86_regparm = i;
1723 else
1724 if (TARGET_64BIT)
1725 ix86_regparm = REGPARM_MAX;
1727 /* If the user has provided any of the -malign-* options,
1728 warn and use that value only if -falign-* is not set.
1729 Remove this code in GCC 3.2 or later. */
1730 if (ix86_align_loops_string)
1732 warning (0, "-malign-loops is obsolete, use -falign-loops");
1733 if (align_loops == 0)
1735 i = atoi (ix86_align_loops_string);
1736 if (i < 0 || i > MAX_CODE_ALIGN)
1737 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1738 else
1739 align_loops = 1 << i;
1743 if (ix86_align_jumps_string)
1745 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1746 if (align_jumps == 0)
1748 i = atoi (ix86_align_jumps_string);
1749 if (i < 0 || i > MAX_CODE_ALIGN)
1750 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1751 else
1752 align_jumps = 1 << i;
1756 if (ix86_align_funcs_string)
1758 warning (0, "-malign-functions is obsolete, use -falign-functions");
1759 if (align_functions == 0)
1761 i = atoi (ix86_align_funcs_string);
1762 if (i < 0 || i > MAX_CODE_ALIGN)
1763 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1764 else
1765 align_functions = 1 << i;
1769 /* Default align_* from the processor table. */
1770 if (align_loops == 0)
1772 align_loops = processor_target_table[ix86_tune].align_loop;
1773 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1775 if (align_jumps == 0)
1777 align_jumps = processor_target_table[ix86_tune].align_jump;
1778 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1780 if (align_functions == 0)
1782 align_functions = processor_target_table[ix86_tune].align_func;
1785 /* Validate -mpreferred-stack-boundary= value, or provide default.
1786 The default of 128 bits is for Pentium III's SSE __m128, but we
1787 don't want additional code to keep the stack aligned when
1788 optimizing for code size. */
1789 ix86_preferred_stack_boundary = ((TARGET_64BIT || TARGET_MACHO || !optimize_size)
1790 ? 128 : 32);
1791 if (ix86_preferred_stack_boundary_string)
1793 i = atoi (ix86_preferred_stack_boundary_string);
1794 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1795 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1796 TARGET_64BIT ? 4 : 2);
1797 else
1798 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1801 /* Validate -mbranch-cost= value, or provide default. */
1802 ix86_branch_cost = ix86_cost->branch_cost;
1803 if (ix86_branch_cost_string)
1805 i = atoi (ix86_branch_cost_string);
1806 if (i < 0 || i > 5)
1807 error ("-mbranch-cost=%d is not between 0 and 5", i);
1808 else
1809 ix86_branch_cost = i;
1811 if (ix86_section_threshold_string)
1813 i = atoi (ix86_section_threshold_string);
1814 if (i < 0)
1815 error ("-mlarge-data-threshold=%d is negative", i);
1816 else
1817 ix86_section_threshold = i;
1820 if (ix86_tls_dialect_string)
1822 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1823 ix86_tls_dialect = TLS_DIALECT_GNU;
1824 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
1825 ix86_tls_dialect = TLS_DIALECT_GNU2;
1826 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1827 ix86_tls_dialect = TLS_DIALECT_SUN;
1828 else
1829 error ("bad value (%s) for -mtls-dialect= switch",
1830 ix86_tls_dialect_string);
1833 /* Keep nonleaf frame pointers. */
1834 if (flag_omit_frame_pointer)
1835 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1836 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1837 flag_omit_frame_pointer = 1;
1839 /* If we're doing fast math, we don't care about comparison order
1840 wrt NaNs. This lets us use a shorter comparison sequence. */
1841 if (flag_unsafe_math_optimizations)
1842 target_flags &= ~MASK_IEEE_FP;
1844 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1845 since the insns won't need emulation. */
1846 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1847 target_flags &= ~MASK_NO_FANCY_MATH_387;
1849 /* Likewise, if the target doesn't have a 387, or we've specified
1850 software floating point, don't use 387 inline intrinsics. */
1851 if (!TARGET_80387)
1852 target_flags |= MASK_NO_FANCY_MATH_387;
1854 /* Turn on SSE2 builtins for -msse3. */
1855 if (TARGET_SSE3)
1856 target_flags |= MASK_SSE2;
1858 /* Turn on SSE builtins for -msse2. */
1859 if (TARGET_SSE2)
1860 target_flags |= MASK_SSE;
1862 /* Turn on MMX builtins for -msse. */
1863 if (TARGET_SSE)
1865 target_flags |= MASK_MMX & ~target_flags_explicit;
1866 x86_prefetch_sse = true;
1869 /* Turn on MMX builtins for 3Dnow. */
1870 if (TARGET_3DNOW)
1871 target_flags |= MASK_MMX;
1873 if (TARGET_64BIT)
1875 if (TARGET_ALIGN_DOUBLE)
1876 error ("-malign-double makes no sense in the 64bit mode");
1877 if (TARGET_RTD)
1878 error ("-mrtd calling convention not supported in the 64bit mode");
1880 /* Enable by default the SSE and MMX builtins. Do allow the user to
1881 explicitly disable any of these. In particular, disabling SSE and
1882 MMX for kernel code is extremely useful. */
1883 target_flags
1884 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1885 & ~target_flags_explicit);
1887 else
1889 /* i386 ABI does not specify red zone. It still makes sense to use it
1890 when programmer takes care to stack from being destroyed. */
1891 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1892 target_flags |= MASK_NO_RED_ZONE;
1895 /* Accept -msseregparm only if at least SSE support is enabled. */
1896 if (TARGET_SSEREGPARM
1897 && ! TARGET_SSE)
1898 error ("-msseregparm used without SSE enabled");
1900 /* Accept -msselibm only if at least SSE support is enabled. */
1901 if (TARGET_SSELIBM
1902 && ! TARGET_SSE2)
1903 error ("-msselibm used without SSE2 enabled");
1905 /* Ignore -msselibm on 64bit targets. */
1906 if (TARGET_SSELIBM
1907 && TARGET_64BIT)
1908 error ("-msselibm used on a 64bit target");
1910 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1912 if (ix86_fpmath_string != 0)
1914 if (! strcmp (ix86_fpmath_string, "387"))
1915 ix86_fpmath = FPMATH_387;
1916 else if (! strcmp (ix86_fpmath_string, "sse"))
1918 if (!TARGET_SSE)
1920 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1921 ix86_fpmath = FPMATH_387;
1923 else
1924 ix86_fpmath = FPMATH_SSE;
1926 else if (! strcmp (ix86_fpmath_string, "387,sse")
1927 || ! strcmp (ix86_fpmath_string, "sse,387"))
1929 if (!TARGET_SSE)
1931 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1932 ix86_fpmath = FPMATH_387;
1934 else if (!TARGET_80387)
1936 warning (0, "387 instruction set disabled, using SSE arithmetics");
1937 ix86_fpmath = FPMATH_SSE;
1939 else
1940 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1942 else
1943 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1946 /* If the i387 is disabled, then do not return values in it. */
1947 if (!TARGET_80387)
1948 target_flags &= ~MASK_FLOAT_RETURNS;
1950 if ((x86_accumulate_outgoing_args & TUNEMASK)
1951 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1952 && !optimize_size)
1953 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1955 /* ??? Unwind info is not correct around the CFG unless either a frame
1956 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1957 unwind info generation to be aware of the CFG and propagating states
1958 around edges. */
1959 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1960 || flag_exceptions || flag_non_call_exceptions)
1961 && flag_omit_frame_pointer
1962 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1964 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1965 warning (0, "unwind tables currently require either a frame pointer "
1966 "or -maccumulate-outgoing-args for correctness");
1967 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1970 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1972 char *p;
1973 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1974 p = strchr (internal_label_prefix, 'X');
1975 internal_label_prefix_len = p - internal_label_prefix;
1976 *p = '\0';
1979 /* When scheduling description is not available, disable scheduler pass
1980 so it won't slow down the compilation and make x87 code slower. */
1981 if (!TARGET_SCHEDULE)
1982 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1985 /* switch to the appropriate section for output of DECL.
1986 DECL is either a `VAR_DECL' node or a constant of some sort.
1987 RELOC indicates whether forming the initial value of DECL requires
1988 link-time relocations. */
1990 static section *
1991 x86_64_elf_select_section (tree decl, int reloc,
1992 unsigned HOST_WIDE_INT align)
1994 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1995 && ix86_in_large_data_p (decl))
1997 const char *sname = NULL;
1998 unsigned int flags = SECTION_WRITE;
1999 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2001 case SECCAT_DATA:
2002 sname = ".ldata";
2003 break;
2004 case SECCAT_DATA_REL:
2005 sname = ".ldata.rel";
2006 break;
2007 case SECCAT_DATA_REL_LOCAL:
2008 sname = ".ldata.rel.local";
2009 break;
2010 case SECCAT_DATA_REL_RO:
2011 sname = ".ldata.rel.ro";
2012 break;
2013 case SECCAT_DATA_REL_RO_LOCAL:
2014 sname = ".ldata.rel.ro.local";
2015 break;
2016 case SECCAT_BSS:
2017 sname = ".lbss";
2018 flags |= SECTION_BSS;
2019 break;
2020 case SECCAT_RODATA:
2021 case SECCAT_RODATA_MERGE_STR:
2022 case SECCAT_RODATA_MERGE_STR_INIT:
2023 case SECCAT_RODATA_MERGE_CONST:
2024 sname = ".lrodata";
2025 flags = 0;
2026 break;
2027 case SECCAT_SRODATA:
2028 case SECCAT_SDATA:
2029 case SECCAT_SBSS:
2030 gcc_unreachable ();
2031 case SECCAT_TEXT:
2032 case SECCAT_TDATA:
2033 case SECCAT_TBSS:
2034 /* We don't split these for medium model. Place them into
2035 default sections and hope for best. */
2036 break;
2038 if (sname)
2040 /* We might get called with string constants, but get_named_section
2041 doesn't like them as they are not DECLs. Also, we need to set
2042 flags in that case. */
2043 if (!DECL_P (decl))
2044 return get_section (sname, flags, NULL);
2045 return get_named_section (decl, sname, reloc);
2048 return default_elf_select_section (decl, reloc, align);
2051 /* Build up a unique section name, expressed as a
2052 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2053 RELOC indicates whether the initial value of EXP requires
2054 link-time relocations. */
2056 static void
2057 x86_64_elf_unique_section (tree decl, int reloc)
2059 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2060 && ix86_in_large_data_p (decl))
2062 const char *prefix = NULL;
2063 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2064 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2066 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2068 case SECCAT_DATA:
2069 case SECCAT_DATA_REL:
2070 case SECCAT_DATA_REL_LOCAL:
2071 case SECCAT_DATA_REL_RO:
2072 case SECCAT_DATA_REL_RO_LOCAL:
2073 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2074 break;
2075 case SECCAT_BSS:
2076 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2077 break;
2078 case SECCAT_RODATA:
2079 case SECCAT_RODATA_MERGE_STR:
2080 case SECCAT_RODATA_MERGE_STR_INIT:
2081 case SECCAT_RODATA_MERGE_CONST:
2082 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2083 break;
2084 case SECCAT_SRODATA:
2085 case SECCAT_SDATA:
2086 case SECCAT_SBSS:
2087 gcc_unreachable ();
2088 case SECCAT_TEXT:
2089 case SECCAT_TDATA:
2090 case SECCAT_TBSS:
2091 /* We don't split these for medium model. Place them into
2092 default sections and hope for best. */
2093 break;
2095 if (prefix)
2097 const char *name;
2098 size_t nlen, plen;
2099 char *string;
2100 plen = strlen (prefix);
2102 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2103 name = targetm.strip_name_encoding (name);
2104 nlen = strlen (name);
2106 string = alloca (nlen + plen + 1);
2107 memcpy (string, prefix, plen);
2108 memcpy (string + plen, name, nlen + 1);
2110 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2111 return;
2114 default_unique_section (decl, reloc);
2117 #ifdef COMMON_ASM_OP
2118 /* This says how to output assembler code to declare an
2119 uninitialized external linkage data object.
2121 For medium model x86-64 we need to use .largecomm opcode for
2122 large objects. */
2123 void
2124 x86_elf_aligned_common (FILE *file,
2125 const char *name, unsigned HOST_WIDE_INT size,
2126 int align)
2128 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2129 && size > (unsigned int)ix86_section_threshold)
2130 fprintf (file, ".largecomm\t");
2131 else
2132 fprintf (file, "%s", COMMON_ASM_OP);
2133 assemble_name (file, name);
2134 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2135 size, align / BITS_PER_UNIT);
2138 /* Utility function for targets to use in implementing
2139 ASM_OUTPUT_ALIGNED_BSS. */
2141 void
2142 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2143 const char *name, unsigned HOST_WIDE_INT size,
2144 int align)
2146 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2147 && size > (unsigned int)ix86_section_threshold)
2148 switch_to_section (get_named_section (decl, ".lbss", 0));
2149 else
2150 switch_to_section (bss_section);
2151 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2152 #ifdef ASM_DECLARE_OBJECT_NAME
2153 last_assemble_variable_decl = decl;
2154 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2155 #else
2156 /* Standard thing is just output label for the object. */
2157 ASM_OUTPUT_LABEL (file, name);
2158 #endif /* ASM_DECLARE_OBJECT_NAME */
2159 ASM_OUTPUT_SKIP (file, size ? size : 1);
2161 #endif
2163 void
2164 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2166 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2167 make the problem with not enough registers even worse. */
2168 #ifdef INSN_SCHEDULING
2169 if (level > 1)
2170 flag_schedule_insns = 0;
2171 #endif
2173 if (TARGET_MACHO)
2174 /* The Darwin libraries never set errno, so we might as well
2175 avoid calling them when that's the only reason we would. */
2176 flag_errno_math = 0;
2178 /* The default values of these switches depend on the TARGET_64BIT
2179 that is not known at this moment. Mark these values with 2 and
2180 let user the to override these. In case there is no command line option
2181 specifying them, we will set the defaults in override_options. */
2182 if (optimize >= 1)
2183 flag_omit_frame_pointer = 2;
2184 flag_pcc_struct_return = 2;
2185 flag_asynchronous_unwind_tables = 2;
2186 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2187 SUBTARGET_OPTIMIZATION_OPTIONS;
2188 #endif
2191 /* Table of valid machine attributes. */
2192 const struct attribute_spec ix86_attribute_table[] =
2194 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2195 /* Stdcall attribute says callee is responsible for popping arguments
2196 if they are not variable. */
2197 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2198 /* Fastcall attribute says callee is responsible for popping arguments
2199 if they are not variable. */
2200 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2201 /* Cdecl attribute says the callee is a normal C declaration */
2202 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2203 /* Regparm attribute specifies how many integer arguments are to be
2204 passed in registers. */
2205 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2206 /* Sseregparm attribute says we are using x86_64 calling conventions
2207 for FP arguments. */
2208 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2209 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2210 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2211 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2212 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2213 #endif
2214 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2215 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2216 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2217 SUBTARGET_ATTRIBUTE_TABLE,
2218 #endif
2219 { NULL, 0, 0, false, false, false, NULL }
2222 /* Decide whether we can make a sibling call to a function. DECL is the
2223 declaration of the function being targeted by the call and EXP is the
2224 CALL_EXPR representing the call. */
2226 static bool
2227 ix86_function_ok_for_sibcall (tree decl, tree exp)
2229 tree func;
2230 rtx a, b;
2232 /* If we are generating position-independent code, we cannot sibcall
2233 optimize any indirect call, or a direct call to a global function,
2234 as the PLT requires %ebx be live. */
2235 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2236 return false;
2238 if (decl)
2239 func = decl;
2240 else
2242 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2243 if (POINTER_TYPE_P (func))
2244 func = TREE_TYPE (func);
2247 /* Check that the return value locations are the same. Like
2248 if we are returning floats on the 80387 register stack, we cannot
2249 make a sibcall from a function that doesn't return a float to a
2250 function that does or, conversely, from a function that does return
2251 a float to a function that doesn't; the necessary stack adjustment
2252 would not be executed. This is also the place we notice
2253 differences in the return value ABI. Note that it is ok for one
2254 of the functions to have void return type as long as the return
2255 value of the other is passed in a register. */
2256 a = ix86_function_value (TREE_TYPE (exp), func, false);
2257 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2258 cfun->decl, false);
2259 if (STACK_REG_P (a) || STACK_REG_P (b))
2261 if (!rtx_equal_p (a, b))
2262 return false;
2264 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2266 else if (!rtx_equal_p (a, b))
2267 return false;
2269 /* If this call is indirect, we'll need to be able to use a call-clobbered
2270 register for the address of the target function. Make sure that all
2271 such registers are not used for passing parameters. */
2272 if (!decl && !TARGET_64BIT)
2274 tree type;
2276 /* We're looking at the CALL_EXPR, we need the type of the function. */
2277 type = TREE_OPERAND (exp, 0); /* pointer expression */
2278 type = TREE_TYPE (type); /* pointer type */
2279 type = TREE_TYPE (type); /* function type */
2281 if (ix86_function_regparm (type, NULL) >= 3)
2283 /* ??? Need to count the actual number of registers to be used,
2284 not the possible number of registers. Fix later. */
2285 return false;
2289 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2290 /* Dllimport'd functions are also called indirectly. */
2291 if (decl && DECL_DLLIMPORT_P (decl)
2292 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2293 return false;
2294 #endif
2296 /* If we forced aligned the stack, then sibcalling would unalign the
2297 stack, which may break the called function. */
2298 if (cfun->machine->force_align_arg_pointer)
2299 return false;
2301 /* Otherwise okay. That also includes certain types of indirect calls. */
2302 return true;
2305 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2306 calling convention attributes;
2307 arguments as in struct attribute_spec.handler. */
2309 static tree
2310 ix86_handle_cconv_attribute (tree *node, tree name,
2311 tree args,
2312 int flags ATTRIBUTE_UNUSED,
2313 bool *no_add_attrs)
2315 if (TREE_CODE (*node) != FUNCTION_TYPE
2316 && TREE_CODE (*node) != METHOD_TYPE
2317 && TREE_CODE (*node) != FIELD_DECL
2318 && TREE_CODE (*node) != TYPE_DECL)
2320 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2321 IDENTIFIER_POINTER (name));
2322 *no_add_attrs = true;
2323 return NULL_TREE;
2326 /* Can combine regparm with all attributes but fastcall. */
2327 if (is_attribute_p ("regparm", name))
2329 tree cst;
2331 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2333 error ("fastcall and regparm attributes are not compatible");
2336 cst = TREE_VALUE (args);
2337 if (TREE_CODE (cst) != INTEGER_CST)
2339 warning (OPT_Wattributes,
2340 "%qs attribute requires an integer constant argument",
2341 IDENTIFIER_POINTER (name));
2342 *no_add_attrs = true;
2344 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2346 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2347 IDENTIFIER_POINTER (name), REGPARM_MAX);
2348 *no_add_attrs = true;
2351 return NULL_TREE;
2354 if (TARGET_64BIT)
2356 warning (OPT_Wattributes, "%qs attribute ignored",
2357 IDENTIFIER_POINTER (name));
2358 *no_add_attrs = true;
2359 return NULL_TREE;
2362 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2363 if (is_attribute_p ("fastcall", name))
2365 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2367 error ("fastcall and cdecl attributes are not compatible");
2369 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2371 error ("fastcall and stdcall attributes are not compatible");
2373 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2375 error ("fastcall and regparm attributes are not compatible");
2379 /* Can combine stdcall with fastcall (redundant), regparm and
2380 sseregparm. */
2381 else if (is_attribute_p ("stdcall", name))
2383 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2385 error ("stdcall and cdecl attributes are not compatible");
2387 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2389 error ("stdcall and fastcall attributes are not compatible");
2393 /* Can combine cdecl with regparm and sseregparm. */
2394 else if (is_attribute_p ("cdecl", name))
2396 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2398 error ("stdcall and cdecl attributes are not compatible");
2400 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2402 error ("fastcall and cdecl attributes are not compatible");
2406 /* Can combine sseregparm with all attributes. */
2408 return NULL_TREE;
2411 /* Return 0 if the attributes for two types are incompatible, 1 if they
2412 are compatible, and 2 if they are nearly compatible (which causes a
2413 warning to be generated). */
2415 static int
2416 ix86_comp_type_attributes (tree type1, tree type2)
2418 /* Check for mismatch of non-default calling convention. */
2419 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2421 if (TREE_CODE (type1) != FUNCTION_TYPE)
2422 return 1;
2424 /* Check for mismatched fastcall/regparm types. */
2425 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2426 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2427 || (ix86_function_regparm (type1, NULL)
2428 != ix86_function_regparm (type2, NULL)))
2429 return 0;
2431 /* Check for mismatched sseregparm types. */
2432 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2433 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2434 return 0;
2436 /* Check for mismatched return types (cdecl vs stdcall). */
2437 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2438 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2439 return 0;
2441 return 1;
2444 /* Return the regparm value for a function with the indicated TYPE and DECL.
2445 DECL may be NULL when calling function indirectly
2446 or considering a libcall. */
2448 static int
2449 ix86_function_regparm (tree type, tree decl)
2451 tree attr;
2452 int regparm = ix86_regparm;
2453 bool user_convention = false;
2455 if (!TARGET_64BIT)
2457 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2458 if (attr)
2460 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2461 user_convention = true;
2464 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2466 regparm = 2;
2467 user_convention = true;
2470 /* Use register calling convention for local functions when possible. */
2471 if (!TARGET_64BIT && !user_convention && decl
2472 && flag_unit_at_a_time && !profile_flag)
2474 struct cgraph_local_info *i = cgraph_local_info (decl);
2475 if (i && i->local)
2477 int local_regparm, globals = 0, regno;
2479 /* Make sure no regparm register is taken by a global register
2480 variable. */
2481 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2482 if (global_regs[local_regparm])
2483 break;
2484 /* We can't use regparm(3) for nested functions as these use
2485 static chain pointer in third argument. */
2486 if (local_regparm == 3
2487 && decl_function_context (decl)
2488 && !DECL_NO_STATIC_CHAIN (decl))
2489 local_regparm = 2;
2490 /* Each global register variable increases register preassure,
2491 so the more global reg vars there are, the smaller regparm
2492 optimization use, unless requested by the user explicitly. */
2493 for (regno = 0; regno < 6; regno++)
2494 if (global_regs[regno])
2495 globals++;
2496 local_regparm
2497 = globals < local_regparm ? local_regparm - globals : 0;
2499 if (local_regparm > regparm)
2500 regparm = local_regparm;
2504 return regparm;
2507 /* Return 1 or 2, if we can pass up to 8 SFmode (1) and DFmode (2) arguments
2508 in SSE registers for a function with the indicated TYPE and DECL.
2509 DECL may be NULL when calling function indirectly
2510 or considering a libcall. Otherwise return 0. */
2512 static int
2513 ix86_function_sseregparm (tree type, tree decl)
2515 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2516 by the sseregparm attribute. */
2517 if (TARGET_SSEREGPARM
2518 || (type
2519 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2521 if (!TARGET_SSE)
2523 if (decl)
2524 error ("Calling %qD with attribute sseregparm without "
2525 "SSE/SSE2 enabled", decl);
2526 else
2527 error ("Calling %qT with attribute sseregparm without "
2528 "SSE/SSE2 enabled", type);
2529 return 0;
2532 return 2;
2535 /* For local functions, pass SFmode (and DFmode for SSE2) arguments
2536 in SSE registers even for 32-bit mode and not just 3, but up to
2537 8 SSE arguments in registers. */
2538 if (!TARGET_64BIT && decl
2539 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2541 struct cgraph_local_info *i = cgraph_local_info (decl);
2542 if (i && i->local)
2543 return TARGET_SSE2 ? 2 : 1;
2546 return 0;
2549 /* Return true if EAX is live at the start of the function. Used by
2550 ix86_expand_prologue to determine if we need special help before
2551 calling allocate_stack_worker. */
2553 static bool
2554 ix86_eax_live_at_start_p (void)
2556 /* Cheat. Don't bother working forward from ix86_function_regparm
2557 to the function type to whether an actual argument is located in
2558 eax. Instead just look at cfg info, which is still close enough
2559 to correct at this point. This gives false positives for broken
2560 functions that might use uninitialized data that happens to be
2561 allocated in eax, but who cares? */
2562 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2565 /* Value is the number of bytes of arguments automatically
2566 popped when returning from a subroutine call.
2567 FUNDECL is the declaration node of the function (as a tree),
2568 FUNTYPE is the data type of the function (as a tree),
2569 or for a library call it is an identifier node for the subroutine name.
2570 SIZE is the number of bytes of arguments passed on the stack.
2572 On the 80386, the RTD insn may be used to pop them if the number
2573 of args is fixed, but if the number is variable then the caller
2574 must pop them all. RTD can't be used for library calls now
2575 because the library is compiled with the Unix compiler.
2576 Use of RTD is a selectable option, since it is incompatible with
2577 standard Unix calling sequences. If the option is not selected,
2578 the caller must always pop the args.
2580 The attribute stdcall is equivalent to RTD on a per module basis. */
2583 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2585 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2587 /* Cdecl functions override -mrtd, and never pop the stack. */
2588 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2590 /* Stdcall and fastcall functions will pop the stack if not
2591 variable args. */
2592 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2593 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2594 rtd = 1;
2596 if (rtd
2597 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2598 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2599 == void_type_node)))
2600 return size;
2603 /* Lose any fake structure return argument if it is passed on the stack. */
2604 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2605 && !TARGET_64BIT
2606 && !KEEP_AGGREGATE_RETURN_POINTER)
2608 int nregs = ix86_function_regparm (funtype, fundecl);
2610 if (!nregs)
2611 return GET_MODE_SIZE (Pmode);
2614 return 0;
2617 /* Argument support functions. */
2619 /* Return true when register may be used to pass function parameters. */
2620 bool
2621 ix86_function_arg_regno_p (int regno)
2623 int i;
2624 if (!TARGET_64BIT)
2625 return (regno < REGPARM_MAX
2626 || (TARGET_MMX && MMX_REGNO_P (regno)
2627 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2628 || (TARGET_SSE && SSE_REGNO_P (regno)
2629 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2631 if (TARGET_SSE && SSE_REGNO_P (regno)
2632 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2633 return true;
2634 /* RAX is used as hidden argument to va_arg functions. */
2635 if (!regno)
2636 return true;
2637 for (i = 0; i < REGPARM_MAX; i++)
2638 if (regno == x86_64_int_parameter_registers[i])
2639 return true;
2640 return false;
2643 /* Return if we do not know how to pass TYPE solely in registers. */
2645 static bool
2646 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2648 if (must_pass_in_stack_var_size_or_pad (mode, type))
2649 return true;
2651 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2652 The layout_type routine is crafty and tries to trick us into passing
2653 currently unsupported vector types on the stack by using TImode. */
2654 return (!TARGET_64BIT && mode == TImode
2655 && type && TREE_CODE (type) != VECTOR_TYPE);
2658 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2659 for a call to a function whose data type is FNTYPE.
2660 For a library call, FNTYPE is 0. */
2662 void
2663 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2664 tree fntype, /* tree ptr for function decl */
2665 rtx libname, /* SYMBOL_REF of library name or 0 */
2666 tree fndecl)
2668 static CUMULATIVE_ARGS zero_cum;
2669 tree param, next_param;
2671 if (TARGET_DEBUG_ARG)
2673 fprintf (stderr, "\ninit_cumulative_args (");
2674 if (fntype)
2675 fprintf (stderr, "fntype code = %s, ret code = %s",
2676 tree_code_name[(int) TREE_CODE (fntype)],
2677 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2678 else
2679 fprintf (stderr, "no fntype");
2681 if (libname)
2682 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2685 *cum = zero_cum;
2687 /* Set up the number of registers to use for passing arguments. */
2688 cum->nregs = ix86_regparm;
2689 if (TARGET_SSE)
2690 cum->sse_nregs = SSE_REGPARM_MAX;
2691 if (TARGET_MMX)
2692 cum->mmx_nregs = MMX_REGPARM_MAX;
2693 cum->warn_sse = true;
2694 cum->warn_mmx = true;
2695 cum->maybe_vaarg = false;
2697 /* Use ecx and edx registers if function has fastcall attribute,
2698 else look for regparm information. */
2699 if (fntype && !TARGET_64BIT)
2701 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2703 cum->nregs = 2;
2704 cum->fastcall = 1;
2706 else
2707 cum->nregs = ix86_function_regparm (fntype, fndecl);
2710 /* Set up the number of SSE registers used for passing SFmode
2711 and DFmode arguments. Warn for mismatching ABI. */
2712 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2714 /* Determine if this function has variable arguments. This is
2715 indicated by the last argument being 'void_type_mode' if there
2716 are no variable arguments. If there are variable arguments, then
2717 we won't pass anything in registers in 32-bit mode. */
2719 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2721 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2722 param != 0; param = next_param)
2724 next_param = TREE_CHAIN (param);
2725 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2727 if (!TARGET_64BIT)
2729 cum->nregs = 0;
2730 cum->sse_nregs = 0;
2731 cum->mmx_nregs = 0;
2732 cum->warn_sse = 0;
2733 cum->warn_mmx = 0;
2734 cum->fastcall = 0;
2735 cum->float_in_sse = 0;
2737 cum->maybe_vaarg = true;
2741 if ((!fntype && !libname)
2742 || (fntype && !TYPE_ARG_TYPES (fntype)))
2743 cum->maybe_vaarg = true;
2745 if (TARGET_DEBUG_ARG)
2746 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2748 return;
2751 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2752 But in the case of vector types, it is some vector mode.
2754 When we have only some of our vector isa extensions enabled, then there
2755 are some modes for which vector_mode_supported_p is false. For these
2756 modes, the generic vector support in gcc will choose some non-vector mode
2757 in order to implement the type. By computing the natural mode, we'll
2758 select the proper ABI location for the operand and not depend on whatever
2759 the middle-end decides to do with these vector types. */
2761 static enum machine_mode
2762 type_natural_mode (tree type)
2764 enum machine_mode mode = TYPE_MODE (type);
2766 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2768 HOST_WIDE_INT size = int_size_in_bytes (type);
2769 if ((size == 8 || size == 16)
2770 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2771 && TYPE_VECTOR_SUBPARTS (type) > 1)
2773 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2775 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2776 mode = MIN_MODE_VECTOR_FLOAT;
2777 else
2778 mode = MIN_MODE_VECTOR_INT;
2780 /* Get the mode which has this inner mode and number of units. */
2781 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2782 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2783 && GET_MODE_INNER (mode) == innermode)
2784 return mode;
2786 gcc_unreachable ();
2790 return mode;
2793 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2794 this may not agree with the mode that the type system has chosen for the
2795 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2796 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2798 static rtx
2799 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2800 unsigned int regno)
2802 rtx tmp;
2804 if (orig_mode != BLKmode)
2805 tmp = gen_rtx_REG (orig_mode, regno);
2806 else
2808 tmp = gen_rtx_REG (mode, regno);
2809 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2810 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2813 return tmp;
2816 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2817 of this code is to classify each 8bytes of incoming argument by the register
2818 class and assign registers accordingly. */
2820 /* Return the union class of CLASS1 and CLASS2.
2821 See the x86-64 PS ABI for details. */
2823 static enum x86_64_reg_class
2824 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2826 /* Rule #1: If both classes are equal, this is the resulting class. */
2827 if (class1 == class2)
2828 return class1;
2830 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2831 the other class. */
2832 if (class1 == X86_64_NO_CLASS)
2833 return class2;
2834 if (class2 == X86_64_NO_CLASS)
2835 return class1;
2837 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2838 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2839 return X86_64_MEMORY_CLASS;
2841 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2842 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2843 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2844 return X86_64_INTEGERSI_CLASS;
2845 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2846 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2847 return X86_64_INTEGER_CLASS;
2849 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2850 MEMORY is used. */
2851 if (class1 == X86_64_X87_CLASS
2852 || class1 == X86_64_X87UP_CLASS
2853 || class1 == X86_64_COMPLEX_X87_CLASS
2854 || class2 == X86_64_X87_CLASS
2855 || class2 == X86_64_X87UP_CLASS
2856 || class2 == X86_64_COMPLEX_X87_CLASS)
2857 return X86_64_MEMORY_CLASS;
2859 /* Rule #6: Otherwise class SSE is used. */
2860 return X86_64_SSE_CLASS;
2863 /* Classify the argument of type TYPE and mode MODE.
2864 CLASSES will be filled by the register class used to pass each word
2865 of the operand. The number of words is returned. In case the parameter
2866 should be passed in memory, 0 is returned. As a special case for zero
2867 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2869 BIT_OFFSET is used internally for handling records and specifies offset
2870 of the offset in bits modulo 256 to avoid overflow cases.
2872 See the x86-64 PS ABI for details.
2875 static int
2876 classify_argument (enum machine_mode mode, tree type,
2877 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2879 HOST_WIDE_INT bytes =
2880 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2881 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2883 /* Variable sized entities are always passed/returned in memory. */
2884 if (bytes < 0)
2885 return 0;
2887 if (mode != VOIDmode
2888 && targetm.calls.must_pass_in_stack (mode, type))
2889 return 0;
2891 if (type && AGGREGATE_TYPE_P (type))
2893 int i;
2894 tree field;
2895 enum x86_64_reg_class subclasses[MAX_CLASSES];
2897 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2898 if (bytes > 16)
2899 return 0;
2901 for (i = 0; i < words; i++)
2902 classes[i] = X86_64_NO_CLASS;
2904 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2905 signalize memory class, so handle it as special case. */
2906 if (!words)
2908 classes[0] = X86_64_NO_CLASS;
2909 return 1;
2912 /* Classify each field of record and merge classes. */
2913 switch (TREE_CODE (type))
2915 case RECORD_TYPE:
2916 /* For classes first merge in the field of the subclasses. */
2917 if (TYPE_BINFO (type))
2919 tree binfo, base_binfo;
2920 int basenum;
2922 for (binfo = TYPE_BINFO (type), basenum = 0;
2923 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2925 int num;
2926 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2927 tree type = BINFO_TYPE (base_binfo);
2929 num = classify_argument (TYPE_MODE (type),
2930 type, subclasses,
2931 (offset + bit_offset) % 256);
2932 if (!num)
2933 return 0;
2934 for (i = 0; i < num; i++)
2936 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2937 classes[i + pos] =
2938 merge_classes (subclasses[i], classes[i + pos]);
2942 /* And now merge the fields of structure. */
2943 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2945 if (TREE_CODE (field) == FIELD_DECL)
2947 int num;
2949 /* Bitfields are always classified as integer. Handle them
2950 early, since later code would consider them to be
2951 misaligned integers. */
2952 if (DECL_BIT_FIELD (field))
2954 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2955 i < ((int_bit_position (field) + (bit_offset % 64))
2956 + tree_low_cst (DECL_SIZE (field), 0)
2957 + 63) / 8 / 8; i++)
2958 classes[i] =
2959 merge_classes (X86_64_INTEGER_CLASS,
2960 classes[i]);
2962 else
2964 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2965 TREE_TYPE (field), subclasses,
2966 (int_bit_position (field)
2967 + bit_offset) % 256);
2968 if (!num)
2969 return 0;
2970 for (i = 0; i < num; i++)
2972 int pos =
2973 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2974 classes[i + pos] =
2975 merge_classes (subclasses[i], classes[i + pos]);
2980 break;
2982 case ARRAY_TYPE:
2983 /* Arrays are handled as small records. */
2985 int num;
2986 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2987 TREE_TYPE (type), subclasses, bit_offset);
2988 if (!num)
2989 return 0;
2991 /* The partial classes are now full classes. */
2992 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2993 subclasses[0] = X86_64_SSE_CLASS;
2994 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2995 subclasses[0] = X86_64_INTEGER_CLASS;
2997 for (i = 0; i < words; i++)
2998 classes[i] = subclasses[i % num];
3000 break;
3002 case UNION_TYPE:
3003 case QUAL_UNION_TYPE:
3004 /* Unions are similar to RECORD_TYPE but offset is always 0.
3007 /* Unions are not derived. */
3008 gcc_assert (!TYPE_BINFO (type)
3009 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
3010 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3012 if (TREE_CODE (field) == FIELD_DECL)
3014 int num;
3015 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3016 TREE_TYPE (field), subclasses,
3017 bit_offset);
3018 if (!num)
3019 return 0;
3020 for (i = 0; i < num; i++)
3021 classes[i] = merge_classes (subclasses[i], classes[i]);
3024 break;
3026 default:
3027 gcc_unreachable ();
3030 /* Final merger cleanup. */
3031 for (i = 0; i < words; i++)
3033 /* If one class is MEMORY, everything should be passed in
3034 memory. */
3035 if (classes[i] == X86_64_MEMORY_CLASS)
3036 return 0;
3038 /* The X86_64_SSEUP_CLASS should be always preceded by
3039 X86_64_SSE_CLASS. */
3040 if (classes[i] == X86_64_SSEUP_CLASS
3041 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3042 classes[i] = X86_64_SSE_CLASS;
3044 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3045 if (classes[i] == X86_64_X87UP_CLASS
3046 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3047 classes[i] = X86_64_SSE_CLASS;
3049 return words;
3052 /* Compute alignment needed. We align all types to natural boundaries with
3053 exception of XFmode that is aligned to 64bits. */
3054 if (mode != VOIDmode && mode != BLKmode)
3056 int mode_alignment = GET_MODE_BITSIZE (mode);
3058 if (mode == XFmode)
3059 mode_alignment = 128;
3060 else if (mode == XCmode)
3061 mode_alignment = 256;
3062 if (COMPLEX_MODE_P (mode))
3063 mode_alignment /= 2;
3064 /* Misaligned fields are always returned in memory. */
3065 if (bit_offset % mode_alignment)
3066 return 0;
3069 /* for V1xx modes, just use the base mode */
3070 if (VECTOR_MODE_P (mode)
3071 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3072 mode = GET_MODE_INNER (mode);
3074 /* Classification of atomic types. */
3075 switch (mode)
3077 case SDmode:
3078 case DDmode:
3079 classes[0] = X86_64_SSE_CLASS;
3080 return 1;
3081 case TDmode:
3082 classes[0] = X86_64_SSE_CLASS;
3083 classes[1] = X86_64_SSEUP_CLASS;
3084 return 2;
3085 case DImode:
3086 case SImode:
3087 case HImode:
3088 case QImode:
3089 case CSImode:
3090 case CHImode:
3091 case CQImode:
3092 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3093 classes[0] = X86_64_INTEGERSI_CLASS;
3094 else
3095 classes[0] = X86_64_INTEGER_CLASS;
3096 return 1;
3097 case CDImode:
3098 case TImode:
3099 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3100 return 2;
3101 case CTImode:
3102 return 0;
3103 case SFmode:
3104 if (!(bit_offset % 64))
3105 classes[0] = X86_64_SSESF_CLASS;
3106 else
3107 classes[0] = X86_64_SSE_CLASS;
3108 return 1;
3109 case DFmode:
3110 classes[0] = X86_64_SSEDF_CLASS;
3111 return 1;
3112 case XFmode:
3113 classes[0] = X86_64_X87_CLASS;
3114 classes[1] = X86_64_X87UP_CLASS;
3115 return 2;
3116 case TFmode:
3117 classes[0] = X86_64_SSE_CLASS;
3118 classes[1] = X86_64_SSEUP_CLASS;
3119 return 2;
3120 case SCmode:
3121 classes[0] = X86_64_SSE_CLASS;
3122 return 1;
3123 case DCmode:
3124 classes[0] = X86_64_SSEDF_CLASS;
3125 classes[1] = X86_64_SSEDF_CLASS;
3126 return 2;
3127 case XCmode:
3128 classes[0] = X86_64_COMPLEX_X87_CLASS;
3129 return 1;
3130 case TCmode:
3131 /* This modes is larger than 16 bytes. */
3132 return 0;
3133 case V4SFmode:
3134 case V4SImode:
3135 case V16QImode:
3136 case V8HImode:
3137 case V2DFmode:
3138 case V2DImode:
3139 classes[0] = X86_64_SSE_CLASS;
3140 classes[1] = X86_64_SSEUP_CLASS;
3141 return 2;
3142 case V2SFmode:
3143 case V2SImode:
3144 case V4HImode:
3145 case V8QImode:
3146 classes[0] = X86_64_SSE_CLASS;
3147 return 1;
3148 case BLKmode:
3149 case VOIDmode:
3150 return 0;
3151 default:
3152 gcc_assert (VECTOR_MODE_P (mode));
3154 if (bytes > 16)
3155 return 0;
3157 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3159 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3160 classes[0] = X86_64_INTEGERSI_CLASS;
3161 else
3162 classes[0] = X86_64_INTEGER_CLASS;
3163 classes[1] = X86_64_INTEGER_CLASS;
3164 return 1 + (bytes > 8);
3168 /* Examine the argument and return set number of register required in each
3169 class. Return 0 iff parameter should be passed in memory. */
3170 static int
3171 examine_argument (enum machine_mode mode, tree type, int in_return,
3172 int *int_nregs, int *sse_nregs)
3174 enum x86_64_reg_class class[MAX_CLASSES];
3175 int n = classify_argument (mode, type, class, 0);
3177 *int_nregs = 0;
3178 *sse_nregs = 0;
3179 if (!n)
3180 return 0;
3181 for (n--; n >= 0; n--)
3182 switch (class[n])
3184 case X86_64_INTEGER_CLASS:
3185 case X86_64_INTEGERSI_CLASS:
3186 (*int_nregs)++;
3187 break;
3188 case X86_64_SSE_CLASS:
3189 case X86_64_SSESF_CLASS:
3190 case X86_64_SSEDF_CLASS:
3191 (*sse_nregs)++;
3192 break;
3193 case X86_64_NO_CLASS:
3194 case X86_64_SSEUP_CLASS:
3195 break;
3196 case X86_64_X87_CLASS:
3197 case X86_64_X87UP_CLASS:
3198 if (!in_return)
3199 return 0;
3200 break;
3201 case X86_64_COMPLEX_X87_CLASS:
3202 return in_return ? 2 : 0;
3203 case X86_64_MEMORY_CLASS:
3204 gcc_unreachable ();
3206 return 1;
3209 /* Construct container for the argument used by GCC interface. See
3210 FUNCTION_ARG for the detailed description. */
3212 static rtx
3213 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3214 tree type, int in_return, int nintregs, int nsseregs,
3215 const int *intreg, int sse_regno)
3217 enum machine_mode tmpmode;
3218 int bytes =
3219 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3220 enum x86_64_reg_class class[MAX_CLASSES];
3221 int n;
3222 int i;
3223 int nexps = 0;
3224 int needed_sseregs, needed_intregs;
3225 rtx exp[MAX_CLASSES];
3226 rtx ret;
3228 n = classify_argument (mode, type, class, 0);
3229 if (TARGET_DEBUG_ARG)
3231 if (!n)
3232 fprintf (stderr, "Memory class\n");
3233 else
3235 fprintf (stderr, "Classes:");
3236 for (i = 0; i < n; i++)
3238 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3240 fprintf (stderr, "\n");
3243 if (!n)
3244 return NULL;
3245 if (!examine_argument (mode, type, in_return, &needed_intregs,
3246 &needed_sseregs))
3247 return NULL;
3248 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3249 return NULL;
3251 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3252 some less clueful developer tries to use floating-point anyway. */
3253 if (needed_sseregs && !TARGET_SSE)
3255 static bool issued_error;
3256 if (!issued_error)
3258 issued_error = true;
3259 if (in_return)
3260 error ("SSE register return with SSE disabled");
3261 else
3262 error ("SSE register argument with SSE disabled");
3264 return NULL;
3267 /* First construct simple cases. Avoid SCmode, since we want to use
3268 single register to pass this type. */
3269 if (n == 1 && mode != SCmode)
3270 switch (class[0])
3272 case X86_64_INTEGER_CLASS:
3273 case X86_64_INTEGERSI_CLASS:
3274 return gen_rtx_REG (mode, intreg[0]);
3275 case X86_64_SSE_CLASS:
3276 case X86_64_SSESF_CLASS:
3277 case X86_64_SSEDF_CLASS:
3278 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3279 case X86_64_X87_CLASS:
3280 case X86_64_COMPLEX_X87_CLASS:
3281 return gen_rtx_REG (mode, FIRST_STACK_REG);
3282 case X86_64_NO_CLASS:
3283 /* Zero sized array, struct or class. */
3284 return NULL;
3285 default:
3286 gcc_unreachable ();
3288 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3289 && mode != BLKmode)
3290 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3291 if (n == 2
3292 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3293 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3294 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3295 && class[1] == X86_64_INTEGER_CLASS
3296 && (mode == CDImode || mode == TImode || mode == TFmode)
3297 && intreg[0] + 1 == intreg[1])
3298 return gen_rtx_REG (mode, intreg[0]);
3300 /* Otherwise figure out the entries of the PARALLEL. */
3301 for (i = 0; i < n; i++)
3303 switch (class[i])
3305 case X86_64_NO_CLASS:
3306 break;
3307 case X86_64_INTEGER_CLASS:
3308 case X86_64_INTEGERSI_CLASS:
3309 /* Merge TImodes on aligned occasions here too. */
3310 if (i * 8 + 8 > bytes)
3311 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3312 else if (class[i] == X86_64_INTEGERSI_CLASS)
3313 tmpmode = SImode;
3314 else
3315 tmpmode = DImode;
3316 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3317 if (tmpmode == BLKmode)
3318 tmpmode = DImode;
3319 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3320 gen_rtx_REG (tmpmode, *intreg),
3321 GEN_INT (i*8));
3322 intreg++;
3323 break;
3324 case X86_64_SSESF_CLASS:
3325 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3326 gen_rtx_REG (SFmode,
3327 SSE_REGNO (sse_regno)),
3328 GEN_INT (i*8));
3329 sse_regno++;
3330 break;
3331 case X86_64_SSEDF_CLASS:
3332 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3333 gen_rtx_REG (DFmode,
3334 SSE_REGNO (sse_regno)),
3335 GEN_INT (i*8));
3336 sse_regno++;
3337 break;
3338 case X86_64_SSE_CLASS:
3339 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3340 tmpmode = TImode;
3341 else
3342 tmpmode = DImode;
3343 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3344 gen_rtx_REG (tmpmode,
3345 SSE_REGNO (sse_regno)),
3346 GEN_INT (i*8));
3347 if (tmpmode == TImode)
3348 i++;
3349 sse_regno++;
3350 break;
3351 default:
3352 gcc_unreachable ();
3356 /* Empty aligned struct, union or class. */
3357 if (nexps == 0)
3358 return NULL;
3360 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3361 for (i = 0; i < nexps; i++)
3362 XVECEXP (ret, 0, i) = exp [i];
3363 return ret;
3366 /* Update the data in CUM to advance over an argument
3367 of mode MODE and data type TYPE.
3368 (TYPE is null for libcalls where that information may not be available.) */
3370 void
3371 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3372 tree type, int named)
3374 int bytes =
3375 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3376 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3378 if (type)
3379 mode = type_natural_mode (type);
3381 if (TARGET_DEBUG_ARG)
3382 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3383 "mode=%s, named=%d)\n\n",
3384 words, cum->words, cum->nregs, cum->sse_nregs,
3385 GET_MODE_NAME (mode), named);
3387 if (TARGET_64BIT)
3389 int int_nregs, sse_nregs;
3390 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3391 cum->words += words;
3392 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3394 cum->nregs -= int_nregs;
3395 cum->sse_nregs -= sse_nregs;
3396 cum->regno += int_nregs;
3397 cum->sse_regno += sse_nregs;
3399 else
3400 cum->words += words;
3402 else
3404 switch (mode)
3406 default:
3407 break;
3409 case BLKmode:
3410 if (bytes < 0)
3411 break;
3412 /* FALLTHRU */
3414 case DImode:
3415 case SImode:
3416 case HImode:
3417 case QImode:
3418 cum->words += words;
3419 cum->nregs -= words;
3420 cum->regno += words;
3422 if (cum->nregs <= 0)
3424 cum->nregs = 0;
3425 cum->regno = 0;
3427 break;
3429 case DFmode:
3430 if (cum->float_in_sse < 2)
3431 break;
3432 case SFmode:
3433 if (cum->float_in_sse < 1)
3434 break;
3435 /* FALLTHRU */
3437 case TImode:
3438 case V16QImode:
3439 case V8HImode:
3440 case V4SImode:
3441 case V2DImode:
3442 case V4SFmode:
3443 case V2DFmode:
3444 if (!type || !AGGREGATE_TYPE_P (type))
3446 cum->sse_words += words;
3447 cum->sse_nregs -= 1;
3448 cum->sse_regno += 1;
3449 if (cum->sse_nregs <= 0)
3451 cum->sse_nregs = 0;
3452 cum->sse_regno = 0;
3455 break;
3457 case V8QImode:
3458 case V4HImode:
3459 case V2SImode:
3460 case V2SFmode:
3461 if (!type || !AGGREGATE_TYPE_P (type))
3463 cum->mmx_words += words;
3464 cum->mmx_nregs -= 1;
3465 cum->mmx_regno += 1;
3466 if (cum->mmx_nregs <= 0)
3468 cum->mmx_nregs = 0;
3469 cum->mmx_regno = 0;
3472 break;
3477 /* Define where to put the arguments to a function.
3478 Value is zero to push the argument on the stack,
3479 or a hard register in which to store the argument.
3481 MODE is the argument's machine mode.
3482 TYPE is the data type of the argument (as a tree).
3483 This is null for libcalls where that information may
3484 not be available.
3485 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3486 the preceding args and about the function being called.
3487 NAMED is nonzero if this argument is a named parameter
3488 (otherwise it is an extra parameter matching an ellipsis). */
3491 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3492 tree type, int named)
3494 enum machine_mode mode = orig_mode;
3495 rtx ret = NULL_RTX;
3496 int bytes =
3497 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3498 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3499 static bool warnedsse, warnedmmx;
3501 /* To simplify the code below, represent vector types with a vector mode
3502 even if MMX/SSE are not active. */
3503 if (type && TREE_CODE (type) == VECTOR_TYPE)
3504 mode = type_natural_mode (type);
3506 /* Handle a hidden AL argument containing number of registers for varargs
3507 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3508 any AL settings. */
3509 if (mode == VOIDmode)
3511 if (TARGET_64BIT)
3512 return GEN_INT (cum->maybe_vaarg
3513 ? (cum->sse_nregs < 0
3514 ? SSE_REGPARM_MAX
3515 : cum->sse_regno)
3516 : -1);
3517 else
3518 return constm1_rtx;
3520 if (TARGET_64BIT)
3521 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3522 cum->sse_nregs,
3523 &x86_64_int_parameter_registers [cum->regno],
3524 cum->sse_regno);
3525 else
3526 switch (mode)
3528 /* For now, pass fp/complex values on the stack. */
3529 default:
3530 break;
3532 case BLKmode:
3533 if (bytes < 0)
3534 break;
3535 /* FALLTHRU */
3536 case DImode:
3537 case SImode:
3538 case HImode:
3539 case QImode:
3540 if (words <= cum->nregs)
3542 int regno = cum->regno;
3544 /* Fastcall allocates the first two DWORD (SImode) or
3545 smaller arguments to ECX and EDX. */
3546 if (cum->fastcall)
3548 if (mode == BLKmode || mode == DImode)
3549 break;
3551 /* ECX not EAX is the first allocated register. */
3552 if (regno == 0)
3553 regno = 2;
3555 ret = gen_rtx_REG (mode, regno);
3557 break;
3558 case DFmode:
3559 if (cum->float_in_sse < 2)
3560 break;
3561 case SFmode:
3562 if (cum->float_in_sse < 1)
3563 break;
3564 /* FALLTHRU */
3565 case TImode:
3566 case V16QImode:
3567 case V8HImode:
3568 case V4SImode:
3569 case V2DImode:
3570 case V4SFmode:
3571 case V2DFmode:
3572 if (!type || !AGGREGATE_TYPE_P (type))
3574 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3576 warnedsse = true;
3577 warning (0, "SSE vector argument without SSE enabled "
3578 "changes the ABI");
3580 if (cum->sse_nregs)
3581 ret = gen_reg_or_parallel (mode, orig_mode,
3582 cum->sse_regno + FIRST_SSE_REG);
3584 break;
3585 case V8QImode:
3586 case V4HImode:
3587 case V2SImode:
3588 case V2SFmode:
3589 if (!type || !AGGREGATE_TYPE_P (type))
3591 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3593 warnedmmx = true;
3594 warning (0, "MMX vector argument without MMX enabled "
3595 "changes the ABI");
3597 if (cum->mmx_nregs)
3598 ret = gen_reg_or_parallel (mode, orig_mode,
3599 cum->mmx_regno + FIRST_MMX_REG);
3601 break;
3604 if (TARGET_DEBUG_ARG)
3606 fprintf (stderr,
3607 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3608 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3610 if (ret)
3611 print_simple_rtl (stderr, ret);
3612 else
3613 fprintf (stderr, ", stack");
3615 fprintf (stderr, " )\n");
3618 return ret;
3621 /* A C expression that indicates when an argument must be passed by
3622 reference. If nonzero for an argument, a copy of that argument is
3623 made in memory and a pointer to the argument is passed instead of
3624 the argument itself. The pointer is passed in whatever way is
3625 appropriate for passing a pointer to that type. */
3627 static bool
3628 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3629 enum machine_mode mode ATTRIBUTE_UNUSED,
3630 tree type, bool named ATTRIBUTE_UNUSED)
3632 if (!TARGET_64BIT)
3633 return 0;
3635 if (type && int_size_in_bytes (type) == -1)
3637 if (TARGET_DEBUG_ARG)
3638 fprintf (stderr, "function_arg_pass_by_reference\n");
3639 return 1;
3642 return 0;
3645 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3646 ABI. Only called if TARGET_SSE. */
3647 static bool
3648 contains_128bit_aligned_vector_p (tree type)
3650 enum machine_mode mode = TYPE_MODE (type);
3651 if (SSE_REG_MODE_P (mode)
3652 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3653 return true;
3654 if (TYPE_ALIGN (type) < 128)
3655 return false;
3657 if (AGGREGATE_TYPE_P (type))
3659 /* Walk the aggregates recursively. */
3660 switch (TREE_CODE (type))
3662 case RECORD_TYPE:
3663 case UNION_TYPE:
3664 case QUAL_UNION_TYPE:
3666 tree field;
3668 if (TYPE_BINFO (type))
3670 tree binfo, base_binfo;
3671 int i;
3673 for (binfo = TYPE_BINFO (type), i = 0;
3674 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3675 if (contains_128bit_aligned_vector_p
3676 (BINFO_TYPE (base_binfo)))
3677 return true;
3679 /* And now merge the fields of structure. */
3680 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3682 if (TREE_CODE (field) == FIELD_DECL
3683 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3684 return true;
3686 break;
3689 case ARRAY_TYPE:
3690 /* Just for use if some languages passes arrays by value. */
3691 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3692 return true;
3693 break;
3695 default:
3696 gcc_unreachable ();
3699 return false;
3702 /* Gives the alignment boundary, in bits, of an argument with the
3703 specified mode and type. */
3706 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3708 int align;
3709 if (type)
3710 align = TYPE_ALIGN (type);
3711 else
3712 align = GET_MODE_ALIGNMENT (mode);
3713 if (align < PARM_BOUNDARY)
3714 align = PARM_BOUNDARY;
3715 if (!TARGET_64BIT)
3717 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3718 make an exception for SSE modes since these require 128bit
3719 alignment.
3721 The handling here differs from field_alignment. ICC aligns MMX
3722 arguments to 4 byte boundaries, while structure fields are aligned
3723 to 8 byte boundaries. */
3724 if (!TARGET_SSE)
3725 align = PARM_BOUNDARY;
3726 else if (!type)
3728 if (!SSE_REG_MODE_P (mode))
3729 align = PARM_BOUNDARY;
3731 else
3733 if (!contains_128bit_aligned_vector_p (type))
3734 align = PARM_BOUNDARY;
3737 if (align > 128)
3738 align = 128;
3739 return align;
3742 /* Return true if N is a possible register number of function value. */
3743 bool
3744 ix86_function_value_regno_p (int regno)
3746 if (regno == 0
3747 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3748 || (regno == FIRST_SSE_REG && TARGET_SSE))
3749 return true;
3751 if (!TARGET_64BIT
3752 && (regno == FIRST_MMX_REG && TARGET_MMX))
3753 return true;
3755 return false;
3758 /* Define how to find the value returned by a function.
3759 VALTYPE is the data type of the value (as a tree).
3760 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3761 otherwise, FUNC is 0. */
3763 ix86_function_value (tree valtype, tree fntype_or_decl,
3764 bool outgoing ATTRIBUTE_UNUSED)
3766 enum machine_mode natmode = type_natural_mode (valtype);
3768 if (TARGET_64BIT)
3770 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3771 1, REGPARM_MAX, SSE_REGPARM_MAX,
3772 x86_64_int_return_registers, 0);
3773 /* For zero sized structures, construct_container return NULL, but we
3774 need to keep rest of compiler happy by returning meaningful value. */
3775 if (!ret)
3776 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3777 return ret;
3779 else
3781 tree fn = NULL_TREE, fntype;
3782 if (fntype_or_decl
3783 && DECL_P (fntype_or_decl))
3784 fn = fntype_or_decl;
3785 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3786 return gen_rtx_REG (TYPE_MODE (valtype),
3787 ix86_value_regno (natmode, fn, fntype));
3791 /* Return true iff type is returned in memory. */
3793 ix86_return_in_memory (tree type)
3795 int needed_intregs, needed_sseregs, size;
3796 enum machine_mode mode = type_natural_mode (type);
3798 if (TARGET_64BIT)
3799 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3801 if (mode == BLKmode)
3802 return 1;
3804 size = int_size_in_bytes (type);
3806 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3807 return 0;
3809 if (VECTOR_MODE_P (mode) || mode == TImode)
3811 /* User-created vectors small enough to fit in EAX. */
3812 if (size < 8)
3813 return 0;
3815 /* MMX/3dNow values are returned in MM0,
3816 except when it doesn't exits. */
3817 if (size == 8)
3818 return (TARGET_MMX ? 0 : 1);
3820 /* SSE values are returned in XMM0, except when it doesn't exist. */
3821 if (size == 16)
3822 return (TARGET_SSE ? 0 : 1);
3825 if (mode == XFmode)
3826 return 0;
3828 if (mode == TDmode)
3829 return 1;
3831 if (size > 12)
3832 return 1;
3833 return 0;
3836 /* When returning SSE vector types, we have a choice of either
3837 (1) being abi incompatible with a -march switch, or
3838 (2) generating an error.
3839 Given no good solution, I think the safest thing is one warning.
3840 The user won't be able to use -Werror, but....
3842 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3843 called in response to actually generating a caller or callee that
3844 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3845 via aggregate_value_p for general type probing from tree-ssa. */
3847 static rtx
3848 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3850 static bool warnedsse, warnedmmx;
3852 if (type)
3854 /* Look at the return type of the function, not the function type. */
3855 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3857 if (!TARGET_SSE && !warnedsse)
3859 if (mode == TImode
3860 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3862 warnedsse = true;
3863 warning (0, "SSE vector return without SSE enabled "
3864 "changes the ABI");
3868 if (!TARGET_MMX && !warnedmmx)
3870 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3872 warnedmmx = true;
3873 warning (0, "MMX vector return without MMX enabled "
3874 "changes the ABI");
3879 return NULL;
3882 /* Define how to find the value returned by a library function
3883 assuming the value has mode MODE. */
3885 ix86_libcall_value (enum machine_mode mode)
3887 if (TARGET_64BIT)
3889 switch (mode)
3891 case SFmode:
3892 case SCmode:
3893 case DFmode:
3894 case DCmode:
3895 case TFmode:
3896 case SDmode:
3897 case DDmode:
3898 case TDmode:
3899 return gen_rtx_REG (mode, FIRST_SSE_REG);
3900 case XFmode:
3901 case XCmode:
3902 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3903 case TCmode:
3904 return NULL;
3905 default:
3906 return gen_rtx_REG (mode, 0);
3909 else
3910 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3913 /* Given a mode, return the register to use for a return value. */
3915 static int
3916 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3918 gcc_assert (!TARGET_64BIT);
3920 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3921 we prevent this case when mmx is not available. */
3922 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3923 return FIRST_MMX_REG;
3925 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3926 we prevent this case when sse is not available. */
3927 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3928 return FIRST_SSE_REG;
3930 /* Decimal floating point values can go in %eax, unlike other float modes. */
3931 if (DECIMAL_FLOAT_MODE_P (mode))
3932 return 0;
3934 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3935 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
3936 return 0;
3938 /* Floating point return values in %st(0), except for local functions when
3939 SSE math is enabled or for functions with sseregparm attribute. */
3940 if ((func || fntype)
3941 && (mode == SFmode || mode == DFmode))
3943 int sse_level = ix86_function_sseregparm (fntype, func);
3944 if ((sse_level >= 1 && mode == SFmode)
3945 || (sse_level == 2 && mode == DFmode))
3946 return FIRST_SSE_REG;
3949 return FIRST_FLOAT_REG;
3952 /* Create the va_list data type. */
3954 static tree
3955 ix86_build_builtin_va_list (void)
3957 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3959 /* For i386 we use plain pointer to argument area. */
3960 if (!TARGET_64BIT)
3961 return build_pointer_type (char_type_node);
3963 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3964 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3966 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3967 unsigned_type_node);
3968 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3969 unsigned_type_node);
3970 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3971 ptr_type_node);
3972 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3973 ptr_type_node);
3975 va_list_gpr_counter_field = f_gpr;
3976 va_list_fpr_counter_field = f_fpr;
3978 DECL_FIELD_CONTEXT (f_gpr) = record;
3979 DECL_FIELD_CONTEXT (f_fpr) = record;
3980 DECL_FIELD_CONTEXT (f_ovf) = record;
3981 DECL_FIELD_CONTEXT (f_sav) = record;
3983 TREE_CHAIN (record) = type_decl;
3984 TYPE_NAME (record) = type_decl;
3985 TYPE_FIELDS (record) = f_gpr;
3986 TREE_CHAIN (f_gpr) = f_fpr;
3987 TREE_CHAIN (f_fpr) = f_ovf;
3988 TREE_CHAIN (f_ovf) = f_sav;
3990 layout_type (record);
3992 /* The correct type is an array type of one element. */
3993 return build_array_type (record, build_index_type (size_zero_node));
3996 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3998 static void
3999 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4000 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4001 int no_rtl)
4003 CUMULATIVE_ARGS next_cum;
4004 rtx save_area = NULL_RTX, mem;
4005 rtx label;
4006 rtx label_ref;
4007 rtx tmp_reg;
4008 rtx nsse_reg;
4009 int set;
4010 tree fntype;
4011 int stdarg_p;
4012 int i;
4014 if (!TARGET_64BIT)
4015 return;
4017 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4018 return;
4020 /* Indicate to allocate space on the stack for varargs save area. */
4021 ix86_save_varrargs_registers = 1;
4023 cfun->stack_alignment_needed = 128;
4025 fntype = TREE_TYPE (current_function_decl);
4026 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4027 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4028 != void_type_node));
4030 /* For varargs, we do not want to skip the dummy va_dcl argument.
4031 For stdargs, we do want to skip the last named argument. */
4032 next_cum = *cum;
4033 if (stdarg_p)
4034 function_arg_advance (&next_cum, mode, type, 1);
4036 if (!no_rtl)
4037 save_area = frame_pointer_rtx;
4039 set = get_varargs_alias_set ();
4041 for (i = next_cum.regno;
4042 i < ix86_regparm
4043 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4044 i++)
4046 mem = gen_rtx_MEM (Pmode,
4047 plus_constant (save_area, i * UNITS_PER_WORD));
4048 MEM_NOTRAP_P (mem) = 1;
4049 set_mem_alias_set (mem, set);
4050 emit_move_insn (mem, gen_rtx_REG (Pmode,
4051 x86_64_int_parameter_registers[i]));
4054 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4056 /* Now emit code to save SSE registers. The AX parameter contains number
4057 of SSE parameter registers used to call this function. We use
4058 sse_prologue_save insn template that produces computed jump across
4059 SSE saves. We need some preparation work to get this working. */
4061 label = gen_label_rtx ();
4062 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4064 /* Compute address to jump to :
4065 label - 5*eax + nnamed_sse_arguments*5 */
4066 tmp_reg = gen_reg_rtx (Pmode);
4067 nsse_reg = gen_reg_rtx (Pmode);
4068 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4069 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4070 gen_rtx_MULT (Pmode, nsse_reg,
4071 GEN_INT (4))));
4072 if (next_cum.sse_regno)
4073 emit_move_insn
4074 (nsse_reg,
4075 gen_rtx_CONST (DImode,
4076 gen_rtx_PLUS (DImode,
4077 label_ref,
4078 GEN_INT (next_cum.sse_regno * 4))));
4079 else
4080 emit_move_insn (nsse_reg, label_ref);
4081 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4083 /* Compute address of memory block we save into. We always use pointer
4084 pointing 127 bytes after first byte to store - this is needed to keep
4085 instruction size limited by 4 bytes. */
4086 tmp_reg = gen_reg_rtx (Pmode);
4087 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4088 plus_constant (save_area,
4089 8 * REGPARM_MAX + 127)));
4090 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4091 MEM_NOTRAP_P (mem) = 1;
4092 set_mem_alias_set (mem, set);
4093 set_mem_align (mem, BITS_PER_WORD);
4095 /* And finally do the dirty job! */
4096 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4097 GEN_INT (next_cum.sse_regno), label));
4102 /* Implement va_start. */
4104 void
4105 ix86_va_start (tree valist, rtx nextarg)
4107 HOST_WIDE_INT words, n_gpr, n_fpr;
4108 tree f_gpr, f_fpr, f_ovf, f_sav;
4109 tree gpr, fpr, ovf, sav, t;
4111 /* Only 64bit target needs something special. */
4112 if (!TARGET_64BIT)
4114 std_expand_builtin_va_start (valist, nextarg);
4115 return;
4118 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4119 f_fpr = TREE_CHAIN (f_gpr);
4120 f_ovf = TREE_CHAIN (f_fpr);
4121 f_sav = TREE_CHAIN (f_ovf);
4123 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4124 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4125 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4126 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4127 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4129 /* Count number of gp and fp argument registers used. */
4130 words = current_function_args_info.words;
4131 n_gpr = current_function_args_info.regno;
4132 n_fpr = current_function_args_info.sse_regno;
4134 if (TARGET_DEBUG_ARG)
4135 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4136 (int) words, (int) n_gpr, (int) n_fpr);
4138 if (cfun->va_list_gpr_size)
4140 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
4141 build_int_cst (NULL_TREE, n_gpr * 8));
4142 TREE_SIDE_EFFECTS (t) = 1;
4143 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4146 if (cfun->va_list_fpr_size)
4148 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
4149 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
4150 TREE_SIDE_EFFECTS (t) = 1;
4151 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4154 /* Find the overflow area. */
4155 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
4156 if (words != 0)
4157 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), t,
4158 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
4159 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4160 TREE_SIDE_EFFECTS (t) = 1;
4161 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4163 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4165 /* Find the register save area.
4166 Prologue of the function save it right above stack frame. */
4167 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
4168 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
4169 TREE_SIDE_EFFECTS (t) = 1;
4170 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4174 /* Implement va_arg. */
4176 tree
4177 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4179 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4180 tree f_gpr, f_fpr, f_ovf, f_sav;
4181 tree gpr, fpr, ovf, sav, t;
4182 int size, rsize;
4183 tree lab_false, lab_over = NULL_TREE;
4184 tree addr, t2;
4185 rtx container;
4186 int indirect_p = 0;
4187 tree ptrtype;
4188 enum machine_mode nat_mode;
4190 /* Only 64bit target needs something special. */
4191 if (!TARGET_64BIT)
4192 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4194 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4195 f_fpr = TREE_CHAIN (f_gpr);
4196 f_ovf = TREE_CHAIN (f_fpr);
4197 f_sav = TREE_CHAIN (f_ovf);
4199 valist = build_va_arg_indirect_ref (valist);
4200 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4201 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4202 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4203 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4205 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4206 if (indirect_p)
4207 type = build_pointer_type (type);
4208 size = int_size_in_bytes (type);
4209 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4211 nat_mode = type_natural_mode (type);
4212 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4213 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4215 /* Pull the value out of the saved registers. */
4217 addr = create_tmp_var (ptr_type_node, "addr");
4218 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4220 if (container)
4222 int needed_intregs, needed_sseregs;
4223 bool need_temp;
4224 tree int_addr, sse_addr;
4226 lab_false = create_artificial_label ();
4227 lab_over = create_artificial_label ();
4229 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4231 need_temp = (!REG_P (container)
4232 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4233 || TYPE_ALIGN (type) > 128));
4235 /* In case we are passing structure, verify that it is consecutive block
4236 on the register save area. If not we need to do moves. */
4237 if (!need_temp && !REG_P (container))
4239 /* Verify that all registers are strictly consecutive */
4240 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4242 int i;
4244 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4246 rtx slot = XVECEXP (container, 0, i);
4247 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4248 || INTVAL (XEXP (slot, 1)) != i * 16)
4249 need_temp = 1;
4252 else
4254 int i;
4256 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4258 rtx slot = XVECEXP (container, 0, i);
4259 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4260 || INTVAL (XEXP (slot, 1)) != i * 8)
4261 need_temp = 1;
4265 if (!need_temp)
4267 int_addr = addr;
4268 sse_addr = addr;
4270 else
4272 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4273 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4274 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4275 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4278 /* First ensure that we fit completely in registers. */
4279 if (needed_intregs)
4281 t = build_int_cst (TREE_TYPE (gpr),
4282 (REGPARM_MAX - needed_intregs + 1) * 8);
4283 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4284 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4285 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4286 gimplify_and_add (t, pre_p);
4288 if (needed_sseregs)
4290 t = build_int_cst (TREE_TYPE (fpr),
4291 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4292 + REGPARM_MAX * 8);
4293 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4294 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4295 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4296 gimplify_and_add (t, pre_p);
4299 /* Compute index to start of area used for integer regs. */
4300 if (needed_intregs)
4302 /* int_addr = gpr + sav; */
4303 t = fold_convert (ptr_type_node, gpr);
4304 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4305 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4306 gimplify_and_add (t, pre_p);
4308 if (needed_sseregs)
4310 /* sse_addr = fpr + sav; */
4311 t = fold_convert (ptr_type_node, fpr);
4312 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4313 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4314 gimplify_and_add (t, pre_p);
4316 if (need_temp)
4318 int i;
4319 tree temp = create_tmp_var (type, "va_arg_tmp");
4321 /* addr = &temp; */
4322 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4323 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4324 gimplify_and_add (t, pre_p);
4326 for (i = 0; i < XVECLEN (container, 0); i++)
4328 rtx slot = XVECEXP (container, 0, i);
4329 rtx reg = XEXP (slot, 0);
4330 enum machine_mode mode = GET_MODE (reg);
4331 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4332 tree addr_type = build_pointer_type (piece_type);
4333 tree src_addr, src;
4334 int src_offset;
4335 tree dest_addr, dest;
4337 if (SSE_REGNO_P (REGNO (reg)))
4339 src_addr = sse_addr;
4340 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4342 else
4344 src_addr = int_addr;
4345 src_offset = REGNO (reg) * 8;
4347 src_addr = fold_convert (addr_type, src_addr);
4348 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4349 size_int (src_offset)));
4350 src = build_va_arg_indirect_ref (src_addr);
4352 dest_addr = fold_convert (addr_type, addr);
4353 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4354 size_int (INTVAL (XEXP (slot, 1)))));
4355 dest = build_va_arg_indirect_ref (dest_addr);
4357 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4358 gimplify_and_add (t, pre_p);
4362 if (needed_intregs)
4364 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4365 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4366 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4367 gimplify_and_add (t, pre_p);
4369 if (needed_sseregs)
4371 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4372 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4373 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4374 gimplify_and_add (t, pre_p);
4377 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4378 gimplify_and_add (t, pre_p);
4380 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4381 append_to_statement_list (t, pre_p);
4384 /* ... otherwise out of the overflow area. */
4386 /* Care for on-stack alignment if needed. */
4387 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4388 || integer_zerop (TYPE_SIZE (type)))
4389 t = ovf;
4390 else
4392 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4393 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4394 build_int_cst (TREE_TYPE (ovf), align - 1));
4395 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4396 build_int_cst (TREE_TYPE (t), -align));
4398 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4400 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4401 gimplify_and_add (t2, pre_p);
4403 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4404 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4405 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4406 gimplify_and_add (t, pre_p);
4408 if (container)
4410 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4411 append_to_statement_list (t, pre_p);
4414 ptrtype = build_pointer_type (type);
4415 addr = fold_convert (ptrtype, addr);
4417 if (indirect_p)
4418 addr = build_va_arg_indirect_ref (addr);
4419 return build_va_arg_indirect_ref (addr);
4422 /* Return nonzero if OPNUM's MEM should be matched
4423 in movabs* patterns. */
4426 ix86_check_movabs (rtx insn, int opnum)
4428 rtx set, mem;
4430 set = PATTERN (insn);
4431 if (GET_CODE (set) == PARALLEL)
4432 set = XVECEXP (set, 0, 0);
4433 gcc_assert (GET_CODE (set) == SET);
4434 mem = XEXP (set, opnum);
4435 while (GET_CODE (mem) == SUBREG)
4436 mem = SUBREG_REG (mem);
4437 gcc_assert (GET_CODE (mem) == MEM);
4438 return (volatile_ok || !MEM_VOLATILE_P (mem));
4441 /* Initialize the table of extra 80387 mathematical constants. */
4443 static void
4444 init_ext_80387_constants (void)
4446 static const char * cst[5] =
4448 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4449 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4450 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4451 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4452 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4454 int i;
4456 for (i = 0; i < 5; i++)
4458 real_from_string (&ext_80387_constants_table[i], cst[i]);
4459 /* Ensure each constant is rounded to XFmode precision. */
4460 real_convert (&ext_80387_constants_table[i],
4461 XFmode, &ext_80387_constants_table[i]);
4464 ext_80387_constants_init = 1;
4467 /* Return true if the constant is something that can be loaded with
4468 a special instruction. */
4471 standard_80387_constant_p (rtx x)
4473 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4474 return -1;
4476 if (x == CONST0_RTX (GET_MODE (x)))
4477 return 1;
4478 if (x == CONST1_RTX (GET_MODE (x)))
4479 return 2;
4481 /* For XFmode constants, try to find a special 80387 instruction when
4482 optimizing for size or on those CPUs that benefit from them. */
4483 if (GET_MODE (x) == XFmode
4484 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4486 REAL_VALUE_TYPE r;
4487 int i;
4489 if (! ext_80387_constants_init)
4490 init_ext_80387_constants ();
4492 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4493 for (i = 0; i < 5; i++)
4494 if (real_identical (&r, &ext_80387_constants_table[i]))
4495 return i + 3;
4498 return 0;
4501 /* Return the opcode of the special instruction to be used to load
4502 the constant X. */
4504 const char *
4505 standard_80387_constant_opcode (rtx x)
4507 switch (standard_80387_constant_p (x))
4509 case 1:
4510 return "fldz";
4511 case 2:
4512 return "fld1";
4513 case 3:
4514 return "fldlg2";
4515 case 4:
4516 return "fldln2";
4517 case 5:
4518 return "fldl2e";
4519 case 6:
4520 return "fldl2t";
4521 case 7:
4522 return "fldpi";
4523 default:
4524 gcc_unreachable ();
4528 /* Return the CONST_DOUBLE representing the 80387 constant that is
4529 loaded by the specified special instruction. The argument IDX
4530 matches the return value from standard_80387_constant_p. */
4533 standard_80387_constant_rtx (int idx)
4535 int i;
4537 if (! ext_80387_constants_init)
4538 init_ext_80387_constants ();
4540 switch (idx)
4542 case 3:
4543 case 4:
4544 case 5:
4545 case 6:
4546 case 7:
4547 i = idx - 3;
4548 break;
4550 default:
4551 gcc_unreachable ();
4554 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4555 XFmode);
4558 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4561 standard_sse_constant_p (rtx x)
4563 if (x == const0_rtx)
4564 return 1;
4565 return (x == CONST0_RTX (GET_MODE (x)));
4568 /* Returns 1 if OP contains a symbol reference */
4571 symbolic_reference_mentioned_p (rtx op)
4573 const char *fmt;
4574 int i;
4576 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4577 return 1;
4579 fmt = GET_RTX_FORMAT (GET_CODE (op));
4580 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4582 if (fmt[i] == 'E')
4584 int j;
4586 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4587 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4588 return 1;
4591 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4592 return 1;
4595 return 0;
4598 /* Return 1 if it is appropriate to emit `ret' instructions in the
4599 body of a function. Do this only if the epilogue is simple, needing a
4600 couple of insns. Prior to reloading, we can't tell how many registers
4601 must be saved, so return 0 then. Return 0 if there is no frame
4602 marker to de-allocate. */
4605 ix86_can_use_return_insn_p (void)
4607 struct ix86_frame frame;
4609 if (! reload_completed || frame_pointer_needed)
4610 return 0;
4612 /* Don't allow more than 32 pop, since that's all we can do
4613 with one instruction. */
4614 if (current_function_pops_args
4615 && current_function_args_size >= 32768)
4616 return 0;
4618 ix86_compute_frame_layout (&frame);
4619 return frame.to_allocate == 0 && frame.nregs == 0;
4622 /* Value should be nonzero if functions must have frame pointers.
4623 Zero means the frame pointer need not be set up (and parms may
4624 be accessed via the stack pointer) in functions that seem suitable. */
4627 ix86_frame_pointer_required (void)
4629 /* If we accessed previous frames, then the generated code expects
4630 to be able to access the saved ebp value in our frame. */
4631 if (cfun->machine->accesses_prev_frame)
4632 return 1;
4634 /* Several x86 os'es need a frame pointer for other reasons,
4635 usually pertaining to setjmp. */
4636 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4637 return 1;
4639 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4640 the frame pointer by default. Turn it back on now if we've not
4641 got a leaf function. */
4642 if (TARGET_OMIT_LEAF_FRAME_POINTER
4643 && (!current_function_is_leaf
4644 || ix86_current_function_calls_tls_descriptor))
4645 return 1;
4647 if (current_function_profile)
4648 return 1;
4650 return 0;
4653 /* Record that the current function accesses previous call frames. */
4655 void
4656 ix86_setup_frame_addresses (void)
4658 cfun->machine->accesses_prev_frame = 1;
4661 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
4662 # define USE_HIDDEN_LINKONCE 1
4663 #else
4664 # define USE_HIDDEN_LINKONCE 0
4665 #endif
4667 static int pic_labels_used;
4669 /* Fills in the label name that should be used for a pc thunk for
4670 the given register. */
4672 static void
4673 get_pc_thunk_name (char name[32], unsigned int regno)
4675 if (USE_HIDDEN_LINKONCE)
4676 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4677 else
4678 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4682 /* This function generates code for -fpic that loads %ebx with
4683 the return address of the caller and then returns. */
4685 void
4686 ix86_file_end (void)
4688 rtx xops[2];
4689 int regno;
4691 for (regno = 0; regno < 8; ++regno)
4693 char name[32];
4695 if (! ((pic_labels_used >> regno) & 1))
4696 continue;
4698 get_pc_thunk_name (name, regno);
4700 #if TARGET_MACHO
4701 if (TARGET_MACHO)
4703 switch_to_section (darwin_sections[text_coal_section]);
4704 fputs ("\t.weak_definition\t", asm_out_file);
4705 assemble_name (asm_out_file, name);
4706 fputs ("\n\t.private_extern\t", asm_out_file);
4707 assemble_name (asm_out_file, name);
4708 fputs ("\n", asm_out_file);
4709 ASM_OUTPUT_LABEL (asm_out_file, name);
4711 else
4712 #endif
4713 if (USE_HIDDEN_LINKONCE)
4715 tree decl;
4717 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4718 error_mark_node);
4719 TREE_PUBLIC (decl) = 1;
4720 TREE_STATIC (decl) = 1;
4721 DECL_ONE_ONLY (decl) = 1;
4723 (*targetm.asm_out.unique_section) (decl, 0);
4724 switch_to_section (get_named_section (decl, NULL, 0));
4726 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4727 fputs ("\t.hidden\t", asm_out_file);
4728 assemble_name (asm_out_file, name);
4729 fputc ('\n', asm_out_file);
4730 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4732 else
4734 switch_to_section (text_section);
4735 ASM_OUTPUT_LABEL (asm_out_file, name);
4738 xops[0] = gen_rtx_REG (SImode, regno);
4739 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4740 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4741 output_asm_insn ("ret", xops);
4744 if (NEED_INDICATE_EXEC_STACK)
4745 file_end_indicate_exec_stack ();
4748 /* Emit code for the SET_GOT patterns. */
4750 const char *
4751 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
4753 rtx xops[3];
4755 xops[0] = dest;
4756 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4758 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4760 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
4762 if (!flag_pic)
4763 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4764 else
4765 output_asm_insn ("call\t%a2", xops);
4767 #if TARGET_MACHO
4768 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4769 is what will be referenced by the Mach-O PIC subsystem. */
4770 if (!label)
4771 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4772 #endif
4774 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4775 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4777 if (flag_pic)
4778 output_asm_insn ("pop{l}\t%0", xops);
4780 else
4782 char name[32];
4783 get_pc_thunk_name (name, REGNO (dest));
4784 pic_labels_used |= 1 << REGNO (dest);
4786 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4787 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4788 output_asm_insn ("call\t%X2", xops);
4789 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
4790 is what will be referenced by the Mach-O PIC subsystem. */
4791 #if TARGET_MACHO
4792 if (!label)
4793 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4794 else
4795 targetm.asm_out.internal_label (asm_out_file, "L",
4796 CODE_LABEL_NUMBER (label));
4797 #endif
4800 if (TARGET_MACHO)
4801 return "";
4803 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4804 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4805 else
4806 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4808 return "";
4811 /* Generate an "push" pattern for input ARG. */
4813 static rtx
4814 gen_push (rtx arg)
4816 return gen_rtx_SET (VOIDmode,
4817 gen_rtx_MEM (Pmode,
4818 gen_rtx_PRE_DEC (Pmode,
4819 stack_pointer_rtx)),
4820 arg);
4823 /* Return >= 0 if there is an unused call-clobbered register available
4824 for the entire function. */
4826 static unsigned int
4827 ix86_select_alt_pic_regnum (void)
4829 if (current_function_is_leaf && !current_function_profile
4830 && !ix86_current_function_calls_tls_descriptor)
4832 int i;
4833 for (i = 2; i >= 0; --i)
4834 if (!regs_ever_live[i])
4835 return i;
4838 return INVALID_REGNUM;
4841 /* Return 1 if we need to save REGNO. */
4842 static int
4843 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4845 if (pic_offset_table_rtx
4846 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4847 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4848 || current_function_profile
4849 || current_function_calls_eh_return
4850 || current_function_uses_const_pool))
4852 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4853 return 0;
4854 return 1;
4857 if (current_function_calls_eh_return && maybe_eh_return)
4859 unsigned i;
4860 for (i = 0; ; i++)
4862 unsigned test = EH_RETURN_DATA_REGNO (i);
4863 if (test == INVALID_REGNUM)
4864 break;
4865 if (test == regno)
4866 return 1;
4870 if (cfun->machine->force_align_arg_pointer
4871 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4872 return 1;
4874 return (regs_ever_live[regno]
4875 && !call_used_regs[regno]
4876 && !fixed_regs[regno]
4877 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4880 /* Return number of registers to be saved on the stack. */
4882 static int
4883 ix86_nsaved_regs (void)
4885 int nregs = 0;
4886 int regno;
4888 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4889 if (ix86_save_reg (regno, true))
4890 nregs++;
4891 return nregs;
4894 /* Return the offset between two registers, one to be eliminated, and the other
4895 its replacement, at the start of a routine. */
4897 HOST_WIDE_INT
4898 ix86_initial_elimination_offset (int from, int to)
4900 struct ix86_frame frame;
4901 ix86_compute_frame_layout (&frame);
4903 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4904 return frame.hard_frame_pointer_offset;
4905 else if (from == FRAME_POINTER_REGNUM
4906 && to == HARD_FRAME_POINTER_REGNUM)
4907 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4908 else
4910 gcc_assert (to == STACK_POINTER_REGNUM);
4912 if (from == ARG_POINTER_REGNUM)
4913 return frame.stack_pointer_offset;
4915 gcc_assert (from == FRAME_POINTER_REGNUM);
4916 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4920 /* Fill structure ix86_frame about frame of currently computed function. */
4922 static void
4923 ix86_compute_frame_layout (struct ix86_frame *frame)
4925 HOST_WIDE_INT total_size;
4926 unsigned int stack_alignment_needed;
4927 HOST_WIDE_INT offset;
4928 unsigned int preferred_alignment;
4929 HOST_WIDE_INT size = get_frame_size ();
4931 frame->nregs = ix86_nsaved_regs ();
4932 total_size = size;
4934 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4935 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4937 /* During reload iteration the amount of registers saved can change.
4938 Recompute the value as needed. Do not recompute when amount of registers
4939 didn't change as reload does multiple calls to the function and does not
4940 expect the decision to change within single iteration. */
4941 if (!optimize_size
4942 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4944 int count = frame->nregs;
4946 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4947 /* The fast prologue uses move instead of push to save registers. This
4948 is significantly longer, but also executes faster as modern hardware
4949 can execute the moves in parallel, but can't do that for push/pop.
4951 Be careful about choosing what prologue to emit: When function takes
4952 many instructions to execute we may use slow version as well as in
4953 case function is known to be outside hot spot (this is known with
4954 feedback only). Weight the size of function by number of registers
4955 to save as it is cheap to use one or two push instructions but very
4956 slow to use many of them. */
4957 if (count)
4958 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4959 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4960 || (flag_branch_probabilities
4961 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4962 cfun->machine->use_fast_prologue_epilogue = false;
4963 else
4964 cfun->machine->use_fast_prologue_epilogue
4965 = !expensive_function_p (count);
4967 if (TARGET_PROLOGUE_USING_MOVE
4968 && cfun->machine->use_fast_prologue_epilogue)
4969 frame->save_regs_using_mov = true;
4970 else
4971 frame->save_regs_using_mov = false;
4974 /* Skip return address and saved base pointer. */
4975 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4977 frame->hard_frame_pointer_offset = offset;
4979 /* Do some sanity checking of stack_alignment_needed and
4980 preferred_alignment, since i386 port is the only using those features
4981 that may break easily. */
4983 gcc_assert (!size || stack_alignment_needed);
4984 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4985 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4986 gcc_assert (stack_alignment_needed
4987 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4989 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4990 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4992 /* Register save area */
4993 offset += frame->nregs * UNITS_PER_WORD;
4995 /* Va-arg area */
4996 if (ix86_save_varrargs_registers)
4998 offset += X86_64_VARARGS_SIZE;
4999 frame->va_arg_size = X86_64_VARARGS_SIZE;
5001 else
5002 frame->va_arg_size = 0;
5004 /* Align start of frame for local function. */
5005 frame->padding1 = ((offset + stack_alignment_needed - 1)
5006 & -stack_alignment_needed) - offset;
5008 offset += frame->padding1;
5010 /* Frame pointer points here. */
5011 frame->frame_pointer_offset = offset;
5013 offset += size;
5015 /* Add outgoing arguments area. Can be skipped if we eliminated
5016 all the function calls as dead code.
5017 Skipping is however impossible when function calls alloca. Alloca
5018 expander assumes that last current_function_outgoing_args_size
5019 of stack frame are unused. */
5020 if (ACCUMULATE_OUTGOING_ARGS
5021 && (!current_function_is_leaf || current_function_calls_alloca
5022 || ix86_current_function_calls_tls_descriptor))
5024 offset += current_function_outgoing_args_size;
5025 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5027 else
5028 frame->outgoing_arguments_size = 0;
5030 /* Align stack boundary. Only needed if we're calling another function
5031 or using alloca. */
5032 if (!current_function_is_leaf || current_function_calls_alloca
5033 || ix86_current_function_calls_tls_descriptor)
5034 frame->padding2 = ((offset + preferred_alignment - 1)
5035 & -preferred_alignment) - offset;
5036 else
5037 frame->padding2 = 0;
5039 offset += frame->padding2;
5041 /* We've reached end of stack frame. */
5042 frame->stack_pointer_offset = offset;
5044 /* Size prologue needs to allocate. */
5045 frame->to_allocate =
5046 (size + frame->padding1 + frame->padding2
5047 + frame->outgoing_arguments_size + frame->va_arg_size);
5049 if ((!frame->to_allocate && frame->nregs <= 1)
5050 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5051 frame->save_regs_using_mov = false;
5053 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5054 && current_function_is_leaf
5055 && !ix86_current_function_calls_tls_descriptor)
5057 frame->red_zone_size = frame->to_allocate;
5058 if (frame->save_regs_using_mov)
5059 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5060 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5061 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5063 else
5064 frame->red_zone_size = 0;
5065 frame->to_allocate -= frame->red_zone_size;
5066 frame->stack_pointer_offset -= frame->red_zone_size;
5067 #if 0
5068 fprintf (stderr, "nregs: %i\n", frame->nregs);
5069 fprintf (stderr, "size: %i\n", size);
5070 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5071 fprintf (stderr, "padding1: %i\n", frame->padding1);
5072 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5073 fprintf (stderr, "padding2: %i\n", frame->padding2);
5074 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5075 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5076 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5077 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5078 frame->hard_frame_pointer_offset);
5079 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5080 #endif
5083 /* Emit code to save registers in the prologue. */
5085 static void
5086 ix86_emit_save_regs (void)
5088 unsigned int regno;
5089 rtx insn;
5091 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5092 if (ix86_save_reg (regno, true))
5094 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5095 RTX_FRAME_RELATED_P (insn) = 1;
5099 /* Emit code to save registers using MOV insns. First register
5100 is restored from POINTER + OFFSET. */
5101 static void
5102 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5104 unsigned int regno;
5105 rtx insn;
5107 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5108 if (ix86_save_reg (regno, true))
5110 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5111 Pmode, offset),
5112 gen_rtx_REG (Pmode, regno));
5113 RTX_FRAME_RELATED_P (insn) = 1;
5114 offset += UNITS_PER_WORD;
5118 /* Expand prologue or epilogue stack adjustment.
5119 The pattern exist to put a dependency on all ebp-based memory accesses.
5120 STYLE should be negative if instructions should be marked as frame related,
5121 zero if %r11 register is live and cannot be freely used and positive
5122 otherwise. */
5124 static void
5125 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5127 rtx insn;
5129 if (! TARGET_64BIT)
5130 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5131 else if (x86_64_immediate_operand (offset, DImode))
5132 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5133 else
5135 rtx r11;
5136 /* r11 is used by indirect sibcall return as well, set before the
5137 epilogue and used after the epilogue. ATM indirect sibcall
5138 shouldn't be used together with huge frame sizes in one
5139 function because of the frame_size check in sibcall.c. */
5140 gcc_assert (style);
5141 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5142 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5143 if (style < 0)
5144 RTX_FRAME_RELATED_P (insn) = 1;
5145 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5146 offset));
5148 if (style < 0)
5149 RTX_FRAME_RELATED_P (insn) = 1;
5152 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5154 static rtx
5155 ix86_internal_arg_pointer (void)
5157 if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5158 && DECL_NAME (current_function_decl)
5159 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5160 && DECL_FILE_SCOPE_P (current_function_decl))
5162 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5163 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5165 else
5166 return virtual_incoming_args_rtx;
5169 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5170 This is called from dwarf2out.c to emit call frame instructions
5171 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5172 static void
5173 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5175 rtx unspec = SET_SRC (pattern);
5176 gcc_assert (GET_CODE (unspec) == UNSPEC);
5178 switch (index)
5180 case UNSPEC_REG_SAVE:
5181 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5182 SET_DEST (pattern));
5183 break;
5184 case UNSPEC_DEF_CFA:
5185 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5186 INTVAL (XVECEXP (unspec, 0, 0)));
5187 break;
5188 default:
5189 gcc_unreachable ();
5193 /* Expand the prologue into a bunch of separate insns. */
5195 void
5196 ix86_expand_prologue (void)
5198 rtx insn;
5199 bool pic_reg_used;
5200 struct ix86_frame frame;
5201 HOST_WIDE_INT allocate;
5203 ix86_compute_frame_layout (&frame);
5205 if (cfun->machine->force_align_arg_pointer)
5207 rtx x, y;
5209 /* Grab the argument pointer. */
5210 x = plus_constant (stack_pointer_rtx, 4);
5211 y = cfun->machine->force_align_arg_pointer;
5212 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5213 RTX_FRAME_RELATED_P (insn) = 1;
5215 /* The unwind info consists of two parts: install the fafp as the cfa,
5216 and record the fafp as the "save register" of the stack pointer.
5217 The later is there in order that the unwinder can see where it
5218 should restore the stack pointer across the and insn. */
5219 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5220 x = gen_rtx_SET (VOIDmode, y, x);
5221 RTX_FRAME_RELATED_P (x) = 1;
5222 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5223 UNSPEC_REG_SAVE);
5224 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5225 RTX_FRAME_RELATED_P (y) = 1;
5226 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5227 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5228 REG_NOTES (insn) = x;
5230 /* Align the stack. */
5231 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5232 GEN_INT (-16)));
5234 /* And here we cheat like madmen with the unwind info. We force the
5235 cfa register back to sp+4, which is exactly what it was at the
5236 start of the function. Re-pushing the return address results in
5237 the return at the same spot relative to the cfa, and thus is
5238 correct wrt the unwind info. */
5239 x = cfun->machine->force_align_arg_pointer;
5240 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5241 insn = emit_insn (gen_push (x));
5242 RTX_FRAME_RELATED_P (insn) = 1;
5244 x = GEN_INT (4);
5245 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5246 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5247 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5248 REG_NOTES (insn) = x;
5251 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5252 slower on all targets. Also sdb doesn't like it. */
5254 if (frame_pointer_needed)
5256 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5257 RTX_FRAME_RELATED_P (insn) = 1;
5259 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5260 RTX_FRAME_RELATED_P (insn) = 1;
5263 allocate = frame.to_allocate;
5265 if (!frame.save_regs_using_mov)
5266 ix86_emit_save_regs ();
5267 else
5268 allocate += frame.nregs * UNITS_PER_WORD;
5270 /* When using red zone we may start register saving before allocating
5271 the stack frame saving one cycle of the prologue. */
5272 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5273 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5274 : stack_pointer_rtx,
5275 -frame.nregs * UNITS_PER_WORD);
5277 if (allocate == 0)
5279 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5280 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5281 GEN_INT (-allocate), -1);
5282 else
5284 /* Only valid for Win32. */
5285 rtx eax = gen_rtx_REG (SImode, 0);
5286 bool eax_live = ix86_eax_live_at_start_p ();
5287 rtx t;
5289 gcc_assert (!TARGET_64BIT);
5291 if (eax_live)
5293 emit_insn (gen_push (eax));
5294 allocate -= 4;
5297 emit_move_insn (eax, GEN_INT (allocate));
5299 insn = emit_insn (gen_allocate_stack_worker (eax));
5300 RTX_FRAME_RELATED_P (insn) = 1;
5301 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5302 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5303 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5304 t, REG_NOTES (insn));
5306 if (eax_live)
5308 if (frame_pointer_needed)
5309 t = plus_constant (hard_frame_pointer_rtx,
5310 allocate
5311 - frame.to_allocate
5312 - frame.nregs * UNITS_PER_WORD);
5313 else
5314 t = plus_constant (stack_pointer_rtx, allocate);
5315 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5319 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5321 if (!frame_pointer_needed || !frame.to_allocate)
5322 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5323 else
5324 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5325 -frame.nregs * UNITS_PER_WORD);
5328 pic_reg_used = false;
5329 if (pic_offset_table_rtx
5330 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5331 || current_function_profile))
5333 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5335 if (alt_pic_reg_used != INVALID_REGNUM)
5336 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5338 pic_reg_used = true;
5341 if (pic_reg_used)
5343 if (TARGET_64BIT)
5344 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5345 else
5346 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5348 /* Even with accurate pre-reload life analysis, we can wind up
5349 deleting all references to the pic register after reload.
5350 Consider if cross-jumping unifies two sides of a branch
5351 controlled by a comparison vs the only read from a global.
5352 In which case, allow the set_got to be deleted, though we're
5353 too late to do anything about the ebx save in the prologue. */
5354 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5357 /* Prevent function calls from be scheduled before the call to mcount.
5358 In the pic_reg_used case, make sure that the got load isn't deleted. */
5359 if (current_function_profile)
5360 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5363 /* Emit code to restore saved registers using MOV insns. First register
5364 is restored from POINTER + OFFSET. */
5365 static void
5366 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5367 int maybe_eh_return)
5369 int regno;
5370 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5372 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5373 if (ix86_save_reg (regno, maybe_eh_return))
5375 /* Ensure that adjust_address won't be forced to produce pointer
5376 out of range allowed by x86-64 instruction set. */
5377 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5379 rtx r11;
5381 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5382 emit_move_insn (r11, GEN_INT (offset));
5383 emit_insn (gen_adddi3 (r11, r11, pointer));
5384 base_address = gen_rtx_MEM (Pmode, r11);
5385 offset = 0;
5387 emit_move_insn (gen_rtx_REG (Pmode, regno),
5388 adjust_address (base_address, Pmode, offset));
5389 offset += UNITS_PER_WORD;
5393 /* Restore function stack, frame, and registers. */
5395 void
5396 ix86_expand_epilogue (int style)
5398 int regno;
5399 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5400 struct ix86_frame frame;
5401 HOST_WIDE_INT offset;
5403 ix86_compute_frame_layout (&frame);
5405 /* Calculate start of saved registers relative to ebp. Special care
5406 must be taken for the normal return case of a function using
5407 eh_return: the eax and edx registers are marked as saved, but not
5408 restored along this path. */
5409 offset = frame.nregs;
5410 if (current_function_calls_eh_return && style != 2)
5411 offset -= 2;
5412 offset *= -UNITS_PER_WORD;
5414 /* If we're only restoring one register and sp is not valid then
5415 using a move instruction to restore the register since it's
5416 less work than reloading sp and popping the register.
5418 The default code result in stack adjustment using add/lea instruction,
5419 while this code results in LEAVE instruction (or discrete equivalent),
5420 so it is profitable in some other cases as well. Especially when there
5421 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5422 and there is exactly one register to pop. This heuristic may need some
5423 tuning in future. */
5424 if ((!sp_valid && frame.nregs <= 1)
5425 || (TARGET_EPILOGUE_USING_MOVE
5426 && cfun->machine->use_fast_prologue_epilogue
5427 && (frame.nregs > 1 || frame.to_allocate))
5428 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5429 || (frame_pointer_needed && TARGET_USE_LEAVE
5430 && cfun->machine->use_fast_prologue_epilogue
5431 && frame.nregs == 1)
5432 || current_function_calls_eh_return)
5434 /* Restore registers. We can use ebp or esp to address the memory
5435 locations. If both are available, default to ebp, since offsets
5436 are known to be small. Only exception is esp pointing directly to the
5437 end of block of saved registers, where we may simplify addressing
5438 mode. */
5440 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5441 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5442 frame.to_allocate, style == 2);
5443 else
5444 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5445 offset, style == 2);
5447 /* eh_return epilogues need %ecx added to the stack pointer. */
5448 if (style == 2)
5450 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5452 if (frame_pointer_needed)
5454 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5455 tmp = plus_constant (tmp, UNITS_PER_WORD);
5456 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5458 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5459 emit_move_insn (hard_frame_pointer_rtx, tmp);
5461 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5462 const0_rtx, style);
5464 else
5466 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5467 tmp = plus_constant (tmp, (frame.to_allocate
5468 + frame.nregs * UNITS_PER_WORD));
5469 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5472 else if (!frame_pointer_needed)
5473 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5474 GEN_INT (frame.to_allocate
5475 + frame.nregs * UNITS_PER_WORD),
5476 style);
5477 /* If not an i386, mov & pop is faster than "leave". */
5478 else if (TARGET_USE_LEAVE || optimize_size
5479 || !cfun->machine->use_fast_prologue_epilogue)
5480 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5481 else
5483 pro_epilogue_adjust_stack (stack_pointer_rtx,
5484 hard_frame_pointer_rtx,
5485 const0_rtx, style);
5486 if (TARGET_64BIT)
5487 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5488 else
5489 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5492 else
5494 /* First step is to deallocate the stack frame so that we can
5495 pop the registers. */
5496 if (!sp_valid)
5498 gcc_assert (frame_pointer_needed);
5499 pro_epilogue_adjust_stack (stack_pointer_rtx,
5500 hard_frame_pointer_rtx,
5501 GEN_INT (offset), style);
5503 else if (frame.to_allocate)
5504 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5505 GEN_INT (frame.to_allocate), style);
5507 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5508 if (ix86_save_reg (regno, false))
5510 if (TARGET_64BIT)
5511 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5512 else
5513 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5515 if (frame_pointer_needed)
5517 /* Leave results in shorter dependency chains on CPUs that are
5518 able to grok it fast. */
5519 if (TARGET_USE_LEAVE)
5520 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5521 else if (TARGET_64BIT)
5522 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5523 else
5524 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5528 if (cfun->machine->force_align_arg_pointer)
5530 emit_insn (gen_addsi3 (stack_pointer_rtx,
5531 cfun->machine->force_align_arg_pointer,
5532 GEN_INT (-4)));
5535 /* Sibcall epilogues don't want a return instruction. */
5536 if (style == 0)
5537 return;
5539 if (current_function_pops_args && current_function_args_size)
5541 rtx popc = GEN_INT (current_function_pops_args);
5543 /* i386 can only pop 64K bytes. If asked to pop more, pop
5544 return address, do explicit add, and jump indirectly to the
5545 caller. */
5547 if (current_function_pops_args >= 65536)
5549 rtx ecx = gen_rtx_REG (SImode, 2);
5551 /* There is no "pascal" calling convention in 64bit ABI. */
5552 gcc_assert (!TARGET_64BIT);
5554 emit_insn (gen_popsi1 (ecx));
5555 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5556 emit_jump_insn (gen_return_indirect_internal (ecx));
5558 else
5559 emit_jump_insn (gen_return_pop_internal (popc));
5561 else
5562 emit_jump_insn (gen_return_internal ());
5565 /* Reset from the function's potential modifications. */
5567 static void
5568 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5569 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5571 if (pic_offset_table_rtx)
5572 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5575 /* Extract the parts of an RTL expression that is a valid memory address
5576 for an instruction. Return 0 if the structure of the address is
5577 grossly off. Return -1 if the address contains ASHIFT, so it is not
5578 strictly valid, but still used for computing length of lea instruction. */
5581 ix86_decompose_address (rtx addr, struct ix86_address *out)
5583 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5584 rtx base_reg, index_reg;
5585 HOST_WIDE_INT scale = 1;
5586 rtx scale_rtx = NULL_RTX;
5587 int retval = 1;
5588 enum ix86_address_seg seg = SEG_DEFAULT;
5590 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5591 base = addr;
5592 else if (GET_CODE (addr) == PLUS)
5594 rtx addends[4], op;
5595 int n = 0, i;
5597 op = addr;
5600 if (n >= 4)
5601 return 0;
5602 addends[n++] = XEXP (op, 1);
5603 op = XEXP (op, 0);
5605 while (GET_CODE (op) == PLUS);
5606 if (n >= 4)
5607 return 0;
5608 addends[n] = op;
5610 for (i = n; i >= 0; --i)
5612 op = addends[i];
5613 switch (GET_CODE (op))
5615 case MULT:
5616 if (index)
5617 return 0;
5618 index = XEXP (op, 0);
5619 scale_rtx = XEXP (op, 1);
5620 break;
5622 case UNSPEC:
5623 if (XINT (op, 1) == UNSPEC_TP
5624 && TARGET_TLS_DIRECT_SEG_REFS
5625 && seg == SEG_DEFAULT)
5626 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5627 else
5628 return 0;
5629 break;
5631 case REG:
5632 case SUBREG:
5633 if (!base)
5634 base = op;
5635 else if (!index)
5636 index = op;
5637 else
5638 return 0;
5639 break;
5641 case CONST:
5642 case CONST_INT:
5643 case SYMBOL_REF:
5644 case LABEL_REF:
5645 if (disp)
5646 return 0;
5647 disp = op;
5648 break;
5650 default:
5651 return 0;
5655 else if (GET_CODE (addr) == MULT)
5657 index = XEXP (addr, 0); /* index*scale */
5658 scale_rtx = XEXP (addr, 1);
5660 else if (GET_CODE (addr) == ASHIFT)
5662 rtx tmp;
5664 /* We're called for lea too, which implements ashift on occasion. */
5665 index = XEXP (addr, 0);
5666 tmp = XEXP (addr, 1);
5667 if (GET_CODE (tmp) != CONST_INT)
5668 return 0;
5669 scale = INTVAL (tmp);
5670 if ((unsigned HOST_WIDE_INT) scale > 3)
5671 return 0;
5672 scale = 1 << scale;
5673 retval = -1;
5675 else
5676 disp = addr; /* displacement */
5678 /* Extract the integral value of scale. */
5679 if (scale_rtx)
5681 if (GET_CODE (scale_rtx) != CONST_INT)
5682 return 0;
5683 scale = INTVAL (scale_rtx);
5686 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5687 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5689 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5690 if (base_reg && index_reg && scale == 1
5691 && (index_reg == arg_pointer_rtx
5692 || index_reg == frame_pointer_rtx
5693 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5695 rtx tmp;
5696 tmp = base, base = index, index = tmp;
5697 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5700 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5701 if ((base_reg == hard_frame_pointer_rtx
5702 || base_reg == frame_pointer_rtx
5703 || base_reg == arg_pointer_rtx) && !disp)
5704 disp = const0_rtx;
5706 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5707 Avoid this by transforming to [%esi+0]. */
5708 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5709 && base_reg && !index_reg && !disp
5710 && REG_P (base_reg)
5711 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5712 disp = const0_rtx;
5714 /* Special case: encode reg+reg instead of reg*2. */
5715 if (!base && index && scale && scale == 2)
5716 base = index, base_reg = index_reg, scale = 1;
5718 /* Special case: scaling cannot be encoded without base or displacement. */
5719 if (!base && !disp && index && scale != 1)
5720 disp = const0_rtx;
5722 out->base = base;
5723 out->index = index;
5724 out->disp = disp;
5725 out->scale = scale;
5726 out->seg = seg;
5728 return retval;
5731 /* Return cost of the memory address x.
5732 For i386, it is better to use a complex address than let gcc copy
5733 the address into a reg and make a new pseudo. But not if the address
5734 requires to two regs - that would mean more pseudos with longer
5735 lifetimes. */
5736 static int
5737 ix86_address_cost (rtx x)
5739 struct ix86_address parts;
5740 int cost = 1;
5741 int ok = ix86_decompose_address (x, &parts);
5743 gcc_assert (ok);
5745 if (parts.base && GET_CODE (parts.base) == SUBREG)
5746 parts.base = SUBREG_REG (parts.base);
5747 if (parts.index && GET_CODE (parts.index) == SUBREG)
5748 parts.index = SUBREG_REG (parts.index);
5750 /* More complex memory references are better. */
5751 if (parts.disp && parts.disp != const0_rtx)
5752 cost--;
5753 if (parts.seg != SEG_DEFAULT)
5754 cost--;
5756 /* Attempt to minimize number of registers in the address. */
5757 if ((parts.base
5758 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5759 || (parts.index
5760 && (!REG_P (parts.index)
5761 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5762 cost++;
5764 if (parts.base
5765 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5766 && parts.index
5767 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5768 && parts.base != parts.index)
5769 cost++;
5771 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5772 since it's predecode logic can't detect the length of instructions
5773 and it degenerates to vector decoded. Increase cost of such
5774 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5775 to split such addresses or even refuse such addresses at all.
5777 Following addressing modes are affected:
5778 [base+scale*index]
5779 [scale*index+disp]
5780 [base+index]
5782 The first and last case may be avoidable by explicitly coding the zero in
5783 memory address, but I don't have AMD-K6 machine handy to check this
5784 theory. */
5786 if (TARGET_K6
5787 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5788 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5789 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5790 cost += 10;
5792 return cost;
5795 /* If X is a machine specific address (i.e. a symbol or label being
5796 referenced as a displacement from the GOT implemented using an
5797 UNSPEC), then return the base term. Otherwise return X. */
5800 ix86_find_base_term (rtx x)
5802 rtx term;
5804 if (TARGET_64BIT)
5806 if (GET_CODE (x) != CONST)
5807 return x;
5808 term = XEXP (x, 0);
5809 if (GET_CODE (term) == PLUS
5810 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5811 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5812 term = XEXP (term, 0);
5813 if (GET_CODE (term) != UNSPEC
5814 || XINT (term, 1) != UNSPEC_GOTPCREL)
5815 return x;
5817 term = XVECEXP (term, 0, 0);
5819 if (GET_CODE (term) != SYMBOL_REF
5820 && GET_CODE (term) != LABEL_REF)
5821 return x;
5823 return term;
5826 term = ix86_delegitimize_address (x);
5828 if (GET_CODE (term) != SYMBOL_REF
5829 && GET_CODE (term) != LABEL_REF)
5830 return x;
5832 return term;
5835 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5836 this is used for to form addresses to local data when -fPIC is in
5837 use. */
5839 static bool
5840 darwin_local_data_pic (rtx disp)
5842 if (GET_CODE (disp) == MINUS)
5844 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5845 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5846 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5848 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5849 if (! strcmp (sym_name, "<pic base>"))
5850 return true;
5854 return false;
5857 /* Determine if a given RTX is a valid constant. We already know this
5858 satisfies CONSTANT_P. */
5860 bool
5861 legitimate_constant_p (rtx x)
5863 switch (GET_CODE (x))
5865 case CONST:
5866 x = XEXP (x, 0);
5868 if (GET_CODE (x) == PLUS)
5870 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5871 return false;
5872 x = XEXP (x, 0);
5875 if (TARGET_MACHO && darwin_local_data_pic (x))
5876 return true;
5878 /* Only some unspecs are valid as "constants". */
5879 if (GET_CODE (x) == UNSPEC)
5880 switch (XINT (x, 1))
5882 case UNSPEC_GOTOFF:
5883 return TARGET_64BIT;
5884 case UNSPEC_TPOFF:
5885 case UNSPEC_NTPOFF:
5886 x = XVECEXP (x, 0, 0);
5887 return (GET_CODE (x) == SYMBOL_REF
5888 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5889 case UNSPEC_DTPOFF:
5890 x = XVECEXP (x, 0, 0);
5891 return (GET_CODE (x) == SYMBOL_REF
5892 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
5893 default:
5894 return false;
5897 /* We must have drilled down to a symbol. */
5898 if (GET_CODE (x) == LABEL_REF)
5899 return true;
5900 if (GET_CODE (x) != SYMBOL_REF)
5901 return false;
5902 /* FALLTHRU */
5904 case SYMBOL_REF:
5905 /* TLS symbols are never valid. */
5906 if (SYMBOL_REF_TLS_MODEL (x))
5907 return false;
5908 break;
5910 default:
5911 break;
5914 /* Otherwise we handle everything else in the move patterns. */
5915 return true;
5918 /* Determine if it's legal to put X into the constant pool. This
5919 is not possible for the address of thread-local symbols, which
5920 is checked above. */
5922 static bool
5923 ix86_cannot_force_const_mem (rtx x)
5925 return !legitimate_constant_p (x);
5928 /* Determine if a given RTX is a valid constant address. */
5930 bool
5931 constant_address_p (rtx x)
5933 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5936 /* Nonzero if the constant value X is a legitimate general operand
5937 when generating PIC code. It is given that flag_pic is on and
5938 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5940 bool
5941 legitimate_pic_operand_p (rtx x)
5943 rtx inner;
5945 switch (GET_CODE (x))
5947 case CONST:
5948 inner = XEXP (x, 0);
5949 if (GET_CODE (inner) == PLUS
5950 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
5951 inner = XEXP (inner, 0);
5953 /* Only some unspecs are valid as "constants". */
5954 if (GET_CODE (inner) == UNSPEC)
5955 switch (XINT (inner, 1))
5957 case UNSPEC_GOTOFF:
5958 return TARGET_64BIT;
5959 case UNSPEC_TPOFF:
5960 x = XVECEXP (inner, 0, 0);
5961 return (GET_CODE (x) == SYMBOL_REF
5962 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5963 default:
5964 return false;
5966 /* FALLTHRU */
5968 case SYMBOL_REF:
5969 case LABEL_REF:
5970 return legitimate_pic_address_disp_p (x);
5972 default:
5973 return true;
5977 /* Determine if a given CONST RTX is a valid memory displacement
5978 in PIC mode. */
5981 legitimate_pic_address_disp_p (rtx disp)
5983 bool saw_plus;
5985 /* In 64bit mode we can allow direct addresses of symbols and labels
5986 when they are not dynamic symbols. */
5987 if (TARGET_64BIT)
5989 rtx op0 = disp, op1;
5991 switch (GET_CODE (disp))
5993 case LABEL_REF:
5994 return true;
5996 case CONST:
5997 if (GET_CODE (XEXP (disp, 0)) != PLUS)
5998 break;
5999 op0 = XEXP (XEXP (disp, 0), 0);
6000 op1 = XEXP (XEXP (disp, 0), 1);
6001 if (GET_CODE (op1) != CONST_INT
6002 || INTVAL (op1) >= 16*1024*1024
6003 || INTVAL (op1) < -16*1024*1024)
6004 break;
6005 if (GET_CODE (op0) == LABEL_REF)
6006 return true;
6007 if (GET_CODE (op0) != SYMBOL_REF)
6008 break;
6009 /* FALLTHRU */
6011 case SYMBOL_REF:
6012 /* TLS references should always be enclosed in UNSPEC. */
6013 if (SYMBOL_REF_TLS_MODEL (op0))
6014 return false;
6015 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6016 return true;
6017 break;
6019 default:
6020 break;
6023 if (GET_CODE (disp) != CONST)
6024 return 0;
6025 disp = XEXP (disp, 0);
6027 if (TARGET_64BIT)
6029 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6030 of GOT tables. We should not need these anyway. */
6031 if (GET_CODE (disp) != UNSPEC
6032 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6033 && XINT (disp, 1) != UNSPEC_GOTOFF))
6034 return 0;
6036 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6037 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6038 return 0;
6039 return 1;
6042 saw_plus = false;
6043 if (GET_CODE (disp) == PLUS)
6045 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6046 return 0;
6047 disp = XEXP (disp, 0);
6048 saw_plus = true;
6051 if (TARGET_MACHO && darwin_local_data_pic (disp))
6052 return 1;
6054 if (GET_CODE (disp) != UNSPEC)
6055 return 0;
6057 switch (XINT (disp, 1))
6059 case UNSPEC_GOT:
6060 if (saw_plus)
6061 return false;
6062 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6063 case UNSPEC_GOTOFF:
6064 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6065 While ABI specify also 32bit relocation but we don't produce it in
6066 small PIC model at all. */
6067 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6068 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6069 && !TARGET_64BIT)
6070 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6071 return false;
6072 case UNSPEC_GOTTPOFF:
6073 case UNSPEC_GOTNTPOFF:
6074 case UNSPEC_INDNTPOFF:
6075 if (saw_plus)
6076 return false;
6077 disp = XVECEXP (disp, 0, 0);
6078 return (GET_CODE (disp) == SYMBOL_REF
6079 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6080 case UNSPEC_NTPOFF:
6081 disp = XVECEXP (disp, 0, 0);
6082 return (GET_CODE (disp) == SYMBOL_REF
6083 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6084 case UNSPEC_DTPOFF:
6085 disp = XVECEXP (disp, 0, 0);
6086 return (GET_CODE (disp) == SYMBOL_REF
6087 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6090 return 0;
6093 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6094 memory address for an instruction. The MODE argument is the machine mode
6095 for the MEM expression that wants to use this address.
6097 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6098 convert common non-canonical forms to canonical form so that they will
6099 be recognized. */
6102 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6104 struct ix86_address parts;
6105 rtx base, index, disp;
6106 HOST_WIDE_INT scale;
6107 const char *reason = NULL;
6108 rtx reason_rtx = NULL_RTX;
6110 if (TARGET_DEBUG_ADDR)
6112 fprintf (stderr,
6113 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6114 GET_MODE_NAME (mode), strict);
6115 debug_rtx (addr);
6118 if (ix86_decompose_address (addr, &parts) <= 0)
6120 reason = "decomposition failed";
6121 goto report_error;
6124 base = parts.base;
6125 index = parts.index;
6126 disp = parts.disp;
6127 scale = parts.scale;
6129 /* Validate base register.
6131 Don't allow SUBREG's that span more than a word here. It can lead to spill
6132 failures when the base is one word out of a two word structure, which is
6133 represented internally as a DImode int. */
6135 if (base)
6137 rtx reg;
6138 reason_rtx = base;
6140 if (REG_P (base))
6141 reg = base;
6142 else if (GET_CODE (base) == SUBREG
6143 && REG_P (SUBREG_REG (base))
6144 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6145 <= UNITS_PER_WORD)
6146 reg = SUBREG_REG (base);
6147 else
6149 reason = "base is not a register";
6150 goto report_error;
6153 if (GET_MODE (base) != Pmode)
6155 reason = "base is not in Pmode";
6156 goto report_error;
6159 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6160 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6162 reason = "base is not valid";
6163 goto report_error;
6167 /* Validate index register.
6169 Don't allow SUBREG's that span more than a word here -- same as above. */
6171 if (index)
6173 rtx reg;
6174 reason_rtx = index;
6176 if (REG_P (index))
6177 reg = index;
6178 else if (GET_CODE (index) == SUBREG
6179 && REG_P (SUBREG_REG (index))
6180 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6181 <= UNITS_PER_WORD)
6182 reg = SUBREG_REG (index);
6183 else
6185 reason = "index is not a register";
6186 goto report_error;
6189 if (GET_MODE (index) != Pmode)
6191 reason = "index is not in Pmode";
6192 goto report_error;
6195 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6196 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6198 reason = "index is not valid";
6199 goto report_error;
6203 /* Validate scale factor. */
6204 if (scale != 1)
6206 reason_rtx = GEN_INT (scale);
6207 if (!index)
6209 reason = "scale without index";
6210 goto report_error;
6213 if (scale != 2 && scale != 4 && scale != 8)
6215 reason = "scale is not a valid multiplier";
6216 goto report_error;
6220 /* Validate displacement. */
6221 if (disp)
6223 reason_rtx = disp;
6225 if (GET_CODE (disp) == CONST
6226 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6227 switch (XINT (XEXP (disp, 0), 1))
6229 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6230 used. While ABI specify also 32bit relocations, we don't produce
6231 them at all and use IP relative instead. */
6232 case UNSPEC_GOT:
6233 case UNSPEC_GOTOFF:
6234 gcc_assert (flag_pic);
6235 if (!TARGET_64BIT)
6236 goto is_legitimate_pic;
6237 reason = "64bit address unspec";
6238 goto report_error;
6240 case UNSPEC_GOTPCREL:
6241 gcc_assert (flag_pic);
6242 goto is_legitimate_pic;
6244 case UNSPEC_GOTTPOFF:
6245 case UNSPEC_GOTNTPOFF:
6246 case UNSPEC_INDNTPOFF:
6247 case UNSPEC_NTPOFF:
6248 case UNSPEC_DTPOFF:
6249 break;
6251 default:
6252 reason = "invalid address unspec";
6253 goto report_error;
6256 else if (flag_pic && (SYMBOLIC_CONST (disp)
6257 #if TARGET_MACHO
6258 && !machopic_operand_p (disp)
6259 #endif
6262 is_legitimate_pic:
6263 if (TARGET_64BIT && (index || base))
6265 /* foo@dtpoff(%rX) is ok. */
6266 if (GET_CODE (disp) != CONST
6267 || GET_CODE (XEXP (disp, 0)) != PLUS
6268 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6269 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6270 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6271 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6273 reason = "non-constant pic memory reference";
6274 goto report_error;
6277 else if (! legitimate_pic_address_disp_p (disp))
6279 reason = "displacement is an invalid pic construct";
6280 goto report_error;
6283 /* This code used to verify that a symbolic pic displacement
6284 includes the pic_offset_table_rtx register.
6286 While this is good idea, unfortunately these constructs may
6287 be created by "adds using lea" optimization for incorrect
6288 code like:
6290 int a;
6291 int foo(int i)
6293 return *(&a+i);
6296 This code is nonsensical, but results in addressing
6297 GOT table with pic_offset_table_rtx base. We can't
6298 just refuse it easily, since it gets matched by
6299 "addsi3" pattern, that later gets split to lea in the
6300 case output register differs from input. While this
6301 can be handled by separate addsi pattern for this case
6302 that never results in lea, this seems to be easier and
6303 correct fix for crash to disable this test. */
6305 else if (GET_CODE (disp) != LABEL_REF
6306 && GET_CODE (disp) != CONST_INT
6307 && (GET_CODE (disp) != CONST
6308 || !legitimate_constant_p (disp))
6309 && (GET_CODE (disp) != SYMBOL_REF
6310 || !legitimate_constant_p (disp)))
6312 reason = "displacement is not constant";
6313 goto report_error;
6315 else if (TARGET_64BIT
6316 && !x86_64_immediate_operand (disp, VOIDmode))
6318 reason = "displacement is out of range";
6319 goto report_error;
6323 /* Everything looks valid. */
6324 if (TARGET_DEBUG_ADDR)
6325 fprintf (stderr, "Success.\n");
6326 return TRUE;
6328 report_error:
6329 if (TARGET_DEBUG_ADDR)
6331 fprintf (stderr, "Error: %s\n", reason);
6332 debug_rtx (reason_rtx);
6334 return FALSE;
6337 /* Return a unique alias set for the GOT. */
6339 static HOST_WIDE_INT
6340 ix86_GOT_alias_set (void)
6342 static HOST_WIDE_INT set = -1;
6343 if (set == -1)
6344 set = new_alias_set ();
6345 return set;
6348 /* Return a legitimate reference for ORIG (an address) using the
6349 register REG. If REG is 0, a new pseudo is generated.
6351 There are two types of references that must be handled:
6353 1. Global data references must load the address from the GOT, via
6354 the PIC reg. An insn is emitted to do this load, and the reg is
6355 returned.
6357 2. Static data references, constant pool addresses, and code labels
6358 compute the address as an offset from the GOT, whose base is in
6359 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6360 differentiate them from global data objects. The returned
6361 address is the PIC reg + an unspec constant.
6363 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6364 reg also appears in the address. */
6366 static rtx
6367 legitimize_pic_address (rtx orig, rtx reg)
6369 rtx addr = orig;
6370 rtx new = orig;
6371 rtx base;
6373 #if TARGET_MACHO
6374 if (reg == 0)
6375 reg = gen_reg_rtx (Pmode);
6376 /* Use the generic Mach-O PIC machinery. */
6377 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6378 #endif
6380 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6381 new = addr;
6382 else if (TARGET_64BIT
6383 && ix86_cmodel != CM_SMALL_PIC
6384 && local_symbolic_operand (addr, Pmode))
6386 rtx tmpreg;
6387 /* This symbol may be referenced via a displacement from the PIC
6388 base address (@GOTOFF). */
6390 if (reload_in_progress)
6391 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6392 if (GET_CODE (addr) == CONST)
6393 addr = XEXP (addr, 0);
6394 if (GET_CODE (addr) == PLUS)
6396 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6397 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6399 else
6400 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6401 new = gen_rtx_CONST (Pmode, new);
6402 if (!reg)
6403 tmpreg = gen_reg_rtx (Pmode);
6404 else
6405 tmpreg = reg;
6406 emit_move_insn (tmpreg, new);
6408 if (reg != 0)
6410 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6411 tmpreg, 1, OPTAB_DIRECT);
6412 new = reg;
6414 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6416 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6418 /* This symbol may be referenced via a displacement from the PIC
6419 base address (@GOTOFF). */
6421 if (reload_in_progress)
6422 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6423 if (GET_CODE (addr) == CONST)
6424 addr = XEXP (addr, 0);
6425 if (GET_CODE (addr) == PLUS)
6427 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6428 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6430 else
6431 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6432 new = gen_rtx_CONST (Pmode, new);
6433 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6435 if (reg != 0)
6437 emit_move_insn (reg, new);
6438 new = reg;
6441 else if (GET_CODE (addr) == SYMBOL_REF)
6443 if (TARGET_64BIT)
6445 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6446 new = gen_rtx_CONST (Pmode, new);
6447 new = gen_const_mem (Pmode, new);
6448 set_mem_alias_set (new, ix86_GOT_alias_set ());
6450 if (reg == 0)
6451 reg = gen_reg_rtx (Pmode);
6452 /* Use directly gen_movsi, otherwise the address is loaded
6453 into register for CSE. We don't want to CSE this addresses,
6454 instead we CSE addresses from the GOT table, so skip this. */
6455 emit_insn (gen_movsi (reg, new));
6456 new = reg;
6458 else
6460 /* This symbol must be referenced via a load from the
6461 Global Offset Table (@GOT). */
6463 if (reload_in_progress)
6464 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6465 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6466 new = gen_rtx_CONST (Pmode, new);
6467 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6468 new = gen_const_mem (Pmode, new);
6469 set_mem_alias_set (new, ix86_GOT_alias_set ());
6471 if (reg == 0)
6472 reg = gen_reg_rtx (Pmode);
6473 emit_move_insn (reg, new);
6474 new = reg;
6477 else
6479 if (GET_CODE (addr) == CONST_INT
6480 && !x86_64_immediate_operand (addr, VOIDmode))
6482 if (reg)
6484 emit_move_insn (reg, addr);
6485 new = reg;
6487 else
6488 new = force_reg (Pmode, addr);
6490 else if (GET_CODE (addr) == CONST)
6492 addr = XEXP (addr, 0);
6494 /* We must match stuff we generate before. Assume the only
6495 unspecs that can get here are ours. Not that we could do
6496 anything with them anyway.... */
6497 if (GET_CODE (addr) == UNSPEC
6498 || (GET_CODE (addr) == PLUS
6499 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6500 return orig;
6501 gcc_assert (GET_CODE (addr) == PLUS);
6503 if (GET_CODE (addr) == PLUS)
6505 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6507 /* Check first to see if this is a constant offset from a @GOTOFF
6508 symbol reference. */
6509 if (local_symbolic_operand (op0, Pmode)
6510 && GET_CODE (op1) == CONST_INT)
6512 if (!TARGET_64BIT)
6514 if (reload_in_progress)
6515 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6516 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6517 UNSPEC_GOTOFF);
6518 new = gen_rtx_PLUS (Pmode, new, op1);
6519 new = gen_rtx_CONST (Pmode, new);
6520 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6522 if (reg != 0)
6524 emit_move_insn (reg, new);
6525 new = reg;
6528 else
6530 if (INTVAL (op1) < -16*1024*1024
6531 || INTVAL (op1) >= 16*1024*1024)
6533 if (!x86_64_immediate_operand (op1, Pmode))
6534 op1 = force_reg (Pmode, op1);
6535 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6539 else
6541 base = legitimize_pic_address (XEXP (addr, 0), reg);
6542 new = legitimize_pic_address (XEXP (addr, 1),
6543 base == reg ? NULL_RTX : reg);
6545 if (GET_CODE (new) == CONST_INT)
6546 new = plus_constant (base, INTVAL (new));
6547 else
6549 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6551 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6552 new = XEXP (new, 1);
6554 new = gen_rtx_PLUS (Pmode, base, new);
6559 return new;
6562 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6564 static rtx
6565 get_thread_pointer (int to_reg)
6567 rtx tp, reg, insn;
6569 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6570 if (!to_reg)
6571 return tp;
6573 reg = gen_reg_rtx (Pmode);
6574 insn = gen_rtx_SET (VOIDmode, reg, tp);
6575 insn = emit_insn (insn);
6577 return reg;
6580 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6581 false if we expect this to be used for a memory address and true if
6582 we expect to load the address into a register. */
6584 static rtx
6585 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6587 rtx dest, base, off, pic, tp;
6588 int type;
6590 switch (model)
6592 case TLS_MODEL_GLOBAL_DYNAMIC:
6593 dest = gen_reg_rtx (Pmode);
6594 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6596 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6598 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6600 start_sequence ();
6601 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6602 insns = get_insns ();
6603 end_sequence ();
6605 emit_libcall_block (insns, dest, rax, x);
6607 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6608 emit_insn (gen_tls_global_dynamic_64 (dest, x));
6609 else
6610 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6612 if (TARGET_GNU2_TLS)
6614 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
6616 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6618 break;
6620 case TLS_MODEL_LOCAL_DYNAMIC:
6621 base = gen_reg_rtx (Pmode);
6622 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
6624 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
6626 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6628 start_sequence ();
6629 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6630 insns = get_insns ();
6631 end_sequence ();
6633 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6634 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6635 emit_libcall_block (insns, base, rax, note);
6637 else if (TARGET_64BIT && TARGET_GNU2_TLS)
6638 emit_insn (gen_tls_local_dynamic_base_64 (base));
6639 else
6640 emit_insn (gen_tls_local_dynamic_base_32 (base));
6642 if (TARGET_GNU2_TLS)
6644 rtx x = ix86_tls_module_base ();
6646 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, base));
6648 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
6651 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6652 off = gen_rtx_CONST (Pmode, off);
6654 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
6655 break;
6657 case TLS_MODEL_INITIAL_EXEC:
6658 if (TARGET_64BIT)
6660 pic = NULL;
6661 type = UNSPEC_GOTNTPOFF;
6663 else if (flag_pic)
6665 if (reload_in_progress)
6666 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6667 pic = pic_offset_table_rtx;
6668 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6670 else if (!TARGET_ANY_GNU_TLS)
6672 pic = gen_reg_rtx (Pmode);
6673 emit_insn (gen_set_got (pic));
6674 type = UNSPEC_GOTTPOFF;
6676 else
6678 pic = NULL;
6679 type = UNSPEC_INDNTPOFF;
6682 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6683 off = gen_rtx_CONST (Pmode, off);
6684 if (pic)
6685 off = gen_rtx_PLUS (Pmode, pic, off);
6686 off = gen_const_mem (Pmode, off);
6687 set_mem_alias_set (off, ix86_GOT_alias_set ());
6689 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6691 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6692 off = force_reg (Pmode, off);
6693 return gen_rtx_PLUS (Pmode, base, off);
6695 else
6697 base = get_thread_pointer (true);
6698 dest = gen_reg_rtx (Pmode);
6699 emit_insn (gen_subsi3 (dest, base, off));
6701 break;
6703 case TLS_MODEL_LOCAL_EXEC:
6704 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6705 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6706 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6707 off = gen_rtx_CONST (Pmode, off);
6709 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
6711 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6712 return gen_rtx_PLUS (Pmode, base, off);
6714 else
6716 base = get_thread_pointer (true);
6717 dest = gen_reg_rtx (Pmode);
6718 emit_insn (gen_subsi3 (dest, base, off));
6720 break;
6722 default:
6723 gcc_unreachable ();
6726 return dest;
6729 /* Try machine-dependent ways of modifying an illegitimate address
6730 to be legitimate. If we find one, return the new, valid address.
6731 This macro is used in only one place: `memory_address' in explow.c.
6733 OLDX is the address as it was before break_out_memory_refs was called.
6734 In some cases it is useful to look at this to decide what needs to be done.
6736 MODE and WIN are passed so that this macro can use
6737 GO_IF_LEGITIMATE_ADDRESS.
6739 It is always safe for this macro to do nothing. It exists to recognize
6740 opportunities to optimize the output.
6742 For the 80386, we handle X+REG by loading X into a register R and
6743 using R+REG. R will go in a general reg and indexing will be used.
6744 However, if REG is a broken-out memory address or multiplication,
6745 nothing needs to be done because REG can certainly go in a general reg.
6747 When -fpic is used, special handling is needed for symbolic references.
6748 See comments by legitimize_pic_address in i386.c for details. */
6751 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6753 int changed = 0;
6754 unsigned log;
6756 if (TARGET_DEBUG_ADDR)
6758 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6759 GET_MODE_NAME (mode));
6760 debug_rtx (x);
6763 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6764 if (log)
6765 return legitimize_tls_address (x, log, false);
6766 if (GET_CODE (x) == CONST
6767 && GET_CODE (XEXP (x, 0)) == PLUS
6768 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6769 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6771 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6772 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6775 if (flag_pic && SYMBOLIC_CONST (x))
6776 return legitimize_pic_address (x, 0);
6778 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6779 if (GET_CODE (x) == ASHIFT
6780 && GET_CODE (XEXP (x, 1)) == CONST_INT
6781 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6783 changed = 1;
6784 log = INTVAL (XEXP (x, 1));
6785 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6786 GEN_INT (1 << log));
6789 if (GET_CODE (x) == PLUS)
6791 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6793 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6794 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6795 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6797 changed = 1;
6798 log = INTVAL (XEXP (XEXP (x, 0), 1));
6799 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6800 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6801 GEN_INT (1 << log));
6804 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6805 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6806 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6808 changed = 1;
6809 log = INTVAL (XEXP (XEXP (x, 1), 1));
6810 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6811 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6812 GEN_INT (1 << log));
6815 /* Put multiply first if it isn't already. */
6816 if (GET_CODE (XEXP (x, 1)) == MULT)
6818 rtx tmp = XEXP (x, 0);
6819 XEXP (x, 0) = XEXP (x, 1);
6820 XEXP (x, 1) = tmp;
6821 changed = 1;
6824 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6825 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6826 created by virtual register instantiation, register elimination, and
6827 similar optimizations. */
6828 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6830 changed = 1;
6831 x = gen_rtx_PLUS (Pmode,
6832 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6833 XEXP (XEXP (x, 1), 0)),
6834 XEXP (XEXP (x, 1), 1));
6837 /* Canonicalize
6838 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6839 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6840 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6841 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6842 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6843 && CONSTANT_P (XEXP (x, 1)))
6845 rtx constant;
6846 rtx other = NULL_RTX;
6848 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6850 constant = XEXP (x, 1);
6851 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6853 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6855 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6856 other = XEXP (x, 1);
6858 else
6859 constant = 0;
6861 if (constant)
6863 changed = 1;
6864 x = gen_rtx_PLUS (Pmode,
6865 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6866 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6867 plus_constant (other, INTVAL (constant)));
6871 if (changed && legitimate_address_p (mode, x, FALSE))
6872 return x;
6874 if (GET_CODE (XEXP (x, 0)) == MULT)
6876 changed = 1;
6877 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6880 if (GET_CODE (XEXP (x, 1)) == MULT)
6882 changed = 1;
6883 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6886 if (changed
6887 && GET_CODE (XEXP (x, 1)) == REG
6888 && GET_CODE (XEXP (x, 0)) == REG)
6889 return x;
6891 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6893 changed = 1;
6894 x = legitimize_pic_address (x, 0);
6897 if (changed && legitimate_address_p (mode, x, FALSE))
6898 return x;
6900 if (GET_CODE (XEXP (x, 0)) == REG)
6902 rtx temp = gen_reg_rtx (Pmode);
6903 rtx val = force_operand (XEXP (x, 1), temp);
6904 if (val != temp)
6905 emit_move_insn (temp, val);
6907 XEXP (x, 1) = temp;
6908 return x;
6911 else if (GET_CODE (XEXP (x, 1)) == REG)
6913 rtx temp = gen_reg_rtx (Pmode);
6914 rtx val = force_operand (XEXP (x, 0), temp);
6915 if (val != temp)
6916 emit_move_insn (temp, val);
6918 XEXP (x, 0) = temp;
6919 return x;
6923 return x;
6926 /* Print an integer constant expression in assembler syntax. Addition
6927 and subtraction are the only arithmetic that may appear in these
6928 expressions. FILE is the stdio stream to write to, X is the rtx, and
6929 CODE is the operand print code from the output string. */
6931 static void
6932 output_pic_addr_const (FILE *file, rtx x, int code)
6934 char buf[256];
6936 switch (GET_CODE (x))
6938 case PC:
6939 gcc_assert (flag_pic);
6940 putc ('.', file);
6941 break;
6943 case SYMBOL_REF:
6944 output_addr_const (file, x);
6945 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6946 fputs ("@PLT", file);
6947 break;
6949 case LABEL_REF:
6950 x = XEXP (x, 0);
6951 /* FALLTHRU */
6952 case CODE_LABEL:
6953 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6954 assemble_name (asm_out_file, buf);
6955 break;
6957 case CONST_INT:
6958 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6959 break;
6961 case CONST:
6962 /* This used to output parentheses around the expression,
6963 but that does not work on the 386 (either ATT or BSD assembler). */
6964 output_pic_addr_const (file, XEXP (x, 0), code);
6965 break;
6967 case CONST_DOUBLE:
6968 if (GET_MODE (x) == VOIDmode)
6970 /* We can use %d if the number is <32 bits and positive. */
6971 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6972 fprintf (file, "0x%lx%08lx",
6973 (unsigned long) CONST_DOUBLE_HIGH (x),
6974 (unsigned long) CONST_DOUBLE_LOW (x));
6975 else
6976 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6978 else
6979 /* We can't handle floating point constants;
6980 PRINT_OPERAND must handle them. */
6981 output_operand_lossage ("floating constant misused");
6982 break;
6984 case PLUS:
6985 /* Some assemblers need integer constants to appear first. */
6986 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6988 output_pic_addr_const (file, XEXP (x, 0), code);
6989 putc ('+', file);
6990 output_pic_addr_const (file, XEXP (x, 1), code);
6992 else
6994 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6995 output_pic_addr_const (file, XEXP (x, 1), code);
6996 putc ('+', file);
6997 output_pic_addr_const (file, XEXP (x, 0), code);
6999 break;
7001 case MINUS:
7002 if (!TARGET_MACHO)
7003 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7004 output_pic_addr_const (file, XEXP (x, 0), code);
7005 putc ('-', file);
7006 output_pic_addr_const (file, XEXP (x, 1), code);
7007 if (!TARGET_MACHO)
7008 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7009 break;
7011 case UNSPEC:
7012 gcc_assert (XVECLEN (x, 0) == 1);
7013 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7014 switch (XINT (x, 1))
7016 case UNSPEC_GOT:
7017 fputs ("@GOT", file);
7018 break;
7019 case UNSPEC_GOTOFF:
7020 fputs ("@GOTOFF", file);
7021 break;
7022 case UNSPEC_GOTPCREL:
7023 fputs ("@GOTPCREL(%rip)", file);
7024 break;
7025 case UNSPEC_GOTTPOFF:
7026 /* FIXME: This might be @TPOFF in Sun ld too. */
7027 fputs ("@GOTTPOFF", file);
7028 break;
7029 case UNSPEC_TPOFF:
7030 fputs ("@TPOFF", file);
7031 break;
7032 case UNSPEC_NTPOFF:
7033 if (TARGET_64BIT)
7034 fputs ("@TPOFF", file);
7035 else
7036 fputs ("@NTPOFF", file);
7037 break;
7038 case UNSPEC_DTPOFF:
7039 fputs ("@DTPOFF", file);
7040 break;
7041 case UNSPEC_GOTNTPOFF:
7042 if (TARGET_64BIT)
7043 fputs ("@GOTTPOFF(%rip)", file);
7044 else
7045 fputs ("@GOTNTPOFF", file);
7046 break;
7047 case UNSPEC_INDNTPOFF:
7048 fputs ("@INDNTPOFF", file);
7049 break;
7050 default:
7051 output_operand_lossage ("invalid UNSPEC as operand");
7052 break;
7054 break;
7056 default:
7057 output_operand_lossage ("invalid expression as operand");
7061 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7062 We need to emit DTP-relative relocations. */
7064 static void
7065 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7067 fputs (ASM_LONG, file);
7068 output_addr_const (file, x);
7069 fputs ("@DTPOFF", file);
7070 switch (size)
7072 case 4:
7073 break;
7074 case 8:
7075 fputs (", 0", file);
7076 break;
7077 default:
7078 gcc_unreachable ();
7082 /* In the name of slightly smaller debug output, and to cater to
7083 general assembler lossage, recognize PIC+GOTOFF and turn it back
7084 into a direct symbol reference. */
7086 static rtx
7087 ix86_delegitimize_address (rtx orig_x)
7089 rtx x = orig_x, y;
7091 if (GET_CODE (x) == MEM)
7092 x = XEXP (x, 0);
7094 if (TARGET_64BIT)
7096 if (GET_CODE (x) != CONST
7097 || GET_CODE (XEXP (x, 0)) != UNSPEC
7098 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7099 || GET_CODE (orig_x) != MEM)
7100 return orig_x;
7101 return XVECEXP (XEXP (x, 0), 0, 0);
7104 if (GET_CODE (x) != PLUS
7105 || GET_CODE (XEXP (x, 1)) != CONST)
7106 return orig_x;
7108 if (GET_CODE (XEXP (x, 0)) == REG
7109 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7110 /* %ebx + GOT/GOTOFF */
7111 y = NULL;
7112 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7114 /* %ebx + %reg * scale + GOT/GOTOFF */
7115 y = XEXP (x, 0);
7116 if (GET_CODE (XEXP (y, 0)) == REG
7117 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
7118 y = XEXP (y, 1);
7119 else if (GET_CODE (XEXP (y, 1)) == REG
7120 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7121 y = XEXP (y, 0);
7122 else
7123 return orig_x;
7124 if (GET_CODE (y) != REG
7125 && GET_CODE (y) != MULT
7126 && GET_CODE (y) != ASHIFT)
7127 return orig_x;
7129 else
7130 return orig_x;
7132 x = XEXP (XEXP (x, 1), 0);
7133 if (GET_CODE (x) == UNSPEC
7134 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7135 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7137 if (y)
7138 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7139 return XVECEXP (x, 0, 0);
7142 if (GET_CODE (x) == PLUS
7143 && GET_CODE (XEXP (x, 0)) == UNSPEC
7144 && GET_CODE (XEXP (x, 1)) == CONST_INT
7145 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7146 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7147 && GET_CODE (orig_x) != MEM)))
7149 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7150 if (y)
7151 return gen_rtx_PLUS (Pmode, y, x);
7152 return x;
7155 if (TARGET_MACHO && darwin_local_data_pic (x)
7156 && GET_CODE (orig_x) != MEM)
7158 x = XEXP (x, 0);
7159 if (y)
7160 return gen_rtx_PLUS (Pmode, y, x);
7161 return x;
7163 return orig_x;
7166 static void
7167 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7168 int fp, FILE *file)
7170 const char *suffix;
7172 if (mode == CCFPmode || mode == CCFPUmode)
7174 enum rtx_code second_code, bypass_code;
7175 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7176 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7177 code = ix86_fp_compare_code_to_integer (code);
7178 mode = CCmode;
7180 if (reverse)
7181 code = reverse_condition (code);
7183 switch (code)
7185 case EQ:
7186 suffix = "e";
7187 break;
7188 case NE:
7189 suffix = "ne";
7190 break;
7191 case GT:
7192 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7193 suffix = "g";
7194 break;
7195 case GTU:
7196 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7197 Those same assemblers have the same but opposite lossage on cmov. */
7198 gcc_assert (mode == CCmode);
7199 suffix = fp ? "nbe" : "a";
7200 break;
7201 case LT:
7202 switch (mode)
7204 case CCNOmode:
7205 case CCGOCmode:
7206 suffix = "s";
7207 break;
7209 case CCmode:
7210 case CCGCmode:
7211 suffix = "l";
7212 break;
7214 default:
7215 gcc_unreachable ();
7217 break;
7218 case LTU:
7219 gcc_assert (mode == CCmode);
7220 suffix = "b";
7221 break;
7222 case GE:
7223 switch (mode)
7225 case CCNOmode:
7226 case CCGOCmode:
7227 suffix = "ns";
7228 break;
7230 case CCmode:
7231 case CCGCmode:
7232 suffix = "ge";
7233 break;
7235 default:
7236 gcc_unreachable ();
7238 break;
7239 case GEU:
7240 /* ??? As above. */
7241 gcc_assert (mode == CCmode);
7242 suffix = fp ? "nb" : "ae";
7243 break;
7244 case LE:
7245 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7246 suffix = "le";
7247 break;
7248 case LEU:
7249 gcc_assert (mode == CCmode);
7250 suffix = "be";
7251 break;
7252 case UNORDERED:
7253 suffix = fp ? "u" : "p";
7254 break;
7255 case ORDERED:
7256 suffix = fp ? "nu" : "np";
7257 break;
7258 default:
7259 gcc_unreachable ();
7261 fputs (suffix, file);
7264 /* Print the name of register X to FILE based on its machine mode and number.
7265 If CODE is 'w', pretend the mode is HImode.
7266 If CODE is 'b', pretend the mode is QImode.
7267 If CODE is 'k', pretend the mode is SImode.
7268 If CODE is 'q', pretend the mode is DImode.
7269 If CODE is 'h', pretend the reg is the 'high' byte register.
7270 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7272 void
7273 print_reg (rtx x, int code, FILE *file)
7275 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7276 && REGNO (x) != FRAME_POINTER_REGNUM
7277 && REGNO (x) != FLAGS_REG
7278 && REGNO (x) != FPSR_REG);
7280 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7281 putc ('%', file);
7283 if (code == 'w' || MMX_REG_P (x))
7284 code = 2;
7285 else if (code == 'b')
7286 code = 1;
7287 else if (code == 'k')
7288 code = 4;
7289 else if (code == 'q')
7290 code = 8;
7291 else if (code == 'y')
7292 code = 3;
7293 else if (code == 'h')
7294 code = 0;
7295 else
7296 code = GET_MODE_SIZE (GET_MODE (x));
7298 /* Irritatingly, AMD extended registers use different naming convention
7299 from the normal registers. */
7300 if (REX_INT_REG_P (x))
7302 gcc_assert (TARGET_64BIT);
7303 switch (code)
7305 case 0:
7306 error ("extended registers have no high halves");
7307 break;
7308 case 1:
7309 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7310 break;
7311 case 2:
7312 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7313 break;
7314 case 4:
7315 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7316 break;
7317 case 8:
7318 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7319 break;
7320 default:
7321 error ("unsupported operand size for extended register");
7322 break;
7324 return;
7326 switch (code)
7328 case 3:
7329 if (STACK_TOP_P (x))
7331 fputs ("st(0)", file);
7332 break;
7334 /* FALLTHRU */
7335 case 8:
7336 case 4:
7337 case 12:
7338 if (! ANY_FP_REG_P (x))
7339 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7340 /* FALLTHRU */
7341 case 16:
7342 case 2:
7343 normal:
7344 fputs (hi_reg_name[REGNO (x)], file);
7345 break;
7346 case 1:
7347 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7348 goto normal;
7349 fputs (qi_reg_name[REGNO (x)], file);
7350 break;
7351 case 0:
7352 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7353 goto normal;
7354 fputs (qi_high_reg_name[REGNO (x)], file);
7355 break;
7356 default:
7357 gcc_unreachable ();
7361 /* Locate some local-dynamic symbol still in use by this function
7362 so that we can print its name in some tls_local_dynamic_base
7363 pattern. */
7365 static const char *
7366 get_some_local_dynamic_name (void)
7368 rtx insn;
7370 if (cfun->machine->some_ld_name)
7371 return cfun->machine->some_ld_name;
7373 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7374 if (INSN_P (insn)
7375 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7376 return cfun->machine->some_ld_name;
7378 gcc_unreachable ();
7381 static int
7382 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7384 rtx x = *px;
7386 if (GET_CODE (x) == SYMBOL_REF
7387 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7389 cfun->machine->some_ld_name = XSTR (x, 0);
7390 return 1;
7393 return 0;
7396 /* Meaning of CODE:
7397 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7398 C -- print opcode suffix for set/cmov insn.
7399 c -- like C, but print reversed condition
7400 F,f -- likewise, but for floating-point.
7401 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7402 otherwise nothing
7403 R -- print the prefix for register names.
7404 z -- print the opcode suffix for the size of the current operand.
7405 * -- print a star (in certain assembler syntax)
7406 A -- print an absolute memory reference.
7407 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7408 s -- print a shift double count, followed by the assemblers argument
7409 delimiter.
7410 b -- print the QImode name of the register for the indicated operand.
7411 %b0 would print %al if operands[0] is reg 0.
7412 w -- likewise, print the HImode name of the register.
7413 k -- likewise, print the SImode name of the register.
7414 q -- likewise, print the DImode name of the register.
7415 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7416 y -- print "st(0)" instead of "st" as a register.
7417 D -- print condition for SSE cmp instruction.
7418 P -- if PIC, print an @PLT suffix.
7419 X -- don't print any sort of PIC '@' suffix for a symbol.
7420 & -- print some in-use local-dynamic symbol name.
7421 H -- print a memory address offset by 8; used for sse high-parts
7424 void
7425 print_operand (FILE *file, rtx x, int code)
7427 if (code)
7429 switch (code)
7431 case '*':
7432 if (ASSEMBLER_DIALECT == ASM_ATT)
7433 putc ('*', file);
7434 return;
7436 case '&':
7437 assemble_name (file, get_some_local_dynamic_name ());
7438 return;
7440 case 'A':
7441 switch (ASSEMBLER_DIALECT)
7443 case ASM_ATT:
7444 putc ('*', file);
7445 break;
7447 case ASM_INTEL:
7448 /* Intel syntax. For absolute addresses, registers should not
7449 be surrounded by braces. */
7450 if (GET_CODE (x) != REG)
7452 putc ('[', file);
7453 PRINT_OPERAND (file, x, 0);
7454 putc (']', file);
7455 return;
7457 break;
7459 default:
7460 gcc_unreachable ();
7463 PRINT_OPERAND (file, x, 0);
7464 return;
7467 case 'L':
7468 if (ASSEMBLER_DIALECT == ASM_ATT)
7469 putc ('l', file);
7470 return;
7472 case 'W':
7473 if (ASSEMBLER_DIALECT == ASM_ATT)
7474 putc ('w', file);
7475 return;
7477 case 'B':
7478 if (ASSEMBLER_DIALECT == ASM_ATT)
7479 putc ('b', file);
7480 return;
7482 case 'Q':
7483 if (ASSEMBLER_DIALECT == ASM_ATT)
7484 putc ('l', file);
7485 return;
7487 case 'S':
7488 if (ASSEMBLER_DIALECT == ASM_ATT)
7489 putc ('s', file);
7490 return;
7492 case 'T':
7493 if (ASSEMBLER_DIALECT == ASM_ATT)
7494 putc ('t', file);
7495 return;
7497 case 'z':
7498 /* 387 opcodes don't get size suffixes if the operands are
7499 registers. */
7500 if (STACK_REG_P (x))
7501 return;
7503 /* Likewise if using Intel opcodes. */
7504 if (ASSEMBLER_DIALECT == ASM_INTEL)
7505 return;
7507 /* This is the size of op from size of operand. */
7508 switch (GET_MODE_SIZE (GET_MODE (x)))
7510 case 2:
7511 #ifdef HAVE_GAS_FILDS_FISTS
7512 putc ('s', file);
7513 #endif
7514 return;
7516 case 4:
7517 if (GET_MODE (x) == SFmode)
7519 putc ('s', file);
7520 return;
7522 else
7523 putc ('l', file);
7524 return;
7526 case 12:
7527 case 16:
7528 putc ('t', file);
7529 return;
7531 case 8:
7532 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7534 #ifdef GAS_MNEMONICS
7535 putc ('q', file);
7536 #else
7537 putc ('l', file);
7538 putc ('l', file);
7539 #endif
7541 else
7542 putc ('l', file);
7543 return;
7545 default:
7546 gcc_unreachable ();
7549 case 'b':
7550 case 'w':
7551 case 'k':
7552 case 'q':
7553 case 'h':
7554 case 'y':
7555 case 'X':
7556 case 'P':
7557 break;
7559 case 's':
7560 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7562 PRINT_OPERAND (file, x, 0);
7563 putc (',', file);
7565 return;
7567 case 'D':
7568 /* Little bit of braindamage here. The SSE compare instructions
7569 does use completely different names for the comparisons that the
7570 fp conditional moves. */
7571 switch (GET_CODE (x))
7573 case EQ:
7574 case UNEQ:
7575 fputs ("eq", file);
7576 break;
7577 case LT:
7578 case UNLT:
7579 fputs ("lt", file);
7580 break;
7581 case LE:
7582 case UNLE:
7583 fputs ("le", file);
7584 break;
7585 case UNORDERED:
7586 fputs ("unord", file);
7587 break;
7588 case NE:
7589 case LTGT:
7590 fputs ("neq", file);
7591 break;
7592 case UNGE:
7593 case GE:
7594 fputs ("nlt", file);
7595 break;
7596 case UNGT:
7597 case GT:
7598 fputs ("nle", file);
7599 break;
7600 case ORDERED:
7601 fputs ("ord", file);
7602 break;
7603 default:
7604 gcc_unreachable ();
7606 return;
7607 case 'O':
7608 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7609 if (ASSEMBLER_DIALECT == ASM_ATT)
7611 switch (GET_MODE (x))
7613 case HImode: putc ('w', file); break;
7614 case SImode:
7615 case SFmode: putc ('l', file); break;
7616 case DImode:
7617 case DFmode: putc ('q', file); break;
7618 default: gcc_unreachable ();
7620 putc ('.', file);
7622 #endif
7623 return;
7624 case 'C':
7625 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7626 return;
7627 case 'F':
7628 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7629 if (ASSEMBLER_DIALECT == ASM_ATT)
7630 putc ('.', file);
7631 #endif
7632 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7633 return;
7635 /* Like above, but reverse condition */
7636 case 'c':
7637 /* Check to see if argument to %c is really a constant
7638 and not a condition code which needs to be reversed. */
7639 if (!COMPARISON_P (x))
7641 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7642 return;
7644 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7645 return;
7646 case 'f':
7647 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7648 if (ASSEMBLER_DIALECT == ASM_ATT)
7649 putc ('.', file);
7650 #endif
7651 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7652 return;
7654 case 'H':
7655 /* It doesn't actually matter what mode we use here, as we're
7656 only going to use this for printing. */
7657 x = adjust_address_nv (x, DImode, 8);
7658 break;
7660 case '+':
7662 rtx x;
7664 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7665 return;
7667 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7668 if (x)
7670 int pred_val = INTVAL (XEXP (x, 0));
7672 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7673 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7675 int taken = pred_val > REG_BR_PROB_BASE / 2;
7676 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7678 /* Emit hints only in the case default branch prediction
7679 heuristics would fail. */
7680 if (taken != cputaken)
7682 /* We use 3e (DS) prefix for taken branches and
7683 2e (CS) prefix for not taken branches. */
7684 if (taken)
7685 fputs ("ds ; ", file);
7686 else
7687 fputs ("cs ; ", file);
7691 return;
7693 default:
7694 output_operand_lossage ("invalid operand code '%c'", code);
7698 if (GET_CODE (x) == REG)
7699 print_reg (x, code, file);
7701 else if (GET_CODE (x) == MEM)
7703 /* No `byte ptr' prefix for call instructions. */
7704 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7706 const char * size;
7707 switch (GET_MODE_SIZE (GET_MODE (x)))
7709 case 1: size = "BYTE"; break;
7710 case 2: size = "WORD"; break;
7711 case 4: size = "DWORD"; break;
7712 case 8: size = "QWORD"; break;
7713 case 12: size = "XWORD"; break;
7714 case 16: size = "XMMWORD"; break;
7715 default:
7716 gcc_unreachable ();
7719 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7720 if (code == 'b')
7721 size = "BYTE";
7722 else if (code == 'w')
7723 size = "WORD";
7724 else if (code == 'k')
7725 size = "DWORD";
7727 fputs (size, file);
7728 fputs (" PTR ", file);
7731 x = XEXP (x, 0);
7732 /* Avoid (%rip) for call operands. */
7733 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7734 && GET_CODE (x) != CONST_INT)
7735 output_addr_const (file, x);
7736 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7737 output_operand_lossage ("invalid constraints for operand");
7738 else
7739 output_address (x);
7742 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7744 REAL_VALUE_TYPE r;
7745 long l;
7747 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7748 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7750 if (ASSEMBLER_DIALECT == ASM_ATT)
7751 putc ('$', file);
7752 fprintf (file, "0x%08lx", l);
7755 /* These float cases don't actually occur as immediate operands. */
7756 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7758 char dstr[30];
7760 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7761 fprintf (file, "%s", dstr);
7764 else if (GET_CODE (x) == CONST_DOUBLE
7765 && GET_MODE (x) == XFmode)
7767 char dstr[30];
7769 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7770 fprintf (file, "%s", dstr);
7773 else
7775 /* We have patterns that allow zero sets of memory, for instance.
7776 In 64-bit mode, we should probably support all 8-byte vectors,
7777 since we can in fact encode that into an immediate. */
7778 if (GET_CODE (x) == CONST_VECTOR)
7780 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7781 x = const0_rtx;
7784 if (code != 'P')
7786 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7788 if (ASSEMBLER_DIALECT == ASM_ATT)
7789 putc ('$', file);
7791 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7792 || GET_CODE (x) == LABEL_REF)
7794 if (ASSEMBLER_DIALECT == ASM_ATT)
7795 putc ('$', file);
7796 else
7797 fputs ("OFFSET FLAT:", file);
7800 if (GET_CODE (x) == CONST_INT)
7801 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7802 else if (flag_pic)
7803 output_pic_addr_const (file, x, code);
7804 else
7805 output_addr_const (file, x);
7809 /* Print a memory operand whose address is ADDR. */
7811 void
7812 print_operand_address (FILE *file, rtx addr)
7814 struct ix86_address parts;
7815 rtx base, index, disp;
7816 int scale;
7817 int ok = ix86_decompose_address (addr, &parts);
7819 gcc_assert (ok);
7821 base = parts.base;
7822 index = parts.index;
7823 disp = parts.disp;
7824 scale = parts.scale;
7826 switch (parts.seg)
7828 case SEG_DEFAULT:
7829 break;
7830 case SEG_FS:
7831 case SEG_GS:
7832 if (USER_LABEL_PREFIX[0] == 0)
7833 putc ('%', file);
7834 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7835 break;
7836 default:
7837 gcc_unreachable ();
7840 if (!base && !index)
7842 /* Displacement only requires special attention. */
7844 if (GET_CODE (disp) == CONST_INT)
7846 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7848 if (USER_LABEL_PREFIX[0] == 0)
7849 putc ('%', file);
7850 fputs ("ds:", file);
7852 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7854 else if (flag_pic)
7855 output_pic_addr_const (file, disp, 0);
7856 else
7857 output_addr_const (file, disp);
7859 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7860 if (TARGET_64BIT)
7862 if (GET_CODE (disp) == CONST
7863 && GET_CODE (XEXP (disp, 0)) == PLUS
7864 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7865 disp = XEXP (XEXP (disp, 0), 0);
7866 if (GET_CODE (disp) == LABEL_REF
7867 || (GET_CODE (disp) == SYMBOL_REF
7868 && SYMBOL_REF_TLS_MODEL (disp) == 0))
7869 fputs ("(%rip)", file);
7872 else
7874 if (ASSEMBLER_DIALECT == ASM_ATT)
7876 if (disp)
7878 if (flag_pic)
7879 output_pic_addr_const (file, disp, 0);
7880 else if (GET_CODE (disp) == LABEL_REF)
7881 output_asm_label (disp);
7882 else
7883 output_addr_const (file, disp);
7886 putc ('(', file);
7887 if (base)
7888 print_reg (base, 0, file);
7889 if (index)
7891 putc (',', file);
7892 print_reg (index, 0, file);
7893 if (scale != 1)
7894 fprintf (file, ",%d", scale);
7896 putc (')', file);
7898 else
7900 rtx offset = NULL_RTX;
7902 if (disp)
7904 /* Pull out the offset of a symbol; print any symbol itself. */
7905 if (GET_CODE (disp) == CONST
7906 && GET_CODE (XEXP (disp, 0)) == PLUS
7907 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7909 offset = XEXP (XEXP (disp, 0), 1);
7910 disp = gen_rtx_CONST (VOIDmode,
7911 XEXP (XEXP (disp, 0), 0));
7914 if (flag_pic)
7915 output_pic_addr_const (file, disp, 0);
7916 else if (GET_CODE (disp) == LABEL_REF)
7917 output_asm_label (disp);
7918 else if (GET_CODE (disp) == CONST_INT)
7919 offset = disp;
7920 else
7921 output_addr_const (file, disp);
7924 putc ('[', file);
7925 if (base)
7927 print_reg (base, 0, file);
7928 if (offset)
7930 if (INTVAL (offset) >= 0)
7931 putc ('+', file);
7932 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7935 else if (offset)
7936 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7937 else
7938 putc ('0', file);
7940 if (index)
7942 putc ('+', file);
7943 print_reg (index, 0, file);
7944 if (scale != 1)
7945 fprintf (file, "*%d", scale);
7947 putc (']', file);
7952 bool
7953 output_addr_const_extra (FILE *file, rtx x)
7955 rtx op;
7957 if (GET_CODE (x) != UNSPEC)
7958 return false;
7960 op = XVECEXP (x, 0, 0);
7961 switch (XINT (x, 1))
7963 case UNSPEC_GOTTPOFF:
7964 output_addr_const (file, op);
7965 /* FIXME: This might be @TPOFF in Sun ld. */
7966 fputs ("@GOTTPOFF", file);
7967 break;
7968 case UNSPEC_TPOFF:
7969 output_addr_const (file, op);
7970 fputs ("@TPOFF", file);
7971 break;
7972 case UNSPEC_NTPOFF:
7973 output_addr_const (file, op);
7974 if (TARGET_64BIT)
7975 fputs ("@TPOFF", file);
7976 else
7977 fputs ("@NTPOFF", file);
7978 break;
7979 case UNSPEC_DTPOFF:
7980 output_addr_const (file, op);
7981 fputs ("@DTPOFF", file);
7982 break;
7983 case UNSPEC_GOTNTPOFF:
7984 output_addr_const (file, op);
7985 if (TARGET_64BIT)
7986 fputs ("@GOTTPOFF(%rip)", file);
7987 else
7988 fputs ("@GOTNTPOFF", file);
7989 break;
7990 case UNSPEC_INDNTPOFF:
7991 output_addr_const (file, op);
7992 fputs ("@INDNTPOFF", file);
7993 break;
7995 default:
7996 return false;
7999 return true;
8002 /* Split one or more DImode RTL references into pairs of SImode
8003 references. The RTL can be REG, offsettable MEM, integer constant, or
8004 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8005 split and "num" is its length. lo_half and hi_half are output arrays
8006 that parallel "operands". */
8008 void
8009 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8011 while (num--)
8013 rtx op = operands[num];
8015 /* simplify_subreg refuse to split volatile memory addresses,
8016 but we still have to handle it. */
8017 if (GET_CODE (op) == MEM)
8019 lo_half[num] = adjust_address (op, SImode, 0);
8020 hi_half[num] = adjust_address (op, SImode, 4);
8022 else
8024 lo_half[num] = simplify_gen_subreg (SImode, op,
8025 GET_MODE (op) == VOIDmode
8026 ? DImode : GET_MODE (op), 0);
8027 hi_half[num] = simplify_gen_subreg (SImode, op,
8028 GET_MODE (op) == VOIDmode
8029 ? DImode : GET_MODE (op), 4);
8033 /* Split one or more TImode RTL references into pairs of DImode
8034 references. The RTL can be REG, offsettable MEM, integer constant, or
8035 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8036 split and "num" is its length. lo_half and hi_half are output arrays
8037 that parallel "operands". */
8039 void
8040 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8042 while (num--)
8044 rtx op = operands[num];
8046 /* simplify_subreg refuse to split volatile memory addresses, but we
8047 still have to handle it. */
8048 if (GET_CODE (op) == MEM)
8050 lo_half[num] = adjust_address (op, DImode, 0);
8051 hi_half[num] = adjust_address (op, DImode, 8);
8053 else
8055 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8056 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8061 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8062 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8063 is the expression of the binary operation. The output may either be
8064 emitted here, or returned to the caller, like all output_* functions.
8066 There is no guarantee that the operands are the same mode, as they
8067 might be within FLOAT or FLOAT_EXTEND expressions. */
8069 #ifndef SYSV386_COMPAT
8070 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8071 wants to fix the assemblers because that causes incompatibility
8072 with gcc. No-one wants to fix gcc because that causes
8073 incompatibility with assemblers... You can use the option of
8074 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8075 #define SYSV386_COMPAT 1
8076 #endif
8078 const char *
8079 output_387_binary_op (rtx insn, rtx *operands)
8081 static char buf[30];
8082 const char *p;
8083 const char *ssep;
8084 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8086 #ifdef ENABLE_CHECKING
8087 /* Even if we do not want to check the inputs, this documents input
8088 constraints. Which helps in understanding the following code. */
8089 if (STACK_REG_P (operands[0])
8090 && ((REG_P (operands[1])
8091 && REGNO (operands[0]) == REGNO (operands[1])
8092 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8093 || (REG_P (operands[2])
8094 && REGNO (operands[0]) == REGNO (operands[2])
8095 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8096 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8097 ; /* ok */
8098 else
8099 gcc_assert (is_sse);
8100 #endif
8102 switch (GET_CODE (operands[3]))
8104 case PLUS:
8105 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8106 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8107 p = "fiadd";
8108 else
8109 p = "fadd";
8110 ssep = "add";
8111 break;
8113 case MINUS:
8114 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8115 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8116 p = "fisub";
8117 else
8118 p = "fsub";
8119 ssep = "sub";
8120 break;
8122 case MULT:
8123 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8124 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8125 p = "fimul";
8126 else
8127 p = "fmul";
8128 ssep = "mul";
8129 break;
8131 case DIV:
8132 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8133 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8134 p = "fidiv";
8135 else
8136 p = "fdiv";
8137 ssep = "div";
8138 break;
8140 default:
8141 gcc_unreachable ();
8144 if (is_sse)
8146 strcpy (buf, ssep);
8147 if (GET_MODE (operands[0]) == SFmode)
8148 strcat (buf, "ss\t{%2, %0|%0, %2}");
8149 else
8150 strcat (buf, "sd\t{%2, %0|%0, %2}");
8151 return buf;
8153 strcpy (buf, p);
8155 switch (GET_CODE (operands[3]))
8157 case MULT:
8158 case PLUS:
8159 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8161 rtx temp = operands[2];
8162 operands[2] = operands[1];
8163 operands[1] = temp;
8166 /* know operands[0] == operands[1]. */
8168 if (GET_CODE (operands[2]) == MEM)
8170 p = "%z2\t%2";
8171 break;
8174 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8176 if (STACK_TOP_P (operands[0]))
8177 /* How is it that we are storing to a dead operand[2]?
8178 Well, presumably operands[1] is dead too. We can't
8179 store the result to st(0) as st(0) gets popped on this
8180 instruction. Instead store to operands[2] (which I
8181 think has to be st(1)). st(1) will be popped later.
8182 gcc <= 2.8.1 didn't have this check and generated
8183 assembly code that the Unixware assembler rejected. */
8184 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8185 else
8186 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8187 break;
8190 if (STACK_TOP_P (operands[0]))
8191 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8192 else
8193 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8194 break;
8196 case MINUS:
8197 case DIV:
8198 if (GET_CODE (operands[1]) == MEM)
8200 p = "r%z1\t%1";
8201 break;
8204 if (GET_CODE (operands[2]) == MEM)
8206 p = "%z2\t%2";
8207 break;
8210 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8212 #if SYSV386_COMPAT
8213 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8214 derived assemblers, confusingly reverse the direction of
8215 the operation for fsub{r} and fdiv{r} when the
8216 destination register is not st(0). The Intel assembler
8217 doesn't have this brain damage. Read !SYSV386_COMPAT to
8218 figure out what the hardware really does. */
8219 if (STACK_TOP_P (operands[0]))
8220 p = "{p\t%0, %2|rp\t%2, %0}";
8221 else
8222 p = "{rp\t%2, %0|p\t%0, %2}";
8223 #else
8224 if (STACK_TOP_P (operands[0]))
8225 /* As above for fmul/fadd, we can't store to st(0). */
8226 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8227 else
8228 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8229 #endif
8230 break;
8233 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8235 #if SYSV386_COMPAT
8236 if (STACK_TOP_P (operands[0]))
8237 p = "{rp\t%0, %1|p\t%1, %0}";
8238 else
8239 p = "{p\t%1, %0|rp\t%0, %1}";
8240 #else
8241 if (STACK_TOP_P (operands[0]))
8242 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8243 else
8244 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8245 #endif
8246 break;
8249 if (STACK_TOP_P (operands[0]))
8251 if (STACK_TOP_P (operands[1]))
8252 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8253 else
8254 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8255 break;
8257 else if (STACK_TOP_P (operands[1]))
8259 #if SYSV386_COMPAT
8260 p = "{\t%1, %0|r\t%0, %1}";
8261 #else
8262 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8263 #endif
8265 else
8267 #if SYSV386_COMPAT
8268 p = "{r\t%2, %0|\t%0, %2}";
8269 #else
8270 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8271 #endif
8273 break;
8275 default:
8276 gcc_unreachable ();
8279 strcat (buf, p);
8280 return buf;
8283 /* Return needed mode for entity in optimize_mode_switching pass. */
8286 ix86_mode_needed (int entity, rtx insn)
8288 enum attr_i387_cw mode;
8290 /* The mode UNINITIALIZED is used to store control word after a
8291 function call or ASM pattern. The mode ANY specify that function
8292 has no requirements on the control word and make no changes in the
8293 bits we are interested in. */
8295 if (CALL_P (insn)
8296 || (NONJUMP_INSN_P (insn)
8297 && (asm_noperands (PATTERN (insn)) >= 0
8298 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8299 return I387_CW_UNINITIALIZED;
8301 if (recog_memoized (insn) < 0)
8302 return I387_CW_ANY;
8304 mode = get_attr_i387_cw (insn);
8306 switch (entity)
8308 case I387_TRUNC:
8309 if (mode == I387_CW_TRUNC)
8310 return mode;
8311 break;
8313 case I387_FLOOR:
8314 if (mode == I387_CW_FLOOR)
8315 return mode;
8316 break;
8318 case I387_CEIL:
8319 if (mode == I387_CW_CEIL)
8320 return mode;
8321 break;
8323 case I387_MASK_PM:
8324 if (mode == I387_CW_MASK_PM)
8325 return mode;
8326 break;
8328 default:
8329 gcc_unreachable ();
8332 return I387_CW_ANY;
8335 /* Output code to initialize control word copies used by trunc?f?i and
8336 rounding patterns. CURRENT_MODE is set to current control word,
8337 while NEW_MODE is set to new control word. */
8339 void
8340 emit_i387_cw_initialization (int mode)
8342 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8343 rtx new_mode;
8345 int slot;
8347 rtx reg = gen_reg_rtx (HImode);
8349 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8350 emit_move_insn (reg, stored_mode);
8352 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8354 switch (mode)
8356 case I387_CW_TRUNC:
8357 /* round toward zero (truncate) */
8358 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8359 slot = SLOT_CW_TRUNC;
8360 break;
8362 case I387_CW_FLOOR:
8363 /* round down toward -oo */
8364 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8365 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8366 slot = SLOT_CW_FLOOR;
8367 break;
8369 case I387_CW_CEIL:
8370 /* round up toward +oo */
8371 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8372 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8373 slot = SLOT_CW_CEIL;
8374 break;
8376 case I387_CW_MASK_PM:
8377 /* mask precision exception for nearbyint() */
8378 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8379 slot = SLOT_CW_MASK_PM;
8380 break;
8382 default:
8383 gcc_unreachable ();
8386 else
8388 switch (mode)
8390 case I387_CW_TRUNC:
8391 /* round toward zero (truncate) */
8392 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8393 slot = SLOT_CW_TRUNC;
8394 break;
8396 case I387_CW_FLOOR:
8397 /* round down toward -oo */
8398 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8399 slot = SLOT_CW_FLOOR;
8400 break;
8402 case I387_CW_CEIL:
8403 /* round up toward +oo */
8404 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8405 slot = SLOT_CW_CEIL;
8406 break;
8408 case I387_CW_MASK_PM:
8409 /* mask precision exception for nearbyint() */
8410 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8411 slot = SLOT_CW_MASK_PM;
8412 break;
8414 default:
8415 gcc_unreachable ();
8419 gcc_assert (slot < MAX_386_STACK_LOCALS);
8421 new_mode = assign_386_stack_local (HImode, slot);
8422 emit_move_insn (new_mode, reg);
8425 /* Output code for INSN to convert a float to a signed int. OPERANDS
8426 are the insn operands. The output may be [HSD]Imode and the input
8427 operand may be [SDX]Fmode. */
8429 const char *
8430 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8432 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8433 int dimode_p = GET_MODE (operands[0]) == DImode;
8434 int round_mode = get_attr_i387_cw (insn);
8436 /* Jump through a hoop or two for DImode, since the hardware has no
8437 non-popping instruction. We used to do this a different way, but
8438 that was somewhat fragile and broke with post-reload splitters. */
8439 if ((dimode_p || fisttp) && !stack_top_dies)
8440 output_asm_insn ("fld\t%y1", operands);
8442 gcc_assert (STACK_TOP_P (operands[1]));
8443 gcc_assert (GET_CODE (operands[0]) == MEM);
8445 if (fisttp)
8446 output_asm_insn ("fisttp%z0\t%0", operands);
8447 else
8449 if (round_mode != I387_CW_ANY)
8450 output_asm_insn ("fldcw\t%3", operands);
8451 if (stack_top_dies || dimode_p)
8452 output_asm_insn ("fistp%z0\t%0", operands);
8453 else
8454 output_asm_insn ("fist%z0\t%0", operands);
8455 if (round_mode != I387_CW_ANY)
8456 output_asm_insn ("fldcw\t%2", operands);
8459 return "";
8462 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8463 should be used. UNORDERED_P is true when fucom should be used. */
8465 const char *
8466 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8468 int stack_top_dies;
8469 rtx cmp_op0, cmp_op1;
8470 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8472 if (eflags_p)
8474 cmp_op0 = operands[0];
8475 cmp_op1 = operands[1];
8477 else
8479 cmp_op0 = operands[1];
8480 cmp_op1 = operands[2];
8483 if (is_sse)
8485 if (GET_MODE (operands[0]) == SFmode)
8486 if (unordered_p)
8487 return "ucomiss\t{%1, %0|%0, %1}";
8488 else
8489 return "comiss\t{%1, %0|%0, %1}";
8490 else
8491 if (unordered_p)
8492 return "ucomisd\t{%1, %0|%0, %1}";
8493 else
8494 return "comisd\t{%1, %0|%0, %1}";
8497 gcc_assert (STACK_TOP_P (cmp_op0));
8499 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8501 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8503 if (stack_top_dies)
8505 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8506 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
8508 else
8509 return "ftst\n\tfnstsw\t%0";
8512 if (STACK_REG_P (cmp_op1)
8513 && stack_top_dies
8514 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8515 && REGNO (cmp_op1) != FIRST_STACK_REG)
8517 /* If both the top of the 387 stack dies, and the other operand
8518 is also a stack register that dies, then this must be a
8519 `fcompp' float compare */
8521 if (eflags_p)
8523 /* There is no double popping fcomi variant. Fortunately,
8524 eflags is immune from the fstp's cc clobbering. */
8525 if (unordered_p)
8526 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8527 else
8528 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8529 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8531 else
8533 if (unordered_p)
8534 return "fucompp\n\tfnstsw\t%0";
8535 else
8536 return "fcompp\n\tfnstsw\t%0";
8539 else
8541 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8543 static const char * const alt[16] =
8545 "fcom%z2\t%y2\n\tfnstsw\t%0",
8546 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8547 "fucom%z2\t%y2\n\tfnstsw\t%0",
8548 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8550 "ficom%z2\t%y2\n\tfnstsw\t%0",
8551 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8552 NULL,
8553 NULL,
8555 "fcomi\t{%y1, %0|%0, %y1}",
8556 "fcomip\t{%y1, %0|%0, %y1}",
8557 "fucomi\t{%y1, %0|%0, %y1}",
8558 "fucomip\t{%y1, %0|%0, %y1}",
8560 NULL,
8561 NULL,
8562 NULL,
8563 NULL
8566 int mask;
8567 const char *ret;
8569 mask = eflags_p << 3;
8570 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8571 mask |= unordered_p << 1;
8572 mask |= stack_top_dies;
8574 gcc_assert (mask < 16);
8575 ret = alt[mask];
8576 gcc_assert (ret);
8578 return ret;
8582 void
8583 ix86_output_addr_vec_elt (FILE *file, int value)
8585 const char *directive = ASM_LONG;
8587 #ifdef ASM_QUAD
8588 if (TARGET_64BIT)
8589 directive = ASM_QUAD;
8590 #else
8591 gcc_assert (!TARGET_64BIT);
8592 #endif
8594 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8597 void
8598 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8600 if (TARGET_64BIT)
8601 fprintf (file, "%s%s%d-%s%d\n",
8602 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8603 else if (HAVE_AS_GOTOFF_IN_DATA)
8604 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8605 #if TARGET_MACHO
8606 else if (TARGET_MACHO)
8608 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8609 machopic_output_function_base_name (file);
8610 fprintf(file, "\n");
8612 #endif
8613 else
8614 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8615 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8618 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8619 for the target. */
8621 void
8622 ix86_expand_clear (rtx dest)
8624 rtx tmp;
8626 /* We play register width games, which are only valid after reload. */
8627 gcc_assert (reload_completed);
8629 /* Avoid HImode and its attendant prefix byte. */
8630 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8631 dest = gen_rtx_REG (SImode, REGNO (dest));
8633 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8635 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8636 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8638 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8639 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8642 emit_insn (tmp);
8645 /* X is an unchanging MEM. If it is a constant pool reference, return
8646 the constant pool rtx, else NULL. */
8649 maybe_get_pool_constant (rtx x)
8651 x = ix86_delegitimize_address (XEXP (x, 0));
8653 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8654 return get_pool_constant (x);
8656 return NULL_RTX;
8659 void
8660 ix86_expand_move (enum machine_mode mode, rtx operands[])
8662 int strict = (reload_in_progress || reload_completed);
8663 rtx op0, op1;
8664 enum tls_model model;
8666 op0 = operands[0];
8667 op1 = operands[1];
8669 if (GET_CODE (op1) == SYMBOL_REF)
8671 model = SYMBOL_REF_TLS_MODEL (op1);
8672 if (model)
8674 op1 = legitimize_tls_address (op1, model, true);
8675 op1 = force_operand (op1, op0);
8676 if (op1 == op0)
8677 return;
8680 else if (GET_CODE (op1) == CONST
8681 && GET_CODE (XEXP (op1, 0)) == PLUS
8682 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8684 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8685 if (model)
8687 rtx addend = XEXP (XEXP (op1, 0), 1);
8688 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8689 op1 = force_operand (op1, NULL);
8690 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8691 op0, 1, OPTAB_DIRECT);
8692 if (op1 == op0)
8693 return;
8697 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8699 #if TARGET_MACHO
8700 if (MACHOPIC_PURE)
8702 rtx temp = ((reload_in_progress
8703 || ((op0 && GET_CODE (op0) == REG)
8704 && mode == Pmode))
8705 ? op0 : gen_reg_rtx (Pmode));
8706 op1 = machopic_indirect_data_reference (op1, temp);
8707 op1 = machopic_legitimize_pic_address (op1, mode,
8708 temp == op1 ? 0 : temp);
8710 else if (MACHOPIC_INDIRECT)
8711 op1 = machopic_indirect_data_reference (op1, 0);
8712 if (op0 == op1)
8713 return;
8714 #else
8715 if (GET_CODE (op0) == MEM)
8716 op1 = force_reg (Pmode, op1);
8717 else
8718 op1 = legitimize_address (op1, op1, Pmode);
8719 #endif /* TARGET_MACHO */
8721 else
8723 if (GET_CODE (op0) == MEM
8724 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8725 || !push_operand (op0, mode))
8726 && GET_CODE (op1) == MEM)
8727 op1 = force_reg (mode, op1);
8729 if (push_operand (op0, mode)
8730 && ! general_no_elim_operand (op1, mode))
8731 op1 = copy_to_mode_reg (mode, op1);
8733 /* Force large constants in 64bit compilation into register
8734 to get them CSEed. */
8735 if (TARGET_64BIT && mode == DImode
8736 && immediate_operand (op1, mode)
8737 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8738 && !register_operand (op0, mode)
8739 && optimize && !reload_completed && !reload_in_progress)
8740 op1 = copy_to_mode_reg (mode, op1);
8742 if (FLOAT_MODE_P (mode))
8744 /* If we are loading a floating point constant to a register,
8745 force the value to memory now, since we'll get better code
8746 out the back end. */
8748 if (strict)
8750 else if (GET_CODE (op1) == CONST_DOUBLE)
8752 op1 = validize_mem (force_const_mem (mode, op1));
8753 if (!register_operand (op0, mode))
8755 rtx temp = gen_reg_rtx (mode);
8756 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8757 emit_move_insn (op0, temp);
8758 return;
8764 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8767 void
8768 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8770 rtx op0 = operands[0], op1 = operands[1];
8772 /* Force constants other than zero into memory. We do not know how
8773 the instructions used to build constants modify the upper 64 bits
8774 of the register, once we have that information we may be able
8775 to handle some of them more efficiently. */
8776 if ((reload_in_progress | reload_completed) == 0
8777 && register_operand (op0, mode)
8778 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
8779 op1 = validize_mem (force_const_mem (mode, op1));
8781 /* Make operand1 a register if it isn't already. */
8782 if (!no_new_pseudos
8783 && !register_operand (op0, mode)
8784 && !register_operand (op1, mode))
8786 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8787 return;
8790 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8793 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
8794 straight to ix86_expand_vector_move. */
8796 void
8797 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
8799 rtx op0, op1, m;
8801 op0 = operands[0];
8802 op1 = operands[1];
8804 if (MEM_P (op1))
8806 /* If we're optimizing for size, movups is the smallest. */
8807 if (optimize_size)
8809 op0 = gen_lowpart (V4SFmode, op0);
8810 op1 = gen_lowpart (V4SFmode, op1);
8811 emit_insn (gen_sse_movups (op0, op1));
8812 return;
8815 /* ??? If we have typed data, then it would appear that using
8816 movdqu is the only way to get unaligned data loaded with
8817 integer type. */
8818 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8820 op0 = gen_lowpart (V16QImode, op0);
8821 op1 = gen_lowpart (V16QImode, op1);
8822 emit_insn (gen_sse2_movdqu (op0, op1));
8823 return;
8826 if (TARGET_SSE2 && mode == V2DFmode)
8828 rtx zero;
8830 /* When SSE registers are split into halves, we can avoid
8831 writing to the top half twice. */
8832 if (TARGET_SSE_SPLIT_REGS)
8834 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8835 zero = op0;
8837 else
8839 /* ??? Not sure about the best option for the Intel chips.
8840 The following would seem to satisfy; the register is
8841 entirely cleared, breaking the dependency chain. We
8842 then store to the upper half, with a dependency depth
8843 of one. A rumor has it that Intel recommends two movsd
8844 followed by an unpacklpd, but this is unconfirmed. And
8845 given that the dependency depth of the unpacklpd would
8846 still be one, I'm not sure why this would be better. */
8847 zero = CONST0_RTX (V2DFmode);
8850 m = adjust_address (op1, DFmode, 0);
8851 emit_insn (gen_sse2_loadlpd (op0, zero, m));
8852 m = adjust_address (op1, DFmode, 8);
8853 emit_insn (gen_sse2_loadhpd (op0, op0, m));
8855 else
8857 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8858 emit_move_insn (op0, CONST0_RTX (mode));
8859 else
8860 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8862 if (mode != V4SFmode)
8863 op0 = gen_lowpart (V4SFmode, op0);
8864 m = adjust_address (op1, V2SFmode, 0);
8865 emit_insn (gen_sse_loadlps (op0, op0, m));
8866 m = adjust_address (op1, V2SFmode, 8);
8867 emit_insn (gen_sse_loadhps (op0, op0, m));
8870 else if (MEM_P (op0))
8872 /* If we're optimizing for size, movups is the smallest. */
8873 if (optimize_size)
8875 op0 = gen_lowpart (V4SFmode, op0);
8876 op1 = gen_lowpart (V4SFmode, op1);
8877 emit_insn (gen_sse_movups (op0, op1));
8878 return;
8881 /* ??? Similar to above, only less clear because of quote
8882 typeless stores unquote. */
8883 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8884 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8886 op0 = gen_lowpart (V16QImode, op0);
8887 op1 = gen_lowpart (V16QImode, op1);
8888 emit_insn (gen_sse2_movdqu (op0, op1));
8889 return;
8892 if (TARGET_SSE2 && mode == V2DFmode)
8894 m = adjust_address (op0, DFmode, 0);
8895 emit_insn (gen_sse2_storelpd (m, op1));
8896 m = adjust_address (op0, DFmode, 8);
8897 emit_insn (gen_sse2_storehpd (m, op1));
8899 else
8901 if (mode != V4SFmode)
8902 op1 = gen_lowpart (V4SFmode, op1);
8903 m = adjust_address (op0, V2SFmode, 0);
8904 emit_insn (gen_sse_storelps (m, op1));
8905 m = adjust_address (op0, V2SFmode, 8);
8906 emit_insn (gen_sse_storehps (m, op1));
8909 else
8910 gcc_unreachable ();
8913 /* Expand a push in MODE. This is some mode for which we do not support
8914 proper push instructions, at least from the registers that we expect
8915 the value to live in. */
8917 void
8918 ix86_expand_push (enum machine_mode mode, rtx x)
8920 rtx tmp;
8922 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8923 GEN_INT (-GET_MODE_SIZE (mode)),
8924 stack_pointer_rtx, 1, OPTAB_DIRECT);
8925 if (tmp != stack_pointer_rtx)
8926 emit_move_insn (stack_pointer_rtx, tmp);
8928 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8929 emit_move_insn (tmp, x);
8932 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8933 destination to use for the operation. If different from the true
8934 destination in operands[0], a copy operation will be required. */
8937 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8938 rtx operands[])
8940 int matching_memory;
8941 rtx src1, src2, dst;
8943 dst = operands[0];
8944 src1 = operands[1];
8945 src2 = operands[2];
8947 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8948 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8949 && (rtx_equal_p (dst, src2)
8950 || immediate_operand (src1, mode)))
8952 rtx temp = src1;
8953 src1 = src2;
8954 src2 = temp;
8957 /* If the destination is memory, and we do not have matching source
8958 operands, do things in registers. */
8959 matching_memory = 0;
8960 if (GET_CODE (dst) == MEM)
8962 if (rtx_equal_p (dst, src1))
8963 matching_memory = 1;
8964 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8965 && rtx_equal_p (dst, src2))
8966 matching_memory = 2;
8967 else
8968 dst = gen_reg_rtx (mode);
8971 /* Both source operands cannot be in memory. */
8972 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8974 if (matching_memory != 2)
8975 src2 = force_reg (mode, src2);
8976 else
8977 src1 = force_reg (mode, src1);
8980 /* If the operation is not commutable, source 1 cannot be a constant
8981 or non-matching memory. */
8982 if ((CONSTANT_P (src1)
8983 || (!matching_memory && GET_CODE (src1) == MEM))
8984 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8985 src1 = force_reg (mode, src1);
8987 src1 = operands[1] = src1;
8988 src2 = operands[2] = src2;
8989 return dst;
8992 /* Similarly, but assume that the destination has already been
8993 set up properly. */
8995 void
8996 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8997 enum machine_mode mode, rtx operands[])
8999 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9000 gcc_assert (dst == operands[0]);
9003 /* Attempt to expand a binary operator. Make the expansion closer to the
9004 actual machine, then just general_operand, which will allow 3 separate
9005 memory references (one output, two input) in a single insn. */
9007 void
9008 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9009 rtx operands[])
9011 rtx src1, src2, dst, op, clob;
9013 dst = ix86_fixup_binary_operands (code, mode, operands);
9014 src1 = operands[1];
9015 src2 = operands[2];
9017 /* Emit the instruction. */
9019 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9020 if (reload_in_progress)
9022 /* Reload doesn't know about the flags register, and doesn't know that
9023 it doesn't want to clobber it. We can only do this with PLUS. */
9024 gcc_assert (code == PLUS);
9025 emit_insn (op);
9027 else
9029 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9030 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9033 /* Fix up the destination if needed. */
9034 if (dst != operands[0])
9035 emit_move_insn (operands[0], dst);
9038 /* Return TRUE or FALSE depending on whether the binary operator meets the
9039 appropriate constraints. */
9042 ix86_binary_operator_ok (enum rtx_code code,
9043 enum machine_mode mode ATTRIBUTE_UNUSED,
9044 rtx operands[3])
9046 /* Both source operands cannot be in memory. */
9047 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9048 return 0;
9049 /* If the operation is not commutable, source 1 cannot be a constant. */
9050 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9051 return 0;
9052 /* If the destination is memory, we must have a matching source operand. */
9053 if (GET_CODE (operands[0]) == MEM
9054 && ! (rtx_equal_p (operands[0], operands[1])
9055 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9056 && rtx_equal_p (operands[0], operands[2]))))
9057 return 0;
9058 /* If the operation is not commutable and the source 1 is memory, we must
9059 have a matching destination. */
9060 if (GET_CODE (operands[1]) == MEM
9061 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9062 && ! rtx_equal_p (operands[0], operands[1]))
9063 return 0;
9064 return 1;
9067 /* Attempt to expand a unary operator. Make the expansion closer to the
9068 actual machine, then just general_operand, which will allow 2 separate
9069 memory references (one output, one input) in a single insn. */
9071 void
9072 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9073 rtx operands[])
9075 int matching_memory;
9076 rtx src, dst, op, clob;
9078 dst = operands[0];
9079 src = operands[1];
9081 /* If the destination is memory, and we do not have matching source
9082 operands, do things in registers. */
9083 matching_memory = 0;
9084 if (MEM_P (dst))
9086 if (rtx_equal_p (dst, src))
9087 matching_memory = 1;
9088 else
9089 dst = gen_reg_rtx (mode);
9092 /* When source operand is memory, destination must match. */
9093 if (MEM_P (src) && !matching_memory)
9094 src = force_reg (mode, src);
9096 /* Emit the instruction. */
9098 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9099 if (reload_in_progress || code == NOT)
9101 /* Reload doesn't know about the flags register, and doesn't know that
9102 it doesn't want to clobber it. */
9103 gcc_assert (code == NOT);
9104 emit_insn (op);
9106 else
9108 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9109 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9112 /* Fix up the destination if needed. */
9113 if (dst != operands[0])
9114 emit_move_insn (operands[0], dst);
9117 /* Return TRUE or FALSE depending on whether the unary operator meets the
9118 appropriate constraints. */
9121 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9122 enum machine_mode mode ATTRIBUTE_UNUSED,
9123 rtx operands[2] ATTRIBUTE_UNUSED)
9125 /* If one of operands is memory, source and destination must match. */
9126 if ((GET_CODE (operands[0]) == MEM
9127 || GET_CODE (operands[1]) == MEM)
9128 && ! rtx_equal_p (operands[0], operands[1]))
9129 return FALSE;
9130 return TRUE;
9133 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9134 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9135 true, then replicate the mask for all elements of the vector register.
9136 If INVERT is true, then create a mask excluding the sign bit. */
9139 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9141 enum machine_mode vec_mode;
9142 HOST_WIDE_INT hi, lo;
9143 int shift = 63;
9144 rtvec v;
9145 rtx mask;
9147 /* Find the sign bit, sign extended to 2*HWI. */
9148 if (mode == SFmode)
9149 lo = 0x80000000, hi = lo < 0;
9150 else if (HOST_BITS_PER_WIDE_INT >= 64)
9151 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9152 else
9153 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9155 if (invert)
9156 lo = ~lo, hi = ~hi;
9158 /* Force this value into the low part of a fp vector constant. */
9159 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9160 mask = gen_lowpart (mode, mask);
9162 if (mode == SFmode)
9164 if (vect)
9165 v = gen_rtvec (4, mask, mask, mask, mask);
9166 else
9167 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9168 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9169 vec_mode = V4SFmode;
9171 else
9173 if (vect)
9174 v = gen_rtvec (2, mask, mask);
9175 else
9176 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9177 vec_mode = V2DFmode;
9180 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9183 /* Generate code for floating point ABS or NEG. */
9185 void
9186 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9187 rtx operands[])
9189 rtx mask, set, use, clob, dst, src;
9190 bool matching_memory;
9191 bool use_sse = false;
9192 bool vector_mode = VECTOR_MODE_P (mode);
9193 enum machine_mode elt_mode = mode;
9195 if (vector_mode)
9197 elt_mode = GET_MODE_INNER (mode);
9198 use_sse = true;
9200 else if (TARGET_SSE_MATH)
9201 use_sse = SSE_FLOAT_MODE_P (mode);
9203 /* NEG and ABS performed with SSE use bitwise mask operations.
9204 Create the appropriate mask now. */
9205 if (use_sse)
9206 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9207 else
9209 /* When not using SSE, we don't use the mask, but prefer to keep the
9210 same general form of the insn pattern to reduce duplication when
9211 it comes time to split. */
9212 mask = const0_rtx;
9215 dst = operands[0];
9216 src = operands[1];
9218 /* If the destination is memory, and we don't have matching source
9219 operands, do things in registers. */
9220 matching_memory = false;
9221 if (MEM_P (dst))
9223 if (rtx_equal_p (dst, src))
9224 matching_memory = true;
9225 else
9226 dst = gen_reg_rtx (mode);
9228 if (MEM_P (src) && !matching_memory)
9229 src = force_reg (mode, src);
9231 if (vector_mode)
9233 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9234 set = gen_rtx_SET (VOIDmode, dst, set);
9235 emit_insn (set);
9237 else
9239 set = gen_rtx_fmt_e (code, mode, src);
9240 set = gen_rtx_SET (VOIDmode, dst, set);
9241 use = gen_rtx_USE (VOIDmode, mask);
9242 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9243 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
9246 if (dst != operands[0])
9247 emit_move_insn (operands[0], dst);
9250 /* Expand a copysign operation. Special case operand 0 being a constant. */
9252 void
9253 ix86_expand_copysign (rtx operands[])
9255 enum machine_mode mode, vmode;
9256 rtx dest, op0, op1, mask, nmask;
9258 dest = operands[0];
9259 op0 = operands[1];
9260 op1 = operands[2];
9262 mode = GET_MODE (dest);
9263 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9265 if (GET_CODE (op0) == CONST_DOUBLE)
9267 rtvec v;
9269 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9270 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9272 if (op0 == CONST0_RTX (mode))
9273 op0 = CONST0_RTX (vmode);
9274 else
9276 if (mode == SFmode)
9277 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9278 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9279 else
9280 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9281 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9284 mask = ix86_build_signbit_mask (mode, 0, 0);
9286 if (mode == SFmode)
9287 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9288 else
9289 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9291 else
9293 nmask = ix86_build_signbit_mask (mode, 0, 1);
9294 mask = ix86_build_signbit_mask (mode, 0, 0);
9296 if (mode == SFmode)
9297 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9298 else
9299 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9303 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9304 be a constant, and so has already been expanded into a vector constant. */
9306 void
9307 ix86_split_copysign_const (rtx operands[])
9309 enum machine_mode mode, vmode;
9310 rtx dest, op0, op1, mask, x;
9312 dest = operands[0];
9313 op0 = operands[1];
9314 op1 = operands[2];
9315 mask = operands[3];
9317 mode = GET_MODE (dest);
9318 vmode = GET_MODE (mask);
9320 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9321 x = gen_rtx_AND (vmode, dest, mask);
9322 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9324 if (op0 != CONST0_RTX (vmode))
9326 x = gen_rtx_IOR (vmode, dest, op0);
9327 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9331 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9332 so we have to do two masks. */
9334 void
9335 ix86_split_copysign_var (rtx operands[])
9337 enum machine_mode mode, vmode;
9338 rtx dest, scratch, op0, op1, mask, nmask, x;
9340 dest = operands[0];
9341 scratch = operands[1];
9342 op0 = operands[2];
9343 op1 = operands[3];
9344 nmask = operands[4];
9345 mask = operands[5];
9347 mode = GET_MODE (dest);
9348 vmode = GET_MODE (mask);
9350 if (rtx_equal_p (op0, op1))
9352 /* Shouldn't happen often (it's useless, obviously), but when it does
9353 we'd generate incorrect code if we continue below. */
9354 emit_move_insn (dest, op0);
9355 return;
9358 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9360 gcc_assert (REGNO (op1) == REGNO (scratch));
9362 x = gen_rtx_AND (vmode, scratch, mask);
9363 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9365 dest = mask;
9366 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9367 x = gen_rtx_NOT (vmode, dest);
9368 x = gen_rtx_AND (vmode, x, op0);
9369 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9371 else
9373 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9375 x = gen_rtx_AND (vmode, scratch, mask);
9377 else /* alternative 2,4 */
9379 gcc_assert (REGNO (mask) == REGNO (scratch));
9380 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9381 x = gen_rtx_AND (vmode, scratch, op1);
9383 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9385 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9387 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9388 x = gen_rtx_AND (vmode, dest, nmask);
9390 else /* alternative 3,4 */
9392 gcc_assert (REGNO (nmask) == REGNO (dest));
9393 dest = nmask;
9394 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9395 x = gen_rtx_AND (vmode, dest, op0);
9397 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9400 x = gen_rtx_IOR (vmode, dest, scratch);
9401 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9404 /* Return TRUE or FALSE depending on whether the first SET in INSN
9405 has source and destination with matching CC modes, and that the
9406 CC mode is at least as constrained as REQ_MODE. */
9409 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9411 rtx set;
9412 enum machine_mode set_mode;
9414 set = PATTERN (insn);
9415 if (GET_CODE (set) == PARALLEL)
9416 set = XVECEXP (set, 0, 0);
9417 gcc_assert (GET_CODE (set) == SET);
9418 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9420 set_mode = GET_MODE (SET_DEST (set));
9421 switch (set_mode)
9423 case CCNOmode:
9424 if (req_mode != CCNOmode
9425 && (req_mode != CCmode
9426 || XEXP (SET_SRC (set), 1) != const0_rtx))
9427 return 0;
9428 break;
9429 case CCmode:
9430 if (req_mode == CCGCmode)
9431 return 0;
9432 /* FALLTHRU */
9433 case CCGCmode:
9434 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9435 return 0;
9436 /* FALLTHRU */
9437 case CCGOCmode:
9438 if (req_mode == CCZmode)
9439 return 0;
9440 /* FALLTHRU */
9441 case CCZmode:
9442 break;
9444 default:
9445 gcc_unreachable ();
9448 return (GET_MODE (SET_SRC (set)) == set_mode);
9451 /* Generate insn patterns to do an integer compare of OPERANDS. */
9453 static rtx
9454 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9456 enum machine_mode cmpmode;
9457 rtx tmp, flags;
9459 cmpmode = SELECT_CC_MODE (code, op0, op1);
9460 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9462 /* This is very simple, but making the interface the same as in the
9463 FP case makes the rest of the code easier. */
9464 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9465 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9467 /* Return the test that should be put into the flags user, i.e.
9468 the bcc, scc, or cmov instruction. */
9469 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9472 /* Figure out whether to use ordered or unordered fp comparisons.
9473 Return the appropriate mode to use. */
9475 enum machine_mode
9476 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9478 /* ??? In order to make all comparisons reversible, we do all comparisons
9479 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9480 all forms trapping and nontrapping comparisons, we can make inequality
9481 comparisons trapping again, since it results in better code when using
9482 FCOM based compares. */
9483 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9486 enum machine_mode
9487 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9489 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
9490 return ix86_fp_compare_mode (code);
9491 switch (code)
9493 /* Only zero flag is needed. */
9494 case EQ: /* ZF=0 */
9495 case NE: /* ZF!=0 */
9496 return CCZmode;
9497 /* Codes needing carry flag. */
9498 case GEU: /* CF=0 */
9499 case GTU: /* CF=0 & ZF=0 */
9500 case LTU: /* CF=1 */
9501 case LEU: /* CF=1 | ZF=1 */
9502 return CCmode;
9503 /* Codes possibly doable only with sign flag when
9504 comparing against zero. */
9505 case GE: /* SF=OF or SF=0 */
9506 case LT: /* SF<>OF or SF=1 */
9507 if (op1 == const0_rtx)
9508 return CCGOCmode;
9509 else
9510 /* For other cases Carry flag is not required. */
9511 return CCGCmode;
9512 /* Codes doable only with sign flag when comparing
9513 against zero, but we miss jump instruction for it
9514 so we need to use relational tests against overflow
9515 that thus needs to be zero. */
9516 case GT: /* ZF=0 & SF=OF */
9517 case LE: /* ZF=1 | SF<>OF */
9518 if (op1 == const0_rtx)
9519 return CCNOmode;
9520 else
9521 return CCGCmode;
9522 /* strcmp pattern do (use flags) and combine may ask us for proper
9523 mode. */
9524 case USE:
9525 return CCmode;
9526 default:
9527 gcc_unreachable ();
9531 /* Return the fixed registers used for condition codes. */
9533 static bool
9534 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9536 *p1 = FLAGS_REG;
9537 *p2 = FPSR_REG;
9538 return true;
9541 /* If two condition code modes are compatible, return a condition code
9542 mode which is compatible with both. Otherwise, return
9543 VOIDmode. */
9545 static enum machine_mode
9546 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9548 if (m1 == m2)
9549 return m1;
9551 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9552 return VOIDmode;
9554 if ((m1 == CCGCmode && m2 == CCGOCmode)
9555 || (m1 == CCGOCmode && m2 == CCGCmode))
9556 return CCGCmode;
9558 switch (m1)
9560 default:
9561 gcc_unreachable ();
9563 case CCmode:
9564 case CCGCmode:
9565 case CCGOCmode:
9566 case CCNOmode:
9567 case CCZmode:
9568 switch (m2)
9570 default:
9571 return VOIDmode;
9573 case CCmode:
9574 case CCGCmode:
9575 case CCGOCmode:
9576 case CCNOmode:
9577 case CCZmode:
9578 return CCmode;
9581 case CCFPmode:
9582 case CCFPUmode:
9583 /* These are only compatible with themselves, which we already
9584 checked above. */
9585 return VOIDmode;
9589 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9592 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9594 enum rtx_code swapped_code = swap_condition (code);
9595 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9596 || (ix86_fp_comparison_cost (swapped_code)
9597 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9600 /* Swap, force into registers, or otherwise massage the two operands
9601 to a fp comparison. The operands are updated in place; the new
9602 comparison code is returned. */
9604 static enum rtx_code
9605 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9607 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9608 rtx op0 = *pop0, op1 = *pop1;
9609 enum machine_mode op_mode = GET_MODE (op0);
9610 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9612 /* All of the unordered compare instructions only work on registers.
9613 The same is true of the fcomi compare instructions. The XFmode
9614 compare instructions require registers except when comparing
9615 against zero or when converting operand 1 from fixed point to
9616 floating point. */
9618 if (!is_sse
9619 && (fpcmp_mode == CCFPUmode
9620 || (op_mode == XFmode
9621 && ! (standard_80387_constant_p (op0) == 1
9622 || standard_80387_constant_p (op1) == 1)
9623 && GET_CODE (op1) != FLOAT)
9624 || ix86_use_fcomi_compare (code)))
9626 op0 = force_reg (op_mode, op0);
9627 op1 = force_reg (op_mode, op1);
9629 else
9631 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9632 things around if they appear profitable, otherwise force op0
9633 into a register. */
9635 if (standard_80387_constant_p (op0) == 0
9636 || (GET_CODE (op0) == MEM
9637 && ! (standard_80387_constant_p (op1) == 0
9638 || GET_CODE (op1) == MEM)))
9640 rtx tmp;
9641 tmp = op0, op0 = op1, op1 = tmp;
9642 code = swap_condition (code);
9645 if (GET_CODE (op0) != REG)
9646 op0 = force_reg (op_mode, op0);
9648 if (CONSTANT_P (op1))
9650 int tmp = standard_80387_constant_p (op1);
9651 if (tmp == 0)
9652 op1 = validize_mem (force_const_mem (op_mode, op1));
9653 else if (tmp == 1)
9655 if (TARGET_CMOVE)
9656 op1 = force_reg (op_mode, op1);
9658 else
9659 op1 = force_reg (op_mode, op1);
9663 /* Try to rearrange the comparison to make it cheaper. */
9664 if (ix86_fp_comparison_cost (code)
9665 > ix86_fp_comparison_cost (swap_condition (code))
9666 && (GET_CODE (op1) == REG || !no_new_pseudos))
9668 rtx tmp;
9669 tmp = op0, op0 = op1, op1 = tmp;
9670 code = swap_condition (code);
9671 if (GET_CODE (op0) != REG)
9672 op0 = force_reg (op_mode, op0);
9675 *pop0 = op0;
9676 *pop1 = op1;
9677 return code;
9680 /* Convert comparison codes we use to represent FP comparison to integer
9681 code that will result in proper branch. Return UNKNOWN if no such code
9682 is available. */
9684 enum rtx_code
9685 ix86_fp_compare_code_to_integer (enum rtx_code code)
9687 switch (code)
9689 case GT:
9690 return GTU;
9691 case GE:
9692 return GEU;
9693 case ORDERED:
9694 case UNORDERED:
9695 return code;
9696 break;
9697 case UNEQ:
9698 return EQ;
9699 break;
9700 case UNLT:
9701 return LTU;
9702 break;
9703 case UNLE:
9704 return LEU;
9705 break;
9706 case LTGT:
9707 return NE;
9708 break;
9709 default:
9710 return UNKNOWN;
9714 /* Split comparison code CODE into comparisons we can do using branch
9715 instructions. BYPASS_CODE is comparison code for branch that will
9716 branch around FIRST_CODE and SECOND_CODE. If some of branches
9717 is not required, set value to UNKNOWN.
9718 We never require more than two branches. */
9720 void
9721 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9722 enum rtx_code *first_code,
9723 enum rtx_code *second_code)
9725 *first_code = code;
9726 *bypass_code = UNKNOWN;
9727 *second_code = UNKNOWN;
9729 /* The fcomi comparison sets flags as follows:
9731 cmp ZF PF CF
9732 > 0 0 0
9733 < 0 0 1
9734 = 1 0 0
9735 un 1 1 1 */
9737 switch (code)
9739 case GT: /* GTU - CF=0 & ZF=0 */
9740 case GE: /* GEU - CF=0 */
9741 case ORDERED: /* PF=0 */
9742 case UNORDERED: /* PF=1 */
9743 case UNEQ: /* EQ - ZF=1 */
9744 case UNLT: /* LTU - CF=1 */
9745 case UNLE: /* LEU - CF=1 | ZF=1 */
9746 case LTGT: /* EQ - ZF=0 */
9747 break;
9748 case LT: /* LTU - CF=1 - fails on unordered */
9749 *first_code = UNLT;
9750 *bypass_code = UNORDERED;
9751 break;
9752 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9753 *first_code = UNLE;
9754 *bypass_code = UNORDERED;
9755 break;
9756 case EQ: /* EQ - ZF=1 - fails on unordered */
9757 *first_code = UNEQ;
9758 *bypass_code = UNORDERED;
9759 break;
9760 case NE: /* NE - ZF=0 - fails on unordered */
9761 *first_code = LTGT;
9762 *second_code = UNORDERED;
9763 break;
9764 case UNGE: /* GEU - CF=0 - fails on unordered */
9765 *first_code = GE;
9766 *second_code = UNORDERED;
9767 break;
9768 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9769 *first_code = GT;
9770 *second_code = UNORDERED;
9771 break;
9772 default:
9773 gcc_unreachable ();
9775 if (!TARGET_IEEE_FP)
9777 *second_code = UNKNOWN;
9778 *bypass_code = UNKNOWN;
9782 /* Return cost of comparison done fcom + arithmetics operations on AX.
9783 All following functions do use number of instructions as a cost metrics.
9784 In future this should be tweaked to compute bytes for optimize_size and
9785 take into account performance of various instructions on various CPUs. */
9786 static int
9787 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9789 if (!TARGET_IEEE_FP)
9790 return 4;
9791 /* The cost of code output by ix86_expand_fp_compare. */
9792 switch (code)
9794 case UNLE:
9795 case UNLT:
9796 case LTGT:
9797 case GT:
9798 case GE:
9799 case UNORDERED:
9800 case ORDERED:
9801 case UNEQ:
9802 return 4;
9803 break;
9804 case LT:
9805 case NE:
9806 case EQ:
9807 case UNGE:
9808 return 5;
9809 break;
9810 case LE:
9811 case UNGT:
9812 return 6;
9813 break;
9814 default:
9815 gcc_unreachable ();
9819 /* Return cost of comparison done using fcomi operation.
9820 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9821 static int
9822 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9824 enum rtx_code bypass_code, first_code, second_code;
9825 /* Return arbitrarily high cost when instruction is not supported - this
9826 prevents gcc from using it. */
9827 if (!TARGET_CMOVE)
9828 return 1024;
9829 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9830 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9833 /* Return cost of comparison done using sahf operation.
9834 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9835 static int
9836 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9838 enum rtx_code bypass_code, first_code, second_code;
9839 /* Return arbitrarily high cost when instruction is not preferred - this
9840 avoids gcc from using it. */
9841 if (!TARGET_USE_SAHF && !optimize_size)
9842 return 1024;
9843 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9844 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9847 /* Compute cost of the comparison done using any method.
9848 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9849 static int
9850 ix86_fp_comparison_cost (enum rtx_code code)
9852 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9853 int min;
9855 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9856 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9858 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9859 if (min > sahf_cost)
9860 min = sahf_cost;
9861 if (min > fcomi_cost)
9862 min = fcomi_cost;
9863 return min;
9866 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9868 static rtx
9869 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9870 rtx *second_test, rtx *bypass_test)
9872 enum machine_mode fpcmp_mode, intcmp_mode;
9873 rtx tmp, tmp2;
9874 int cost = ix86_fp_comparison_cost (code);
9875 enum rtx_code bypass_code, first_code, second_code;
9877 fpcmp_mode = ix86_fp_compare_mode (code);
9878 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9880 if (second_test)
9881 *second_test = NULL_RTX;
9882 if (bypass_test)
9883 *bypass_test = NULL_RTX;
9885 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9887 /* Do fcomi/sahf based test when profitable. */
9888 if ((bypass_code == UNKNOWN || bypass_test)
9889 && (second_code == UNKNOWN || second_test)
9890 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9892 if (TARGET_CMOVE)
9894 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9895 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9896 tmp);
9897 emit_insn (tmp);
9899 else
9901 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9902 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9903 if (!scratch)
9904 scratch = gen_reg_rtx (HImode);
9905 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9906 emit_insn (gen_x86_sahf_1 (scratch));
9909 /* The FP codes work out to act like unsigned. */
9910 intcmp_mode = fpcmp_mode;
9911 code = first_code;
9912 if (bypass_code != UNKNOWN)
9913 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9914 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9915 const0_rtx);
9916 if (second_code != UNKNOWN)
9917 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9918 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9919 const0_rtx);
9921 else
9923 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9924 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9925 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9926 if (!scratch)
9927 scratch = gen_reg_rtx (HImode);
9928 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9930 /* In the unordered case, we have to check C2 for NaN's, which
9931 doesn't happen to work out to anything nice combination-wise.
9932 So do some bit twiddling on the value we've got in AH to come
9933 up with an appropriate set of condition codes. */
9935 intcmp_mode = CCNOmode;
9936 switch (code)
9938 case GT:
9939 case UNGT:
9940 if (code == GT || !TARGET_IEEE_FP)
9942 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9943 code = EQ;
9945 else
9947 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9948 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9949 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9950 intcmp_mode = CCmode;
9951 code = GEU;
9953 break;
9954 case LT:
9955 case UNLT:
9956 if (code == LT && TARGET_IEEE_FP)
9958 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9959 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9960 intcmp_mode = CCmode;
9961 code = EQ;
9963 else
9965 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9966 code = NE;
9968 break;
9969 case GE:
9970 case UNGE:
9971 if (code == GE || !TARGET_IEEE_FP)
9973 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9974 code = EQ;
9976 else
9978 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9979 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9980 GEN_INT (0x01)));
9981 code = NE;
9983 break;
9984 case LE:
9985 case UNLE:
9986 if (code == LE && TARGET_IEEE_FP)
9988 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9989 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9990 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9991 intcmp_mode = CCmode;
9992 code = LTU;
9994 else
9996 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9997 code = NE;
9999 break;
10000 case EQ:
10001 case UNEQ:
10002 if (code == EQ && TARGET_IEEE_FP)
10004 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10005 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10006 intcmp_mode = CCmode;
10007 code = EQ;
10009 else
10011 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10012 code = NE;
10013 break;
10015 break;
10016 case NE:
10017 case LTGT:
10018 if (code == NE && TARGET_IEEE_FP)
10020 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10021 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10022 GEN_INT (0x40)));
10023 code = NE;
10025 else
10027 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10028 code = EQ;
10030 break;
10032 case UNORDERED:
10033 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10034 code = NE;
10035 break;
10036 case ORDERED:
10037 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10038 code = EQ;
10039 break;
10041 default:
10042 gcc_unreachable ();
10046 /* Return the test that should be put into the flags user, i.e.
10047 the bcc, scc, or cmov instruction. */
10048 return gen_rtx_fmt_ee (code, VOIDmode,
10049 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10050 const0_rtx);
10054 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10056 rtx op0, op1, ret;
10057 op0 = ix86_compare_op0;
10058 op1 = ix86_compare_op1;
10060 if (second_test)
10061 *second_test = NULL_RTX;
10062 if (bypass_test)
10063 *bypass_test = NULL_RTX;
10065 if (ix86_compare_emitted)
10067 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10068 ix86_compare_emitted = NULL_RTX;
10070 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10071 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10072 second_test, bypass_test);
10073 else
10074 ret = ix86_expand_int_compare (code, op0, op1);
10076 return ret;
10079 /* Return true if the CODE will result in nontrivial jump sequence. */
10080 bool
10081 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10083 enum rtx_code bypass_code, first_code, second_code;
10084 if (!TARGET_CMOVE)
10085 return true;
10086 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10087 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10090 void
10091 ix86_expand_branch (enum rtx_code code, rtx label)
10093 rtx tmp;
10095 switch (GET_MODE (ix86_compare_op0))
10097 case QImode:
10098 case HImode:
10099 case SImode:
10100 simple:
10101 tmp = ix86_expand_compare (code, NULL, NULL);
10102 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10103 gen_rtx_LABEL_REF (VOIDmode, label),
10104 pc_rtx);
10105 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10106 return;
10108 case SFmode:
10109 case DFmode:
10110 case XFmode:
10112 rtvec vec;
10113 int use_fcomi;
10114 enum rtx_code bypass_code, first_code, second_code;
10116 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10117 &ix86_compare_op1);
10119 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10121 /* Check whether we will use the natural sequence with one jump. If
10122 so, we can expand jump early. Otherwise delay expansion by
10123 creating compound insn to not confuse optimizers. */
10124 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10125 && TARGET_CMOVE)
10127 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10128 gen_rtx_LABEL_REF (VOIDmode, label),
10129 pc_rtx, NULL_RTX, NULL_RTX);
10131 else
10133 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10134 ix86_compare_op0, ix86_compare_op1);
10135 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10136 gen_rtx_LABEL_REF (VOIDmode, label),
10137 pc_rtx);
10138 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10140 use_fcomi = ix86_use_fcomi_compare (code);
10141 vec = rtvec_alloc (3 + !use_fcomi);
10142 RTVEC_ELT (vec, 0) = tmp;
10143 RTVEC_ELT (vec, 1)
10144 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10145 RTVEC_ELT (vec, 2)
10146 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10147 if (! use_fcomi)
10148 RTVEC_ELT (vec, 3)
10149 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10151 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10153 return;
10156 case DImode:
10157 if (TARGET_64BIT)
10158 goto simple;
10159 case TImode:
10160 /* Expand DImode branch into multiple compare+branch. */
10162 rtx lo[2], hi[2], label2;
10163 enum rtx_code code1, code2, code3;
10164 enum machine_mode submode;
10166 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10168 tmp = ix86_compare_op0;
10169 ix86_compare_op0 = ix86_compare_op1;
10170 ix86_compare_op1 = tmp;
10171 code = swap_condition (code);
10173 if (GET_MODE (ix86_compare_op0) == DImode)
10175 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10176 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10177 submode = SImode;
10179 else
10181 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10182 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10183 submode = DImode;
10186 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10187 avoid two branches. This costs one extra insn, so disable when
10188 optimizing for size. */
10190 if ((code == EQ || code == NE)
10191 && (!optimize_size
10192 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10194 rtx xor0, xor1;
10196 xor1 = hi[0];
10197 if (hi[1] != const0_rtx)
10198 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10199 NULL_RTX, 0, OPTAB_WIDEN);
10201 xor0 = lo[0];
10202 if (lo[1] != const0_rtx)
10203 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10204 NULL_RTX, 0, OPTAB_WIDEN);
10206 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10207 NULL_RTX, 0, OPTAB_WIDEN);
10209 ix86_compare_op0 = tmp;
10210 ix86_compare_op1 = const0_rtx;
10211 ix86_expand_branch (code, label);
10212 return;
10215 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10216 op1 is a constant and the low word is zero, then we can just
10217 examine the high word. */
10219 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10220 switch (code)
10222 case LT: case LTU: case GE: case GEU:
10223 ix86_compare_op0 = hi[0];
10224 ix86_compare_op1 = hi[1];
10225 ix86_expand_branch (code, label);
10226 return;
10227 default:
10228 break;
10231 /* Otherwise, we need two or three jumps. */
10233 label2 = gen_label_rtx ();
10235 code1 = code;
10236 code2 = swap_condition (code);
10237 code3 = unsigned_condition (code);
10239 switch (code)
10241 case LT: case GT: case LTU: case GTU:
10242 break;
10244 case LE: code1 = LT; code2 = GT; break;
10245 case GE: code1 = GT; code2 = LT; break;
10246 case LEU: code1 = LTU; code2 = GTU; break;
10247 case GEU: code1 = GTU; code2 = LTU; break;
10249 case EQ: code1 = UNKNOWN; code2 = NE; break;
10250 case NE: code2 = UNKNOWN; break;
10252 default:
10253 gcc_unreachable ();
10257 * a < b =>
10258 * if (hi(a) < hi(b)) goto true;
10259 * if (hi(a) > hi(b)) goto false;
10260 * if (lo(a) < lo(b)) goto true;
10261 * false:
10264 ix86_compare_op0 = hi[0];
10265 ix86_compare_op1 = hi[1];
10267 if (code1 != UNKNOWN)
10268 ix86_expand_branch (code1, label);
10269 if (code2 != UNKNOWN)
10270 ix86_expand_branch (code2, label2);
10272 ix86_compare_op0 = lo[0];
10273 ix86_compare_op1 = lo[1];
10274 ix86_expand_branch (code3, label);
10276 if (code2 != UNKNOWN)
10277 emit_label (label2);
10278 return;
10281 default:
10282 gcc_unreachable ();
10286 /* Split branch based on floating point condition. */
10287 void
10288 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10289 rtx target1, rtx target2, rtx tmp, rtx pushed)
10291 rtx second, bypass;
10292 rtx label = NULL_RTX;
10293 rtx condition;
10294 int bypass_probability = -1, second_probability = -1, probability = -1;
10295 rtx i;
10297 if (target2 != pc_rtx)
10299 rtx tmp = target2;
10300 code = reverse_condition_maybe_unordered (code);
10301 target2 = target1;
10302 target1 = tmp;
10305 condition = ix86_expand_fp_compare (code, op1, op2,
10306 tmp, &second, &bypass);
10308 /* Remove pushed operand from stack. */
10309 if (pushed)
10310 ix86_free_from_memory (GET_MODE (pushed));
10312 if (split_branch_probability >= 0)
10314 /* Distribute the probabilities across the jumps.
10315 Assume the BYPASS and SECOND to be always test
10316 for UNORDERED. */
10317 probability = split_branch_probability;
10319 /* Value of 1 is low enough to make no need for probability
10320 to be updated. Later we may run some experiments and see
10321 if unordered values are more frequent in practice. */
10322 if (bypass)
10323 bypass_probability = 1;
10324 if (second)
10325 second_probability = 1;
10327 if (bypass != NULL_RTX)
10329 label = gen_label_rtx ();
10330 i = emit_jump_insn (gen_rtx_SET
10331 (VOIDmode, pc_rtx,
10332 gen_rtx_IF_THEN_ELSE (VOIDmode,
10333 bypass,
10334 gen_rtx_LABEL_REF (VOIDmode,
10335 label),
10336 pc_rtx)));
10337 if (bypass_probability >= 0)
10338 REG_NOTES (i)
10339 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10340 GEN_INT (bypass_probability),
10341 REG_NOTES (i));
10343 i = emit_jump_insn (gen_rtx_SET
10344 (VOIDmode, pc_rtx,
10345 gen_rtx_IF_THEN_ELSE (VOIDmode,
10346 condition, target1, target2)));
10347 if (probability >= 0)
10348 REG_NOTES (i)
10349 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10350 GEN_INT (probability),
10351 REG_NOTES (i));
10352 if (second != NULL_RTX)
10354 i = emit_jump_insn (gen_rtx_SET
10355 (VOIDmode, pc_rtx,
10356 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10357 target2)));
10358 if (second_probability >= 0)
10359 REG_NOTES (i)
10360 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10361 GEN_INT (second_probability),
10362 REG_NOTES (i));
10364 if (label != NULL_RTX)
10365 emit_label (label);
10369 ix86_expand_setcc (enum rtx_code code, rtx dest)
10371 rtx ret, tmp, tmpreg, equiv;
10372 rtx second_test, bypass_test;
10374 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10375 return 0; /* FAIL */
10377 gcc_assert (GET_MODE (dest) == QImode);
10379 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10380 PUT_MODE (ret, QImode);
10382 tmp = dest;
10383 tmpreg = dest;
10385 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10386 if (bypass_test || second_test)
10388 rtx test = second_test;
10389 int bypass = 0;
10390 rtx tmp2 = gen_reg_rtx (QImode);
10391 if (bypass_test)
10393 gcc_assert (!second_test);
10394 test = bypass_test;
10395 bypass = 1;
10396 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10398 PUT_MODE (test, QImode);
10399 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10401 if (bypass)
10402 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10403 else
10404 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10407 /* Attach a REG_EQUAL note describing the comparison result. */
10408 if (ix86_compare_op0 && ix86_compare_op1)
10410 equiv = simplify_gen_relational (code, QImode,
10411 GET_MODE (ix86_compare_op0),
10412 ix86_compare_op0, ix86_compare_op1);
10413 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10416 return 1; /* DONE */
10419 /* Expand comparison setting or clearing carry flag. Return true when
10420 successful and set pop for the operation. */
10421 static bool
10422 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10424 enum machine_mode mode =
10425 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10427 /* Do not handle DImode compares that go trought special path. Also we can't
10428 deal with FP compares yet. This is possible to add. */
10429 if (mode == (TARGET_64BIT ? TImode : DImode))
10430 return false;
10431 if (FLOAT_MODE_P (mode))
10433 rtx second_test = NULL, bypass_test = NULL;
10434 rtx compare_op, compare_seq;
10436 /* Shortcut: following common codes never translate into carry flag compares. */
10437 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10438 || code == ORDERED || code == UNORDERED)
10439 return false;
10441 /* These comparisons require zero flag; swap operands so they won't. */
10442 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10443 && !TARGET_IEEE_FP)
10445 rtx tmp = op0;
10446 op0 = op1;
10447 op1 = tmp;
10448 code = swap_condition (code);
10451 /* Try to expand the comparison and verify that we end up with carry flag
10452 based comparison. This is fails to be true only when we decide to expand
10453 comparison using arithmetic that is not too common scenario. */
10454 start_sequence ();
10455 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10456 &second_test, &bypass_test);
10457 compare_seq = get_insns ();
10458 end_sequence ();
10460 if (second_test || bypass_test)
10461 return false;
10462 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10463 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10464 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10465 else
10466 code = GET_CODE (compare_op);
10467 if (code != LTU && code != GEU)
10468 return false;
10469 emit_insn (compare_seq);
10470 *pop = compare_op;
10471 return true;
10473 if (!INTEGRAL_MODE_P (mode))
10474 return false;
10475 switch (code)
10477 case LTU:
10478 case GEU:
10479 break;
10481 /* Convert a==0 into (unsigned)a<1. */
10482 case EQ:
10483 case NE:
10484 if (op1 != const0_rtx)
10485 return false;
10486 op1 = const1_rtx;
10487 code = (code == EQ ? LTU : GEU);
10488 break;
10490 /* Convert a>b into b<a or a>=b-1. */
10491 case GTU:
10492 case LEU:
10493 if (GET_CODE (op1) == CONST_INT)
10495 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10496 /* Bail out on overflow. We still can swap operands but that
10497 would force loading of the constant into register. */
10498 if (op1 == const0_rtx
10499 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10500 return false;
10501 code = (code == GTU ? GEU : LTU);
10503 else
10505 rtx tmp = op1;
10506 op1 = op0;
10507 op0 = tmp;
10508 code = (code == GTU ? LTU : GEU);
10510 break;
10512 /* Convert a>=0 into (unsigned)a<0x80000000. */
10513 case LT:
10514 case GE:
10515 if (mode == DImode || op1 != const0_rtx)
10516 return false;
10517 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10518 code = (code == LT ? GEU : LTU);
10519 break;
10520 case LE:
10521 case GT:
10522 if (mode == DImode || op1 != constm1_rtx)
10523 return false;
10524 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10525 code = (code == LE ? GEU : LTU);
10526 break;
10528 default:
10529 return false;
10531 /* Swapping operands may cause constant to appear as first operand. */
10532 if (!nonimmediate_operand (op0, VOIDmode))
10534 if (no_new_pseudos)
10535 return false;
10536 op0 = force_reg (mode, op0);
10538 ix86_compare_op0 = op0;
10539 ix86_compare_op1 = op1;
10540 *pop = ix86_expand_compare (code, NULL, NULL);
10541 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10542 return true;
10546 ix86_expand_int_movcc (rtx operands[])
10548 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10549 rtx compare_seq, compare_op;
10550 rtx second_test, bypass_test;
10551 enum machine_mode mode = GET_MODE (operands[0]);
10552 bool sign_bit_compare_p = false;;
10554 start_sequence ();
10555 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10556 compare_seq = get_insns ();
10557 end_sequence ();
10559 compare_code = GET_CODE (compare_op);
10561 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10562 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10563 sign_bit_compare_p = true;
10565 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10566 HImode insns, we'd be swallowed in word prefix ops. */
10568 if ((mode != HImode || TARGET_FAST_PREFIX)
10569 && (mode != (TARGET_64BIT ? TImode : DImode))
10570 && GET_CODE (operands[2]) == CONST_INT
10571 && GET_CODE (operands[3]) == CONST_INT)
10573 rtx out = operands[0];
10574 HOST_WIDE_INT ct = INTVAL (operands[2]);
10575 HOST_WIDE_INT cf = INTVAL (operands[3]);
10576 HOST_WIDE_INT diff;
10578 diff = ct - cf;
10579 /* Sign bit compares are better done using shifts than we do by using
10580 sbb. */
10581 if (sign_bit_compare_p
10582 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10583 ix86_compare_op1, &compare_op))
10585 /* Detect overlap between destination and compare sources. */
10586 rtx tmp = out;
10588 if (!sign_bit_compare_p)
10590 bool fpcmp = false;
10592 compare_code = GET_CODE (compare_op);
10594 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10595 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10597 fpcmp = true;
10598 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10601 /* To simplify rest of code, restrict to the GEU case. */
10602 if (compare_code == LTU)
10604 HOST_WIDE_INT tmp = ct;
10605 ct = cf;
10606 cf = tmp;
10607 compare_code = reverse_condition (compare_code);
10608 code = reverse_condition (code);
10610 else
10612 if (fpcmp)
10613 PUT_CODE (compare_op,
10614 reverse_condition_maybe_unordered
10615 (GET_CODE (compare_op)));
10616 else
10617 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10619 diff = ct - cf;
10621 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10622 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10623 tmp = gen_reg_rtx (mode);
10625 if (mode == DImode)
10626 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10627 else
10628 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10630 else
10632 if (code == GT || code == GE)
10633 code = reverse_condition (code);
10634 else
10636 HOST_WIDE_INT tmp = ct;
10637 ct = cf;
10638 cf = tmp;
10639 diff = ct - cf;
10641 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10642 ix86_compare_op1, VOIDmode, 0, -1);
10645 if (diff == 1)
10648 * cmpl op0,op1
10649 * sbbl dest,dest
10650 * [addl dest, ct]
10652 * Size 5 - 8.
10654 if (ct)
10655 tmp = expand_simple_binop (mode, PLUS,
10656 tmp, GEN_INT (ct),
10657 copy_rtx (tmp), 1, OPTAB_DIRECT);
10659 else if (cf == -1)
10662 * cmpl op0,op1
10663 * sbbl dest,dest
10664 * orl $ct, dest
10666 * Size 8.
10668 tmp = expand_simple_binop (mode, IOR,
10669 tmp, GEN_INT (ct),
10670 copy_rtx (tmp), 1, OPTAB_DIRECT);
10672 else if (diff == -1 && ct)
10675 * cmpl op0,op1
10676 * sbbl dest,dest
10677 * notl dest
10678 * [addl dest, cf]
10680 * Size 8 - 11.
10682 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10683 if (cf)
10684 tmp = expand_simple_binop (mode, PLUS,
10685 copy_rtx (tmp), GEN_INT (cf),
10686 copy_rtx (tmp), 1, OPTAB_DIRECT);
10688 else
10691 * cmpl op0,op1
10692 * sbbl dest,dest
10693 * [notl dest]
10694 * andl cf - ct, dest
10695 * [addl dest, ct]
10697 * Size 8 - 11.
10700 if (cf == 0)
10702 cf = ct;
10703 ct = 0;
10704 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10707 tmp = expand_simple_binop (mode, AND,
10708 copy_rtx (tmp),
10709 gen_int_mode (cf - ct, mode),
10710 copy_rtx (tmp), 1, OPTAB_DIRECT);
10711 if (ct)
10712 tmp = expand_simple_binop (mode, PLUS,
10713 copy_rtx (tmp), GEN_INT (ct),
10714 copy_rtx (tmp), 1, OPTAB_DIRECT);
10717 if (!rtx_equal_p (tmp, out))
10718 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10720 return 1; /* DONE */
10723 if (diff < 0)
10725 HOST_WIDE_INT tmp;
10726 tmp = ct, ct = cf, cf = tmp;
10727 diff = -diff;
10728 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10730 /* We may be reversing unordered compare to normal compare, that
10731 is not valid in general (we may convert non-trapping condition
10732 to trapping one), however on i386 we currently emit all
10733 comparisons unordered. */
10734 compare_code = reverse_condition_maybe_unordered (compare_code);
10735 code = reverse_condition_maybe_unordered (code);
10737 else
10739 compare_code = reverse_condition (compare_code);
10740 code = reverse_condition (code);
10744 compare_code = UNKNOWN;
10745 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10746 && GET_CODE (ix86_compare_op1) == CONST_INT)
10748 if (ix86_compare_op1 == const0_rtx
10749 && (code == LT || code == GE))
10750 compare_code = code;
10751 else if (ix86_compare_op1 == constm1_rtx)
10753 if (code == LE)
10754 compare_code = LT;
10755 else if (code == GT)
10756 compare_code = GE;
10760 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10761 if (compare_code != UNKNOWN
10762 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10763 && (cf == -1 || ct == -1))
10765 /* If lea code below could be used, only optimize
10766 if it results in a 2 insn sequence. */
10768 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10769 || diff == 3 || diff == 5 || diff == 9)
10770 || (compare_code == LT && ct == -1)
10771 || (compare_code == GE && cf == -1))
10774 * notl op1 (if necessary)
10775 * sarl $31, op1
10776 * orl cf, op1
10778 if (ct != -1)
10780 cf = ct;
10781 ct = -1;
10782 code = reverse_condition (code);
10785 out = emit_store_flag (out, code, ix86_compare_op0,
10786 ix86_compare_op1, VOIDmode, 0, -1);
10788 out = expand_simple_binop (mode, IOR,
10789 out, GEN_INT (cf),
10790 out, 1, OPTAB_DIRECT);
10791 if (out != operands[0])
10792 emit_move_insn (operands[0], out);
10794 return 1; /* DONE */
10799 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10800 || diff == 3 || diff == 5 || diff == 9)
10801 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10802 && (mode != DImode
10803 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
10806 * xorl dest,dest
10807 * cmpl op1,op2
10808 * setcc dest
10809 * lea cf(dest*(ct-cf)),dest
10811 * Size 14.
10813 * This also catches the degenerate setcc-only case.
10816 rtx tmp;
10817 int nops;
10819 out = emit_store_flag (out, code, ix86_compare_op0,
10820 ix86_compare_op1, VOIDmode, 0, 1);
10822 nops = 0;
10823 /* On x86_64 the lea instruction operates on Pmode, so we need
10824 to get arithmetics done in proper mode to match. */
10825 if (diff == 1)
10826 tmp = copy_rtx (out);
10827 else
10829 rtx out1;
10830 out1 = copy_rtx (out);
10831 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10832 nops++;
10833 if (diff & 1)
10835 tmp = gen_rtx_PLUS (mode, tmp, out1);
10836 nops++;
10839 if (cf != 0)
10841 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10842 nops++;
10844 if (!rtx_equal_p (tmp, out))
10846 if (nops == 1)
10847 out = force_operand (tmp, copy_rtx (out));
10848 else
10849 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10851 if (!rtx_equal_p (out, operands[0]))
10852 emit_move_insn (operands[0], copy_rtx (out));
10854 return 1; /* DONE */
10858 * General case: Jumpful:
10859 * xorl dest,dest cmpl op1, op2
10860 * cmpl op1, op2 movl ct, dest
10861 * setcc dest jcc 1f
10862 * decl dest movl cf, dest
10863 * andl (cf-ct),dest 1:
10864 * addl ct,dest
10866 * Size 20. Size 14.
10868 * This is reasonably steep, but branch mispredict costs are
10869 * high on modern cpus, so consider failing only if optimizing
10870 * for space.
10873 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10874 && BRANCH_COST >= 2)
10876 if (cf == 0)
10878 cf = ct;
10879 ct = 0;
10880 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10881 /* We may be reversing unordered compare to normal compare,
10882 that is not valid in general (we may convert non-trapping
10883 condition to trapping one), however on i386 we currently
10884 emit all comparisons unordered. */
10885 code = reverse_condition_maybe_unordered (code);
10886 else
10888 code = reverse_condition (code);
10889 if (compare_code != UNKNOWN)
10890 compare_code = reverse_condition (compare_code);
10894 if (compare_code != UNKNOWN)
10896 /* notl op1 (if needed)
10897 sarl $31, op1
10898 andl (cf-ct), op1
10899 addl ct, op1
10901 For x < 0 (resp. x <= -1) there will be no notl,
10902 so if possible swap the constants to get rid of the
10903 complement.
10904 True/false will be -1/0 while code below (store flag
10905 followed by decrement) is 0/-1, so the constants need
10906 to be exchanged once more. */
10908 if (compare_code == GE || !cf)
10910 code = reverse_condition (code);
10911 compare_code = LT;
10913 else
10915 HOST_WIDE_INT tmp = cf;
10916 cf = ct;
10917 ct = tmp;
10920 out = emit_store_flag (out, code, ix86_compare_op0,
10921 ix86_compare_op1, VOIDmode, 0, -1);
10923 else
10925 out = emit_store_flag (out, code, ix86_compare_op0,
10926 ix86_compare_op1, VOIDmode, 0, 1);
10928 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10929 copy_rtx (out), 1, OPTAB_DIRECT);
10932 out = expand_simple_binop (mode, AND, copy_rtx (out),
10933 gen_int_mode (cf - ct, mode),
10934 copy_rtx (out), 1, OPTAB_DIRECT);
10935 if (ct)
10936 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10937 copy_rtx (out), 1, OPTAB_DIRECT);
10938 if (!rtx_equal_p (out, operands[0]))
10939 emit_move_insn (operands[0], copy_rtx (out));
10941 return 1; /* DONE */
10945 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10947 /* Try a few things more with specific constants and a variable. */
10949 optab op;
10950 rtx var, orig_out, out, tmp;
10952 if (BRANCH_COST <= 2)
10953 return 0; /* FAIL */
10955 /* If one of the two operands is an interesting constant, load a
10956 constant with the above and mask it in with a logical operation. */
10958 if (GET_CODE (operands[2]) == CONST_INT)
10960 var = operands[3];
10961 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10962 operands[3] = constm1_rtx, op = and_optab;
10963 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10964 operands[3] = const0_rtx, op = ior_optab;
10965 else
10966 return 0; /* FAIL */
10968 else if (GET_CODE (operands[3]) == CONST_INT)
10970 var = operands[2];
10971 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10972 operands[2] = constm1_rtx, op = and_optab;
10973 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10974 operands[2] = const0_rtx, op = ior_optab;
10975 else
10976 return 0; /* FAIL */
10978 else
10979 return 0; /* FAIL */
10981 orig_out = operands[0];
10982 tmp = gen_reg_rtx (mode);
10983 operands[0] = tmp;
10985 /* Recurse to get the constant loaded. */
10986 if (ix86_expand_int_movcc (operands) == 0)
10987 return 0; /* FAIL */
10989 /* Mask in the interesting variable. */
10990 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10991 OPTAB_WIDEN);
10992 if (!rtx_equal_p (out, orig_out))
10993 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10995 return 1; /* DONE */
10999 * For comparison with above,
11001 * movl cf,dest
11002 * movl ct,tmp
11003 * cmpl op1,op2
11004 * cmovcc tmp,dest
11006 * Size 15.
11009 if (! nonimmediate_operand (operands[2], mode))
11010 operands[2] = force_reg (mode, operands[2]);
11011 if (! nonimmediate_operand (operands[3], mode))
11012 operands[3] = force_reg (mode, operands[3]);
11014 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11016 rtx tmp = gen_reg_rtx (mode);
11017 emit_move_insn (tmp, operands[3]);
11018 operands[3] = tmp;
11020 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11022 rtx tmp = gen_reg_rtx (mode);
11023 emit_move_insn (tmp, operands[2]);
11024 operands[2] = tmp;
11027 if (! register_operand (operands[2], VOIDmode)
11028 && (mode == QImode
11029 || ! register_operand (operands[3], VOIDmode)))
11030 operands[2] = force_reg (mode, operands[2]);
11032 if (mode == QImode
11033 && ! register_operand (operands[3], VOIDmode))
11034 operands[3] = force_reg (mode, operands[3]);
11036 emit_insn (compare_seq);
11037 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11038 gen_rtx_IF_THEN_ELSE (mode,
11039 compare_op, operands[2],
11040 operands[3])));
11041 if (bypass_test)
11042 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11043 gen_rtx_IF_THEN_ELSE (mode,
11044 bypass_test,
11045 copy_rtx (operands[3]),
11046 copy_rtx (operands[0]))));
11047 if (second_test)
11048 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11049 gen_rtx_IF_THEN_ELSE (mode,
11050 second_test,
11051 copy_rtx (operands[2]),
11052 copy_rtx (operands[0]))));
11054 return 1; /* DONE */
11057 /* Swap, force into registers, or otherwise massage the two operands
11058 to an sse comparison with a mask result. Thus we differ a bit from
11059 ix86_prepare_fp_compare_args which expects to produce a flags result.
11061 The DEST operand exists to help determine whether to commute commutative
11062 operators. The POP0/POP1 operands are updated in place. The new
11063 comparison code is returned, or UNKNOWN if not implementable. */
11065 static enum rtx_code
11066 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11067 rtx *pop0, rtx *pop1)
11069 rtx tmp;
11071 switch (code)
11073 case LTGT:
11074 case UNEQ:
11075 /* We have no LTGT as an operator. We could implement it with
11076 NE & ORDERED, but this requires an extra temporary. It's
11077 not clear that it's worth it. */
11078 return UNKNOWN;
11080 case LT:
11081 case LE:
11082 case UNGT:
11083 case UNGE:
11084 /* These are supported directly. */
11085 break;
11087 case EQ:
11088 case NE:
11089 case UNORDERED:
11090 case ORDERED:
11091 /* For commutative operators, try to canonicalize the destination
11092 operand to be first in the comparison - this helps reload to
11093 avoid extra moves. */
11094 if (!dest || !rtx_equal_p (dest, *pop1))
11095 break;
11096 /* FALLTHRU */
11098 case GE:
11099 case GT:
11100 case UNLE:
11101 case UNLT:
11102 /* These are not supported directly. Swap the comparison operands
11103 to transform into something that is supported. */
11104 tmp = *pop0;
11105 *pop0 = *pop1;
11106 *pop1 = tmp;
11107 code = swap_condition (code);
11108 break;
11110 default:
11111 gcc_unreachable ();
11114 return code;
11117 /* Detect conditional moves that exactly match min/max operational
11118 semantics. Note that this is IEEE safe, as long as we don't
11119 interchange the operands.
11121 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11122 and TRUE if the operation is successful and instructions are emitted. */
11124 static bool
11125 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11126 rtx cmp_op1, rtx if_true, rtx if_false)
11128 enum machine_mode mode;
11129 bool is_min;
11130 rtx tmp;
11132 if (code == LT)
11134 else if (code == UNGE)
11136 tmp = if_true;
11137 if_true = if_false;
11138 if_false = tmp;
11140 else
11141 return false;
11143 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11144 is_min = true;
11145 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11146 is_min = false;
11147 else
11148 return false;
11150 mode = GET_MODE (dest);
11152 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11153 but MODE may be a vector mode and thus not appropriate. */
11154 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11156 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11157 rtvec v;
11159 if_true = force_reg (mode, if_true);
11160 v = gen_rtvec (2, if_true, if_false);
11161 tmp = gen_rtx_UNSPEC (mode, v, u);
11163 else
11165 code = is_min ? SMIN : SMAX;
11166 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11169 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11170 return true;
11173 /* Expand an sse vector comparison. Return the register with the result. */
11175 static rtx
11176 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11177 rtx op_true, rtx op_false)
11179 enum machine_mode mode = GET_MODE (dest);
11180 rtx x;
11182 cmp_op0 = force_reg (mode, cmp_op0);
11183 if (!nonimmediate_operand (cmp_op1, mode))
11184 cmp_op1 = force_reg (mode, cmp_op1);
11186 if (optimize
11187 || reg_overlap_mentioned_p (dest, op_true)
11188 || reg_overlap_mentioned_p (dest, op_false))
11189 dest = gen_reg_rtx (mode);
11191 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11192 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11194 return dest;
11197 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11198 operations. This is used for both scalar and vector conditional moves. */
11200 static void
11201 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11203 enum machine_mode mode = GET_MODE (dest);
11204 rtx t2, t3, x;
11206 if (op_false == CONST0_RTX (mode))
11208 op_true = force_reg (mode, op_true);
11209 x = gen_rtx_AND (mode, cmp, op_true);
11210 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11212 else if (op_true == CONST0_RTX (mode))
11214 op_false = force_reg (mode, op_false);
11215 x = gen_rtx_NOT (mode, cmp);
11216 x = gen_rtx_AND (mode, x, op_false);
11217 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11219 else
11221 op_true = force_reg (mode, op_true);
11222 op_false = force_reg (mode, op_false);
11224 t2 = gen_reg_rtx (mode);
11225 if (optimize)
11226 t3 = gen_reg_rtx (mode);
11227 else
11228 t3 = dest;
11230 x = gen_rtx_AND (mode, op_true, cmp);
11231 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11233 x = gen_rtx_NOT (mode, cmp);
11234 x = gen_rtx_AND (mode, x, op_false);
11235 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11237 x = gen_rtx_IOR (mode, t3, t2);
11238 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11242 /* Expand a floating-point conditional move. Return true if successful. */
11245 ix86_expand_fp_movcc (rtx operands[])
11247 enum machine_mode mode = GET_MODE (operands[0]);
11248 enum rtx_code code = GET_CODE (operands[1]);
11249 rtx tmp, compare_op, second_test, bypass_test;
11251 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11253 enum machine_mode cmode;
11255 /* Since we've no cmove for sse registers, don't force bad register
11256 allocation just to gain access to it. Deny movcc when the
11257 comparison mode doesn't match the move mode. */
11258 cmode = GET_MODE (ix86_compare_op0);
11259 if (cmode == VOIDmode)
11260 cmode = GET_MODE (ix86_compare_op1);
11261 if (cmode != mode)
11262 return 0;
11264 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11265 &ix86_compare_op0,
11266 &ix86_compare_op1);
11267 if (code == UNKNOWN)
11268 return 0;
11270 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11271 ix86_compare_op1, operands[2],
11272 operands[3]))
11273 return 1;
11275 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11276 ix86_compare_op1, operands[2], operands[3]);
11277 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11278 return 1;
11281 /* The floating point conditional move instructions don't directly
11282 support conditions resulting from a signed integer comparison. */
11284 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11286 /* The floating point conditional move instructions don't directly
11287 support signed integer comparisons. */
11289 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11291 gcc_assert (!second_test && !bypass_test);
11292 tmp = gen_reg_rtx (QImode);
11293 ix86_expand_setcc (code, tmp);
11294 code = NE;
11295 ix86_compare_op0 = tmp;
11296 ix86_compare_op1 = const0_rtx;
11297 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11299 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11301 tmp = gen_reg_rtx (mode);
11302 emit_move_insn (tmp, operands[3]);
11303 operands[3] = tmp;
11305 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11307 tmp = gen_reg_rtx (mode);
11308 emit_move_insn (tmp, operands[2]);
11309 operands[2] = tmp;
11312 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11313 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11314 operands[2], operands[3])));
11315 if (bypass_test)
11316 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11317 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11318 operands[3], operands[0])));
11319 if (second_test)
11320 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11321 gen_rtx_IF_THEN_ELSE (mode, second_test,
11322 operands[2], operands[0])));
11324 return 1;
11327 /* Expand a floating-point vector conditional move; a vcond operation
11328 rather than a movcc operation. */
11330 bool
11331 ix86_expand_fp_vcond (rtx operands[])
11333 enum rtx_code code = GET_CODE (operands[3]);
11334 rtx cmp;
11336 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11337 &operands[4], &operands[5]);
11338 if (code == UNKNOWN)
11339 return false;
11341 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11342 operands[5], operands[1], operands[2]))
11343 return true;
11345 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11346 operands[1], operands[2]);
11347 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11348 return true;
11351 /* Expand a signed integral vector conditional move. */
11353 bool
11354 ix86_expand_int_vcond (rtx operands[])
11356 enum machine_mode mode = GET_MODE (operands[0]);
11357 enum rtx_code code = GET_CODE (operands[3]);
11358 bool negate = false;
11359 rtx x, cop0, cop1;
11361 cop0 = operands[4];
11362 cop1 = operands[5];
11364 /* Canonicalize the comparison to EQ, GT, GTU. */
11365 switch (code)
11367 case EQ:
11368 case GT:
11369 case GTU:
11370 break;
11372 case NE:
11373 case LE:
11374 case LEU:
11375 code = reverse_condition (code);
11376 negate = true;
11377 break;
11379 case GE:
11380 case GEU:
11381 code = reverse_condition (code);
11382 negate = true;
11383 /* FALLTHRU */
11385 case LT:
11386 case LTU:
11387 code = swap_condition (code);
11388 x = cop0, cop0 = cop1, cop1 = x;
11389 break;
11391 default:
11392 gcc_unreachable ();
11395 /* Unsigned parallel compare is not supported by the hardware. Play some
11396 tricks to turn this into a signed comparison against 0. */
11397 if (code == GTU)
11399 switch (mode)
11401 case V4SImode:
11403 rtx t1, t2, mask;
11405 /* Perform a parallel modulo subtraction. */
11406 t1 = gen_reg_rtx (mode);
11407 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11409 /* Extract the original sign bit of op0. */
11410 mask = GEN_INT (-0x80000000);
11411 mask = gen_rtx_CONST_VECTOR (mode,
11412 gen_rtvec (4, mask, mask, mask, mask));
11413 mask = force_reg (mode, mask);
11414 t2 = gen_reg_rtx (mode);
11415 emit_insn (gen_andv4si3 (t2, cop0, mask));
11417 /* XOR it back into the result of the subtraction. This results
11418 in the sign bit set iff we saw unsigned underflow. */
11419 x = gen_reg_rtx (mode);
11420 emit_insn (gen_xorv4si3 (x, t1, t2));
11422 code = GT;
11424 break;
11426 case V16QImode:
11427 case V8HImode:
11428 /* Perform a parallel unsigned saturating subtraction. */
11429 x = gen_reg_rtx (mode);
11430 emit_insn (gen_rtx_SET (VOIDmode, x,
11431 gen_rtx_US_MINUS (mode, cop0, cop1)));
11433 code = EQ;
11434 negate = !negate;
11435 break;
11437 default:
11438 gcc_unreachable ();
11441 cop0 = x;
11442 cop1 = CONST0_RTX (mode);
11445 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11446 operands[1+negate], operands[2-negate]);
11448 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11449 operands[2-negate]);
11450 return true;
11453 /* Expand conditional increment or decrement using adb/sbb instructions.
11454 The default case using setcc followed by the conditional move can be
11455 done by generic code. */
11457 ix86_expand_int_addcc (rtx operands[])
11459 enum rtx_code code = GET_CODE (operands[1]);
11460 rtx compare_op;
11461 rtx val = const0_rtx;
11462 bool fpcmp = false;
11463 enum machine_mode mode = GET_MODE (operands[0]);
11465 if (operands[3] != const1_rtx
11466 && operands[3] != constm1_rtx)
11467 return 0;
11468 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11469 ix86_compare_op1, &compare_op))
11470 return 0;
11471 code = GET_CODE (compare_op);
11473 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11474 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11476 fpcmp = true;
11477 code = ix86_fp_compare_code_to_integer (code);
11480 if (code != LTU)
11482 val = constm1_rtx;
11483 if (fpcmp)
11484 PUT_CODE (compare_op,
11485 reverse_condition_maybe_unordered
11486 (GET_CODE (compare_op)));
11487 else
11488 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11490 PUT_MODE (compare_op, mode);
11492 /* Construct either adc or sbb insn. */
11493 if ((code == LTU) == (operands[3] == constm1_rtx))
11495 switch (GET_MODE (operands[0]))
11497 case QImode:
11498 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11499 break;
11500 case HImode:
11501 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11502 break;
11503 case SImode:
11504 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11505 break;
11506 case DImode:
11507 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11508 break;
11509 default:
11510 gcc_unreachable ();
11513 else
11515 switch (GET_MODE (operands[0]))
11517 case QImode:
11518 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11519 break;
11520 case HImode:
11521 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11522 break;
11523 case SImode:
11524 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11525 break;
11526 case DImode:
11527 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11528 break;
11529 default:
11530 gcc_unreachable ();
11533 return 1; /* DONE */
11537 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11538 works for floating pointer parameters and nonoffsetable memories.
11539 For pushes, it returns just stack offsets; the values will be saved
11540 in the right order. Maximally three parts are generated. */
11542 static int
11543 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11545 int size;
11547 if (!TARGET_64BIT)
11548 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11549 else
11550 size = (GET_MODE_SIZE (mode) + 4) / 8;
11552 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11553 gcc_assert (size >= 2 && size <= 3);
11555 /* Optimize constant pool reference to immediates. This is used by fp
11556 moves, that force all constants to memory to allow combining. */
11557 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11559 rtx tmp = maybe_get_pool_constant (operand);
11560 if (tmp)
11561 operand = tmp;
11564 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11566 /* The only non-offsetable memories we handle are pushes. */
11567 int ok = push_operand (operand, VOIDmode);
11569 gcc_assert (ok);
11571 operand = copy_rtx (operand);
11572 PUT_MODE (operand, Pmode);
11573 parts[0] = parts[1] = parts[2] = operand;
11574 return size;
11577 if (GET_CODE (operand) == CONST_VECTOR)
11579 enum machine_mode imode = int_mode_for_mode (mode);
11580 /* Caution: if we looked through a constant pool memory above,
11581 the operand may actually have a different mode now. That's
11582 ok, since we want to pun this all the way back to an integer. */
11583 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11584 gcc_assert (operand != NULL);
11585 mode = imode;
11588 if (!TARGET_64BIT)
11590 if (mode == DImode)
11591 split_di (&operand, 1, &parts[0], &parts[1]);
11592 else
11594 if (REG_P (operand))
11596 gcc_assert (reload_completed);
11597 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11598 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11599 if (size == 3)
11600 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11602 else if (offsettable_memref_p (operand))
11604 operand = adjust_address (operand, SImode, 0);
11605 parts[0] = operand;
11606 parts[1] = adjust_address (operand, SImode, 4);
11607 if (size == 3)
11608 parts[2] = adjust_address (operand, SImode, 8);
11610 else if (GET_CODE (operand) == CONST_DOUBLE)
11612 REAL_VALUE_TYPE r;
11613 long l[4];
11615 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11616 switch (mode)
11618 case XFmode:
11619 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11620 parts[2] = gen_int_mode (l[2], SImode);
11621 break;
11622 case DFmode:
11623 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11624 break;
11625 default:
11626 gcc_unreachable ();
11628 parts[1] = gen_int_mode (l[1], SImode);
11629 parts[0] = gen_int_mode (l[0], SImode);
11631 else
11632 gcc_unreachable ();
11635 else
11637 if (mode == TImode)
11638 split_ti (&operand, 1, &parts[0], &parts[1]);
11639 if (mode == XFmode || mode == TFmode)
11641 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11642 if (REG_P (operand))
11644 gcc_assert (reload_completed);
11645 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11646 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11648 else if (offsettable_memref_p (operand))
11650 operand = adjust_address (operand, DImode, 0);
11651 parts[0] = operand;
11652 parts[1] = adjust_address (operand, upper_mode, 8);
11654 else if (GET_CODE (operand) == CONST_DOUBLE)
11656 REAL_VALUE_TYPE r;
11657 long l[4];
11659 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11660 real_to_target (l, &r, mode);
11662 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11663 if (HOST_BITS_PER_WIDE_INT >= 64)
11664 parts[0]
11665 = gen_int_mode
11666 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11667 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11668 DImode);
11669 else
11670 parts[0] = immed_double_const (l[0], l[1], DImode);
11672 if (upper_mode == SImode)
11673 parts[1] = gen_int_mode (l[2], SImode);
11674 else if (HOST_BITS_PER_WIDE_INT >= 64)
11675 parts[1]
11676 = gen_int_mode
11677 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11678 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11679 DImode);
11680 else
11681 parts[1] = immed_double_const (l[2], l[3], DImode);
11683 else
11684 gcc_unreachable ();
11688 return size;
11691 /* Emit insns to perform a move or push of DI, DF, and XF values.
11692 Return false when normal moves are needed; true when all required
11693 insns have been emitted. Operands 2-4 contain the input values
11694 int the correct order; operands 5-7 contain the output values. */
11696 void
11697 ix86_split_long_move (rtx operands[])
11699 rtx part[2][3];
11700 int nparts;
11701 int push = 0;
11702 int collisions = 0;
11703 enum machine_mode mode = GET_MODE (operands[0]);
11705 /* The DFmode expanders may ask us to move double.
11706 For 64bit target this is single move. By hiding the fact
11707 here we simplify i386.md splitters. */
11708 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11710 /* Optimize constant pool reference to immediates. This is used by
11711 fp moves, that force all constants to memory to allow combining. */
11713 if (GET_CODE (operands[1]) == MEM
11714 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11715 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11716 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11717 if (push_operand (operands[0], VOIDmode))
11719 operands[0] = copy_rtx (operands[0]);
11720 PUT_MODE (operands[0], Pmode);
11722 else
11723 operands[0] = gen_lowpart (DImode, operands[0]);
11724 operands[1] = gen_lowpart (DImode, operands[1]);
11725 emit_move_insn (operands[0], operands[1]);
11726 return;
11729 /* The only non-offsettable memory we handle is push. */
11730 if (push_operand (operands[0], VOIDmode))
11731 push = 1;
11732 else
11733 gcc_assert (GET_CODE (operands[0]) != MEM
11734 || offsettable_memref_p (operands[0]));
11736 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11737 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11739 /* When emitting push, take care for source operands on the stack. */
11740 if (push && GET_CODE (operands[1]) == MEM
11741 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11743 if (nparts == 3)
11744 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11745 XEXP (part[1][2], 0));
11746 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11747 XEXP (part[1][1], 0));
11750 /* We need to do copy in the right order in case an address register
11751 of the source overlaps the destination. */
11752 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11754 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11755 collisions++;
11756 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11757 collisions++;
11758 if (nparts == 3
11759 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11760 collisions++;
11762 /* Collision in the middle part can be handled by reordering. */
11763 if (collisions == 1 && nparts == 3
11764 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11766 rtx tmp;
11767 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11768 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11771 /* If there are more collisions, we can't handle it by reordering.
11772 Do an lea to the last part and use only one colliding move. */
11773 else if (collisions > 1)
11775 rtx base;
11777 collisions = 1;
11779 base = part[0][nparts - 1];
11781 /* Handle the case when the last part isn't valid for lea.
11782 Happens in 64-bit mode storing the 12-byte XFmode. */
11783 if (GET_MODE (base) != Pmode)
11784 base = gen_rtx_REG (Pmode, REGNO (base));
11786 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11787 part[1][0] = replace_equiv_address (part[1][0], base);
11788 part[1][1] = replace_equiv_address (part[1][1],
11789 plus_constant (base, UNITS_PER_WORD));
11790 if (nparts == 3)
11791 part[1][2] = replace_equiv_address (part[1][2],
11792 plus_constant (base, 8));
11796 if (push)
11798 if (!TARGET_64BIT)
11800 if (nparts == 3)
11802 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
11803 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
11804 emit_move_insn (part[0][2], part[1][2]);
11807 else
11809 /* In 64bit mode we don't have 32bit push available. In case this is
11810 register, it is OK - we will just use larger counterpart. We also
11811 retype memory - these comes from attempt to avoid REX prefix on
11812 moving of second half of TFmode value. */
11813 if (GET_MODE (part[1][1]) == SImode)
11815 switch (GET_CODE (part[1][1]))
11817 case MEM:
11818 part[1][1] = adjust_address (part[1][1], DImode, 0);
11819 break;
11821 case REG:
11822 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11823 break;
11825 default:
11826 gcc_unreachable ();
11829 if (GET_MODE (part[1][0]) == SImode)
11830 part[1][0] = part[1][1];
11833 emit_move_insn (part[0][1], part[1][1]);
11834 emit_move_insn (part[0][0], part[1][0]);
11835 return;
11838 /* Choose correct order to not overwrite the source before it is copied. */
11839 if ((REG_P (part[0][0])
11840 && REG_P (part[1][1])
11841 && (REGNO (part[0][0]) == REGNO (part[1][1])
11842 || (nparts == 3
11843 && REGNO (part[0][0]) == REGNO (part[1][2]))))
11844 || (collisions > 0
11845 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11847 if (nparts == 3)
11849 operands[2] = part[0][2];
11850 operands[3] = part[0][1];
11851 operands[4] = part[0][0];
11852 operands[5] = part[1][2];
11853 operands[6] = part[1][1];
11854 operands[7] = part[1][0];
11856 else
11858 operands[2] = part[0][1];
11859 operands[3] = part[0][0];
11860 operands[5] = part[1][1];
11861 operands[6] = part[1][0];
11864 else
11866 if (nparts == 3)
11868 operands[2] = part[0][0];
11869 operands[3] = part[0][1];
11870 operands[4] = part[0][2];
11871 operands[5] = part[1][0];
11872 operands[6] = part[1][1];
11873 operands[7] = part[1][2];
11875 else
11877 operands[2] = part[0][0];
11878 operands[3] = part[0][1];
11879 operands[5] = part[1][0];
11880 operands[6] = part[1][1];
11884 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
11885 if (optimize_size)
11887 if (GET_CODE (operands[5]) == CONST_INT
11888 && operands[5] != const0_rtx
11889 && REG_P (operands[2]))
11891 if (GET_CODE (operands[6]) == CONST_INT
11892 && INTVAL (operands[6]) == INTVAL (operands[5]))
11893 operands[6] = operands[2];
11895 if (nparts == 3
11896 && GET_CODE (operands[7]) == CONST_INT
11897 && INTVAL (operands[7]) == INTVAL (operands[5]))
11898 operands[7] = operands[2];
11901 if (nparts == 3
11902 && GET_CODE (operands[6]) == CONST_INT
11903 && operands[6] != const0_rtx
11904 && REG_P (operands[3])
11905 && GET_CODE (operands[7]) == CONST_INT
11906 && INTVAL (operands[7]) == INTVAL (operands[6]))
11907 operands[7] = operands[3];
11910 emit_move_insn (operands[2], operands[5]);
11911 emit_move_insn (operands[3], operands[6]);
11912 if (nparts == 3)
11913 emit_move_insn (operands[4], operands[7]);
11915 return;
11918 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
11919 left shift by a constant, either using a single shift or
11920 a sequence of add instructions. */
11922 static void
11923 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
11925 if (count == 1)
11927 emit_insn ((mode == DImode
11928 ? gen_addsi3
11929 : gen_adddi3) (operand, operand, operand));
11931 else if (!optimize_size
11932 && count * ix86_cost->add <= ix86_cost->shift_const)
11934 int i;
11935 for (i=0; i<count; i++)
11937 emit_insn ((mode == DImode
11938 ? gen_addsi3
11939 : gen_adddi3) (operand, operand, operand));
11942 else
11943 emit_insn ((mode == DImode
11944 ? gen_ashlsi3
11945 : gen_ashldi3) (operand, operand, GEN_INT (count)));
11948 void
11949 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
11951 rtx low[2], high[2];
11952 int count;
11953 const int single_width = mode == DImode ? 32 : 64;
11955 if (GET_CODE (operands[2]) == CONST_INT)
11957 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11958 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11960 if (count >= single_width)
11962 emit_move_insn (high[0], low[1]);
11963 emit_move_insn (low[0], const0_rtx);
11965 if (count > single_width)
11966 ix86_expand_ashl_const (high[0], count - single_width, mode);
11968 else
11970 if (!rtx_equal_p (operands[0], operands[1]))
11971 emit_move_insn (operands[0], operands[1]);
11972 emit_insn ((mode == DImode
11973 ? gen_x86_shld_1
11974 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
11975 ix86_expand_ashl_const (low[0], count, mode);
11977 return;
11980 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11982 if (operands[1] == const1_rtx)
11984 /* Assuming we've chosen a QImode capable registers, then 1 << N
11985 can be done with two 32/64-bit shifts, no branches, no cmoves. */
11986 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11988 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11990 ix86_expand_clear (low[0]);
11991 ix86_expand_clear (high[0]);
11992 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
11994 d = gen_lowpart (QImode, low[0]);
11995 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11996 s = gen_rtx_EQ (QImode, flags, const0_rtx);
11997 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11999 d = gen_lowpart (QImode, high[0]);
12000 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12001 s = gen_rtx_NE (QImode, flags, const0_rtx);
12002 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12005 /* Otherwise, we can get the same results by manually performing
12006 a bit extract operation on bit 5/6, and then performing the two
12007 shifts. The two methods of getting 0/1 into low/high are exactly
12008 the same size. Avoiding the shift in the bit extract case helps
12009 pentium4 a bit; no one else seems to care much either way. */
12010 else
12012 rtx x;
12014 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12015 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12016 else
12017 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12018 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12020 emit_insn ((mode == DImode
12021 ? gen_lshrsi3
12022 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12023 emit_insn ((mode == DImode
12024 ? gen_andsi3
12025 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12026 emit_move_insn (low[0], high[0]);
12027 emit_insn ((mode == DImode
12028 ? gen_xorsi3
12029 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12032 emit_insn ((mode == DImode
12033 ? gen_ashlsi3
12034 : gen_ashldi3) (low[0], low[0], operands[2]));
12035 emit_insn ((mode == DImode
12036 ? gen_ashlsi3
12037 : gen_ashldi3) (high[0], high[0], operands[2]));
12038 return;
12041 if (operands[1] == constm1_rtx)
12043 /* For -1 << N, we can avoid the shld instruction, because we
12044 know that we're shifting 0...31/63 ones into a -1. */
12045 emit_move_insn (low[0], constm1_rtx);
12046 if (optimize_size)
12047 emit_move_insn (high[0], low[0]);
12048 else
12049 emit_move_insn (high[0], constm1_rtx);
12051 else
12053 if (!rtx_equal_p (operands[0], operands[1]))
12054 emit_move_insn (operands[0], operands[1]);
12056 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12057 emit_insn ((mode == DImode
12058 ? gen_x86_shld_1
12059 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12062 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12064 if (TARGET_CMOVE && scratch)
12066 ix86_expand_clear (scratch);
12067 emit_insn ((mode == DImode
12068 ? gen_x86_shift_adj_1
12069 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12071 else
12072 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12075 void
12076 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12078 rtx low[2], high[2];
12079 int count;
12080 const int single_width = mode == DImode ? 32 : 64;
12082 if (GET_CODE (operands[2]) == CONST_INT)
12084 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12085 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12087 if (count == single_width * 2 - 1)
12089 emit_move_insn (high[0], high[1]);
12090 emit_insn ((mode == DImode
12091 ? gen_ashrsi3
12092 : gen_ashrdi3) (high[0], high[0],
12093 GEN_INT (single_width - 1)));
12094 emit_move_insn (low[0], high[0]);
12097 else if (count >= single_width)
12099 emit_move_insn (low[0], high[1]);
12100 emit_move_insn (high[0], low[0]);
12101 emit_insn ((mode == DImode
12102 ? gen_ashrsi3
12103 : gen_ashrdi3) (high[0], high[0],
12104 GEN_INT (single_width - 1)));
12105 if (count > single_width)
12106 emit_insn ((mode == DImode
12107 ? gen_ashrsi3
12108 : gen_ashrdi3) (low[0], low[0],
12109 GEN_INT (count - single_width)));
12111 else
12113 if (!rtx_equal_p (operands[0], operands[1]))
12114 emit_move_insn (operands[0], operands[1]);
12115 emit_insn ((mode == DImode
12116 ? gen_x86_shrd_1
12117 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12118 emit_insn ((mode == DImode
12119 ? gen_ashrsi3
12120 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12123 else
12125 if (!rtx_equal_p (operands[0], operands[1]))
12126 emit_move_insn (operands[0], operands[1]);
12128 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12130 emit_insn ((mode == DImode
12131 ? gen_x86_shrd_1
12132 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12133 emit_insn ((mode == DImode
12134 ? gen_ashrsi3
12135 : gen_ashrdi3) (high[0], high[0], operands[2]));
12137 if (TARGET_CMOVE && scratch)
12139 emit_move_insn (scratch, high[0]);
12140 emit_insn ((mode == DImode
12141 ? gen_ashrsi3
12142 : gen_ashrdi3) (scratch, scratch,
12143 GEN_INT (single_width - 1)));
12144 emit_insn ((mode == DImode
12145 ? gen_x86_shift_adj_1
12146 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12147 scratch));
12149 else
12150 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12154 void
12155 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12157 rtx low[2], high[2];
12158 int count;
12159 const int single_width = mode == DImode ? 32 : 64;
12161 if (GET_CODE (operands[2]) == CONST_INT)
12163 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12164 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12166 if (count >= single_width)
12168 emit_move_insn (low[0], high[1]);
12169 ix86_expand_clear (high[0]);
12171 if (count > single_width)
12172 emit_insn ((mode == DImode
12173 ? gen_lshrsi3
12174 : gen_lshrdi3) (low[0], low[0],
12175 GEN_INT (count - single_width)));
12177 else
12179 if (!rtx_equal_p (operands[0], operands[1]))
12180 emit_move_insn (operands[0], operands[1]);
12181 emit_insn ((mode == DImode
12182 ? gen_x86_shrd_1
12183 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12184 emit_insn ((mode == DImode
12185 ? gen_lshrsi3
12186 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12189 else
12191 if (!rtx_equal_p (operands[0], operands[1]))
12192 emit_move_insn (operands[0], operands[1]);
12194 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12196 emit_insn ((mode == DImode
12197 ? gen_x86_shrd_1
12198 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12199 emit_insn ((mode == DImode
12200 ? gen_lshrsi3
12201 : gen_lshrdi3) (high[0], high[0], operands[2]));
12203 /* Heh. By reversing the arguments, we can reuse this pattern. */
12204 if (TARGET_CMOVE && scratch)
12206 ix86_expand_clear (scratch);
12207 emit_insn ((mode == DImode
12208 ? gen_x86_shift_adj_1
12209 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12210 scratch));
12212 else
12213 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12217 /* Helper function for the string operations below. Dest VARIABLE whether
12218 it is aligned to VALUE bytes. If true, jump to the label. */
12219 static rtx
12220 ix86_expand_aligntest (rtx variable, int value)
12222 rtx label = gen_label_rtx ();
12223 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12224 if (GET_MODE (variable) == DImode)
12225 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12226 else
12227 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12228 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12229 1, label);
12230 return label;
12233 /* Adjust COUNTER by the VALUE. */
12234 static void
12235 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12237 if (GET_MODE (countreg) == DImode)
12238 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12239 else
12240 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12243 /* Zero extend possibly SImode EXP to Pmode register. */
12245 ix86_zero_extend_to_Pmode (rtx exp)
12247 rtx r;
12248 if (GET_MODE (exp) == VOIDmode)
12249 return force_reg (Pmode, exp);
12250 if (GET_MODE (exp) == Pmode)
12251 return copy_to_mode_reg (Pmode, exp);
12252 r = gen_reg_rtx (Pmode);
12253 emit_insn (gen_zero_extendsidi2 (r, exp));
12254 return r;
12257 /* Expand string move (memcpy) operation. Use i386 string operations when
12258 profitable. expand_clrmem contains similar code. */
12260 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
12262 rtx srcreg, destreg, countreg, srcexp, destexp;
12263 enum machine_mode counter_mode;
12264 HOST_WIDE_INT align = 0;
12265 unsigned HOST_WIDE_INT count = 0;
12267 if (GET_CODE (align_exp) == CONST_INT)
12268 align = INTVAL (align_exp);
12270 /* Can't use any of this if the user has appropriated esi or edi. */
12271 if (global_regs[4] || global_regs[5])
12272 return 0;
12274 /* This simple hack avoids all inlining code and simplifies code below. */
12275 if (!TARGET_ALIGN_STRINGOPS)
12276 align = 64;
12278 if (GET_CODE (count_exp) == CONST_INT)
12280 count = INTVAL (count_exp);
12281 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12282 return 0;
12285 /* Figure out proper mode for counter. For 32bits it is always SImode,
12286 for 64bits use SImode when possible, otherwise DImode.
12287 Set count to number of bytes copied when known at compile time. */
12288 if (!TARGET_64BIT
12289 || GET_MODE (count_exp) == SImode
12290 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12291 counter_mode = SImode;
12292 else
12293 counter_mode = DImode;
12295 gcc_assert (counter_mode == SImode || counter_mode == DImode);
12297 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12298 if (destreg != XEXP (dst, 0))
12299 dst = replace_equiv_address_nv (dst, destreg);
12300 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
12301 if (srcreg != XEXP (src, 0))
12302 src = replace_equiv_address_nv (src, srcreg);
12304 /* When optimizing for size emit simple rep ; movsb instruction for
12305 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
12306 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
12307 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
12308 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
12309 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
12310 known to be zero or not. The rep; movsb sequence causes higher
12311 register pressure though, so take that into account. */
12313 if ((!optimize || optimize_size)
12314 && (count == 0
12315 || ((count & 0x03)
12316 && (!optimize_size
12317 || count > 5 * 4
12318 || (count & 3) + count / 4 > 6))))
12320 emit_insn (gen_cld ());
12321 countreg = ix86_zero_extend_to_Pmode (count_exp);
12322 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12323 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12324 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12325 destexp, srcexp));
12328 /* For constant aligned (or small unaligned) copies use rep movsl
12329 followed by code copying the rest. For PentiumPro ensure 8 byte
12330 alignment to allow rep movsl acceleration. */
12332 else if (count != 0
12333 && (align >= 8
12334 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12335 || optimize_size || count < (unsigned int) 64))
12337 unsigned HOST_WIDE_INT offset = 0;
12338 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12339 rtx srcmem, dstmem;
12341 emit_insn (gen_cld ());
12342 if (count & ~(size - 1))
12344 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12346 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12348 while (offset < (count & ~(size - 1)))
12350 srcmem = adjust_automodify_address_nv (src, movs_mode,
12351 srcreg, offset);
12352 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12353 destreg, offset);
12354 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12355 offset += size;
12358 else
12360 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12361 & (TARGET_64BIT ? -1 : 0x3fffffff));
12362 countreg = copy_to_mode_reg (counter_mode, countreg);
12363 countreg = ix86_zero_extend_to_Pmode (countreg);
12365 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12366 GEN_INT (size == 4 ? 2 : 3));
12367 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12368 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12370 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12371 countreg, destexp, srcexp));
12372 offset = count & ~(size - 1);
12375 if (size == 8 && (count & 0x04))
12377 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12378 offset);
12379 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12380 offset);
12381 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12382 offset += 4;
12384 if (count & 0x02)
12386 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12387 offset);
12388 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12389 offset);
12390 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12391 offset += 2;
12393 if (count & 0x01)
12395 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12396 offset);
12397 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12398 offset);
12399 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12402 /* The generic code based on the glibc implementation:
12403 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12404 allowing accelerated copying there)
12405 - copy the data using rep movsl
12406 - copy the rest. */
12407 else
12409 rtx countreg2;
12410 rtx label = NULL;
12411 rtx srcmem, dstmem;
12412 int desired_alignment = (TARGET_PENTIUMPRO
12413 && (count == 0 || count >= (unsigned int) 260)
12414 ? 8 : UNITS_PER_WORD);
12415 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12416 dst = change_address (dst, BLKmode, destreg);
12417 src = change_address (src, BLKmode, srcreg);
12419 /* In case we don't know anything about the alignment, default to
12420 library version, since it is usually equally fast and result in
12421 shorter code.
12423 Also emit call when we know that the count is large and call overhead
12424 will not be important. */
12425 if (!TARGET_INLINE_ALL_STRINGOPS
12426 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12427 return 0;
12429 if (TARGET_SINGLE_STRINGOP)
12430 emit_insn (gen_cld ());
12432 countreg2 = gen_reg_rtx (Pmode);
12433 countreg = copy_to_mode_reg (counter_mode, count_exp);
12435 /* We don't use loops to align destination and to copy parts smaller
12436 than 4 bytes, because gcc is able to optimize such code better (in
12437 the case the destination or the count really is aligned, gcc is often
12438 able to predict the branches) and also it is friendlier to the
12439 hardware branch prediction.
12441 Using loops is beneficial for generic case, because we can
12442 handle small counts using the loops. Many CPUs (such as Athlon)
12443 have large REP prefix setup costs.
12445 This is quite costly. Maybe we can revisit this decision later or
12446 add some customizability to this code. */
12448 if (count == 0 && align < desired_alignment)
12450 label = gen_label_rtx ();
12451 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12452 LEU, 0, counter_mode, 1, label);
12454 if (align <= 1)
12456 rtx label = ix86_expand_aligntest (destreg, 1);
12457 srcmem = change_address (src, QImode, srcreg);
12458 dstmem = change_address (dst, QImode, destreg);
12459 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12460 ix86_adjust_counter (countreg, 1);
12461 emit_label (label);
12462 LABEL_NUSES (label) = 1;
12464 if (align <= 2)
12466 rtx label = ix86_expand_aligntest (destreg, 2);
12467 srcmem = change_address (src, HImode, srcreg);
12468 dstmem = change_address (dst, HImode, destreg);
12469 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12470 ix86_adjust_counter (countreg, 2);
12471 emit_label (label);
12472 LABEL_NUSES (label) = 1;
12474 if (align <= 4 && desired_alignment > 4)
12476 rtx label = ix86_expand_aligntest (destreg, 4);
12477 srcmem = change_address (src, SImode, srcreg);
12478 dstmem = change_address (dst, SImode, destreg);
12479 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12480 ix86_adjust_counter (countreg, 4);
12481 emit_label (label);
12482 LABEL_NUSES (label) = 1;
12485 if (label && desired_alignment > 4 && !TARGET_64BIT)
12487 emit_label (label);
12488 LABEL_NUSES (label) = 1;
12489 label = NULL_RTX;
12491 if (!TARGET_SINGLE_STRINGOP)
12492 emit_insn (gen_cld ());
12493 if (TARGET_64BIT)
12495 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12496 GEN_INT (3)));
12497 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12499 else
12501 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12502 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12504 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12505 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12506 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12507 countreg2, destexp, srcexp));
12509 if (label)
12511 emit_label (label);
12512 LABEL_NUSES (label) = 1;
12514 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12516 srcmem = change_address (src, SImode, srcreg);
12517 dstmem = change_address (dst, SImode, destreg);
12518 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12520 if ((align <= 4 || count == 0) && TARGET_64BIT)
12522 rtx label = ix86_expand_aligntest (countreg, 4);
12523 srcmem = change_address (src, SImode, srcreg);
12524 dstmem = change_address (dst, SImode, destreg);
12525 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12526 emit_label (label);
12527 LABEL_NUSES (label) = 1;
12529 if (align > 2 && count != 0 && (count & 2))
12531 srcmem = change_address (src, HImode, srcreg);
12532 dstmem = change_address (dst, HImode, destreg);
12533 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12535 if (align <= 2 || count == 0)
12537 rtx label = ix86_expand_aligntest (countreg, 2);
12538 srcmem = change_address (src, HImode, srcreg);
12539 dstmem = change_address (dst, HImode, destreg);
12540 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12541 emit_label (label);
12542 LABEL_NUSES (label) = 1;
12544 if (align > 1 && count != 0 && (count & 1))
12546 srcmem = change_address (src, QImode, srcreg);
12547 dstmem = change_address (dst, QImode, destreg);
12548 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12550 if (align <= 1 || count == 0)
12552 rtx label = ix86_expand_aligntest (countreg, 1);
12553 srcmem = change_address (src, QImode, srcreg);
12554 dstmem = change_address (dst, QImode, destreg);
12555 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12556 emit_label (label);
12557 LABEL_NUSES (label) = 1;
12561 return 1;
12564 /* Expand string clear operation (bzero). Use i386 string operations when
12565 profitable. expand_movmem contains similar code. */
12567 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12569 rtx destreg, zeroreg, countreg, destexp;
12570 enum machine_mode counter_mode;
12571 HOST_WIDE_INT align = 0;
12572 unsigned HOST_WIDE_INT count = 0;
12574 if (GET_CODE (align_exp) == CONST_INT)
12575 align = INTVAL (align_exp);
12577 /* Can't use any of this if the user has appropriated esi. */
12578 if (global_regs[4])
12579 return 0;
12581 /* This simple hack avoids all inlining code and simplifies code below. */
12582 if (!TARGET_ALIGN_STRINGOPS)
12583 align = 32;
12585 if (GET_CODE (count_exp) == CONST_INT)
12587 count = INTVAL (count_exp);
12588 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12589 return 0;
12591 /* Figure out proper mode for counter. For 32bits it is always SImode,
12592 for 64bits use SImode when possible, otherwise DImode.
12593 Set count to number of bytes copied when known at compile time. */
12594 if (!TARGET_64BIT
12595 || GET_MODE (count_exp) == SImode
12596 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12597 counter_mode = SImode;
12598 else
12599 counter_mode = DImode;
12601 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12602 if (destreg != XEXP (dst, 0))
12603 dst = replace_equiv_address_nv (dst, destreg);
12606 /* When optimizing for size emit simple rep ; movsb instruction for
12607 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12608 sequence is 7 bytes long, so if optimizing for size and count is
12609 small enough that some stosl, stosw and stosb instructions without
12610 rep are shorter, fall back into the next if. */
12612 if ((!optimize || optimize_size)
12613 && (count == 0
12614 || ((count & 0x03)
12615 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12617 emit_insn (gen_cld ());
12619 countreg = ix86_zero_extend_to_Pmode (count_exp);
12620 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12621 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12622 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12624 else if (count != 0
12625 && (align >= 8
12626 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12627 || optimize_size || count < (unsigned int) 64))
12629 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12630 unsigned HOST_WIDE_INT offset = 0;
12632 emit_insn (gen_cld ());
12634 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12635 if (count & ~(size - 1))
12637 unsigned HOST_WIDE_INT repcount;
12638 unsigned int max_nonrep;
12640 repcount = count >> (size == 4 ? 2 : 3);
12641 if (!TARGET_64BIT)
12642 repcount &= 0x3fffffff;
12644 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12645 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12646 bytes. In both cases the latter seems to be faster for small
12647 values of N. */
12648 max_nonrep = size == 4 ? 7 : 4;
12649 if (!optimize_size)
12650 switch (ix86_tune)
12652 case PROCESSOR_PENTIUM4:
12653 case PROCESSOR_NOCONA:
12654 max_nonrep = 3;
12655 break;
12656 default:
12657 break;
12660 if (repcount <= max_nonrep)
12661 while (repcount-- > 0)
12663 rtx mem = adjust_automodify_address_nv (dst,
12664 GET_MODE (zeroreg),
12665 destreg, offset);
12666 emit_insn (gen_strset (destreg, mem, zeroreg));
12667 offset += size;
12669 else
12671 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12672 countreg = ix86_zero_extend_to_Pmode (countreg);
12673 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12674 GEN_INT (size == 4 ? 2 : 3));
12675 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12676 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12677 destexp));
12678 offset = count & ~(size - 1);
12681 if (size == 8 && (count & 0x04))
12683 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12684 offset);
12685 emit_insn (gen_strset (destreg, mem,
12686 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12687 offset += 4;
12689 if (count & 0x02)
12691 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12692 offset);
12693 emit_insn (gen_strset (destreg, mem,
12694 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12695 offset += 2;
12697 if (count & 0x01)
12699 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12700 offset);
12701 emit_insn (gen_strset (destreg, mem,
12702 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12705 else
12707 rtx countreg2;
12708 rtx label = NULL;
12709 /* Compute desired alignment of the string operation. */
12710 int desired_alignment = (TARGET_PENTIUMPRO
12711 && (count == 0 || count >= (unsigned int) 260)
12712 ? 8 : UNITS_PER_WORD);
12714 /* In case we don't know anything about the alignment, default to
12715 library version, since it is usually equally fast and result in
12716 shorter code.
12718 Also emit call when we know that the count is large and call overhead
12719 will not be important. */
12720 if (!TARGET_INLINE_ALL_STRINGOPS
12721 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12722 return 0;
12724 if (TARGET_SINGLE_STRINGOP)
12725 emit_insn (gen_cld ());
12727 countreg2 = gen_reg_rtx (Pmode);
12728 countreg = copy_to_mode_reg (counter_mode, count_exp);
12729 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12730 /* Get rid of MEM_OFFSET, it won't be accurate. */
12731 dst = change_address (dst, BLKmode, destreg);
12733 if (count == 0 && align < desired_alignment)
12735 label = gen_label_rtx ();
12736 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12737 LEU, 0, counter_mode, 1, label);
12739 if (align <= 1)
12741 rtx label = ix86_expand_aligntest (destreg, 1);
12742 emit_insn (gen_strset (destreg, dst,
12743 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12744 ix86_adjust_counter (countreg, 1);
12745 emit_label (label);
12746 LABEL_NUSES (label) = 1;
12748 if (align <= 2)
12750 rtx label = ix86_expand_aligntest (destreg, 2);
12751 emit_insn (gen_strset (destreg, dst,
12752 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12753 ix86_adjust_counter (countreg, 2);
12754 emit_label (label);
12755 LABEL_NUSES (label) = 1;
12757 if (align <= 4 && desired_alignment > 4)
12759 rtx label = ix86_expand_aligntest (destreg, 4);
12760 emit_insn (gen_strset (destreg, dst,
12761 (TARGET_64BIT
12762 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12763 : zeroreg)));
12764 ix86_adjust_counter (countreg, 4);
12765 emit_label (label);
12766 LABEL_NUSES (label) = 1;
12769 if (label && desired_alignment > 4 && !TARGET_64BIT)
12771 emit_label (label);
12772 LABEL_NUSES (label) = 1;
12773 label = NULL_RTX;
12776 if (!TARGET_SINGLE_STRINGOP)
12777 emit_insn (gen_cld ());
12778 if (TARGET_64BIT)
12780 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12781 GEN_INT (3)));
12782 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12784 else
12786 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12787 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12789 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12790 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
12792 if (label)
12794 emit_label (label);
12795 LABEL_NUSES (label) = 1;
12798 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12799 emit_insn (gen_strset (destreg, dst,
12800 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12801 if (TARGET_64BIT && (align <= 4 || count == 0))
12803 rtx label = ix86_expand_aligntest (countreg, 4);
12804 emit_insn (gen_strset (destreg, dst,
12805 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12806 emit_label (label);
12807 LABEL_NUSES (label) = 1;
12809 if (align > 2 && count != 0 && (count & 2))
12810 emit_insn (gen_strset (destreg, dst,
12811 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12812 if (align <= 2 || count == 0)
12814 rtx label = ix86_expand_aligntest (countreg, 2);
12815 emit_insn (gen_strset (destreg, dst,
12816 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12817 emit_label (label);
12818 LABEL_NUSES (label) = 1;
12820 if (align > 1 && count != 0 && (count & 1))
12821 emit_insn (gen_strset (destreg, dst,
12822 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12823 if (align <= 1 || count == 0)
12825 rtx label = ix86_expand_aligntest (countreg, 1);
12826 emit_insn (gen_strset (destreg, dst,
12827 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12828 emit_label (label);
12829 LABEL_NUSES (label) = 1;
12832 return 1;
12835 /* Expand strlen. */
12837 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
12839 rtx addr, scratch1, scratch2, scratch3, scratch4;
12841 /* The generic case of strlen expander is long. Avoid it's
12842 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
12844 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12845 && !TARGET_INLINE_ALL_STRINGOPS
12846 && !optimize_size
12847 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12848 return 0;
12850 addr = force_reg (Pmode, XEXP (src, 0));
12851 scratch1 = gen_reg_rtx (Pmode);
12853 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12854 && !optimize_size)
12856 /* Well it seems that some optimizer does not combine a call like
12857 foo(strlen(bar), strlen(bar));
12858 when the move and the subtraction is done here. It does calculate
12859 the length just once when these instructions are done inside of
12860 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
12861 often used and I use one fewer register for the lifetime of
12862 output_strlen_unroll() this is better. */
12864 emit_move_insn (out, addr);
12866 ix86_expand_strlensi_unroll_1 (out, src, align);
12868 /* strlensi_unroll_1 returns the address of the zero at the end of
12869 the string, like memchr(), so compute the length by subtracting
12870 the start address. */
12871 if (TARGET_64BIT)
12872 emit_insn (gen_subdi3 (out, out, addr));
12873 else
12874 emit_insn (gen_subsi3 (out, out, addr));
12876 else
12878 rtx unspec;
12879 scratch2 = gen_reg_rtx (Pmode);
12880 scratch3 = gen_reg_rtx (Pmode);
12881 scratch4 = force_reg (Pmode, constm1_rtx);
12883 emit_move_insn (scratch3, addr);
12884 eoschar = force_reg (QImode, eoschar);
12886 emit_insn (gen_cld ());
12887 src = replace_equiv_address_nv (src, scratch3);
12889 /* If .md starts supporting :P, this can be done in .md. */
12890 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12891 scratch4), UNSPEC_SCAS);
12892 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
12893 if (TARGET_64BIT)
12895 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12896 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12898 else
12900 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12901 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12904 return 1;
12907 /* Expand the appropriate insns for doing strlen if not just doing
12908 repnz; scasb
12910 out = result, initialized with the start address
12911 align_rtx = alignment of the address.
12912 scratch = scratch register, initialized with the startaddress when
12913 not aligned, otherwise undefined
12915 This is just the body. It needs the initializations mentioned above and
12916 some address computing at the end. These things are done in i386.md. */
12918 static void
12919 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12921 int align;
12922 rtx tmp;
12923 rtx align_2_label = NULL_RTX;
12924 rtx align_3_label = NULL_RTX;
12925 rtx align_4_label = gen_label_rtx ();
12926 rtx end_0_label = gen_label_rtx ();
12927 rtx mem;
12928 rtx tmpreg = gen_reg_rtx (SImode);
12929 rtx scratch = gen_reg_rtx (SImode);
12930 rtx cmp;
12932 align = 0;
12933 if (GET_CODE (align_rtx) == CONST_INT)
12934 align = INTVAL (align_rtx);
12936 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
12938 /* Is there a known alignment and is it less than 4? */
12939 if (align < 4)
12941 rtx scratch1 = gen_reg_rtx (Pmode);
12942 emit_move_insn (scratch1, out);
12943 /* Is there a known alignment and is it not 2? */
12944 if (align != 2)
12946 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12947 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12949 /* Leave just the 3 lower bits. */
12950 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12951 NULL_RTX, 0, OPTAB_WIDEN);
12953 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12954 Pmode, 1, align_4_label);
12955 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12956 Pmode, 1, align_2_label);
12957 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12958 Pmode, 1, align_3_label);
12960 else
12962 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12963 check if is aligned to 4 - byte. */
12965 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12966 NULL_RTX, 0, OPTAB_WIDEN);
12968 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12969 Pmode, 1, align_4_label);
12972 mem = change_address (src, QImode, out);
12974 /* Now compare the bytes. */
12976 /* Compare the first n unaligned byte on a byte per byte basis. */
12977 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
12978 QImode, 1, end_0_label);
12980 /* Increment the address. */
12981 if (TARGET_64BIT)
12982 emit_insn (gen_adddi3 (out, out, const1_rtx));
12983 else
12984 emit_insn (gen_addsi3 (out, out, const1_rtx));
12986 /* Not needed with an alignment of 2 */
12987 if (align != 2)
12989 emit_label (align_2_label);
12991 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12992 end_0_label);
12994 if (TARGET_64BIT)
12995 emit_insn (gen_adddi3 (out, out, const1_rtx));
12996 else
12997 emit_insn (gen_addsi3 (out, out, const1_rtx));
12999 emit_label (align_3_label);
13002 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
13003 end_0_label);
13005 if (TARGET_64BIT)
13006 emit_insn (gen_adddi3 (out, out, const1_rtx));
13007 else
13008 emit_insn (gen_addsi3 (out, out, const1_rtx));
13011 /* Generate loop to check 4 bytes at a time. It is not a good idea to
13012 align this loop. It gives only huge programs, but does not help to
13013 speed up. */
13014 emit_label (align_4_label);
13016 mem = change_address (src, SImode, out);
13017 emit_move_insn (scratch, mem);
13018 if (TARGET_64BIT)
13019 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
13020 else
13021 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
13023 /* This formula yields a nonzero result iff one of the bytes is zero.
13024 This saves three branches inside loop and many cycles. */
13026 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
13027 emit_insn (gen_one_cmplsi2 (scratch, scratch));
13028 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
13029 emit_insn (gen_andsi3 (tmpreg, tmpreg,
13030 gen_int_mode (0x80808080, SImode)));
13031 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
13032 align_4_label);
13034 if (TARGET_CMOVE)
13036 rtx reg = gen_reg_rtx (SImode);
13037 rtx reg2 = gen_reg_rtx (Pmode);
13038 emit_move_insn (reg, tmpreg);
13039 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
13041 /* If zero is not in the first two bytes, move two bytes forward. */
13042 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13043 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13044 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13045 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
13046 gen_rtx_IF_THEN_ELSE (SImode, tmp,
13047 reg,
13048 tmpreg)));
13049 /* Emit lea manually to avoid clobbering of flags. */
13050 emit_insn (gen_rtx_SET (SImode, reg2,
13051 gen_rtx_PLUS (Pmode, out, const2_rtx)));
13053 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13054 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
13055 emit_insn (gen_rtx_SET (VOIDmode, out,
13056 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
13057 reg2,
13058 out)));
13061 else
13063 rtx end_2_label = gen_label_rtx ();
13064 /* Is zero in the first two bytes? */
13066 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
13067 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
13068 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
13069 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13070 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
13071 pc_rtx);
13072 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13073 JUMP_LABEL (tmp) = end_2_label;
13075 /* Not in the first two. Move two bytes forward. */
13076 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
13077 if (TARGET_64BIT)
13078 emit_insn (gen_adddi3 (out, out, const2_rtx));
13079 else
13080 emit_insn (gen_addsi3 (out, out, const2_rtx));
13082 emit_label (end_2_label);
13086 /* Avoid branch in fixing the byte. */
13087 tmpreg = gen_lowpart (QImode, tmpreg);
13088 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
13089 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
13090 if (TARGET_64BIT)
13091 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
13092 else
13093 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
13095 emit_label (end_0_label);
13098 void
13099 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
13100 rtx callarg2 ATTRIBUTE_UNUSED,
13101 rtx pop, int sibcall)
13103 rtx use = NULL, call;
13105 if (pop == const0_rtx)
13106 pop = NULL;
13107 gcc_assert (!TARGET_64BIT || !pop);
13109 #if TARGET_MACHO
13110 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
13111 fnaddr = machopic_indirect_call_target (fnaddr);
13112 #else
13113 /* Static functions and indirect calls don't need the pic register. */
13114 if (! TARGET_64BIT && flag_pic
13115 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
13116 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
13117 use_reg (&use, pic_offset_table_rtx);
13119 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
13121 rtx al = gen_rtx_REG (QImode, 0);
13122 emit_move_insn (al, callarg2);
13123 use_reg (&use, al);
13125 #endif /* TARGET_MACHO */
13127 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
13129 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13130 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13132 if (sibcall && TARGET_64BIT
13133 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
13135 rtx addr;
13136 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
13137 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
13138 emit_move_insn (fnaddr, addr);
13139 fnaddr = gen_rtx_MEM (QImode, fnaddr);
13142 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
13143 if (retval)
13144 call = gen_rtx_SET (VOIDmode, retval, call);
13145 if (pop)
13147 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
13148 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
13149 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
13152 call = emit_call_insn (call);
13153 if (use)
13154 CALL_INSN_FUNCTION_USAGE (call) = use;
13158 /* Clear stack slot assignments remembered from previous functions.
13159 This is called from INIT_EXPANDERS once before RTL is emitted for each
13160 function. */
13162 static struct machine_function *
13163 ix86_init_machine_status (void)
13165 struct machine_function *f;
13167 f = ggc_alloc_cleared (sizeof (struct machine_function));
13168 f->use_fast_prologue_epilogue_nregs = -1;
13169 f->tls_descriptor_call_expanded_p = 0;
13171 return f;
13174 /* Return a MEM corresponding to a stack slot with mode MODE.
13175 Allocate a new slot if necessary.
13177 The RTL for a function can have several slots available: N is
13178 which slot to use. */
13181 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
13183 struct stack_local_entry *s;
13185 gcc_assert (n < MAX_386_STACK_LOCALS);
13187 for (s = ix86_stack_locals; s; s = s->next)
13188 if (s->mode == mode && s->n == n)
13189 return s->rtl;
13191 s = (struct stack_local_entry *)
13192 ggc_alloc (sizeof (struct stack_local_entry));
13193 s->n = n;
13194 s->mode = mode;
13195 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
13197 s->next = ix86_stack_locals;
13198 ix86_stack_locals = s;
13199 return s->rtl;
13202 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13204 static GTY(()) rtx ix86_tls_symbol;
13206 ix86_tls_get_addr (void)
13209 if (!ix86_tls_symbol)
13211 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
13212 (TARGET_ANY_GNU_TLS
13213 && !TARGET_64BIT)
13214 ? "___tls_get_addr"
13215 : "__tls_get_addr");
13218 return ix86_tls_symbol;
13221 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13223 static GTY(()) rtx ix86_tls_module_base_symbol;
13225 ix86_tls_module_base (void)
13228 if (!ix86_tls_module_base_symbol)
13230 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
13231 "_TLS_MODULE_BASE_");
13232 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13233 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13236 return ix86_tls_module_base_symbol;
13239 /* Calculate the length of the memory address in the instruction
13240 encoding. Does not include the one-byte modrm, opcode, or prefix. */
13243 memory_address_length (rtx addr)
13245 struct ix86_address parts;
13246 rtx base, index, disp;
13247 int len;
13248 int ok;
13250 if (GET_CODE (addr) == PRE_DEC
13251 || GET_CODE (addr) == POST_INC
13252 || GET_CODE (addr) == PRE_MODIFY
13253 || GET_CODE (addr) == POST_MODIFY)
13254 return 0;
13256 ok = ix86_decompose_address (addr, &parts);
13257 gcc_assert (ok);
13259 if (parts.base && GET_CODE (parts.base) == SUBREG)
13260 parts.base = SUBREG_REG (parts.base);
13261 if (parts.index && GET_CODE (parts.index) == SUBREG)
13262 parts.index = SUBREG_REG (parts.index);
13264 base = parts.base;
13265 index = parts.index;
13266 disp = parts.disp;
13267 len = 0;
13269 /* Rule of thumb:
13270 - esp as the base always wants an index,
13271 - ebp as the base always wants a displacement. */
13273 /* Register Indirect. */
13274 if (base && !index && !disp)
13276 /* esp (for its index) and ebp (for its displacement) need
13277 the two-byte modrm form. */
13278 if (addr == stack_pointer_rtx
13279 || addr == arg_pointer_rtx
13280 || addr == frame_pointer_rtx
13281 || addr == hard_frame_pointer_rtx)
13282 len = 1;
13285 /* Direct Addressing. */
13286 else if (disp && !base && !index)
13287 len = 4;
13289 else
13291 /* Find the length of the displacement constant. */
13292 if (disp)
13294 if (GET_CODE (disp) == CONST_INT
13295 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
13296 && base)
13297 len = 1;
13298 else
13299 len = 4;
13301 /* ebp always wants a displacement. */
13302 else if (base == hard_frame_pointer_rtx)
13303 len = 1;
13305 /* An index requires the two-byte modrm form.... */
13306 if (index
13307 /* ...like esp, which always wants an index. */
13308 || base == stack_pointer_rtx
13309 || base == arg_pointer_rtx
13310 || base == frame_pointer_rtx)
13311 len += 1;
13314 return len;
13317 /* Compute default value for "length_immediate" attribute. When SHORTFORM
13318 is set, expect that insn have 8bit immediate alternative. */
13320 ix86_attr_length_immediate_default (rtx insn, int shortform)
13322 int len = 0;
13323 int i;
13324 extract_insn_cached (insn);
13325 for (i = recog_data.n_operands - 1; i >= 0; --i)
13326 if (CONSTANT_P (recog_data.operand[i]))
13328 gcc_assert (!len);
13329 if (shortform
13330 && GET_CODE (recog_data.operand[i]) == CONST_INT
13331 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
13332 len = 1;
13333 else
13335 switch (get_attr_mode (insn))
13337 case MODE_QI:
13338 len+=1;
13339 break;
13340 case MODE_HI:
13341 len+=2;
13342 break;
13343 case MODE_SI:
13344 len+=4;
13345 break;
13346 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13347 case MODE_DI:
13348 len+=4;
13349 break;
13350 default:
13351 fatal_insn ("unknown insn mode", insn);
13355 return len;
13357 /* Compute default value for "length_address" attribute. */
13359 ix86_attr_length_address_default (rtx insn)
13361 int i;
13363 if (get_attr_type (insn) == TYPE_LEA)
13365 rtx set = PATTERN (insn);
13367 if (GET_CODE (set) == PARALLEL)
13368 set = XVECEXP (set, 0, 0);
13370 gcc_assert (GET_CODE (set) == SET);
13372 return memory_address_length (SET_SRC (set));
13375 extract_insn_cached (insn);
13376 for (i = recog_data.n_operands - 1; i >= 0; --i)
13377 if (GET_CODE (recog_data.operand[i]) == MEM)
13379 return memory_address_length (XEXP (recog_data.operand[i], 0));
13380 break;
13382 return 0;
13385 /* Return the maximum number of instructions a cpu can issue. */
13387 static int
13388 ix86_issue_rate (void)
13390 switch (ix86_tune)
13392 case PROCESSOR_PENTIUM:
13393 case PROCESSOR_K6:
13394 return 2;
13396 case PROCESSOR_PENTIUMPRO:
13397 case PROCESSOR_PENTIUM4:
13398 case PROCESSOR_ATHLON:
13399 case PROCESSOR_K8:
13400 case PROCESSOR_NOCONA:
13401 case PROCESSOR_GENERIC32:
13402 case PROCESSOR_GENERIC64:
13403 return 3;
13405 default:
13406 return 1;
13410 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13411 by DEP_INSN and nothing set by DEP_INSN. */
13413 static int
13414 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13416 rtx set, set2;
13418 /* Simplify the test for uninteresting insns. */
13419 if (insn_type != TYPE_SETCC
13420 && insn_type != TYPE_ICMOV
13421 && insn_type != TYPE_FCMOV
13422 && insn_type != TYPE_IBR)
13423 return 0;
13425 if ((set = single_set (dep_insn)) != 0)
13427 set = SET_DEST (set);
13428 set2 = NULL_RTX;
13430 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13431 && XVECLEN (PATTERN (dep_insn), 0) == 2
13432 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13433 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13435 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13436 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13438 else
13439 return 0;
13441 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13442 return 0;
13444 /* This test is true if the dependent insn reads the flags but
13445 not any other potentially set register. */
13446 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13447 return 0;
13449 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13450 return 0;
13452 return 1;
13455 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13456 address with operands set by DEP_INSN. */
13458 static int
13459 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13461 rtx addr;
13463 if (insn_type == TYPE_LEA
13464 && TARGET_PENTIUM)
13466 addr = PATTERN (insn);
13468 if (GET_CODE (addr) == PARALLEL)
13469 addr = XVECEXP (addr, 0, 0);
13471 gcc_assert (GET_CODE (addr) == SET);
13473 addr = SET_SRC (addr);
13475 else
13477 int i;
13478 extract_insn_cached (insn);
13479 for (i = recog_data.n_operands - 1; i >= 0; --i)
13480 if (GET_CODE (recog_data.operand[i]) == MEM)
13482 addr = XEXP (recog_data.operand[i], 0);
13483 goto found;
13485 return 0;
13486 found:;
13489 return modified_in_p (addr, dep_insn);
13492 static int
13493 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13495 enum attr_type insn_type, dep_insn_type;
13496 enum attr_memory memory;
13497 rtx set, set2;
13498 int dep_insn_code_number;
13500 /* Anti and output dependencies have zero cost on all CPUs. */
13501 if (REG_NOTE_KIND (link) != 0)
13502 return 0;
13504 dep_insn_code_number = recog_memoized (dep_insn);
13506 /* If we can't recognize the insns, we can't really do anything. */
13507 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13508 return cost;
13510 insn_type = get_attr_type (insn);
13511 dep_insn_type = get_attr_type (dep_insn);
13513 switch (ix86_tune)
13515 case PROCESSOR_PENTIUM:
13516 /* Address Generation Interlock adds a cycle of latency. */
13517 if (ix86_agi_dependant (insn, dep_insn, insn_type))
13518 cost += 1;
13520 /* ??? Compares pair with jump/setcc. */
13521 if (ix86_flags_dependant (insn, dep_insn, insn_type))
13522 cost = 0;
13524 /* Floating point stores require value to be ready one cycle earlier. */
13525 if (insn_type == TYPE_FMOV
13526 && get_attr_memory (insn) == MEMORY_STORE
13527 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13528 cost += 1;
13529 break;
13531 case PROCESSOR_PENTIUMPRO:
13532 memory = get_attr_memory (insn);
13534 /* INT->FP conversion is expensive. */
13535 if (get_attr_fp_int_src (dep_insn))
13536 cost += 5;
13538 /* There is one cycle extra latency between an FP op and a store. */
13539 if (insn_type == TYPE_FMOV
13540 && (set = single_set (dep_insn)) != NULL_RTX
13541 && (set2 = single_set (insn)) != NULL_RTX
13542 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13543 && GET_CODE (SET_DEST (set2)) == MEM)
13544 cost += 1;
13546 /* Show ability of reorder buffer to hide latency of load by executing
13547 in parallel with previous instruction in case
13548 previous instruction is not needed to compute the address. */
13549 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13550 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13552 /* Claim moves to take one cycle, as core can issue one load
13553 at time and the next load can start cycle later. */
13554 if (dep_insn_type == TYPE_IMOV
13555 || dep_insn_type == TYPE_FMOV)
13556 cost = 1;
13557 else if (cost > 1)
13558 cost--;
13560 break;
13562 case PROCESSOR_K6:
13563 memory = get_attr_memory (insn);
13565 /* The esp dependency is resolved before the instruction is really
13566 finished. */
13567 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13568 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13569 return 1;
13571 /* INT->FP conversion is expensive. */
13572 if (get_attr_fp_int_src (dep_insn))
13573 cost += 5;
13575 /* Show ability of reorder buffer to hide latency of load by executing
13576 in parallel with previous instruction in case
13577 previous instruction is not needed to compute the address. */
13578 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13579 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13581 /* Claim moves to take one cycle, as core can issue one load
13582 at time and the next load can start cycle later. */
13583 if (dep_insn_type == TYPE_IMOV
13584 || dep_insn_type == TYPE_FMOV)
13585 cost = 1;
13586 else if (cost > 2)
13587 cost -= 2;
13588 else
13589 cost = 1;
13591 break;
13593 case PROCESSOR_ATHLON:
13594 case PROCESSOR_K8:
13595 case PROCESSOR_GENERIC32:
13596 case PROCESSOR_GENERIC64:
13597 memory = get_attr_memory (insn);
13599 /* Show ability of reorder buffer to hide latency of load by executing
13600 in parallel with previous instruction in case
13601 previous instruction is not needed to compute the address. */
13602 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13603 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13605 enum attr_unit unit = get_attr_unit (insn);
13606 int loadcost = 3;
13608 /* Because of the difference between the length of integer and
13609 floating unit pipeline preparation stages, the memory operands
13610 for floating point are cheaper.
13612 ??? For Athlon it the difference is most probably 2. */
13613 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13614 loadcost = 3;
13615 else
13616 loadcost = TARGET_ATHLON ? 2 : 0;
13618 if (cost >= loadcost)
13619 cost -= loadcost;
13620 else
13621 cost = 0;
13624 default:
13625 break;
13628 return cost;
13631 /* How many alternative schedules to try. This should be as wide as the
13632 scheduling freedom in the DFA, but no wider. Making this value too
13633 large results extra work for the scheduler. */
13635 static int
13636 ia32_multipass_dfa_lookahead (void)
13638 if (ix86_tune == PROCESSOR_PENTIUM)
13639 return 2;
13641 if (ix86_tune == PROCESSOR_PENTIUMPRO
13642 || ix86_tune == PROCESSOR_K6)
13643 return 1;
13645 else
13646 return 0;
13650 /* Compute the alignment given to a constant that is being placed in memory.
13651 EXP is the constant and ALIGN is the alignment that the object would
13652 ordinarily have.
13653 The value of this function is used instead of that alignment to align
13654 the object. */
13657 ix86_constant_alignment (tree exp, int align)
13659 if (TREE_CODE (exp) == REAL_CST)
13661 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13662 return 64;
13663 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13664 return 128;
13666 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13667 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13668 return BITS_PER_WORD;
13670 return align;
13673 /* Compute the alignment for a static variable.
13674 TYPE is the data type, and ALIGN is the alignment that
13675 the object would ordinarily have. The value of this function is used
13676 instead of that alignment to align the object. */
13679 ix86_data_alignment (tree type, int align)
13681 int max_align = optimize_size ? BITS_PER_WORD : 256;
13683 if (AGGREGATE_TYPE_P (type)
13684 && TYPE_SIZE (type)
13685 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13686 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
13687 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
13688 && align < max_align)
13689 align = max_align;
13691 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13692 to 16byte boundary. */
13693 if (TARGET_64BIT)
13695 if (AGGREGATE_TYPE_P (type)
13696 && TYPE_SIZE (type)
13697 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13698 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13699 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13700 return 128;
13703 if (TREE_CODE (type) == ARRAY_TYPE)
13705 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13706 return 64;
13707 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13708 return 128;
13710 else if (TREE_CODE (type) == COMPLEX_TYPE)
13713 if (TYPE_MODE (type) == DCmode && align < 64)
13714 return 64;
13715 if (TYPE_MODE (type) == XCmode && align < 128)
13716 return 128;
13718 else if ((TREE_CODE (type) == RECORD_TYPE
13719 || TREE_CODE (type) == UNION_TYPE
13720 || TREE_CODE (type) == QUAL_UNION_TYPE)
13721 && TYPE_FIELDS (type))
13723 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13724 return 64;
13725 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13726 return 128;
13728 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13729 || TREE_CODE (type) == INTEGER_TYPE)
13731 if (TYPE_MODE (type) == DFmode && align < 64)
13732 return 64;
13733 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13734 return 128;
13737 return align;
13740 /* Compute the alignment for a local variable.
13741 TYPE is the data type, and ALIGN is the alignment that
13742 the object would ordinarily have. The value of this macro is used
13743 instead of that alignment to align the object. */
13746 ix86_local_alignment (tree type, int align)
13748 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13749 to 16byte boundary. */
13750 if (TARGET_64BIT)
13752 if (AGGREGATE_TYPE_P (type)
13753 && TYPE_SIZE (type)
13754 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13755 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13756 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13757 return 128;
13759 if (TREE_CODE (type) == ARRAY_TYPE)
13761 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13762 return 64;
13763 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13764 return 128;
13766 else if (TREE_CODE (type) == COMPLEX_TYPE)
13768 if (TYPE_MODE (type) == DCmode && align < 64)
13769 return 64;
13770 if (TYPE_MODE (type) == XCmode && align < 128)
13771 return 128;
13773 else if ((TREE_CODE (type) == RECORD_TYPE
13774 || TREE_CODE (type) == UNION_TYPE
13775 || TREE_CODE (type) == QUAL_UNION_TYPE)
13776 && TYPE_FIELDS (type))
13778 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13779 return 64;
13780 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13781 return 128;
13783 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13784 || TREE_CODE (type) == INTEGER_TYPE)
13787 if (TYPE_MODE (type) == DFmode && align < 64)
13788 return 64;
13789 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13790 return 128;
13792 return align;
13795 /* Emit RTL insns to initialize the variable parts of a trampoline.
13796 FNADDR is an RTX for the address of the function's pure code.
13797 CXT is an RTX for the static chain value for the function. */
13798 void
13799 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
13801 if (!TARGET_64BIT)
13803 /* Compute offset from the end of the jmp to the target function. */
13804 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
13805 plus_constant (tramp, 10),
13806 NULL_RTX, 1, OPTAB_DIRECT);
13807 emit_move_insn (gen_rtx_MEM (QImode, tramp),
13808 gen_int_mode (0xb9, QImode));
13809 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
13810 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
13811 gen_int_mode (0xe9, QImode));
13812 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
13814 else
13816 int offset = 0;
13817 /* Try to load address using shorter movl instead of movabs.
13818 We may want to support movq for kernel mode, but kernel does not use
13819 trampolines at the moment. */
13820 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
13822 fnaddr = copy_to_mode_reg (DImode, fnaddr);
13823 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13824 gen_int_mode (0xbb41, HImode));
13825 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
13826 gen_lowpart (SImode, fnaddr));
13827 offset += 6;
13829 else
13831 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13832 gen_int_mode (0xbb49, HImode));
13833 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13834 fnaddr);
13835 offset += 10;
13837 /* Load static chain using movabs to r10. */
13838 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13839 gen_int_mode (0xba49, HImode));
13840 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13841 cxt);
13842 offset += 10;
13843 /* Jump to the r11 */
13844 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13845 gen_int_mode (0xff49, HImode));
13846 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
13847 gen_int_mode (0xe3, QImode));
13848 offset += 3;
13849 gcc_assert (offset <= TRAMPOLINE_SIZE);
13852 #ifdef ENABLE_EXECUTE_STACK
13853 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
13854 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13855 #endif
13858 /* Codes for all the SSE/MMX builtins. */
13859 enum ix86_builtins
13861 IX86_BUILTIN_ADDPS,
13862 IX86_BUILTIN_ADDSS,
13863 IX86_BUILTIN_DIVPS,
13864 IX86_BUILTIN_DIVSS,
13865 IX86_BUILTIN_MULPS,
13866 IX86_BUILTIN_MULSS,
13867 IX86_BUILTIN_SUBPS,
13868 IX86_BUILTIN_SUBSS,
13870 IX86_BUILTIN_CMPEQPS,
13871 IX86_BUILTIN_CMPLTPS,
13872 IX86_BUILTIN_CMPLEPS,
13873 IX86_BUILTIN_CMPGTPS,
13874 IX86_BUILTIN_CMPGEPS,
13875 IX86_BUILTIN_CMPNEQPS,
13876 IX86_BUILTIN_CMPNLTPS,
13877 IX86_BUILTIN_CMPNLEPS,
13878 IX86_BUILTIN_CMPNGTPS,
13879 IX86_BUILTIN_CMPNGEPS,
13880 IX86_BUILTIN_CMPORDPS,
13881 IX86_BUILTIN_CMPUNORDPS,
13882 IX86_BUILTIN_CMPEQSS,
13883 IX86_BUILTIN_CMPLTSS,
13884 IX86_BUILTIN_CMPLESS,
13885 IX86_BUILTIN_CMPNEQSS,
13886 IX86_BUILTIN_CMPNLTSS,
13887 IX86_BUILTIN_CMPNLESS,
13888 IX86_BUILTIN_CMPNGTSS,
13889 IX86_BUILTIN_CMPNGESS,
13890 IX86_BUILTIN_CMPORDSS,
13891 IX86_BUILTIN_CMPUNORDSS,
13893 IX86_BUILTIN_COMIEQSS,
13894 IX86_BUILTIN_COMILTSS,
13895 IX86_BUILTIN_COMILESS,
13896 IX86_BUILTIN_COMIGTSS,
13897 IX86_BUILTIN_COMIGESS,
13898 IX86_BUILTIN_COMINEQSS,
13899 IX86_BUILTIN_UCOMIEQSS,
13900 IX86_BUILTIN_UCOMILTSS,
13901 IX86_BUILTIN_UCOMILESS,
13902 IX86_BUILTIN_UCOMIGTSS,
13903 IX86_BUILTIN_UCOMIGESS,
13904 IX86_BUILTIN_UCOMINEQSS,
13906 IX86_BUILTIN_CVTPI2PS,
13907 IX86_BUILTIN_CVTPS2PI,
13908 IX86_BUILTIN_CVTSI2SS,
13909 IX86_BUILTIN_CVTSI642SS,
13910 IX86_BUILTIN_CVTSS2SI,
13911 IX86_BUILTIN_CVTSS2SI64,
13912 IX86_BUILTIN_CVTTPS2PI,
13913 IX86_BUILTIN_CVTTSS2SI,
13914 IX86_BUILTIN_CVTTSS2SI64,
13916 IX86_BUILTIN_MAXPS,
13917 IX86_BUILTIN_MAXSS,
13918 IX86_BUILTIN_MINPS,
13919 IX86_BUILTIN_MINSS,
13921 IX86_BUILTIN_LOADUPS,
13922 IX86_BUILTIN_STOREUPS,
13923 IX86_BUILTIN_MOVSS,
13925 IX86_BUILTIN_MOVHLPS,
13926 IX86_BUILTIN_MOVLHPS,
13927 IX86_BUILTIN_LOADHPS,
13928 IX86_BUILTIN_LOADLPS,
13929 IX86_BUILTIN_STOREHPS,
13930 IX86_BUILTIN_STORELPS,
13932 IX86_BUILTIN_MASKMOVQ,
13933 IX86_BUILTIN_MOVMSKPS,
13934 IX86_BUILTIN_PMOVMSKB,
13936 IX86_BUILTIN_MOVNTPS,
13937 IX86_BUILTIN_MOVNTQ,
13939 IX86_BUILTIN_LOADDQU,
13940 IX86_BUILTIN_STOREDQU,
13942 IX86_BUILTIN_PACKSSWB,
13943 IX86_BUILTIN_PACKSSDW,
13944 IX86_BUILTIN_PACKUSWB,
13946 IX86_BUILTIN_PADDB,
13947 IX86_BUILTIN_PADDW,
13948 IX86_BUILTIN_PADDD,
13949 IX86_BUILTIN_PADDQ,
13950 IX86_BUILTIN_PADDSB,
13951 IX86_BUILTIN_PADDSW,
13952 IX86_BUILTIN_PADDUSB,
13953 IX86_BUILTIN_PADDUSW,
13954 IX86_BUILTIN_PSUBB,
13955 IX86_BUILTIN_PSUBW,
13956 IX86_BUILTIN_PSUBD,
13957 IX86_BUILTIN_PSUBQ,
13958 IX86_BUILTIN_PSUBSB,
13959 IX86_BUILTIN_PSUBSW,
13960 IX86_BUILTIN_PSUBUSB,
13961 IX86_BUILTIN_PSUBUSW,
13963 IX86_BUILTIN_PAND,
13964 IX86_BUILTIN_PANDN,
13965 IX86_BUILTIN_POR,
13966 IX86_BUILTIN_PXOR,
13968 IX86_BUILTIN_PAVGB,
13969 IX86_BUILTIN_PAVGW,
13971 IX86_BUILTIN_PCMPEQB,
13972 IX86_BUILTIN_PCMPEQW,
13973 IX86_BUILTIN_PCMPEQD,
13974 IX86_BUILTIN_PCMPGTB,
13975 IX86_BUILTIN_PCMPGTW,
13976 IX86_BUILTIN_PCMPGTD,
13978 IX86_BUILTIN_PMADDWD,
13980 IX86_BUILTIN_PMAXSW,
13981 IX86_BUILTIN_PMAXUB,
13982 IX86_BUILTIN_PMINSW,
13983 IX86_BUILTIN_PMINUB,
13985 IX86_BUILTIN_PMULHUW,
13986 IX86_BUILTIN_PMULHW,
13987 IX86_BUILTIN_PMULLW,
13989 IX86_BUILTIN_PSADBW,
13990 IX86_BUILTIN_PSHUFW,
13992 IX86_BUILTIN_PSLLW,
13993 IX86_BUILTIN_PSLLD,
13994 IX86_BUILTIN_PSLLQ,
13995 IX86_BUILTIN_PSRAW,
13996 IX86_BUILTIN_PSRAD,
13997 IX86_BUILTIN_PSRLW,
13998 IX86_BUILTIN_PSRLD,
13999 IX86_BUILTIN_PSRLQ,
14000 IX86_BUILTIN_PSLLWI,
14001 IX86_BUILTIN_PSLLDI,
14002 IX86_BUILTIN_PSLLQI,
14003 IX86_BUILTIN_PSRAWI,
14004 IX86_BUILTIN_PSRADI,
14005 IX86_BUILTIN_PSRLWI,
14006 IX86_BUILTIN_PSRLDI,
14007 IX86_BUILTIN_PSRLQI,
14009 IX86_BUILTIN_PUNPCKHBW,
14010 IX86_BUILTIN_PUNPCKHWD,
14011 IX86_BUILTIN_PUNPCKHDQ,
14012 IX86_BUILTIN_PUNPCKLBW,
14013 IX86_BUILTIN_PUNPCKLWD,
14014 IX86_BUILTIN_PUNPCKLDQ,
14016 IX86_BUILTIN_SHUFPS,
14018 IX86_BUILTIN_RCPPS,
14019 IX86_BUILTIN_RCPSS,
14020 IX86_BUILTIN_RSQRTPS,
14021 IX86_BUILTIN_RSQRTSS,
14022 IX86_BUILTIN_SQRTPS,
14023 IX86_BUILTIN_SQRTSS,
14025 IX86_BUILTIN_UNPCKHPS,
14026 IX86_BUILTIN_UNPCKLPS,
14028 IX86_BUILTIN_ANDPS,
14029 IX86_BUILTIN_ANDNPS,
14030 IX86_BUILTIN_ORPS,
14031 IX86_BUILTIN_XORPS,
14033 IX86_BUILTIN_EMMS,
14034 IX86_BUILTIN_LDMXCSR,
14035 IX86_BUILTIN_STMXCSR,
14036 IX86_BUILTIN_SFENCE,
14038 /* 3DNow! Original */
14039 IX86_BUILTIN_FEMMS,
14040 IX86_BUILTIN_PAVGUSB,
14041 IX86_BUILTIN_PF2ID,
14042 IX86_BUILTIN_PFACC,
14043 IX86_BUILTIN_PFADD,
14044 IX86_BUILTIN_PFCMPEQ,
14045 IX86_BUILTIN_PFCMPGE,
14046 IX86_BUILTIN_PFCMPGT,
14047 IX86_BUILTIN_PFMAX,
14048 IX86_BUILTIN_PFMIN,
14049 IX86_BUILTIN_PFMUL,
14050 IX86_BUILTIN_PFRCP,
14051 IX86_BUILTIN_PFRCPIT1,
14052 IX86_BUILTIN_PFRCPIT2,
14053 IX86_BUILTIN_PFRSQIT1,
14054 IX86_BUILTIN_PFRSQRT,
14055 IX86_BUILTIN_PFSUB,
14056 IX86_BUILTIN_PFSUBR,
14057 IX86_BUILTIN_PI2FD,
14058 IX86_BUILTIN_PMULHRW,
14060 /* 3DNow! Athlon Extensions */
14061 IX86_BUILTIN_PF2IW,
14062 IX86_BUILTIN_PFNACC,
14063 IX86_BUILTIN_PFPNACC,
14064 IX86_BUILTIN_PI2FW,
14065 IX86_BUILTIN_PSWAPDSI,
14066 IX86_BUILTIN_PSWAPDSF,
14068 /* SSE2 */
14069 IX86_BUILTIN_ADDPD,
14070 IX86_BUILTIN_ADDSD,
14071 IX86_BUILTIN_DIVPD,
14072 IX86_BUILTIN_DIVSD,
14073 IX86_BUILTIN_MULPD,
14074 IX86_BUILTIN_MULSD,
14075 IX86_BUILTIN_SUBPD,
14076 IX86_BUILTIN_SUBSD,
14078 IX86_BUILTIN_CMPEQPD,
14079 IX86_BUILTIN_CMPLTPD,
14080 IX86_BUILTIN_CMPLEPD,
14081 IX86_BUILTIN_CMPGTPD,
14082 IX86_BUILTIN_CMPGEPD,
14083 IX86_BUILTIN_CMPNEQPD,
14084 IX86_BUILTIN_CMPNLTPD,
14085 IX86_BUILTIN_CMPNLEPD,
14086 IX86_BUILTIN_CMPNGTPD,
14087 IX86_BUILTIN_CMPNGEPD,
14088 IX86_BUILTIN_CMPORDPD,
14089 IX86_BUILTIN_CMPUNORDPD,
14090 IX86_BUILTIN_CMPNEPD,
14091 IX86_BUILTIN_CMPEQSD,
14092 IX86_BUILTIN_CMPLTSD,
14093 IX86_BUILTIN_CMPLESD,
14094 IX86_BUILTIN_CMPNEQSD,
14095 IX86_BUILTIN_CMPNLTSD,
14096 IX86_BUILTIN_CMPNLESD,
14097 IX86_BUILTIN_CMPORDSD,
14098 IX86_BUILTIN_CMPUNORDSD,
14099 IX86_BUILTIN_CMPNESD,
14101 IX86_BUILTIN_COMIEQSD,
14102 IX86_BUILTIN_COMILTSD,
14103 IX86_BUILTIN_COMILESD,
14104 IX86_BUILTIN_COMIGTSD,
14105 IX86_BUILTIN_COMIGESD,
14106 IX86_BUILTIN_COMINEQSD,
14107 IX86_BUILTIN_UCOMIEQSD,
14108 IX86_BUILTIN_UCOMILTSD,
14109 IX86_BUILTIN_UCOMILESD,
14110 IX86_BUILTIN_UCOMIGTSD,
14111 IX86_BUILTIN_UCOMIGESD,
14112 IX86_BUILTIN_UCOMINEQSD,
14114 IX86_BUILTIN_MAXPD,
14115 IX86_BUILTIN_MAXSD,
14116 IX86_BUILTIN_MINPD,
14117 IX86_BUILTIN_MINSD,
14119 IX86_BUILTIN_ANDPD,
14120 IX86_BUILTIN_ANDNPD,
14121 IX86_BUILTIN_ORPD,
14122 IX86_BUILTIN_XORPD,
14124 IX86_BUILTIN_SQRTPD,
14125 IX86_BUILTIN_SQRTSD,
14127 IX86_BUILTIN_UNPCKHPD,
14128 IX86_BUILTIN_UNPCKLPD,
14130 IX86_BUILTIN_SHUFPD,
14132 IX86_BUILTIN_LOADUPD,
14133 IX86_BUILTIN_STOREUPD,
14134 IX86_BUILTIN_MOVSD,
14136 IX86_BUILTIN_LOADHPD,
14137 IX86_BUILTIN_LOADLPD,
14139 IX86_BUILTIN_CVTDQ2PD,
14140 IX86_BUILTIN_CVTDQ2PS,
14142 IX86_BUILTIN_CVTPD2DQ,
14143 IX86_BUILTIN_CVTPD2PI,
14144 IX86_BUILTIN_CVTPD2PS,
14145 IX86_BUILTIN_CVTTPD2DQ,
14146 IX86_BUILTIN_CVTTPD2PI,
14148 IX86_BUILTIN_CVTPI2PD,
14149 IX86_BUILTIN_CVTSI2SD,
14150 IX86_BUILTIN_CVTSI642SD,
14152 IX86_BUILTIN_CVTSD2SI,
14153 IX86_BUILTIN_CVTSD2SI64,
14154 IX86_BUILTIN_CVTSD2SS,
14155 IX86_BUILTIN_CVTSS2SD,
14156 IX86_BUILTIN_CVTTSD2SI,
14157 IX86_BUILTIN_CVTTSD2SI64,
14159 IX86_BUILTIN_CVTPS2DQ,
14160 IX86_BUILTIN_CVTPS2PD,
14161 IX86_BUILTIN_CVTTPS2DQ,
14163 IX86_BUILTIN_MOVNTI,
14164 IX86_BUILTIN_MOVNTPD,
14165 IX86_BUILTIN_MOVNTDQ,
14167 /* SSE2 MMX */
14168 IX86_BUILTIN_MASKMOVDQU,
14169 IX86_BUILTIN_MOVMSKPD,
14170 IX86_BUILTIN_PMOVMSKB128,
14172 IX86_BUILTIN_PACKSSWB128,
14173 IX86_BUILTIN_PACKSSDW128,
14174 IX86_BUILTIN_PACKUSWB128,
14176 IX86_BUILTIN_PADDB128,
14177 IX86_BUILTIN_PADDW128,
14178 IX86_BUILTIN_PADDD128,
14179 IX86_BUILTIN_PADDQ128,
14180 IX86_BUILTIN_PADDSB128,
14181 IX86_BUILTIN_PADDSW128,
14182 IX86_BUILTIN_PADDUSB128,
14183 IX86_BUILTIN_PADDUSW128,
14184 IX86_BUILTIN_PSUBB128,
14185 IX86_BUILTIN_PSUBW128,
14186 IX86_BUILTIN_PSUBD128,
14187 IX86_BUILTIN_PSUBQ128,
14188 IX86_BUILTIN_PSUBSB128,
14189 IX86_BUILTIN_PSUBSW128,
14190 IX86_BUILTIN_PSUBUSB128,
14191 IX86_BUILTIN_PSUBUSW128,
14193 IX86_BUILTIN_PAND128,
14194 IX86_BUILTIN_PANDN128,
14195 IX86_BUILTIN_POR128,
14196 IX86_BUILTIN_PXOR128,
14198 IX86_BUILTIN_PAVGB128,
14199 IX86_BUILTIN_PAVGW128,
14201 IX86_BUILTIN_PCMPEQB128,
14202 IX86_BUILTIN_PCMPEQW128,
14203 IX86_BUILTIN_PCMPEQD128,
14204 IX86_BUILTIN_PCMPGTB128,
14205 IX86_BUILTIN_PCMPGTW128,
14206 IX86_BUILTIN_PCMPGTD128,
14208 IX86_BUILTIN_PMADDWD128,
14210 IX86_BUILTIN_PMAXSW128,
14211 IX86_BUILTIN_PMAXUB128,
14212 IX86_BUILTIN_PMINSW128,
14213 IX86_BUILTIN_PMINUB128,
14215 IX86_BUILTIN_PMULUDQ,
14216 IX86_BUILTIN_PMULUDQ128,
14217 IX86_BUILTIN_PMULHUW128,
14218 IX86_BUILTIN_PMULHW128,
14219 IX86_BUILTIN_PMULLW128,
14221 IX86_BUILTIN_PSADBW128,
14222 IX86_BUILTIN_PSHUFHW,
14223 IX86_BUILTIN_PSHUFLW,
14224 IX86_BUILTIN_PSHUFD,
14226 IX86_BUILTIN_PSLLW128,
14227 IX86_BUILTIN_PSLLD128,
14228 IX86_BUILTIN_PSLLQ128,
14229 IX86_BUILTIN_PSRAW128,
14230 IX86_BUILTIN_PSRAD128,
14231 IX86_BUILTIN_PSRLW128,
14232 IX86_BUILTIN_PSRLD128,
14233 IX86_BUILTIN_PSRLQ128,
14234 IX86_BUILTIN_PSLLDQI128,
14235 IX86_BUILTIN_PSLLWI128,
14236 IX86_BUILTIN_PSLLDI128,
14237 IX86_BUILTIN_PSLLQI128,
14238 IX86_BUILTIN_PSRAWI128,
14239 IX86_BUILTIN_PSRADI128,
14240 IX86_BUILTIN_PSRLDQI128,
14241 IX86_BUILTIN_PSRLWI128,
14242 IX86_BUILTIN_PSRLDI128,
14243 IX86_BUILTIN_PSRLQI128,
14245 IX86_BUILTIN_PUNPCKHBW128,
14246 IX86_BUILTIN_PUNPCKHWD128,
14247 IX86_BUILTIN_PUNPCKHDQ128,
14248 IX86_BUILTIN_PUNPCKHQDQ128,
14249 IX86_BUILTIN_PUNPCKLBW128,
14250 IX86_BUILTIN_PUNPCKLWD128,
14251 IX86_BUILTIN_PUNPCKLDQ128,
14252 IX86_BUILTIN_PUNPCKLQDQ128,
14254 IX86_BUILTIN_CLFLUSH,
14255 IX86_BUILTIN_MFENCE,
14256 IX86_BUILTIN_LFENCE,
14258 /* Prescott New Instructions. */
14259 IX86_BUILTIN_ADDSUBPS,
14260 IX86_BUILTIN_HADDPS,
14261 IX86_BUILTIN_HSUBPS,
14262 IX86_BUILTIN_MOVSHDUP,
14263 IX86_BUILTIN_MOVSLDUP,
14264 IX86_BUILTIN_ADDSUBPD,
14265 IX86_BUILTIN_HADDPD,
14266 IX86_BUILTIN_HSUBPD,
14267 IX86_BUILTIN_LDDQU,
14269 IX86_BUILTIN_MONITOR,
14270 IX86_BUILTIN_MWAIT,
14272 IX86_BUILTIN_VEC_INIT_V2SI,
14273 IX86_BUILTIN_VEC_INIT_V4HI,
14274 IX86_BUILTIN_VEC_INIT_V8QI,
14275 IX86_BUILTIN_VEC_EXT_V2DF,
14276 IX86_BUILTIN_VEC_EXT_V2DI,
14277 IX86_BUILTIN_VEC_EXT_V4SF,
14278 IX86_BUILTIN_VEC_EXT_V4SI,
14279 IX86_BUILTIN_VEC_EXT_V8HI,
14280 IX86_BUILTIN_VEC_EXT_V2SI,
14281 IX86_BUILTIN_VEC_EXT_V4HI,
14282 IX86_BUILTIN_VEC_SET_V8HI,
14283 IX86_BUILTIN_VEC_SET_V4HI,
14285 /* SSE2 ABI functions. */
14286 IX86_BUILTIN_SSE2_ACOS,
14287 IX86_BUILTIN_SSE2_ACOSF,
14288 IX86_BUILTIN_SSE2_ASIN,
14289 IX86_BUILTIN_SSE2_ASINF,
14290 IX86_BUILTIN_SSE2_ATAN,
14291 IX86_BUILTIN_SSE2_ATANF,
14292 IX86_BUILTIN_SSE2_ATAN2,
14293 IX86_BUILTIN_SSE2_ATAN2F,
14294 IX86_BUILTIN_SSE2_COS,
14295 IX86_BUILTIN_SSE2_COSF,
14296 IX86_BUILTIN_SSE2_EXP,
14297 IX86_BUILTIN_SSE2_EXPF,
14298 IX86_BUILTIN_SSE2_LOG10,
14299 IX86_BUILTIN_SSE2_LOG10F,
14300 IX86_BUILTIN_SSE2_LOG,
14301 IX86_BUILTIN_SSE2_LOGF,
14302 IX86_BUILTIN_SSE2_SIN,
14303 IX86_BUILTIN_SSE2_SINF,
14304 IX86_BUILTIN_SSE2_TAN,
14305 IX86_BUILTIN_SSE2_TANF,
14307 IX86_BUILTIN_MAX
14310 #define def_builtin(MASK, NAME, TYPE, CODE) \
14311 do { \
14312 if ((MASK) & target_flags \
14313 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
14314 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
14315 NULL, NULL_TREE); \
14316 } while (0)
14318 /* Bits for builtin_description.flag. */
14320 /* Set when we don't support the comparison natively, and should
14321 swap_comparison in order to support it. */
14322 #define BUILTIN_DESC_SWAP_OPERANDS 1
14324 struct builtin_description
14326 const unsigned int mask;
14327 const enum insn_code icode;
14328 const char *const name;
14329 const enum ix86_builtins code;
14330 const enum rtx_code comparison;
14331 const unsigned int flag;
14334 static const struct builtin_description bdesc_comi[] =
14336 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
14337 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
14338 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
14339 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
14340 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
14341 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
14342 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
14343 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
14344 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
14345 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
14346 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
14347 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
14348 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
14349 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
14350 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
14351 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
14352 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
14353 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
14354 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
14355 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
14356 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
14357 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
14358 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
14359 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14362 static const struct builtin_description bdesc_2arg[] =
14364 /* SSE */
14365 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14366 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14367 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14368 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14369 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14370 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14371 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14372 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14374 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14375 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14376 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14377 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14378 BUILTIN_DESC_SWAP_OPERANDS },
14379 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14380 BUILTIN_DESC_SWAP_OPERANDS },
14381 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14382 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14383 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14384 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14385 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14386 BUILTIN_DESC_SWAP_OPERANDS },
14387 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14388 BUILTIN_DESC_SWAP_OPERANDS },
14389 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14390 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14391 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14392 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14393 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14394 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14395 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14396 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14397 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14398 BUILTIN_DESC_SWAP_OPERANDS },
14399 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14400 BUILTIN_DESC_SWAP_OPERANDS },
14401 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
14403 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14404 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14405 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14406 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14408 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14409 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14410 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14411 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14413 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14414 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14415 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14416 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14417 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14419 /* MMX */
14420 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14421 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14422 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14423 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14424 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14425 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14426 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14427 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14429 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14430 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14431 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14432 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14433 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14434 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14435 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14436 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14438 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14439 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14440 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14442 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14443 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14444 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14445 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14447 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14448 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14450 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14451 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14452 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14453 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14454 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14455 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14457 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14458 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14459 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14460 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14462 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14463 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14464 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14465 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14466 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14467 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14469 /* Special. */
14470 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14471 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14472 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14474 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14475 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14476 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14478 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14479 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14480 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14481 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14482 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14483 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14485 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14486 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14487 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14488 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14489 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14490 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14492 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14493 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14494 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14495 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14497 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14498 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14500 /* SSE2 */
14501 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14502 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14503 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14504 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14505 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14506 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14507 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14508 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14510 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14511 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14512 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14513 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14514 BUILTIN_DESC_SWAP_OPERANDS },
14515 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14516 BUILTIN_DESC_SWAP_OPERANDS },
14517 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14518 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14519 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14520 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14521 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14522 BUILTIN_DESC_SWAP_OPERANDS },
14523 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14524 BUILTIN_DESC_SWAP_OPERANDS },
14525 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14526 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14527 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14528 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14529 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14530 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14531 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14532 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14533 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14535 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14536 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14537 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14538 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14540 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14541 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14542 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14543 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14545 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14546 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14547 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14549 /* SSE2 MMX */
14550 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14551 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14552 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14553 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14554 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14555 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14556 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14557 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14559 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14560 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14561 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14562 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14563 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14564 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14565 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14566 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14568 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14569 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14571 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14572 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14573 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14574 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14576 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14577 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14579 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14580 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14581 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14582 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14583 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14584 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14586 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14587 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14588 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14589 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14591 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14592 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14593 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14594 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14595 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14596 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14597 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14598 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14600 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14601 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14602 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14604 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14605 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14607 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14608 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14610 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14611 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14612 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14614 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14615 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14616 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14618 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14619 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14621 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14623 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14624 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14625 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14626 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14628 /* SSE3 MMX */
14629 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14630 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14631 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14632 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14633 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14634 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14637 static const struct builtin_description bdesc_1arg[] =
14639 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14640 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14642 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14643 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14644 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14646 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14647 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14648 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14649 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14650 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14651 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14653 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14654 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14656 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14658 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14659 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14661 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14662 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14663 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14664 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14665 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14667 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14669 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14670 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14671 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14672 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14674 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14675 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14676 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14678 /* SSE3 */
14679 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14680 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14683 static void
14684 ix86_init_builtins (void)
14686 if (TARGET_MMX)
14687 ix86_init_mmx_sse_builtins ();
14688 if (TARGET_SSE2)
14689 ix86_init_sse_abi_builtins ();
14692 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14693 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14694 builtins. */
14695 static void
14696 ix86_init_mmx_sse_builtins (void)
14698 const struct builtin_description * d;
14699 size_t i;
14701 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14702 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14703 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14704 tree V2DI_type_node
14705 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14706 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14707 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14708 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14709 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14710 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14711 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14713 tree pchar_type_node = build_pointer_type (char_type_node);
14714 tree pcchar_type_node = build_pointer_type (
14715 build_type_variant (char_type_node, 1, 0));
14716 tree pfloat_type_node = build_pointer_type (float_type_node);
14717 tree pcfloat_type_node = build_pointer_type (
14718 build_type_variant (float_type_node, 1, 0));
14719 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14720 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14721 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14723 /* Comparisons. */
14724 tree int_ftype_v4sf_v4sf
14725 = build_function_type_list (integer_type_node,
14726 V4SF_type_node, V4SF_type_node, NULL_TREE);
14727 tree v4si_ftype_v4sf_v4sf
14728 = build_function_type_list (V4SI_type_node,
14729 V4SF_type_node, V4SF_type_node, NULL_TREE);
14730 /* MMX/SSE/integer conversions. */
14731 tree int_ftype_v4sf
14732 = build_function_type_list (integer_type_node,
14733 V4SF_type_node, NULL_TREE);
14734 tree int64_ftype_v4sf
14735 = build_function_type_list (long_long_integer_type_node,
14736 V4SF_type_node, NULL_TREE);
14737 tree int_ftype_v8qi
14738 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14739 tree v4sf_ftype_v4sf_int
14740 = build_function_type_list (V4SF_type_node,
14741 V4SF_type_node, integer_type_node, NULL_TREE);
14742 tree v4sf_ftype_v4sf_int64
14743 = build_function_type_list (V4SF_type_node,
14744 V4SF_type_node, long_long_integer_type_node,
14745 NULL_TREE);
14746 tree v4sf_ftype_v4sf_v2si
14747 = build_function_type_list (V4SF_type_node,
14748 V4SF_type_node, V2SI_type_node, NULL_TREE);
14750 /* Miscellaneous. */
14751 tree v8qi_ftype_v4hi_v4hi
14752 = build_function_type_list (V8QI_type_node,
14753 V4HI_type_node, V4HI_type_node, NULL_TREE);
14754 tree v4hi_ftype_v2si_v2si
14755 = build_function_type_list (V4HI_type_node,
14756 V2SI_type_node, V2SI_type_node, NULL_TREE);
14757 tree v4sf_ftype_v4sf_v4sf_int
14758 = build_function_type_list (V4SF_type_node,
14759 V4SF_type_node, V4SF_type_node,
14760 integer_type_node, NULL_TREE);
14761 tree v2si_ftype_v4hi_v4hi
14762 = build_function_type_list (V2SI_type_node,
14763 V4HI_type_node, V4HI_type_node, NULL_TREE);
14764 tree v4hi_ftype_v4hi_int
14765 = build_function_type_list (V4HI_type_node,
14766 V4HI_type_node, integer_type_node, NULL_TREE);
14767 tree v4hi_ftype_v4hi_di
14768 = build_function_type_list (V4HI_type_node,
14769 V4HI_type_node, long_long_unsigned_type_node,
14770 NULL_TREE);
14771 tree v2si_ftype_v2si_di
14772 = build_function_type_list (V2SI_type_node,
14773 V2SI_type_node, long_long_unsigned_type_node,
14774 NULL_TREE);
14775 tree void_ftype_void
14776 = build_function_type (void_type_node, void_list_node);
14777 tree void_ftype_unsigned
14778 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14779 tree void_ftype_unsigned_unsigned
14780 = build_function_type_list (void_type_node, unsigned_type_node,
14781 unsigned_type_node, NULL_TREE);
14782 tree void_ftype_pcvoid_unsigned_unsigned
14783 = build_function_type_list (void_type_node, const_ptr_type_node,
14784 unsigned_type_node, unsigned_type_node,
14785 NULL_TREE);
14786 tree unsigned_ftype_void
14787 = build_function_type (unsigned_type_node, void_list_node);
14788 tree v2si_ftype_v4sf
14789 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14790 /* Loads/stores. */
14791 tree void_ftype_v8qi_v8qi_pchar
14792 = build_function_type_list (void_type_node,
14793 V8QI_type_node, V8QI_type_node,
14794 pchar_type_node, NULL_TREE);
14795 tree v4sf_ftype_pcfloat
14796 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14797 /* @@@ the type is bogus */
14798 tree v4sf_ftype_v4sf_pv2si
14799 = build_function_type_list (V4SF_type_node,
14800 V4SF_type_node, pv2si_type_node, NULL_TREE);
14801 tree void_ftype_pv2si_v4sf
14802 = build_function_type_list (void_type_node,
14803 pv2si_type_node, V4SF_type_node, NULL_TREE);
14804 tree void_ftype_pfloat_v4sf
14805 = build_function_type_list (void_type_node,
14806 pfloat_type_node, V4SF_type_node, NULL_TREE);
14807 tree void_ftype_pdi_di
14808 = build_function_type_list (void_type_node,
14809 pdi_type_node, long_long_unsigned_type_node,
14810 NULL_TREE);
14811 tree void_ftype_pv2di_v2di
14812 = build_function_type_list (void_type_node,
14813 pv2di_type_node, V2DI_type_node, NULL_TREE);
14814 /* Normal vector unops. */
14815 tree v4sf_ftype_v4sf
14816 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14818 /* Normal vector binops. */
14819 tree v4sf_ftype_v4sf_v4sf
14820 = build_function_type_list (V4SF_type_node,
14821 V4SF_type_node, V4SF_type_node, NULL_TREE);
14822 tree v8qi_ftype_v8qi_v8qi
14823 = build_function_type_list (V8QI_type_node,
14824 V8QI_type_node, V8QI_type_node, NULL_TREE);
14825 tree v4hi_ftype_v4hi_v4hi
14826 = build_function_type_list (V4HI_type_node,
14827 V4HI_type_node, V4HI_type_node, NULL_TREE);
14828 tree v2si_ftype_v2si_v2si
14829 = build_function_type_list (V2SI_type_node,
14830 V2SI_type_node, V2SI_type_node, NULL_TREE);
14831 tree di_ftype_di_di
14832 = build_function_type_list (long_long_unsigned_type_node,
14833 long_long_unsigned_type_node,
14834 long_long_unsigned_type_node, NULL_TREE);
14836 tree v2si_ftype_v2sf
14837 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
14838 tree v2sf_ftype_v2si
14839 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
14840 tree v2si_ftype_v2si
14841 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
14842 tree v2sf_ftype_v2sf
14843 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
14844 tree v2sf_ftype_v2sf_v2sf
14845 = build_function_type_list (V2SF_type_node,
14846 V2SF_type_node, V2SF_type_node, NULL_TREE);
14847 tree v2si_ftype_v2sf_v2sf
14848 = build_function_type_list (V2SI_type_node,
14849 V2SF_type_node, V2SF_type_node, NULL_TREE);
14850 tree pint_type_node = build_pointer_type (integer_type_node);
14851 tree pdouble_type_node = build_pointer_type (double_type_node);
14852 tree pcdouble_type_node = build_pointer_type (
14853 build_type_variant (double_type_node, 1, 0));
14854 tree int_ftype_v2df_v2df
14855 = build_function_type_list (integer_type_node,
14856 V2DF_type_node, V2DF_type_node, NULL_TREE);
14858 tree void_ftype_pcvoid
14859 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
14860 tree v4sf_ftype_v4si
14861 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
14862 tree v4si_ftype_v4sf
14863 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
14864 tree v2df_ftype_v4si
14865 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
14866 tree v4si_ftype_v2df
14867 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
14868 tree v2si_ftype_v2df
14869 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
14870 tree v4sf_ftype_v2df
14871 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
14872 tree v2df_ftype_v2si
14873 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
14874 tree v2df_ftype_v4sf
14875 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
14876 tree int_ftype_v2df
14877 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
14878 tree int64_ftype_v2df
14879 = build_function_type_list (long_long_integer_type_node,
14880 V2DF_type_node, NULL_TREE);
14881 tree v2df_ftype_v2df_int
14882 = build_function_type_list (V2DF_type_node,
14883 V2DF_type_node, integer_type_node, NULL_TREE);
14884 tree v2df_ftype_v2df_int64
14885 = build_function_type_list (V2DF_type_node,
14886 V2DF_type_node, long_long_integer_type_node,
14887 NULL_TREE);
14888 tree v4sf_ftype_v4sf_v2df
14889 = build_function_type_list (V4SF_type_node,
14890 V4SF_type_node, V2DF_type_node, NULL_TREE);
14891 tree v2df_ftype_v2df_v4sf
14892 = build_function_type_list (V2DF_type_node,
14893 V2DF_type_node, V4SF_type_node, NULL_TREE);
14894 tree v2df_ftype_v2df_v2df_int
14895 = build_function_type_list (V2DF_type_node,
14896 V2DF_type_node, V2DF_type_node,
14897 integer_type_node,
14898 NULL_TREE);
14899 tree v2df_ftype_v2df_pcdouble
14900 = build_function_type_list (V2DF_type_node,
14901 V2DF_type_node, pcdouble_type_node, NULL_TREE);
14902 tree void_ftype_pdouble_v2df
14903 = build_function_type_list (void_type_node,
14904 pdouble_type_node, V2DF_type_node, NULL_TREE);
14905 tree void_ftype_pint_int
14906 = build_function_type_list (void_type_node,
14907 pint_type_node, integer_type_node, NULL_TREE);
14908 tree void_ftype_v16qi_v16qi_pchar
14909 = build_function_type_list (void_type_node,
14910 V16QI_type_node, V16QI_type_node,
14911 pchar_type_node, NULL_TREE);
14912 tree v2df_ftype_pcdouble
14913 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
14914 tree v2df_ftype_v2df_v2df
14915 = build_function_type_list (V2DF_type_node,
14916 V2DF_type_node, V2DF_type_node, NULL_TREE);
14917 tree v16qi_ftype_v16qi_v16qi
14918 = build_function_type_list (V16QI_type_node,
14919 V16QI_type_node, V16QI_type_node, NULL_TREE);
14920 tree v8hi_ftype_v8hi_v8hi
14921 = build_function_type_list (V8HI_type_node,
14922 V8HI_type_node, V8HI_type_node, NULL_TREE);
14923 tree v4si_ftype_v4si_v4si
14924 = build_function_type_list (V4SI_type_node,
14925 V4SI_type_node, V4SI_type_node, NULL_TREE);
14926 tree v2di_ftype_v2di_v2di
14927 = build_function_type_list (V2DI_type_node,
14928 V2DI_type_node, V2DI_type_node, NULL_TREE);
14929 tree v2di_ftype_v2df_v2df
14930 = build_function_type_list (V2DI_type_node,
14931 V2DF_type_node, V2DF_type_node, NULL_TREE);
14932 tree v2df_ftype_v2df
14933 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14934 tree v2di_ftype_v2di_int
14935 = build_function_type_list (V2DI_type_node,
14936 V2DI_type_node, integer_type_node, NULL_TREE);
14937 tree v4si_ftype_v4si_int
14938 = build_function_type_list (V4SI_type_node,
14939 V4SI_type_node, integer_type_node, NULL_TREE);
14940 tree v8hi_ftype_v8hi_int
14941 = build_function_type_list (V8HI_type_node,
14942 V8HI_type_node, integer_type_node, NULL_TREE);
14943 tree v8hi_ftype_v8hi_v2di
14944 = build_function_type_list (V8HI_type_node,
14945 V8HI_type_node, V2DI_type_node, NULL_TREE);
14946 tree v4si_ftype_v4si_v2di
14947 = build_function_type_list (V4SI_type_node,
14948 V4SI_type_node, V2DI_type_node, NULL_TREE);
14949 tree v4si_ftype_v8hi_v8hi
14950 = build_function_type_list (V4SI_type_node,
14951 V8HI_type_node, V8HI_type_node, NULL_TREE);
14952 tree di_ftype_v8qi_v8qi
14953 = build_function_type_list (long_long_unsigned_type_node,
14954 V8QI_type_node, V8QI_type_node, NULL_TREE);
14955 tree di_ftype_v2si_v2si
14956 = build_function_type_list (long_long_unsigned_type_node,
14957 V2SI_type_node, V2SI_type_node, NULL_TREE);
14958 tree v2di_ftype_v16qi_v16qi
14959 = build_function_type_list (V2DI_type_node,
14960 V16QI_type_node, V16QI_type_node, NULL_TREE);
14961 tree v2di_ftype_v4si_v4si
14962 = build_function_type_list (V2DI_type_node,
14963 V4SI_type_node, V4SI_type_node, NULL_TREE);
14964 tree int_ftype_v16qi
14965 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14966 tree v16qi_ftype_pcchar
14967 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14968 tree void_ftype_pchar_v16qi
14969 = build_function_type_list (void_type_node,
14970 pchar_type_node, V16QI_type_node, NULL_TREE);
14972 tree float80_type;
14973 tree float128_type;
14974 tree ftype;
14976 /* The __float80 type. */
14977 if (TYPE_MODE (long_double_type_node) == XFmode)
14978 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14979 "__float80");
14980 else
14982 /* The __float80 type. */
14983 float80_type = make_node (REAL_TYPE);
14984 TYPE_PRECISION (float80_type) = 80;
14985 layout_type (float80_type);
14986 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
14989 if (TARGET_64BIT)
14991 float128_type = make_node (REAL_TYPE);
14992 TYPE_PRECISION (float128_type) = 128;
14993 layout_type (float128_type);
14994 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
14997 /* Add all builtins that are more or less simple operations on two
14998 operands. */
14999 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15001 /* Use one of the operands; the target can have a different mode for
15002 mask-generating compares. */
15003 enum machine_mode mode;
15004 tree type;
15006 if (d->name == 0)
15007 continue;
15008 mode = insn_data[d->icode].operand[1].mode;
15010 switch (mode)
15012 case V16QImode:
15013 type = v16qi_ftype_v16qi_v16qi;
15014 break;
15015 case V8HImode:
15016 type = v8hi_ftype_v8hi_v8hi;
15017 break;
15018 case V4SImode:
15019 type = v4si_ftype_v4si_v4si;
15020 break;
15021 case V2DImode:
15022 type = v2di_ftype_v2di_v2di;
15023 break;
15024 case V2DFmode:
15025 type = v2df_ftype_v2df_v2df;
15026 break;
15027 case V4SFmode:
15028 type = v4sf_ftype_v4sf_v4sf;
15029 break;
15030 case V8QImode:
15031 type = v8qi_ftype_v8qi_v8qi;
15032 break;
15033 case V4HImode:
15034 type = v4hi_ftype_v4hi_v4hi;
15035 break;
15036 case V2SImode:
15037 type = v2si_ftype_v2si_v2si;
15038 break;
15039 case DImode:
15040 type = di_ftype_di_di;
15041 break;
15043 default:
15044 gcc_unreachable ();
15047 /* Override for comparisons. */
15048 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15049 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
15050 type = v4si_ftype_v4sf_v4sf;
15052 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
15053 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15054 type = v2di_ftype_v2df_v2df;
15056 def_builtin (d->mask, d->name, type, d->code);
15059 /* Add the remaining MMX insns with somewhat more complicated types. */
15060 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
15061 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
15062 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
15063 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
15065 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
15066 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
15067 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
15069 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
15070 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
15072 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
15073 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
15075 /* comi/ucomi insns. */
15076 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15077 if (d->mask == MASK_SSE2)
15078 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
15079 else
15080 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
15082 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
15083 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
15084 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
15086 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
15087 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
15088 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
15089 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
15090 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
15091 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
15092 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
15093 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
15094 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
15095 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
15096 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
15098 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
15100 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
15101 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
15103 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
15104 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
15105 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
15106 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
15108 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
15109 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
15110 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
15111 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
15113 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
15115 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
15117 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
15118 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
15119 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
15120 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
15121 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
15122 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
15124 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
15126 /* Original 3DNow! */
15127 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
15128 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
15129 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
15130 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
15131 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
15132 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
15133 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
15134 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
15135 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
15136 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
15137 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
15138 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
15139 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
15140 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
15141 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
15142 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
15143 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
15144 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
15145 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
15146 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
15148 /* 3DNow! extension as used in the Athlon CPU. */
15149 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
15150 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
15151 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
15152 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
15153 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
15154 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
15156 /* SSE2 */
15157 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
15159 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
15160 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
15162 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
15163 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
15165 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
15166 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
15167 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
15168 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
15169 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
15171 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
15172 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
15173 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
15174 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
15176 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
15177 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
15179 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
15181 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
15182 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
15184 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
15185 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
15186 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
15187 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
15188 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
15190 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
15192 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
15193 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
15194 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
15195 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
15197 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
15198 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
15199 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
15201 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
15202 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
15203 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
15204 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
15206 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
15207 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
15208 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
15210 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
15211 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
15213 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
15214 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
15216 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
15217 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
15218 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
15220 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
15221 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
15222 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
15224 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
15225 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
15227 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
15228 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
15229 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
15230 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
15232 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
15233 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
15234 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
15235 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
15237 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
15238 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
15240 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
15242 /* Prescott New Instructions. */
15243 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
15244 void_ftype_pcvoid_unsigned_unsigned,
15245 IX86_BUILTIN_MONITOR);
15246 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
15247 void_ftype_unsigned_unsigned,
15248 IX86_BUILTIN_MWAIT);
15249 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
15250 v4sf_ftype_v4sf,
15251 IX86_BUILTIN_MOVSHDUP);
15252 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
15253 v4sf_ftype_v4sf,
15254 IX86_BUILTIN_MOVSLDUP);
15255 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
15256 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
15258 /* Access to the vec_init patterns. */
15259 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
15260 integer_type_node, NULL_TREE);
15261 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
15262 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
15264 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
15265 short_integer_type_node,
15266 short_integer_type_node,
15267 short_integer_type_node, NULL_TREE);
15268 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
15269 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
15271 ftype = build_function_type_list (V8QI_type_node, char_type_node,
15272 char_type_node, char_type_node,
15273 char_type_node, char_type_node,
15274 char_type_node, char_type_node,
15275 char_type_node, NULL_TREE);
15276 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
15277 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
15279 /* Access to the vec_extract patterns. */
15280 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15281 integer_type_node, NULL_TREE);
15282 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
15283 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
15285 ftype = build_function_type_list (long_long_integer_type_node,
15286 V2DI_type_node, integer_type_node,
15287 NULL_TREE);
15288 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
15289 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
15291 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15292 integer_type_node, NULL_TREE);
15293 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
15294 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
15296 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15297 integer_type_node, NULL_TREE);
15298 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
15299 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
15301 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15302 integer_type_node, NULL_TREE);
15303 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
15304 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
15306 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
15307 integer_type_node, NULL_TREE);
15308 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
15309 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
15311 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
15312 integer_type_node, NULL_TREE);
15313 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
15314 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
15316 /* Access to the vec_set patterns. */
15317 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15318 intHI_type_node,
15319 integer_type_node, NULL_TREE);
15320 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
15321 ftype, IX86_BUILTIN_VEC_SET_V8HI);
15323 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
15324 intHI_type_node,
15325 integer_type_node, NULL_TREE);
15326 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
15327 ftype, IX86_BUILTIN_VEC_SET_V4HI);
15329 #undef def_builtin
15331 /* Set up all the SSE ABI builtins that we may use to override
15332 the normal builtins. */
15333 static void
15334 ix86_init_sse_abi_builtins (void)
15336 tree dbl, flt, dbl2, flt2;
15338 /* Bail out in case the template definitions are not available. */
15339 if (! built_in_decls [BUILT_IN_SIN]
15340 || ! built_in_decls [BUILT_IN_SINF]
15341 || ! built_in_decls [BUILT_IN_ATAN2]
15342 || ! built_in_decls [BUILT_IN_ATAN2F])
15343 return;
15345 /* Build the function types as variants of the existing ones. */
15346 dbl = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_SIN]));
15347 TYPE_ATTRIBUTES (dbl)
15348 = tree_cons (get_identifier ("sseregparm"),
15349 NULL_TREE, TYPE_ATTRIBUTES (dbl));
15350 flt = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_SINF]));
15351 TYPE_ATTRIBUTES (flt)
15352 = tree_cons (get_identifier ("sseregparm"),
15353 NULL_TREE, TYPE_ATTRIBUTES (flt));
15354 dbl2 = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_ATAN2]));
15355 TYPE_ATTRIBUTES (dbl2)
15356 = tree_cons (get_identifier ("sseregparm"),
15357 NULL_TREE, TYPE_ATTRIBUTES (dbl2));
15358 flt2 = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_ATAN2F]));
15359 TYPE_ATTRIBUTES (flt2)
15360 = tree_cons (get_identifier ("sseregparm"),
15361 NULL_TREE, TYPE_ATTRIBUTES (flt2));
15363 #define def_builtin(capname, name, type) \
15364 ix86_builtin_function_variants [BUILT_IN_ ## capname] \
15365 = lang_hooks.builtin_function ("__builtin_sse2_" # name, type, \
15366 IX86_BUILTIN_SSE2_ ## capname, \
15367 BUILT_IN_NORMAL, \
15368 "__libm_sse2_" # name, NULL_TREE)
15370 def_builtin (ACOS, acos, dbl);
15371 def_builtin (ACOSF, acosf, flt);
15372 def_builtin (ASIN, asin, dbl);
15373 def_builtin (ASINF, asinf, flt);
15374 def_builtin (ATAN, atan, dbl);
15375 def_builtin (ATANF, atanf, flt);
15376 def_builtin (ATAN2, atan2, dbl2);
15377 def_builtin (ATAN2F, atan2f, flt2);
15378 def_builtin (COS, cos, dbl);
15379 def_builtin (COSF, cosf, flt);
15380 def_builtin (EXP, exp, dbl);
15381 def_builtin (EXPF, expf, flt);
15382 def_builtin (LOG10, log10, dbl);
15383 def_builtin (LOG10F, log10f, flt);
15384 def_builtin (LOG, log, dbl);
15385 def_builtin (LOGF, logf, flt);
15386 def_builtin (SIN, sin, dbl);
15387 def_builtin (SINF, sinf, flt);
15388 def_builtin (TAN, tan, dbl);
15389 def_builtin (TANF, tanf, flt);
15391 #undef def_builtin
15394 /* Errors in the source file can cause expand_expr to return const0_rtx
15395 where we expect a vector. To avoid crashing, use one of the vector
15396 clear instructions. */
15397 static rtx
15398 safe_vector_operand (rtx x, enum machine_mode mode)
15400 if (x == const0_rtx)
15401 x = CONST0_RTX (mode);
15402 return x;
15405 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
15407 static rtx
15408 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
15410 rtx pat, xops[3];
15411 tree arg0 = TREE_VALUE (arglist);
15412 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15413 rtx op0 = expand_normal (arg0);
15414 rtx op1 = expand_normal (arg1);
15415 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15416 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15417 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15419 if (VECTOR_MODE_P (mode0))
15420 op0 = safe_vector_operand (op0, mode0);
15421 if (VECTOR_MODE_P (mode1))
15422 op1 = safe_vector_operand (op1, mode1);
15424 if (optimize || !target
15425 || GET_MODE (target) != tmode
15426 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15427 target = gen_reg_rtx (tmode);
15429 if (GET_MODE (op1) == SImode && mode1 == TImode)
15431 rtx x = gen_reg_rtx (V4SImode);
15432 emit_insn (gen_sse2_loadd (x, op1));
15433 op1 = gen_lowpart (TImode, x);
15436 /* The insn must want input operands in the same modes as the
15437 result. */
15438 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15439 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15441 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15442 op0 = copy_to_mode_reg (mode0, op0);
15443 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15444 op1 = copy_to_mode_reg (mode1, op1);
15446 /* ??? Using ix86_fixup_binary_operands is problematic when
15447 we've got mismatched modes. Fake it. */
15449 xops[0] = target;
15450 xops[1] = op0;
15451 xops[2] = op1;
15453 if (tmode == mode0 && tmode == mode1)
15455 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15456 op0 = xops[1];
15457 op1 = xops[2];
15459 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15461 op0 = force_reg (mode0, op0);
15462 op1 = force_reg (mode1, op1);
15463 target = gen_reg_rtx (tmode);
15466 pat = GEN_FCN (icode) (target, op0, op1);
15467 if (! pat)
15468 return 0;
15469 emit_insn (pat);
15470 return target;
15473 /* Subroutine of ix86_expand_builtin to take care of stores. */
15475 static rtx
15476 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15478 rtx pat;
15479 tree arg0 = TREE_VALUE (arglist);
15480 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15481 rtx op0 = expand_normal (arg0);
15482 rtx op1 = expand_normal (arg1);
15483 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15484 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15486 if (VECTOR_MODE_P (mode1))
15487 op1 = safe_vector_operand (op1, mode1);
15489 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15490 op1 = copy_to_mode_reg (mode1, op1);
15492 pat = GEN_FCN (icode) (op0, op1);
15493 if (pat)
15494 emit_insn (pat);
15495 return 0;
15498 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15500 static rtx
15501 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15502 rtx target, int do_load)
15504 rtx pat;
15505 tree arg0 = TREE_VALUE (arglist);
15506 rtx op0 = expand_normal (arg0);
15507 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15508 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15510 if (optimize || !target
15511 || GET_MODE (target) != tmode
15512 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15513 target = gen_reg_rtx (tmode);
15514 if (do_load)
15515 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15516 else
15518 if (VECTOR_MODE_P (mode0))
15519 op0 = safe_vector_operand (op0, mode0);
15521 if ((optimize && !register_operand (op0, mode0))
15522 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15523 op0 = copy_to_mode_reg (mode0, op0);
15526 pat = GEN_FCN (icode) (target, op0);
15527 if (! pat)
15528 return 0;
15529 emit_insn (pat);
15530 return target;
15533 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15534 sqrtss, rsqrtss, rcpss. */
15536 static rtx
15537 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15539 rtx pat;
15540 tree arg0 = TREE_VALUE (arglist);
15541 rtx op1, op0 = expand_normal (arg0);
15542 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15543 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15545 if (optimize || !target
15546 || GET_MODE (target) != tmode
15547 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15548 target = gen_reg_rtx (tmode);
15550 if (VECTOR_MODE_P (mode0))
15551 op0 = safe_vector_operand (op0, mode0);
15553 if ((optimize && !register_operand (op0, mode0))
15554 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15555 op0 = copy_to_mode_reg (mode0, op0);
15557 op1 = op0;
15558 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15559 op1 = copy_to_mode_reg (mode0, op1);
15561 pat = GEN_FCN (icode) (target, op0, op1);
15562 if (! pat)
15563 return 0;
15564 emit_insn (pat);
15565 return target;
15568 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15570 static rtx
15571 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15572 rtx target)
15574 rtx pat;
15575 tree arg0 = TREE_VALUE (arglist);
15576 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15577 rtx op0 = expand_normal (arg0);
15578 rtx op1 = expand_normal (arg1);
15579 rtx op2;
15580 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15581 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15582 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15583 enum rtx_code comparison = d->comparison;
15585 if (VECTOR_MODE_P (mode0))
15586 op0 = safe_vector_operand (op0, mode0);
15587 if (VECTOR_MODE_P (mode1))
15588 op1 = safe_vector_operand (op1, mode1);
15590 /* Swap operands if we have a comparison that isn't available in
15591 hardware. */
15592 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15594 rtx tmp = gen_reg_rtx (mode1);
15595 emit_move_insn (tmp, op1);
15596 op1 = op0;
15597 op0 = tmp;
15600 if (optimize || !target
15601 || GET_MODE (target) != tmode
15602 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15603 target = gen_reg_rtx (tmode);
15605 if ((optimize && !register_operand (op0, mode0))
15606 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15607 op0 = copy_to_mode_reg (mode0, op0);
15608 if ((optimize && !register_operand (op1, mode1))
15609 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15610 op1 = copy_to_mode_reg (mode1, op1);
15612 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15613 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15614 if (! pat)
15615 return 0;
15616 emit_insn (pat);
15617 return target;
15620 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15622 static rtx
15623 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15624 rtx target)
15626 rtx pat;
15627 tree arg0 = TREE_VALUE (arglist);
15628 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15629 rtx op0 = expand_normal (arg0);
15630 rtx op1 = expand_normal (arg1);
15631 rtx op2;
15632 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15633 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15634 enum rtx_code comparison = d->comparison;
15636 if (VECTOR_MODE_P (mode0))
15637 op0 = safe_vector_operand (op0, mode0);
15638 if (VECTOR_MODE_P (mode1))
15639 op1 = safe_vector_operand (op1, mode1);
15641 /* Swap operands if we have a comparison that isn't available in
15642 hardware. */
15643 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15645 rtx tmp = op1;
15646 op1 = op0;
15647 op0 = tmp;
15650 target = gen_reg_rtx (SImode);
15651 emit_move_insn (target, const0_rtx);
15652 target = gen_rtx_SUBREG (QImode, target, 0);
15654 if ((optimize && !register_operand (op0, mode0))
15655 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15656 op0 = copy_to_mode_reg (mode0, op0);
15657 if ((optimize && !register_operand (op1, mode1))
15658 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15659 op1 = copy_to_mode_reg (mode1, op1);
15661 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15662 pat = GEN_FCN (d->icode) (op0, op1);
15663 if (! pat)
15664 return 0;
15665 emit_insn (pat);
15666 emit_insn (gen_rtx_SET (VOIDmode,
15667 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15668 gen_rtx_fmt_ee (comparison, QImode,
15669 SET_DEST (pat),
15670 const0_rtx)));
15672 return SUBREG_REG (target);
15675 /* Return the integer constant in ARG. Constrain it to be in the range
15676 of the subparts of VEC_TYPE; issue an error if not. */
15678 static int
15679 get_element_number (tree vec_type, tree arg)
15681 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15683 if (!host_integerp (arg, 1)
15684 || (elt = tree_low_cst (arg, 1), elt > max))
15686 error ("selector must be an integer constant in the range 0..%wi", max);
15687 return 0;
15690 return elt;
15693 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15694 ix86_expand_vector_init. We DO have language-level syntax for this, in
15695 the form of (type){ init-list }. Except that since we can't place emms
15696 instructions from inside the compiler, we can't allow the use of MMX
15697 registers unless the user explicitly asks for it. So we do *not* define
15698 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15699 we have builtins invoked by mmintrin.h that gives us license to emit
15700 these sorts of instructions. */
15702 static rtx
15703 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15705 enum machine_mode tmode = TYPE_MODE (type);
15706 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15707 int i, n_elt = GET_MODE_NUNITS (tmode);
15708 rtvec v = rtvec_alloc (n_elt);
15710 gcc_assert (VECTOR_MODE_P (tmode));
15712 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15714 rtx x = expand_normal (TREE_VALUE (arglist));
15715 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15718 gcc_assert (arglist == NULL);
15720 if (!target || !register_operand (target, tmode))
15721 target = gen_reg_rtx (tmode);
15723 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15724 return target;
15727 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15728 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15729 had a language-level syntax for referencing vector elements. */
15731 static rtx
15732 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15734 enum machine_mode tmode, mode0;
15735 tree arg0, arg1;
15736 int elt;
15737 rtx op0;
15739 arg0 = TREE_VALUE (arglist);
15740 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15742 op0 = expand_normal (arg0);
15743 elt = get_element_number (TREE_TYPE (arg0), arg1);
15745 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15746 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15747 gcc_assert (VECTOR_MODE_P (mode0));
15749 op0 = force_reg (mode0, op0);
15751 if (optimize || !target || !register_operand (target, tmode))
15752 target = gen_reg_rtx (tmode);
15754 ix86_expand_vector_extract (true, target, op0, elt);
15756 return target;
15759 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15760 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15761 a language-level syntax for referencing vector elements. */
15763 static rtx
15764 ix86_expand_vec_set_builtin (tree arglist)
15766 enum machine_mode tmode, mode1;
15767 tree arg0, arg1, arg2;
15768 int elt;
15769 rtx op0, op1;
15771 arg0 = TREE_VALUE (arglist);
15772 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15773 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15775 tmode = TYPE_MODE (TREE_TYPE (arg0));
15776 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15777 gcc_assert (VECTOR_MODE_P (tmode));
15779 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15780 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15781 elt = get_element_number (TREE_TYPE (arg0), arg2);
15783 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15784 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15786 op0 = force_reg (tmode, op0);
15787 op1 = force_reg (mode1, op1);
15789 ix86_expand_vector_set (true, op0, op1, elt);
15791 return op0;
15794 /* Expand an expression EXP that calls a built-in function,
15795 with result going to TARGET if that's convenient
15796 (and in mode MODE if that's convenient).
15797 SUBTARGET may be used as the target for computing one of EXP's operands.
15798 IGNORE is nonzero if the value is to be ignored. */
15800 static rtx
15801 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15802 enum machine_mode mode ATTRIBUTE_UNUSED,
15803 int ignore ATTRIBUTE_UNUSED)
15805 const struct builtin_description *d;
15806 size_t i;
15807 enum insn_code icode;
15808 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15809 tree arglist = TREE_OPERAND (exp, 1);
15810 tree arg0, arg1, arg2;
15811 rtx op0, op1, op2, pat;
15812 enum machine_mode tmode, mode0, mode1, mode2;
15813 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15815 switch (fcode)
15817 case IX86_BUILTIN_EMMS:
15818 emit_insn (gen_mmx_emms ());
15819 return 0;
15821 case IX86_BUILTIN_SFENCE:
15822 emit_insn (gen_sse_sfence ());
15823 return 0;
15825 case IX86_BUILTIN_MASKMOVQ:
15826 case IX86_BUILTIN_MASKMOVDQU:
15827 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15828 ? CODE_FOR_mmx_maskmovq
15829 : CODE_FOR_sse2_maskmovdqu);
15830 /* Note the arg order is different from the operand order. */
15831 arg1 = TREE_VALUE (arglist);
15832 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15833 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15834 op0 = expand_normal (arg0);
15835 op1 = expand_normal (arg1);
15836 op2 = expand_normal (arg2);
15837 mode0 = insn_data[icode].operand[0].mode;
15838 mode1 = insn_data[icode].operand[1].mode;
15839 mode2 = insn_data[icode].operand[2].mode;
15841 op0 = force_reg (Pmode, op0);
15842 op0 = gen_rtx_MEM (mode1, op0);
15844 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15845 op0 = copy_to_mode_reg (mode0, op0);
15846 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15847 op1 = copy_to_mode_reg (mode1, op1);
15848 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15849 op2 = copy_to_mode_reg (mode2, op2);
15850 pat = GEN_FCN (icode) (op0, op1, op2);
15851 if (! pat)
15852 return 0;
15853 emit_insn (pat);
15854 return 0;
15856 case IX86_BUILTIN_SQRTSS:
15857 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15858 case IX86_BUILTIN_RSQRTSS:
15859 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15860 case IX86_BUILTIN_RCPSS:
15861 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15863 case IX86_BUILTIN_LOADUPS:
15864 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15866 case IX86_BUILTIN_STOREUPS:
15867 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15869 case IX86_BUILTIN_LOADHPS:
15870 case IX86_BUILTIN_LOADLPS:
15871 case IX86_BUILTIN_LOADHPD:
15872 case IX86_BUILTIN_LOADLPD:
15873 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
15874 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
15875 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
15876 : CODE_FOR_sse2_loadlpd);
15877 arg0 = TREE_VALUE (arglist);
15878 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15879 op0 = expand_normal (arg0);
15880 op1 = expand_normal (arg1);
15881 tmode = insn_data[icode].operand[0].mode;
15882 mode0 = insn_data[icode].operand[1].mode;
15883 mode1 = insn_data[icode].operand[2].mode;
15885 op0 = force_reg (mode0, op0);
15886 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
15887 if (optimize || target == 0
15888 || GET_MODE (target) != tmode
15889 || !register_operand (target, tmode))
15890 target = gen_reg_rtx (tmode);
15891 pat = GEN_FCN (icode) (target, op0, op1);
15892 if (! pat)
15893 return 0;
15894 emit_insn (pat);
15895 return target;
15897 case IX86_BUILTIN_STOREHPS:
15898 case IX86_BUILTIN_STORELPS:
15899 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
15900 : CODE_FOR_sse_storelps);
15901 arg0 = TREE_VALUE (arglist);
15902 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15903 op0 = expand_normal (arg0);
15904 op1 = expand_normal (arg1);
15905 mode0 = insn_data[icode].operand[0].mode;
15906 mode1 = insn_data[icode].operand[1].mode;
15908 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15909 op1 = force_reg (mode1, op1);
15911 pat = GEN_FCN (icode) (op0, op1);
15912 if (! pat)
15913 return 0;
15914 emit_insn (pat);
15915 return const0_rtx;
15917 case IX86_BUILTIN_MOVNTPS:
15918 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
15919 case IX86_BUILTIN_MOVNTQ:
15920 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
15922 case IX86_BUILTIN_LDMXCSR:
15923 op0 = expand_normal (TREE_VALUE (arglist));
15924 target = assign_386_stack_local (SImode, SLOT_TEMP);
15925 emit_move_insn (target, op0);
15926 emit_insn (gen_sse_ldmxcsr (target));
15927 return 0;
15929 case IX86_BUILTIN_STMXCSR:
15930 target = assign_386_stack_local (SImode, SLOT_TEMP);
15931 emit_insn (gen_sse_stmxcsr (target));
15932 return copy_to_mode_reg (SImode, target);
15934 case IX86_BUILTIN_SHUFPS:
15935 case IX86_BUILTIN_SHUFPD:
15936 icode = (fcode == IX86_BUILTIN_SHUFPS
15937 ? CODE_FOR_sse_shufps
15938 : CODE_FOR_sse2_shufpd);
15939 arg0 = TREE_VALUE (arglist);
15940 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15941 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15942 op0 = expand_normal (arg0);
15943 op1 = expand_normal (arg1);
15944 op2 = expand_normal (arg2);
15945 tmode = insn_data[icode].operand[0].mode;
15946 mode0 = insn_data[icode].operand[1].mode;
15947 mode1 = insn_data[icode].operand[2].mode;
15948 mode2 = insn_data[icode].operand[3].mode;
15950 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15951 op0 = copy_to_mode_reg (mode0, op0);
15952 if ((optimize && !register_operand (op1, mode1))
15953 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
15954 op1 = copy_to_mode_reg (mode1, op1);
15955 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15957 /* @@@ better error message */
15958 error ("mask must be an immediate");
15959 return gen_reg_rtx (tmode);
15961 if (optimize || target == 0
15962 || GET_MODE (target) != tmode
15963 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15964 target = gen_reg_rtx (tmode);
15965 pat = GEN_FCN (icode) (target, op0, op1, op2);
15966 if (! pat)
15967 return 0;
15968 emit_insn (pat);
15969 return target;
15971 case IX86_BUILTIN_PSHUFW:
15972 case IX86_BUILTIN_PSHUFD:
15973 case IX86_BUILTIN_PSHUFHW:
15974 case IX86_BUILTIN_PSHUFLW:
15975 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15976 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15977 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15978 : CODE_FOR_mmx_pshufw);
15979 arg0 = TREE_VALUE (arglist);
15980 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15981 op0 = expand_normal (arg0);
15982 op1 = expand_normal (arg1);
15983 tmode = insn_data[icode].operand[0].mode;
15984 mode1 = insn_data[icode].operand[1].mode;
15985 mode2 = insn_data[icode].operand[2].mode;
15987 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15988 op0 = copy_to_mode_reg (mode1, op0);
15989 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15991 /* @@@ better error message */
15992 error ("mask must be an immediate");
15993 return const0_rtx;
15995 if (target == 0
15996 || GET_MODE (target) != tmode
15997 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15998 target = gen_reg_rtx (tmode);
15999 pat = GEN_FCN (icode) (target, op0, op1);
16000 if (! pat)
16001 return 0;
16002 emit_insn (pat);
16003 return target;
16005 case IX86_BUILTIN_PSLLDQI128:
16006 case IX86_BUILTIN_PSRLDQI128:
16007 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
16008 : CODE_FOR_sse2_lshrti3);
16009 arg0 = TREE_VALUE (arglist);
16010 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16011 op0 = expand_normal (arg0);
16012 op1 = expand_normal (arg1);
16013 tmode = insn_data[icode].operand[0].mode;
16014 mode1 = insn_data[icode].operand[1].mode;
16015 mode2 = insn_data[icode].operand[2].mode;
16017 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16019 op0 = copy_to_reg (op0);
16020 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
16022 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16024 error ("shift must be an immediate");
16025 return const0_rtx;
16027 target = gen_reg_rtx (V2DImode);
16028 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
16029 if (! pat)
16030 return 0;
16031 emit_insn (pat);
16032 return target;
16034 case IX86_BUILTIN_FEMMS:
16035 emit_insn (gen_mmx_femms ());
16036 return NULL_RTX;
16038 case IX86_BUILTIN_PAVGUSB:
16039 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
16041 case IX86_BUILTIN_PF2ID:
16042 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
16044 case IX86_BUILTIN_PFACC:
16045 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
16047 case IX86_BUILTIN_PFADD:
16048 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
16050 case IX86_BUILTIN_PFCMPEQ:
16051 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
16053 case IX86_BUILTIN_PFCMPGE:
16054 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
16056 case IX86_BUILTIN_PFCMPGT:
16057 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
16059 case IX86_BUILTIN_PFMAX:
16060 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
16062 case IX86_BUILTIN_PFMIN:
16063 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
16065 case IX86_BUILTIN_PFMUL:
16066 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
16068 case IX86_BUILTIN_PFRCP:
16069 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
16071 case IX86_BUILTIN_PFRCPIT1:
16072 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
16074 case IX86_BUILTIN_PFRCPIT2:
16075 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
16077 case IX86_BUILTIN_PFRSQIT1:
16078 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
16080 case IX86_BUILTIN_PFRSQRT:
16081 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
16083 case IX86_BUILTIN_PFSUB:
16084 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
16086 case IX86_BUILTIN_PFSUBR:
16087 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
16089 case IX86_BUILTIN_PI2FD:
16090 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
16092 case IX86_BUILTIN_PMULHRW:
16093 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
16095 case IX86_BUILTIN_PF2IW:
16096 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
16098 case IX86_BUILTIN_PFNACC:
16099 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
16101 case IX86_BUILTIN_PFPNACC:
16102 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
16104 case IX86_BUILTIN_PI2FW:
16105 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
16107 case IX86_BUILTIN_PSWAPDSI:
16108 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
16110 case IX86_BUILTIN_PSWAPDSF:
16111 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
16113 case IX86_BUILTIN_SQRTSD:
16114 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
16115 case IX86_BUILTIN_LOADUPD:
16116 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
16117 case IX86_BUILTIN_STOREUPD:
16118 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
16120 case IX86_BUILTIN_MFENCE:
16121 emit_insn (gen_sse2_mfence ());
16122 return 0;
16123 case IX86_BUILTIN_LFENCE:
16124 emit_insn (gen_sse2_lfence ());
16125 return 0;
16127 case IX86_BUILTIN_CLFLUSH:
16128 arg0 = TREE_VALUE (arglist);
16129 op0 = expand_normal (arg0);
16130 icode = CODE_FOR_sse2_clflush;
16131 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
16132 op0 = copy_to_mode_reg (Pmode, op0);
16134 emit_insn (gen_sse2_clflush (op0));
16135 return 0;
16137 case IX86_BUILTIN_MOVNTPD:
16138 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
16139 case IX86_BUILTIN_MOVNTDQ:
16140 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
16141 case IX86_BUILTIN_MOVNTI:
16142 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
16144 case IX86_BUILTIN_LOADDQU:
16145 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
16146 case IX86_BUILTIN_STOREDQU:
16147 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
16149 case IX86_BUILTIN_MONITOR:
16150 arg0 = TREE_VALUE (arglist);
16151 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16152 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
16153 op0 = expand_normal (arg0);
16154 op1 = expand_normal (arg1);
16155 op2 = expand_normal (arg2);
16156 if (!REG_P (op0))
16157 op0 = copy_to_mode_reg (SImode, op0);
16158 if (!REG_P (op1))
16159 op1 = copy_to_mode_reg (SImode, op1);
16160 if (!REG_P (op2))
16161 op2 = copy_to_mode_reg (SImode, op2);
16162 emit_insn (gen_sse3_monitor (op0, op1, op2));
16163 return 0;
16165 case IX86_BUILTIN_MWAIT:
16166 arg0 = TREE_VALUE (arglist);
16167 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16168 op0 = expand_normal (arg0);
16169 op1 = expand_normal (arg1);
16170 if (!REG_P (op0))
16171 op0 = copy_to_mode_reg (SImode, op0);
16172 if (!REG_P (op1))
16173 op1 = copy_to_mode_reg (SImode, op1);
16174 emit_insn (gen_sse3_mwait (op0, op1));
16175 return 0;
16177 case IX86_BUILTIN_LDDQU:
16178 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
16179 target, 1);
16181 case IX86_BUILTIN_VEC_INIT_V2SI:
16182 case IX86_BUILTIN_VEC_INIT_V4HI:
16183 case IX86_BUILTIN_VEC_INIT_V8QI:
16184 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
16186 case IX86_BUILTIN_VEC_EXT_V2DF:
16187 case IX86_BUILTIN_VEC_EXT_V2DI:
16188 case IX86_BUILTIN_VEC_EXT_V4SF:
16189 case IX86_BUILTIN_VEC_EXT_V4SI:
16190 case IX86_BUILTIN_VEC_EXT_V8HI:
16191 case IX86_BUILTIN_VEC_EXT_V2SI:
16192 case IX86_BUILTIN_VEC_EXT_V4HI:
16193 return ix86_expand_vec_ext_builtin (arglist, target);
16195 case IX86_BUILTIN_VEC_SET_V8HI:
16196 case IX86_BUILTIN_VEC_SET_V4HI:
16197 return ix86_expand_vec_set_builtin (arglist);
16199 default:
16200 break;
16203 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16204 if (d->code == fcode)
16206 /* Compares are treated specially. */
16207 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16208 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
16209 || d->icode == CODE_FOR_sse2_maskcmpv2df3
16210 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16211 return ix86_expand_sse_compare (d, arglist, target);
16213 return ix86_expand_binop_builtin (d->icode, arglist, target);
16216 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16217 if (d->code == fcode)
16218 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
16220 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16221 if (d->code == fcode)
16222 return ix86_expand_sse_comi (d, arglist, target);
16224 gcc_unreachable ();
16227 /* Expand an expression EXP that calls a built-in library function,
16228 with result going to TARGET if that's convenient
16229 (and in mode MODE if that's convenient).
16230 SUBTARGET may be used as the target for computing one of EXP's operands.
16231 IGNORE is nonzero if the value is to be ignored. */
16233 static rtx
16234 ix86_expand_library_builtin (tree exp, rtx target,
16235 rtx subtarget ATTRIBUTE_UNUSED,
16236 enum machine_mode mode ATTRIBUTE_UNUSED,
16237 int ignore)
16239 enum built_in_function fncode;
16240 tree fndecl, newfn, call;
16242 /* Try expanding builtin math functions to the SSE2 ABI variants. */
16243 if (!TARGET_SSELIBM)
16244 return NULL_RTX;
16246 fncode = builtin_mathfn_code (exp);
16247 if (!ix86_builtin_function_variants [(int)fncode])
16248 return NULL_RTX;
16250 fndecl = get_callee_fndecl (exp);
16251 if (DECL_RTL_SET_P (fndecl))
16252 return NULL_RTX;
16254 /* Build the redirected call and expand it. */
16255 newfn = ix86_builtin_function_variants [(int)fncode];
16256 call = build_function_call_expr (newfn, TREE_OPERAND (exp, 1));
16257 return expand_call (call, target, ignore);
16260 /* Store OPERAND to the memory after reload is completed. This means
16261 that we can't easily use assign_stack_local. */
16263 ix86_force_to_memory (enum machine_mode mode, rtx operand)
16265 rtx result;
16267 gcc_assert (reload_completed);
16268 if (TARGET_RED_ZONE)
16270 result = gen_rtx_MEM (mode,
16271 gen_rtx_PLUS (Pmode,
16272 stack_pointer_rtx,
16273 GEN_INT (-RED_ZONE_SIZE)));
16274 emit_move_insn (result, operand);
16276 else if (!TARGET_RED_ZONE && TARGET_64BIT)
16278 switch (mode)
16280 case HImode:
16281 case SImode:
16282 operand = gen_lowpart (DImode, operand);
16283 /* FALLTHRU */
16284 case DImode:
16285 emit_insn (
16286 gen_rtx_SET (VOIDmode,
16287 gen_rtx_MEM (DImode,
16288 gen_rtx_PRE_DEC (DImode,
16289 stack_pointer_rtx)),
16290 operand));
16291 break;
16292 default:
16293 gcc_unreachable ();
16295 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16297 else
16299 switch (mode)
16301 case DImode:
16303 rtx operands[2];
16304 split_di (&operand, 1, operands, operands + 1);
16305 emit_insn (
16306 gen_rtx_SET (VOIDmode,
16307 gen_rtx_MEM (SImode,
16308 gen_rtx_PRE_DEC (Pmode,
16309 stack_pointer_rtx)),
16310 operands[1]));
16311 emit_insn (
16312 gen_rtx_SET (VOIDmode,
16313 gen_rtx_MEM (SImode,
16314 gen_rtx_PRE_DEC (Pmode,
16315 stack_pointer_rtx)),
16316 operands[0]));
16318 break;
16319 case HImode:
16320 /* Store HImodes as SImodes. */
16321 operand = gen_lowpart (SImode, operand);
16322 /* FALLTHRU */
16323 case SImode:
16324 emit_insn (
16325 gen_rtx_SET (VOIDmode,
16326 gen_rtx_MEM (GET_MODE (operand),
16327 gen_rtx_PRE_DEC (SImode,
16328 stack_pointer_rtx)),
16329 operand));
16330 break;
16331 default:
16332 gcc_unreachable ();
16334 result = gen_rtx_MEM (mode, stack_pointer_rtx);
16336 return result;
16339 /* Free operand from the memory. */
16340 void
16341 ix86_free_from_memory (enum machine_mode mode)
16343 if (!TARGET_RED_ZONE)
16345 int size;
16347 if (mode == DImode || TARGET_64BIT)
16348 size = 8;
16349 else
16350 size = 4;
16351 /* Use LEA to deallocate stack space. In peephole2 it will be converted
16352 to pop or add instruction if registers are available. */
16353 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16354 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16355 GEN_INT (size))));
16359 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16360 QImode must go into class Q_REGS.
16361 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16362 movdf to do mem-to-mem moves through integer regs. */
16363 enum reg_class
16364 ix86_preferred_reload_class (rtx x, enum reg_class class)
16366 /* We're only allowed to return a subclass of CLASS. Many of the
16367 following checks fail for NO_REGS, so eliminate that early. */
16368 if (class == NO_REGS)
16369 return NO_REGS;
16371 /* All classes can load zeros. */
16372 if (x == CONST0_RTX (GET_MODE (x)))
16373 return class;
16375 /* Floating-point constants need more complex checks. */
16376 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16378 /* General regs can load everything. */
16379 if (reg_class_subset_p (class, GENERAL_REGS))
16380 return class;
16382 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16383 zero above. We only want to wind up preferring 80387 registers if
16384 we plan on doing computation with them. */
16385 if (TARGET_80387
16386 && (TARGET_MIX_SSE_I387
16387 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
16388 && standard_80387_constant_p (x))
16390 /* Limit class to non-sse. */
16391 if (class == FLOAT_SSE_REGS)
16392 return FLOAT_REGS;
16393 if (class == FP_TOP_SSE_REGS)
16394 return FP_TOP_REG;
16395 if (class == FP_SECOND_SSE_REGS)
16396 return FP_SECOND_REG;
16397 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16398 return class;
16401 return NO_REGS;
16403 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
16404 return NO_REGS;
16405 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
16406 return NO_REGS;
16408 /* Generally when we see PLUS here, it's the function invariant
16409 (plus soft-fp const_int). Which can only be computed into general
16410 regs. */
16411 if (GET_CODE (x) == PLUS)
16412 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16414 /* QImode constants are easy to load, but non-constant QImode data
16415 must go into Q_REGS. */
16416 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16418 if (reg_class_subset_p (class, Q_REGS))
16419 return class;
16420 if (reg_class_subset_p (Q_REGS, class))
16421 return Q_REGS;
16422 return NO_REGS;
16425 return class;
16428 /* If we are copying between general and FP registers, we need a memory
16429 location. The same is true for SSE and MMX registers.
16431 The macro can't work reliably when one of the CLASSES is class containing
16432 registers from multiple units (SSE, MMX, integer). We avoid this by never
16433 combining those units in single alternative in the machine description.
16434 Ensure that this constraint holds to avoid unexpected surprises.
16436 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16437 enforce these sanity checks. */
16440 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16441 enum machine_mode mode, int strict)
16443 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16444 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16445 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16446 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16447 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16448 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16450 gcc_assert (!strict);
16451 return true;
16454 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16455 return true;
16457 /* ??? This is a lie. We do have moves between mmx/general, and for
16458 mmx/sse2. But by saying we need secondary memory we discourage the
16459 register allocator from using the mmx registers unless needed. */
16460 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16461 return true;
16463 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16465 /* SSE1 doesn't have any direct moves from other classes. */
16466 if (!TARGET_SSE2)
16467 return true;
16469 /* If the target says that inter-unit moves are more expensive
16470 than moving through memory, then don't generate them. */
16471 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16472 return true;
16474 /* Between SSE and general, we have moves no larger than word size. */
16475 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16476 return true;
16478 /* ??? For the cost of one register reformat penalty, we could use
16479 the same instructions to move SFmode and DFmode data, but the
16480 relevant move patterns don't support those alternatives. */
16481 if (mode == SFmode || mode == DFmode)
16482 return true;
16485 return false;
16488 /* Return true if the registers in CLASS cannot represent the change from
16489 modes FROM to TO. */
16491 bool
16492 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16493 enum reg_class class)
16495 if (from == to)
16496 return false;
16498 /* x87 registers can't do subreg at all, as all values are reformatted
16499 to extended precision. */
16500 if (MAYBE_FLOAT_CLASS_P (class))
16501 return true;
16503 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16505 /* Vector registers do not support QI or HImode loads. If we don't
16506 disallow a change to these modes, reload will assume it's ok to
16507 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16508 the vec_dupv4hi pattern. */
16509 if (GET_MODE_SIZE (from) < 4)
16510 return true;
16512 /* Vector registers do not support subreg with nonzero offsets, which
16513 are otherwise valid for integer registers. Since we can't see
16514 whether we have a nonzero offset from here, prohibit all
16515 nonparadoxical subregs changing size. */
16516 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16517 return true;
16520 return false;
16523 /* Return the cost of moving data from a register in class CLASS1 to
16524 one in class CLASS2.
16526 It is not required that the cost always equal 2 when FROM is the same as TO;
16527 on some machines it is expensive to move between registers if they are not
16528 general registers. */
16531 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16532 enum reg_class class2)
16534 /* In case we require secondary memory, compute cost of the store followed
16535 by load. In order to avoid bad register allocation choices, we need
16536 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16538 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16540 int cost = 1;
16542 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16543 MEMORY_MOVE_COST (mode, class1, 1));
16544 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16545 MEMORY_MOVE_COST (mode, class2, 1));
16547 /* In case of copying from general_purpose_register we may emit multiple
16548 stores followed by single load causing memory size mismatch stall.
16549 Count this as arbitrarily high cost of 20. */
16550 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16551 cost += 20;
16553 /* In the case of FP/MMX moves, the registers actually overlap, and we
16554 have to switch modes in order to treat them differently. */
16555 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16556 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16557 cost += 20;
16559 return cost;
16562 /* Moves between SSE/MMX and integer unit are expensive. */
16563 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16564 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16565 return ix86_cost->mmxsse_to_integer;
16566 if (MAYBE_FLOAT_CLASS_P (class1))
16567 return ix86_cost->fp_move;
16568 if (MAYBE_SSE_CLASS_P (class1))
16569 return ix86_cost->sse_move;
16570 if (MAYBE_MMX_CLASS_P (class1))
16571 return ix86_cost->mmx_move;
16572 return 2;
16575 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16577 bool
16578 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16580 /* Flags and only flags can only hold CCmode values. */
16581 if (CC_REGNO_P (regno))
16582 return GET_MODE_CLASS (mode) == MODE_CC;
16583 if (GET_MODE_CLASS (mode) == MODE_CC
16584 || GET_MODE_CLASS (mode) == MODE_RANDOM
16585 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16586 return 0;
16587 if (FP_REGNO_P (regno))
16588 return VALID_FP_MODE_P (mode);
16589 if (SSE_REGNO_P (regno))
16591 /* We implement the move patterns for all vector modes into and
16592 out of SSE registers, even when no operation instructions
16593 are available. */
16594 return (VALID_SSE_REG_MODE (mode)
16595 || VALID_SSE2_REG_MODE (mode)
16596 || VALID_MMX_REG_MODE (mode)
16597 || VALID_MMX_REG_MODE_3DNOW (mode));
16599 if (MMX_REGNO_P (regno))
16601 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16602 so if the register is available at all, then we can move data of
16603 the given mode into or out of it. */
16604 return (VALID_MMX_REG_MODE (mode)
16605 || VALID_MMX_REG_MODE_3DNOW (mode));
16608 if (mode == QImode)
16610 /* Take care for QImode values - they can be in non-QI regs,
16611 but then they do cause partial register stalls. */
16612 if (regno < 4 || TARGET_64BIT)
16613 return 1;
16614 if (!TARGET_PARTIAL_REG_STALL)
16615 return 1;
16616 return reload_in_progress || reload_completed;
16618 /* We handle both integer and floats in the general purpose registers. */
16619 else if (VALID_INT_MODE_P (mode))
16620 return 1;
16621 else if (VALID_FP_MODE_P (mode))
16622 return 1;
16623 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16624 on to use that value in smaller contexts, this can easily force a
16625 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16626 supporting DImode, allow it. */
16627 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16628 return 1;
16630 return 0;
16633 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16634 tieable integer mode. */
16636 static bool
16637 ix86_tieable_integer_mode_p (enum machine_mode mode)
16639 switch (mode)
16641 case HImode:
16642 case SImode:
16643 return true;
16645 case QImode:
16646 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16648 case DImode:
16649 return TARGET_64BIT;
16651 default:
16652 return false;
16656 /* Return true if MODE1 is accessible in a register that can hold MODE2
16657 without copying. That is, all register classes that can hold MODE2
16658 can also hold MODE1. */
16660 bool
16661 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16663 if (mode1 == mode2)
16664 return true;
16666 if (ix86_tieable_integer_mode_p (mode1)
16667 && ix86_tieable_integer_mode_p (mode2))
16668 return true;
16670 /* MODE2 being XFmode implies fp stack or general regs, which means we
16671 can tie any smaller floating point modes to it. Note that we do not
16672 tie this with TFmode. */
16673 if (mode2 == XFmode)
16674 return mode1 == SFmode || mode1 == DFmode;
16676 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16677 that we can tie it with SFmode. */
16678 if (mode2 == DFmode)
16679 return mode1 == SFmode;
16681 /* If MODE2 is only appropriate for an SSE register, then tie with
16682 any other mode acceptable to SSE registers. */
16683 if (GET_MODE_SIZE (mode2) >= 8
16684 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16685 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16687 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16688 with any other mode acceptable to MMX registers. */
16689 if (GET_MODE_SIZE (mode2) == 8
16690 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16691 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16693 return false;
16696 /* Return the cost of moving data of mode M between a
16697 register and memory. A value of 2 is the default; this cost is
16698 relative to those in `REGISTER_MOVE_COST'.
16700 If moving between registers and memory is more expensive than
16701 between two registers, you should define this macro to express the
16702 relative cost.
16704 Model also increased moving costs of QImode registers in non
16705 Q_REGS classes.
16708 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16710 if (FLOAT_CLASS_P (class))
16712 int index;
16713 switch (mode)
16715 case SFmode:
16716 index = 0;
16717 break;
16718 case DFmode:
16719 index = 1;
16720 break;
16721 case XFmode:
16722 index = 2;
16723 break;
16724 default:
16725 return 100;
16727 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16729 if (SSE_CLASS_P (class))
16731 int index;
16732 switch (GET_MODE_SIZE (mode))
16734 case 4:
16735 index = 0;
16736 break;
16737 case 8:
16738 index = 1;
16739 break;
16740 case 16:
16741 index = 2;
16742 break;
16743 default:
16744 return 100;
16746 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16748 if (MMX_CLASS_P (class))
16750 int index;
16751 switch (GET_MODE_SIZE (mode))
16753 case 4:
16754 index = 0;
16755 break;
16756 case 8:
16757 index = 1;
16758 break;
16759 default:
16760 return 100;
16762 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16764 switch (GET_MODE_SIZE (mode))
16766 case 1:
16767 if (in)
16768 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16769 : ix86_cost->movzbl_load);
16770 else
16771 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16772 : ix86_cost->int_store[0] + 4);
16773 break;
16774 case 2:
16775 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16776 default:
16777 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16778 if (mode == TFmode)
16779 mode = XFmode;
16780 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16781 * (((int) GET_MODE_SIZE (mode)
16782 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16786 /* Compute a (partial) cost for rtx X. Return true if the complete
16787 cost has been computed, and false if subexpressions should be
16788 scanned. In either case, *TOTAL contains the cost result. */
16790 static bool
16791 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16793 enum machine_mode mode = GET_MODE (x);
16795 switch (code)
16797 case CONST_INT:
16798 case CONST:
16799 case LABEL_REF:
16800 case SYMBOL_REF:
16801 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16802 *total = 3;
16803 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16804 *total = 2;
16805 else if (flag_pic && SYMBOLIC_CONST (x)
16806 && (!TARGET_64BIT
16807 || (!GET_CODE (x) != LABEL_REF
16808 && (GET_CODE (x) != SYMBOL_REF
16809 || !SYMBOL_REF_LOCAL_P (x)))))
16810 *total = 1;
16811 else
16812 *total = 0;
16813 return true;
16815 case CONST_DOUBLE:
16816 if (mode == VOIDmode)
16817 *total = 0;
16818 else
16819 switch (standard_80387_constant_p (x))
16821 case 1: /* 0.0 */
16822 *total = 1;
16823 break;
16824 default: /* Other constants */
16825 *total = 2;
16826 break;
16827 case 0:
16828 case -1:
16829 /* Start with (MEM (SYMBOL_REF)), since that's where
16830 it'll probably end up. Add a penalty for size. */
16831 *total = (COSTS_N_INSNS (1)
16832 + (flag_pic != 0 && !TARGET_64BIT)
16833 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16834 break;
16836 return true;
16838 case ZERO_EXTEND:
16839 /* The zero extensions is often completely free on x86_64, so make
16840 it as cheap as possible. */
16841 if (TARGET_64BIT && mode == DImode
16842 && GET_MODE (XEXP (x, 0)) == SImode)
16843 *total = 1;
16844 else if (TARGET_ZERO_EXTEND_WITH_AND)
16845 *total = ix86_cost->add;
16846 else
16847 *total = ix86_cost->movzx;
16848 return false;
16850 case SIGN_EXTEND:
16851 *total = ix86_cost->movsx;
16852 return false;
16854 case ASHIFT:
16855 if (GET_CODE (XEXP (x, 1)) == CONST_INT
16856 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16858 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16859 if (value == 1)
16861 *total = ix86_cost->add;
16862 return false;
16864 if ((value == 2 || value == 3)
16865 && ix86_cost->lea <= ix86_cost->shift_const)
16867 *total = ix86_cost->lea;
16868 return false;
16871 /* FALLTHRU */
16873 case ROTATE:
16874 case ASHIFTRT:
16875 case LSHIFTRT:
16876 case ROTATERT:
16877 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
16879 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16881 if (INTVAL (XEXP (x, 1)) > 32)
16882 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
16883 else
16884 *total = ix86_cost->shift_const * 2;
16886 else
16888 if (GET_CODE (XEXP (x, 1)) == AND)
16889 *total = ix86_cost->shift_var * 2;
16890 else
16891 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
16894 else
16896 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16897 *total = ix86_cost->shift_const;
16898 else
16899 *total = ix86_cost->shift_var;
16901 return false;
16903 case MULT:
16904 if (FLOAT_MODE_P (mode))
16906 *total = ix86_cost->fmul;
16907 return false;
16909 else
16911 rtx op0 = XEXP (x, 0);
16912 rtx op1 = XEXP (x, 1);
16913 int nbits;
16914 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16916 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16917 for (nbits = 0; value != 0; value &= value - 1)
16918 nbits++;
16920 else
16921 /* This is arbitrary. */
16922 nbits = 7;
16924 /* Compute costs correctly for widening multiplication. */
16925 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
16926 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
16927 == GET_MODE_SIZE (mode))
16929 int is_mulwiden = 0;
16930 enum machine_mode inner_mode = GET_MODE (op0);
16932 if (GET_CODE (op0) == GET_CODE (op1))
16933 is_mulwiden = 1, op1 = XEXP (op1, 0);
16934 else if (GET_CODE (op1) == CONST_INT)
16936 if (GET_CODE (op0) == SIGN_EXTEND)
16937 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
16938 == INTVAL (op1);
16939 else
16940 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
16943 if (is_mulwiden)
16944 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
16947 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
16948 + nbits * ix86_cost->mult_bit
16949 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
16951 return true;
16954 case DIV:
16955 case UDIV:
16956 case MOD:
16957 case UMOD:
16958 if (FLOAT_MODE_P (mode))
16959 *total = ix86_cost->fdiv;
16960 else
16961 *total = ix86_cost->divide[MODE_INDEX (mode)];
16962 return false;
16964 case PLUS:
16965 if (FLOAT_MODE_P (mode))
16966 *total = ix86_cost->fadd;
16967 else if (GET_MODE_CLASS (mode) == MODE_INT
16968 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
16970 if (GET_CODE (XEXP (x, 0)) == PLUS
16971 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16972 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
16973 && CONSTANT_P (XEXP (x, 1)))
16975 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
16976 if (val == 2 || val == 4 || val == 8)
16978 *total = ix86_cost->lea;
16979 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16980 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
16981 outer_code);
16982 *total += rtx_cost (XEXP (x, 1), outer_code);
16983 return true;
16986 else if (GET_CODE (XEXP (x, 0)) == MULT
16987 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
16989 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
16990 if (val == 2 || val == 4 || val == 8)
16992 *total = ix86_cost->lea;
16993 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16994 *total += rtx_cost (XEXP (x, 1), outer_code);
16995 return true;
16998 else if (GET_CODE (XEXP (x, 0)) == PLUS)
17000 *total = ix86_cost->lea;
17001 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
17002 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
17003 *total += rtx_cost (XEXP (x, 1), outer_code);
17004 return true;
17007 /* FALLTHRU */
17009 case MINUS:
17010 if (FLOAT_MODE_P (mode))
17012 *total = ix86_cost->fadd;
17013 return false;
17015 /* FALLTHRU */
17017 case AND:
17018 case IOR:
17019 case XOR:
17020 if (!TARGET_64BIT && mode == DImode)
17022 *total = (ix86_cost->add * 2
17023 + (rtx_cost (XEXP (x, 0), outer_code)
17024 << (GET_MODE (XEXP (x, 0)) != DImode))
17025 + (rtx_cost (XEXP (x, 1), outer_code)
17026 << (GET_MODE (XEXP (x, 1)) != DImode)));
17027 return true;
17029 /* FALLTHRU */
17031 case NEG:
17032 if (FLOAT_MODE_P (mode))
17034 *total = ix86_cost->fchs;
17035 return false;
17037 /* FALLTHRU */
17039 case NOT:
17040 if (!TARGET_64BIT && mode == DImode)
17041 *total = ix86_cost->add * 2;
17042 else
17043 *total = ix86_cost->add;
17044 return false;
17046 case COMPARE:
17047 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
17048 && XEXP (XEXP (x, 0), 1) == const1_rtx
17049 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
17050 && XEXP (x, 1) == const0_rtx)
17052 /* This kind of construct is implemented using test[bwl].
17053 Treat it as if we had an AND. */
17054 *total = (ix86_cost->add
17055 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
17056 + rtx_cost (const1_rtx, outer_code));
17057 return true;
17059 return false;
17061 case FLOAT_EXTEND:
17062 if (!TARGET_SSE_MATH
17063 || mode == XFmode
17064 || (mode == DFmode && !TARGET_SSE2))
17065 *total = 0;
17066 return false;
17068 case ABS:
17069 if (FLOAT_MODE_P (mode))
17070 *total = ix86_cost->fabs;
17071 return false;
17073 case SQRT:
17074 if (FLOAT_MODE_P (mode))
17075 *total = ix86_cost->fsqrt;
17076 return false;
17078 case UNSPEC:
17079 if (XINT (x, 1) == UNSPEC_TP)
17080 *total = 0;
17081 return false;
17083 default:
17084 return false;
17088 #if TARGET_MACHO
17090 static int current_machopic_label_num;
17092 /* Given a symbol name and its associated stub, write out the
17093 definition of the stub. */
17095 void
17096 machopic_output_stub (FILE *file, const char *symb, const char *stub)
17098 unsigned int length;
17099 char *binder_name, *symbol_name, lazy_ptr_name[32];
17100 int label = ++current_machopic_label_num;
17102 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
17103 symb = (*targetm.strip_name_encoding) (symb);
17105 length = strlen (stub);
17106 binder_name = alloca (length + 32);
17107 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
17109 length = strlen (symb);
17110 symbol_name = alloca (length + 32);
17111 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
17113 sprintf (lazy_ptr_name, "L%d$lz", label);
17115 if (MACHOPIC_PURE)
17116 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
17117 else
17118 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
17120 fprintf (file, "%s:\n", stub);
17121 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17123 if (MACHOPIC_PURE)
17125 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
17126 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
17127 fprintf (file, "\tjmp %%edx\n");
17129 else
17130 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
17132 fprintf (file, "%s:\n", binder_name);
17134 if (MACHOPIC_PURE)
17136 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
17137 fprintf (file, "\tpushl %%eax\n");
17139 else
17140 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
17142 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
17144 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
17145 fprintf (file, "%s:\n", lazy_ptr_name);
17146 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
17147 fprintf (file, "\t.long %s\n", binder_name);
17150 void
17151 darwin_x86_file_end (void)
17153 darwin_file_end ();
17154 ix86_file_end ();
17156 #endif /* TARGET_MACHO */
17158 /* Order the registers for register allocator. */
17160 void
17161 x86_order_regs_for_local_alloc (void)
17163 int pos = 0;
17164 int i;
17166 /* First allocate the local general purpose registers. */
17167 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17168 if (GENERAL_REGNO_P (i) && call_used_regs[i])
17169 reg_alloc_order [pos++] = i;
17171 /* Global general purpose registers. */
17172 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
17173 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
17174 reg_alloc_order [pos++] = i;
17176 /* x87 registers come first in case we are doing FP math
17177 using them. */
17178 if (!TARGET_SSE_MATH)
17179 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17180 reg_alloc_order [pos++] = i;
17182 /* SSE registers. */
17183 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17184 reg_alloc_order [pos++] = i;
17185 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17186 reg_alloc_order [pos++] = i;
17188 /* x87 registers. */
17189 if (TARGET_SSE_MATH)
17190 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
17191 reg_alloc_order [pos++] = i;
17193 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
17194 reg_alloc_order [pos++] = i;
17196 /* Initialize the rest of array as we do not allocate some registers
17197 at all. */
17198 while (pos < FIRST_PSEUDO_REGISTER)
17199 reg_alloc_order [pos++] = 0;
17202 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
17203 struct attribute_spec.handler. */
17204 static tree
17205 ix86_handle_struct_attribute (tree *node, tree name,
17206 tree args ATTRIBUTE_UNUSED,
17207 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
17209 tree *type = NULL;
17210 if (DECL_P (*node))
17212 if (TREE_CODE (*node) == TYPE_DECL)
17213 type = &TREE_TYPE (*node);
17215 else
17216 type = node;
17218 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
17219 || TREE_CODE (*type) == UNION_TYPE)))
17221 warning (OPT_Wattributes, "%qs attribute ignored",
17222 IDENTIFIER_POINTER (name));
17223 *no_add_attrs = true;
17226 else if ((is_attribute_p ("ms_struct", name)
17227 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
17228 || ((is_attribute_p ("gcc_struct", name)
17229 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
17231 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
17232 IDENTIFIER_POINTER (name));
17233 *no_add_attrs = true;
17236 return NULL_TREE;
17239 static bool
17240 ix86_ms_bitfield_layout_p (tree record_type)
17242 return (TARGET_MS_BITFIELD_LAYOUT &&
17243 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
17244 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
17247 /* Returns an expression indicating where the this parameter is
17248 located on entry to the FUNCTION. */
17250 static rtx
17251 x86_this_parameter (tree function)
17253 tree type = TREE_TYPE (function);
17255 if (TARGET_64BIT)
17257 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
17258 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
17261 if (ix86_function_regparm (type, function) > 0)
17263 tree parm;
17265 parm = TYPE_ARG_TYPES (type);
17266 /* Figure out whether or not the function has a variable number of
17267 arguments. */
17268 for (; parm; parm = TREE_CHAIN (parm))
17269 if (TREE_VALUE (parm) == void_type_node)
17270 break;
17271 /* If not, the this parameter is in the first argument. */
17272 if (parm)
17274 int regno = 0;
17275 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
17276 regno = 2;
17277 return gen_rtx_REG (SImode, regno);
17281 if (aggregate_value_p (TREE_TYPE (type), type))
17282 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
17283 else
17284 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
17287 /* Determine whether x86_output_mi_thunk can succeed. */
17289 static bool
17290 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
17291 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
17292 HOST_WIDE_INT vcall_offset, tree function)
17294 /* 64-bit can handle anything. */
17295 if (TARGET_64BIT)
17296 return true;
17298 /* For 32-bit, everything's fine if we have one free register. */
17299 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
17300 return true;
17302 /* Need a free register for vcall_offset. */
17303 if (vcall_offset)
17304 return false;
17306 /* Need a free register for GOT references. */
17307 if (flag_pic && !(*targetm.binds_local_p) (function))
17308 return false;
17310 /* Otherwise ok. */
17311 return true;
17314 /* Output the assembler code for a thunk function. THUNK_DECL is the
17315 declaration for the thunk function itself, FUNCTION is the decl for
17316 the target function. DELTA is an immediate constant offset to be
17317 added to THIS. If VCALL_OFFSET is nonzero, the word at
17318 *(*this + vcall_offset) should be added to THIS. */
17320 static void
17321 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
17322 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
17323 HOST_WIDE_INT vcall_offset, tree function)
17325 rtx xops[3];
17326 rtx this = x86_this_parameter (function);
17327 rtx this_reg, tmp;
17329 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
17330 pull it in now and let DELTA benefit. */
17331 if (REG_P (this))
17332 this_reg = this;
17333 else if (vcall_offset)
17335 /* Put the this parameter into %eax. */
17336 xops[0] = this;
17337 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
17338 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17340 else
17341 this_reg = NULL_RTX;
17343 /* Adjust the this parameter by a fixed constant. */
17344 if (delta)
17346 xops[0] = GEN_INT (delta);
17347 xops[1] = this_reg ? this_reg : this;
17348 if (TARGET_64BIT)
17350 if (!x86_64_general_operand (xops[0], DImode))
17352 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17353 xops[1] = tmp;
17354 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
17355 xops[0] = tmp;
17356 xops[1] = this;
17358 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17360 else
17361 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17364 /* Adjust the this parameter by a value stored in the vtable. */
17365 if (vcall_offset)
17367 if (TARGET_64BIT)
17368 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17369 else
17371 int tmp_regno = 2 /* ECX */;
17372 if (lookup_attribute ("fastcall",
17373 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17374 tmp_regno = 0 /* EAX */;
17375 tmp = gen_rtx_REG (SImode, tmp_regno);
17378 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17379 xops[1] = tmp;
17380 if (TARGET_64BIT)
17381 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17382 else
17383 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17385 /* Adjust the this parameter. */
17386 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17387 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17389 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17390 xops[0] = GEN_INT (vcall_offset);
17391 xops[1] = tmp2;
17392 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17393 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17395 xops[1] = this_reg;
17396 if (TARGET_64BIT)
17397 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17398 else
17399 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17402 /* If necessary, drop THIS back to its stack slot. */
17403 if (this_reg && this_reg != this)
17405 xops[0] = this_reg;
17406 xops[1] = this;
17407 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17410 xops[0] = XEXP (DECL_RTL (function), 0);
17411 if (TARGET_64BIT)
17413 if (!flag_pic || (*targetm.binds_local_p) (function))
17414 output_asm_insn ("jmp\t%P0", xops);
17415 else
17417 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17418 tmp = gen_rtx_CONST (Pmode, tmp);
17419 tmp = gen_rtx_MEM (QImode, tmp);
17420 xops[0] = tmp;
17421 output_asm_insn ("jmp\t%A0", xops);
17424 else
17426 if (!flag_pic || (*targetm.binds_local_p) (function))
17427 output_asm_insn ("jmp\t%P0", xops);
17428 else
17429 #if TARGET_MACHO
17430 if (TARGET_MACHO)
17432 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17433 tmp = (gen_rtx_SYMBOL_REF
17434 (Pmode,
17435 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17436 tmp = gen_rtx_MEM (QImode, tmp);
17437 xops[0] = tmp;
17438 output_asm_insn ("jmp\t%0", xops);
17440 else
17441 #endif /* TARGET_MACHO */
17443 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17444 output_set_got (tmp, NULL_RTX);
17446 xops[1] = tmp;
17447 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17448 output_asm_insn ("jmp\t{*}%1", xops);
17453 static void
17454 x86_file_start (void)
17456 default_file_start ();
17457 if (X86_FILE_START_VERSION_DIRECTIVE)
17458 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17459 if (X86_FILE_START_FLTUSED)
17460 fputs ("\t.global\t__fltused\n", asm_out_file);
17461 if (ix86_asm_dialect == ASM_INTEL)
17462 fputs ("\t.intel_syntax\n", asm_out_file);
17466 x86_field_alignment (tree field, int computed)
17468 enum machine_mode mode;
17469 tree type = TREE_TYPE (field);
17471 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17472 return computed;
17473 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17474 ? get_inner_array_type (type) : type);
17475 if (mode == DFmode || mode == DCmode
17476 || GET_MODE_CLASS (mode) == MODE_INT
17477 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17478 return MIN (32, computed);
17479 return computed;
17482 /* Output assembler code to FILE to increment profiler label # LABELNO
17483 for profiling a function entry. */
17484 void
17485 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17487 if (TARGET_64BIT)
17488 if (flag_pic)
17490 #ifndef NO_PROFILE_COUNTERS
17491 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17492 #endif
17493 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17495 else
17497 #ifndef NO_PROFILE_COUNTERS
17498 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17499 #endif
17500 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17502 else if (flag_pic)
17504 #ifndef NO_PROFILE_COUNTERS
17505 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17506 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17507 #endif
17508 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17510 else
17512 #ifndef NO_PROFILE_COUNTERS
17513 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17514 PROFILE_COUNT_REGISTER);
17515 #endif
17516 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17520 /* We don't have exact information about the insn sizes, but we may assume
17521 quite safely that we are informed about all 1 byte insns and memory
17522 address sizes. This is enough to eliminate unnecessary padding in
17523 99% of cases. */
17525 static int
17526 min_insn_size (rtx insn)
17528 int l = 0;
17530 if (!INSN_P (insn) || !active_insn_p (insn))
17531 return 0;
17533 /* Discard alignments we've emit and jump instructions. */
17534 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17535 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17536 return 0;
17537 if (GET_CODE (insn) == JUMP_INSN
17538 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17539 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17540 return 0;
17542 /* Important case - calls are always 5 bytes.
17543 It is common to have many calls in the row. */
17544 if (GET_CODE (insn) == CALL_INSN
17545 && symbolic_reference_mentioned_p (PATTERN (insn))
17546 && !SIBLING_CALL_P (insn))
17547 return 5;
17548 if (get_attr_length (insn) <= 1)
17549 return 1;
17551 /* For normal instructions we may rely on the sizes of addresses
17552 and the presence of symbol to require 4 bytes of encoding.
17553 This is not the case for jumps where references are PC relative. */
17554 if (GET_CODE (insn) != JUMP_INSN)
17556 l = get_attr_length_address (insn);
17557 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17558 l = 4;
17560 if (l)
17561 return 1+l;
17562 else
17563 return 2;
17566 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17567 window. */
17569 static void
17570 ix86_avoid_jump_misspredicts (void)
17572 rtx insn, start = get_insns ();
17573 int nbytes = 0, njumps = 0;
17574 int isjump = 0;
17576 /* Look for all minimal intervals of instructions containing 4 jumps.
17577 The intervals are bounded by START and INSN. NBYTES is the total
17578 size of instructions in the interval including INSN and not including
17579 START. When the NBYTES is smaller than 16 bytes, it is possible
17580 that the end of START and INSN ends up in the same 16byte page.
17582 The smallest offset in the page INSN can start is the case where START
17583 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17584 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17586 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17589 nbytes += min_insn_size (insn);
17590 if (dump_file)
17591 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17592 INSN_UID (insn), min_insn_size (insn));
17593 if ((GET_CODE (insn) == JUMP_INSN
17594 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17595 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17596 || GET_CODE (insn) == CALL_INSN)
17597 njumps++;
17598 else
17599 continue;
17601 while (njumps > 3)
17603 start = NEXT_INSN (start);
17604 if ((GET_CODE (start) == JUMP_INSN
17605 && GET_CODE (PATTERN (start)) != ADDR_VEC
17606 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17607 || GET_CODE (start) == CALL_INSN)
17608 njumps--, isjump = 1;
17609 else
17610 isjump = 0;
17611 nbytes -= min_insn_size (start);
17613 gcc_assert (njumps >= 0);
17614 if (dump_file)
17615 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17616 INSN_UID (start), INSN_UID (insn), nbytes);
17618 if (njumps == 3 && isjump && nbytes < 16)
17620 int padsize = 15 - nbytes + min_insn_size (insn);
17622 if (dump_file)
17623 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17624 INSN_UID (insn), padsize);
17625 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17630 /* AMD Athlon works faster
17631 when RET is not destination of conditional jump or directly preceded
17632 by other jump instruction. We avoid the penalty by inserting NOP just
17633 before the RET instructions in such cases. */
17634 static void
17635 ix86_pad_returns (void)
17637 edge e;
17638 edge_iterator ei;
17640 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17642 basic_block bb = e->src;
17643 rtx ret = BB_END (bb);
17644 rtx prev;
17645 bool replace = false;
17647 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17648 || !maybe_hot_bb_p (bb))
17649 continue;
17650 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17651 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17652 break;
17653 if (prev && GET_CODE (prev) == CODE_LABEL)
17655 edge e;
17656 edge_iterator ei;
17658 FOR_EACH_EDGE (e, ei, bb->preds)
17659 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17660 && !(e->flags & EDGE_FALLTHRU))
17661 replace = true;
17663 if (!replace)
17665 prev = prev_active_insn (ret);
17666 if (prev
17667 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17668 || GET_CODE (prev) == CALL_INSN))
17669 replace = true;
17670 /* Empty functions get branch mispredict even when the jump destination
17671 is not visible to us. */
17672 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17673 replace = true;
17675 if (replace)
17677 emit_insn_before (gen_return_internal_long (), ret);
17678 delete_insn (ret);
17683 /* Implement machine specific optimizations. We implement padding of returns
17684 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17685 static void
17686 ix86_reorg (void)
17688 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
17689 ix86_pad_returns ();
17690 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17691 ix86_avoid_jump_misspredicts ();
17694 /* Return nonzero when QImode register that must be represented via REX prefix
17695 is used. */
17696 bool
17697 x86_extended_QIreg_mentioned_p (rtx insn)
17699 int i;
17700 extract_insn_cached (insn);
17701 for (i = 0; i < recog_data.n_operands; i++)
17702 if (REG_P (recog_data.operand[i])
17703 && REGNO (recog_data.operand[i]) >= 4)
17704 return true;
17705 return false;
17708 /* Return nonzero when P points to register encoded via REX prefix.
17709 Called via for_each_rtx. */
17710 static int
17711 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17713 unsigned int regno;
17714 if (!REG_P (*p))
17715 return 0;
17716 regno = REGNO (*p);
17717 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17720 /* Return true when INSN mentions register that must be encoded using REX
17721 prefix. */
17722 bool
17723 x86_extended_reg_mentioned_p (rtx insn)
17725 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17728 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17729 optabs would emit if we didn't have TFmode patterns. */
17731 void
17732 x86_emit_floatuns (rtx operands[2])
17734 rtx neglab, donelab, i0, i1, f0, in, out;
17735 enum machine_mode mode, inmode;
17737 inmode = GET_MODE (operands[1]);
17738 gcc_assert (inmode == SImode || inmode == DImode);
17740 out = operands[0];
17741 in = force_reg (inmode, operands[1]);
17742 mode = GET_MODE (out);
17743 neglab = gen_label_rtx ();
17744 donelab = gen_label_rtx ();
17745 i1 = gen_reg_rtx (Pmode);
17746 f0 = gen_reg_rtx (mode);
17748 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17750 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17751 emit_jump_insn (gen_jump (donelab));
17752 emit_barrier ();
17754 emit_label (neglab);
17756 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17757 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17758 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17759 expand_float (f0, i0, 0);
17760 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17762 emit_label (donelab);
17765 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17766 with all elements equal to VAR. Return true if successful. */
17768 static bool
17769 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17770 rtx target, rtx val)
17772 enum machine_mode smode, wsmode, wvmode;
17773 rtx x;
17775 switch (mode)
17777 case V2SImode:
17778 case V2SFmode:
17779 if (!mmx_ok && !TARGET_SSE)
17780 return false;
17781 /* FALLTHRU */
17783 case V2DFmode:
17784 case V2DImode:
17785 case V4SFmode:
17786 case V4SImode:
17787 val = force_reg (GET_MODE_INNER (mode), val);
17788 x = gen_rtx_VEC_DUPLICATE (mode, val);
17789 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17790 return true;
17792 case V4HImode:
17793 if (!mmx_ok)
17794 return false;
17795 if (TARGET_SSE || TARGET_3DNOW_A)
17797 val = gen_lowpart (SImode, val);
17798 x = gen_rtx_TRUNCATE (HImode, val);
17799 x = gen_rtx_VEC_DUPLICATE (mode, x);
17800 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17801 return true;
17803 else
17805 smode = HImode;
17806 wsmode = SImode;
17807 wvmode = V2SImode;
17808 goto widen;
17811 case V8QImode:
17812 if (!mmx_ok)
17813 return false;
17814 smode = QImode;
17815 wsmode = HImode;
17816 wvmode = V4HImode;
17817 goto widen;
17818 case V8HImode:
17819 smode = HImode;
17820 wsmode = SImode;
17821 wvmode = V4SImode;
17822 goto widen;
17823 case V16QImode:
17824 smode = QImode;
17825 wsmode = HImode;
17826 wvmode = V8HImode;
17827 goto widen;
17828 widen:
17829 /* Replicate the value once into the next wider mode and recurse. */
17830 val = convert_modes (wsmode, smode, val, true);
17831 x = expand_simple_binop (wsmode, ASHIFT, val,
17832 GEN_INT (GET_MODE_BITSIZE (smode)),
17833 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17834 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
17836 x = gen_reg_rtx (wvmode);
17837 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
17838 gcc_unreachable ();
17839 emit_move_insn (target, gen_lowpart (mode, x));
17840 return true;
17842 default:
17843 return false;
17847 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17848 whose low element is VAR, and other elements are zero. Return true
17849 if successful. */
17851 static bool
17852 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
17853 rtx target, rtx var)
17855 enum machine_mode vsimode;
17856 rtx x;
17858 switch (mode)
17860 case V2SFmode:
17861 case V2SImode:
17862 if (!mmx_ok && !TARGET_SSE)
17863 return false;
17864 /* FALLTHRU */
17866 case V2DFmode:
17867 case V2DImode:
17868 var = force_reg (GET_MODE_INNER (mode), var);
17869 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
17870 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17871 return true;
17873 case V4SFmode:
17874 case V4SImode:
17875 var = force_reg (GET_MODE_INNER (mode), var);
17876 x = gen_rtx_VEC_DUPLICATE (mode, var);
17877 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
17878 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17879 return true;
17881 case V8HImode:
17882 case V16QImode:
17883 vsimode = V4SImode;
17884 goto widen;
17885 case V4HImode:
17886 case V8QImode:
17887 if (!mmx_ok)
17888 return false;
17889 vsimode = V2SImode;
17890 goto widen;
17891 widen:
17892 /* Zero extend the variable element to SImode and recurse. */
17893 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
17895 x = gen_reg_rtx (vsimode);
17896 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
17897 gcc_unreachable ();
17899 emit_move_insn (target, gen_lowpart (mode, x));
17900 return true;
17902 default:
17903 return false;
17907 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17908 consisting of the values in VALS. It is known that all elements
17909 except ONE_VAR are constants. Return true if successful. */
17911 static bool
17912 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
17913 rtx target, rtx vals, int one_var)
17915 rtx var = XVECEXP (vals, 0, one_var);
17916 enum machine_mode wmode;
17917 rtx const_vec, x;
17919 const_vec = copy_rtx (vals);
17920 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
17921 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
17923 switch (mode)
17925 case V2DFmode:
17926 case V2DImode:
17927 case V2SFmode:
17928 case V2SImode:
17929 /* For the two element vectors, it's just as easy to use
17930 the general case. */
17931 return false;
17933 case V4SFmode:
17934 case V4SImode:
17935 case V8HImode:
17936 case V4HImode:
17937 break;
17939 case V16QImode:
17940 wmode = V8HImode;
17941 goto widen;
17942 case V8QImode:
17943 wmode = V4HImode;
17944 goto widen;
17945 widen:
17946 /* There's no way to set one QImode entry easily. Combine
17947 the variable value with its adjacent constant value, and
17948 promote to an HImode set. */
17949 x = XVECEXP (vals, 0, one_var ^ 1);
17950 if (one_var & 1)
17952 var = convert_modes (HImode, QImode, var, true);
17953 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
17954 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17955 x = GEN_INT (INTVAL (x) & 0xff);
17957 else
17959 var = convert_modes (HImode, QImode, var, true);
17960 x = gen_int_mode (INTVAL (x) << 8, HImode);
17962 if (x != const0_rtx)
17963 var = expand_simple_binop (HImode, IOR, var, x, var,
17964 1, OPTAB_LIB_WIDEN);
17966 x = gen_reg_rtx (wmode);
17967 emit_move_insn (x, gen_lowpart (wmode, const_vec));
17968 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
17970 emit_move_insn (target, gen_lowpart (mode, x));
17971 return true;
17973 default:
17974 return false;
17977 emit_move_insn (target, const_vec);
17978 ix86_expand_vector_set (mmx_ok, target, var, one_var);
17979 return true;
17982 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
17983 all values variable, and none identical. */
17985 static void
17986 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
17987 rtx target, rtx vals)
17989 enum machine_mode half_mode = GET_MODE_INNER (mode);
17990 rtx op0 = NULL, op1 = NULL;
17991 bool use_vec_concat = false;
17993 switch (mode)
17995 case V2SFmode:
17996 case V2SImode:
17997 if (!mmx_ok && !TARGET_SSE)
17998 break;
17999 /* FALLTHRU */
18001 case V2DFmode:
18002 case V2DImode:
18003 /* For the two element vectors, we always implement VEC_CONCAT. */
18004 op0 = XVECEXP (vals, 0, 0);
18005 op1 = XVECEXP (vals, 0, 1);
18006 use_vec_concat = true;
18007 break;
18009 case V4SFmode:
18010 half_mode = V2SFmode;
18011 goto half;
18012 case V4SImode:
18013 half_mode = V2SImode;
18014 goto half;
18015 half:
18017 rtvec v;
18019 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
18020 Recurse to load the two halves. */
18022 op0 = gen_reg_rtx (half_mode);
18023 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
18024 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
18026 op1 = gen_reg_rtx (half_mode);
18027 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
18028 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
18030 use_vec_concat = true;
18032 break;
18034 case V8HImode:
18035 case V16QImode:
18036 case V4HImode:
18037 case V8QImode:
18038 break;
18040 default:
18041 gcc_unreachable ();
18044 if (use_vec_concat)
18046 if (!register_operand (op0, half_mode))
18047 op0 = force_reg (half_mode, op0);
18048 if (!register_operand (op1, half_mode))
18049 op1 = force_reg (half_mode, op1);
18051 emit_insn (gen_rtx_SET (VOIDmode, target,
18052 gen_rtx_VEC_CONCAT (mode, op0, op1)));
18054 else
18056 int i, j, n_elts, n_words, n_elt_per_word;
18057 enum machine_mode inner_mode;
18058 rtx words[4], shift;
18060 inner_mode = GET_MODE_INNER (mode);
18061 n_elts = GET_MODE_NUNITS (mode);
18062 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
18063 n_elt_per_word = n_elts / n_words;
18064 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
18066 for (i = 0; i < n_words; ++i)
18068 rtx word = NULL_RTX;
18070 for (j = 0; j < n_elt_per_word; ++j)
18072 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
18073 elt = convert_modes (word_mode, inner_mode, elt, true);
18075 if (j == 0)
18076 word = elt;
18077 else
18079 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
18080 word, 1, OPTAB_LIB_WIDEN);
18081 word = expand_simple_binop (word_mode, IOR, word, elt,
18082 word, 1, OPTAB_LIB_WIDEN);
18086 words[i] = word;
18089 if (n_words == 1)
18090 emit_move_insn (target, gen_lowpart (mode, words[0]));
18091 else if (n_words == 2)
18093 rtx tmp = gen_reg_rtx (mode);
18094 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
18095 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
18096 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
18097 emit_move_insn (target, tmp);
18099 else if (n_words == 4)
18101 rtx tmp = gen_reg_rtx (V4SImode);
18102 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
18103 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
18104 emit_move_insn (target, gen_lowpart (mode, tmp));
18106 else
18107 gcc_unreachable ();
18111 /* Initialize vector TARGET via VALS. Suppress the use of MMX
18112 instructions unless MMX_OK is true. */
18114 void
18115 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
18117 enum machine_mode mode = GET_MODE (target);
18118 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18119 int n_elts = GET_MODE_NUNITS (mode);
18120 int n_var = 0, one_var = -1;
18121 bool all_same = true, all_const_zero = true;
18122 int i;
18123 rtx x;
18125 for (i = 0; i < n_elts; ++i)
18127 x = XVECEXP (vals, 0, i);
18128 if (!CONSTANT_P (x))
18129 n_var++, one_var = i;
18130 else if (x != CONST0_RTX (inner_mode))
18131 all_const_zero = false;
18132 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
18133 all_same = false;
18136 /* Constants are best loaded from the constant pool. */
18137 if (n_var == 0)
18139 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
18140 return;
18143 /* If all values are identical, broadcast the value. */
18144 if (all_same
18145 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
18146 XVECEXP (vals, 0, 0)))
18147 return;
18149 /* Values where only one field is non-constant are best loaded from
18150 the pool and overwritten via move later. */
18151 if (n_var == 1)
18153 if (all_const_zero && one_var == 0
18154 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
18155 XVECEXP (vals, 0, 0)))
18156 return;
18158 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
18159 return;
18162 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
18165 void
18166 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
18168 enum machine_mode mode = GET_MODE (target);
18169 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18170 bool use_vec_merge = false;
18171 rtx tmp;
18173 switch (mode)
18175 case V2SFmode:
18176 case V2SImode:
18177 if (mmx_ok)
18179 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
18180 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
18181 if (elt == 0)
18182 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
18183 else
18184 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
18185 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18186 return;
18188 break;
18190 case V2DFmode:
18191 case V2DImode:
18193 rtx op0, op1;
18195 /* For the two element vectors, we implement a VEC_CONCAT with
18196 the extraction of the other element. */
18198 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
18199 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
18201 if (elt == 0)
18202 op0 = val, op1 = tmp;
18203 else
18204 op0 = tmp, op1 = val;
18206 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
18207 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18209 return;
18211 case V4SFmode:
18212 switch (elt)
18214 case 0:
18215 use_vec_merge = true;
18216 break;
18218 case 1:
18219 /* tmp = target = A B C D */
18220 tmp = copy_to_reg (target);
18221 /* target = A A B B */
18222 emit_insn (gen_sse_unpcklps (target, target, target));
18223 /* target = X A B B */
18224 ix86_expand_vector_set (false, target, val, 0);
18225 /* target = A X C D */
18226 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18227 GEN_INT (1), GEN_INT (0),
18228 GEN_INT (2+4), GEN_INT (3+4)));
18229 return;
18231 case 2:
18232 /* tmp = target = A B C D */
18233 tmp = copy_to_reg (target);
18234 /* tmp = X B C D */
18235 ix86_expand_vector_set (false, tmp, val, 0);
18236 /* target = A B X D */
18237 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18238 GEN_INT (0), GEN_INT (1),
18239 GEN_INT (0+4), GEN_INT (3+4)));
18240 return;
18242 case 3:
18243 /* tmp = target = A B C D */
18244 tmp = copy_to_reg (target);
18245 /* tmp = X B C D */
18246 ix86_expand_vector_set (false, tmp, val, 0);
18247 /* target = A B X D */
18248 emit_insn (gen_sse_shufps_1 (target, target, tmp,
18249 GEN_INT (0), GEN_INT (1),
18250 GEN_INT (2+4), GEN_INT (0+4)));
18251 return;
18253 default:
18254 gcc_unreachable ();
18256 break;
18258 case V4SImode:
18259 /* Element 0 handled by vec_merge below. */
18260 if (elt == 0)
18262 use_vec_merge = true;
18263 break;
18266 if (TARGET_SSE2)
18268 /* With SSE2, use integer shuffles to swap element 0 and ELT,
18269 store into element 0, then shuffle them back. */
18271 rtx order[4];
18273 order[0] = GEN_INT (elt);
18274 order[1] = const1_rtx;
18275 order[2] = const2_rtx;
18276 order[3] = GEN_INT (3);
18277 order[elt] = const0_rtx;
18279 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18280 order[1], order[2], order[3]));
18282 ix86_expand_vector_set (false, target, val, 0);
18284 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
18285 order[1], order[2], order[3]));
18287 else
18289 /* For SSE1, we have to reuse the V4SF code. */
18290 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
18291 gen_lowpart (SFmode, val), elt);
18293 return;
18295 case V8HImode:
18296 use_vec_merge = TARGET_SSE2;
18297 break;
18298 case V4HImode:
18299 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18300 break;
18302 case V16QImode:
18303 case V8QImode:
18304 default:
18305 break;
18308 if (use_vec_merge)
18310 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
18311 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
18312 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18314 else
18316 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18318 emit_move_insn (mem, target);
18320 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18321 emit_move_insn (tmp, val);
18323 emit_move_insn (target, mem);
18327 void
18328 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
18330 enum machine_mode mode = GET_MODE (vec);
18331 enum machine_mode inner_mode = GET_MODE_INNER (mode);
18332 bool use_vec_extr = false;
18333 rtx tmp;
18335 switch (mode)
18337 case V2SImode:
18338 case V2SFmode:
18339 if (!mmx_ok)
18340 break;
18341 /* FALLTHRU */
18343 case V2DFmode:
18344 case V2DImode:
18345 use_vec_extr = true;
18346 break;
18348 case V4SFmode:
18349 switch (elt)
18351 case 0:
18352 tmp = vec;
18353 break;
18355 case 1:
18356 case 3:
18357 tmp = gen_reg_rtx (mode);
18358 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
18359 GEN_INT (elt), GEN_INT (elt),
18360 GEN_INT (elt+4), GEN_INT (elt+4)));
18361 break;
18363 case 2:
18364 tmp = gen_reg_rtx (mode);
18365 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18366 break;
18368 default:
18369 gcc_unreachable ();
18371 vec = tmp;
18372 use_vec_extr = true;
18373 elt = 0;
18374 break;
18376 case V4SImode:
18377 if (TARGET_SSE2)
18379 switch (elt)
18381 case 0:
18382 tmp = vec;
18383 break;
18385 case 1:
18386 case 3:
18387 tmp = gen_reg_rtx (mode);
18388 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18389 GEN_INT (elt), GEN_INT (elt),
18390 GEN_INT (elt), GEN_INT (elt)));
18391 break;
18393 case 2:
18394 tmp = gen_reg_rtx (mode);
18395 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18396 break;
18398 default:
18399 gcc_unreachable ();
18401 vec = tmp;
18402 use_vec_extr = true;
18403 elt = 0;
18405 else
18407 /* For SSE1, we have to reuse the V4SF code. */
18408 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18409 gen_lowpart (V4SFmode, vec), elt);
18410 return;
18412 break;
18414 case V8HImode:
18415 use_vec_extr = TARGET_SSE2;
18416 break;
18417 case V4HImode:
18418 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18419 break;
18421 case V16QImode:
18422 case V8QImode:
18423 /* ??? Could extract the appropriate HImode element and shift. */
18424 default:
18425 break;
18428 if (use_vec_extr)
18430 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18431 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18433 /* Let the rtl optimizers know about the zero extension performed. */
18434 if (inner_mode == HImode)
18436 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18437 target = gen_lowpart (SImode, target);
18440 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18442 else
18444 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18446 emit_move_insn (mem, vec);
18448 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18449 emit_move_insn (target, tmp);
18453 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18454 pattern to reduce; DEST is the destination; IN is the input vector. */
18456 void
18457 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18459 rtx tmp1, tmp2, tmp3;
18461 tmp1 = gen_reg_rtx (V4SFmode);
18462 tmp2 = gen_reg_rtx (V4SFmode);
18463 tmp3 = gen_reg_rtx (V4SFmode);
18465 emit_insn (gen_sse_movhlps (tmp1, in, in));
18466 emit_insn (fn (tmp2, tmp1, in));
18468 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18469 GEN_INT (1), GEN_INT (1),
18470 GEN_INT (1+4), GEN_INT (1+4)));
18471 emit_insn (fn (dest, tmp2, tmp3));
18474 /* Target hook for scalar_mode_supported_p. */
18475 static bool
18476 ix86_scalar_mode_supported_p (enum machine_mode mode)
18478 if (DECIMAL_FLOAT_MODE_P (mode))
18479 return true;
18480 else
18481 return default_scalar_mode_supported_p (mode);
18484 /* Implements target hook vector_mode_supported_p. */
18485 static bool
18486 ix86_vector_mode_supported_p (enum machine_mode mode)
18488 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18489 return true;
18490 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18491 return true;
18492 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18493 return true;
18494 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18495 return true;
18496 return false;
18499 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18501 We do this in the new i386 backend to maintain source compatibility
18502 with the old cc0-based compiler. */
18504 static tree
18505 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18506 tree inputs ATTRIBUTE_UNUSED,
18507 tree clobbers)
18509 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18510 clobbers);
18511 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18512 clobbers);
18513 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18514 clobbers);
18515 return clobbers;
18518 /* Return true if this goes in small data/bss. */
18520 static bool
18521 ix86_in_large_data_p (tree exp)
18523 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18524 return false;
18526 /* Functions are never large data. */
18527 if (TREE_CODE (exp) == FUNCTION_DECL)
18528 return false;
18530 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18532 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18533 if (strcmp (section, ".ldata") == 0
18534 || strcmp (section, ".lbss") == 0)
18535 return true;
18536 return false;
18538 else
18540 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18542 /* If this is an incomplete type with size 0, then we can't put it
18543 in data because it might be too big when completed. */
18544 if (!size || size > ix86_section_threshold)
18545 return true;
18548 return false;
18550 static void
18551 ix86_encode_section_info (tree decl, rtx rtl, int first)
18553 default_encode_section_info (decl, rtl, first);
18555 if (TREE_CODE (decl) == VAR_DECL
18556 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18557 && ix86_in_large_data_p (decl))
18558 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18561 /* Worker function for REVERSE_CONDITION. */
18563 enum rtx_code
18564 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18566 return (mode != CCFPmode && mode != CCFPUmode
18567 ? reverse_condition (code)
18568 : reverse_condition_maybe_unordered (code));
18571 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18572 to OPERANDS[0]. */
18574 const char *
18575 output_387_reg_move (rtx insn, rtx *operands)
18577 if (REG_P (operands[1])
18578 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18580 if (REGNO (operands[0]) == FIRST_STACK_REG
18581 && TARGET_USE_FFREEP)
18582 return "ffreep\t%y0";
18583 return "fstp\t%y0";
18585 if (STACK_TOP_P (operands[0]))
18586 return "fld%z1\t%y1";
18587 return "fst\t%y0";
18590 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18591 FP status register is set. */
18593 void
18594 ix86_emit_fp_unordered_jump (rtx label)
18596 rtx reg = gen_reg_rtx (HImode);
18597 rtx temp;
18599 emit_insn (gen_x86_fnstsw_1 (reg));
18601 if (TARGET_USE_SAHF)
18603 emit_insn (gen_x86_sahf_1 (reg));
18605 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18606 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18608 else
18610 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18612 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18613 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18616 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18617 gen_rtx_LABEL_REF (VOIDmode, label),
18618 pc_rtx);
18619 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18620 emit_jump_insn (temp);
18623 /* Output code to perform a log1p XFmode calculation. */
18625 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18627 rtx label1 = gen_label_rtx ();
18628 rtx label2 = gen_label_rtx ();
18630 rtx tmp = gen_reg_rtx (XFmode);
18631 rtx tmp2 = gen_reg_rtx (XFmode);
18633 emit_insn (gen_absxf2 (tmp, op1));
18634 emit_insn (gen_cmpxf (tmp,
18635 CONST_DOUBLE_FROM_REAL_VALUE (
18636 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18637 XFmode)));
18638 emit_jump_insn (gen_bge (label1));
18640 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18641 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18642 emit_jump (label2);
18644 emit_label (label1);
18645 emit_move_insn (tmp, CONST1_RTX (XFmode));
18646 emit_insn (gen_addxf3 (tmp, op1, tmp));
18647 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18648 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18650 emit_label (label2);
18653 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
18655 static void
18656 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18657 tree decl)
18659 /* With Binutils 2.15, the "@unwind" marker must be specified on
18660 every occurrence of the ".eh_frame" section, not just the first
18661 one. */
18662 if (TARGET_64BIT
18663 && strcmp (name, ".eh_frame") == 0)
18665 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18666 flags & SECTION_WRITE ? "aw" : "a");
18667 return;
18669 default_elf_asm_named_section (name, flags, decl);
18672 /* Return the mangling of TYPE if it is an extended fundamental type. */
18674 static const char *
18675 ix86_mangle_fundamental_type (tree type)
18677 switch (TYPE_MODE (type))
18679 case TFmode:
18680 /* __float128 is "g". */
18681 return "g";
18682 case XFmode:
18683 /* "long double" or __float80 is "e". */
18684 return "e";
18685 default:
18686 return NULL;
18690 /* For 32-bit code we can save PIC register setup by using
18691 __stack_chk_fail_local hidden function instead of calling
18692 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18693 register, so it is better to call __stack_chk_fail directly. */
18695 static tree
18696 ix86_stack_protect_fail (void)
18698 return TARGET_64BIT
18699 ? default_external_stack_protect_fail ()
18700 : default_hidden_stack_protect_fail ();
18703 /* Select a format to encode pointers in exception handling data. CODE
18704 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18705 true if the symbol may be affected by dynamic relocations.
18707 ??? All x86 object file formats are capable of representing this.
18708 After all, the relocation needed is the same as for the call insn.
18709 Whether or not a particular assembler allows us to enter such, I
18710 guess we'll have to see. */
18712 asm_preferred_eh_data_format (int code, int global)
18714 if (flag_pic)
18716 int type = DW_EH_PE_sdata8;
18717 if (!TARGET_64BIT
18718 || ix86_cmodel == CM_SMALL_PIC
18719 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18720 type = DW_EH_PE_sdata4;
18721 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18723 if (ix86_cmodel == CM_SMALL
18724 || (ix86_cmodel == CM_MEDIUM && code))
18725 return DW_EH_PE_udata4;
18726 return DW_EH_PE_absptr;
18729 #include "gt-i386.h"